From 935e35d2b9f889566207b76a7026b63a1619742c Mon Sep 17 00:00:00 2001 From: Shuxin Yang Date: Wed, 9 Jan 2013 00:13:41 +0000 Subject: Consider expression "0.0 - X" as the negation of X if - this expression is explicitly marked no-signed-zero, or - no-signed-zero of this expression can be derived from some context. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171922 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Constants.cpp | 9 +++++++++ lib/IR/Instructions.cpp | 9 ++++++--- lib/Transforms/InstCombine/InstCombine.h | 2 +- lib/Transforms/InstCombine/InstructionCombining.cpp | 4 ++-- 4 files changed, 18 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/IR/Constants.cpp b/lib/IR/Constants.cpp index 4b58599..812692f 100644 --- a/lib/IR/Constants.cpp +++ b/lib/IR/Constants.cpp @@ -51,6 +51,15 @@ bool Constant::isNegativeZeroValue() const { return isNullValue(); } +bool Constant::isZeroValue() const { + // Floating point values have an explicit -0.0 value. + if (const ConstantFP *CFP = dyn_cast(this)) + return CFP->isZero(); + + // Otherwise, just use +0.0. + return isNullValue(); +} + bool Constant::isNullValue() const { // 0 is null. if (const ConstantInt *CI = dyn_cast(this)) diff --git a/lib/IR/Instructions.cpp b/lib/IR/Instructions.cpp index 1b5d004..f2e9813 100644 --- a/lib/IR/Instructions.cpp +++ b/lib/IR/Instructions.cpp @@ -1926,11 +1926,14 @@ bool BinaryOperator::isNeg(const Value *V) { return false; } -bool BinaryOperator::isFNeg(const Value *V) { +bool BinaryOperator::isFNeg(const Value *V, bool IgnoreZeroSign) { if (const BinaryOperator *Bop = dyn_cast(V)) if (Bop->getOpcode() == Instruction::FSub) - if (Constant* C = dyn_cast(Bop->getOperand(0))) - return C->isNegativeZeroValue(); + if (Constant* C = dyn_cast(Bop->getOperand(0))) { + if (!IgnoreZeroSign) + IgnoreZeroSign = cast(V)->hasNoSignedZeros(); + return !IgnoreZeroSign ? C->isNegativeZeroValue() : C->isZeroValue(); + } return false; } diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h index 959daa2..a36b1e6 100644 --- a/lib/Transforms/InstCombine/InstCombine.h +++ b/lib/Transforms/InstCombine/InstCombine.h @@ -211,7 +211,7 @@ public: private: bool ShouldChangeType(Type *From, Type *To) const; Value *dyn_castNegVal(Value *V) const; - Value *dyn_castFNegVal(Value *V) const; + Value *dyn_castFNegVal(Value *V, bool NoSignedZero=false) const; Type *FindElementAtOffset(Type *Ty, int64_t Offset, SmallVectorImpl &NewIndices); Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI); diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index 6f24cdd..dc7fe5c 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -516,8 +516,8 @@ Value *InstCombiner::dyn_castNegVal(Value *V) const { // instruction if the LHS is a constant negative zero (which is the 'negate' // form). // -Value *InstCombiner::dyn_castFNegVal(Value *V) const { - if (BinaryOperator::isFNeg(V)) +Value *InstCombiner::dyn_castFNegVal(Value *V, bool IgnoreZeroSign) const { + if (BinaryOperator::isFNeg(V, IgnoreZeroSign)) return BinaryOperator::getFNegArgument(V); // Constants can be considered to be negated values if they can be folded. -- cgit v1.1 From ff887165bc221c0398c0d4404dc0b22de216dedf Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Wed, 9 Jan 2013 00:32:08 +0000 Subject: Add the integer value of the ConstantInt instead of the Constant* value. This is causing some problems. The root cause is unknown at this time. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171923 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/AttributeImpl.h | 7 +------ lib/IR/Attributes.cpp | 8 ++++++++ 2 files changed, 9 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/IR/AttributeImpl.h b/lib/IR/AttributeImpl.h index 7bb666a..1164d68 100644 --- a/lib/IR/AttributeImpl.h +++ b/lib/IR/AttributeImpl.h @@ -65,12 +65,7 @@ public: Profile(ID, Data, Vals); } static void Profile(FoldingSetNodeID &ID, Constant *Data, - ArrayRef Vals) { - ID.AddPointer(Data); - for (ArrayRef::iterator I = Vals.begin(), E = Vals.end(); - I != E; ++I) - ID.AddPointer(*I); - } + ArrayRef Vals); }; //===----------------------------------------------------------------------===// diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index bef7a6c..01a59a3 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -509,6 +509,14 @@ void AttributeImpl::setStackAlignment(unsigned Align) { Vals.push_back(ConstantInt::get(Type::getInt64Ty(Context), Align)); } +void Profile(FoldingSetNodeID &ID, Constant *Data, + ArrayRef Vals) { + ID.AddInteger(cast(Data)->getZExtValue()); + for (ArrayRef::iterator I = Vals.begin(), E = Vals.end(); + I != E; ++I) + ID.AddPointer(*I); +} + //===----------------------------------------------------------------------===// // AttributeSetImpl Definition //===----------------------------------------------------------------------===// -- cgit v1.1 From 8456efb38e479e4878a6a782c3026c20b09c1f8e Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Wed, 9 Jan 2013 00:32:55 +0000 Subject: Forgot the namespace identifier. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171924 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Attributes.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 01a59a3..cdea350 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -509,8 +509,8 @@ void AttributeImpl::setStackAlignment(unsigned Align) { Vals.push_back(ConstantInt::get(Type::getInt64Ty(Context), Align)); } -void Profile(FoldingSetNodeID &ID, Constant *Data, - ArrayRef Vals) { +void AttributeImpl::Profile(FoldingSetNodeID &ID, Constant *Data, + ArrayRef Vals) { ID.AddInteger(cast(Data)->getZExtValue()); for (ArrayRef::iterator I = Vals.begin(), E = Vals.end(); I != E; ++I) -- cgit v1.1 From c3d6de2fe52dbdbf41b1dfebb1430656a16b254b Mon Sep 17 00:00:00 2001 From: Shuxin Yang Date: Wed, 9 Jan 2013 00:53:25 +0000 Subject: Add comment to the definition of Constant::isZeroValue(). (There already has a concise comment to the declaration.) Thank Eric Christopher for his feedback! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171926 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Constants.cpp | 2 ++ 1 file changed, 2 insertions(+) (limited to 'lib') diff --git a/lib/IR/Constants.cpp b/lib/IR/Constants.cpp index 812692f..9327554 100644 --- a/lib/IR/Constants.cpp +++ b/lib/IR/Constants.cpp @@ -51,6 +51,8 @@ bool Constant::isNegativeZeroValue() const { return isNullValue(); } +// Return true iff this constant is positive zero (floating point), negative +// zero (floating point), or a null value. bool Constant::isZeroValue() const { // Floating point values have an explicit -0.0 value. if (const ConstantFP *CFP = dyn_cast(this)) -- cgit v1.1 From 83be7b0dd3ae9a3cb22d36ae4c1775972553b94b Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Wed, 9 Jan 2013 01:15:42 +0000 Subject: Cost Model: Move the 'max unroll factor' variable to the TTI and add initial Cost Model support on ARM. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171928 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/TargetTransformInfo.cpp | 8 ++++++++ lib/CodeGen/BasicTargetTransformInfo.cpp | 5 +++++ lib/Target/ARM/ARMTargetTransformInfo.cpp | 25 +++++++++++++++++++++++++ lib/Target/X86/X86TargetTransformInfo.cpp | 15 +++++++++++++-- lib/Transforms/Vectorize/LoopVectorize.cpp | 5 ++--- 5 files changed, 53 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp index 63f495a..02af2d3 100644 --- a/lib/Analysis/TargetTransformInfo.cpp +++ b/lib/Analysis/TargetTransformInfo.cpp @@ -92,6 +92,10 @@ unsigned TargetTransformInfo::getNumberOfRegisters(bool Vector) const { return PrevTTI->getNumberOfRegisters(Vector); } +unsigned TargetTransformInfo::getMaximumUnrollFactor() const { + return PrevTTI->getMaximumUnrollFactor(); +} + unsigned TargetTransformInfo::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const { return PrevTTI->getArithmeticInstrCost(Opcode, Ty); @@ -216,6 +220,10 @@ struct NoTTI : ImmutablePass, TargetTransformInfo { return 8; } + unsigned getMaximumUnrollFactor() const { + return 1; + } + unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const { return 1; } diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp index c27e081..2f3ac9a 100644 --- a/lib/CodeGen/BasicTargetTransformInfo.cpp +++ b/lib/CodeGen/BasicTargetTransformInfo.cpp @@ -83,6 +83,7 @@ public: /// @{ virtual unsigned getNumberOfRegisters(bool Vector) const; + virtual unsigned getMaximumUnrollFactor() const; virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const; virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, Type *SubTp) const; @@ -182,6 +183,10 @@ unsigned BasicTTI::getNumberOfRegisters(bool Vector) const { return 1; } +unsigned BasicTTI::getMaximumUnrollFactor() const { + return 1; +} + unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const { // Check if any of the operands are vector operands. int ISD = TLI->InstructionOpcodeToISD(Opcode); diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp index 03a23be..634004a 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -77,6 +77,31 @@ public: virtual unsigned getIntImmCost(const APInt &Imm, Type *Ty) const; /// @} + + + /// \name Vector TTI Implementations + /// @{ + + unsigned getNumberOfRegisters(bool Vector) const { + if (Vector) { + if (ST->hasNEON()) + return 16; + return 0; + } + + if (ST->isThumb1Only()) + return 8; + return 16; + } + + unsigned getMaximumUnrollFactor() const { + // These are out of order CPUs: + if (ST->isCortexA15() || ST->isSwift()) + return 2; + return 1; + } + + /// @} }; } // end anonymous namespace diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index 9cc1b18..6ab08cb 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -75,7 +75,6 @@ public: /// \name Scalar TTI Implementations /// @{ - virtual PopcntSupportKind getPopcntSupport(unsigned TyWidth) const; /// @} @@ -84,6 +83,7 @@ public: /// @{ virtual unsigned getNumberOfRegisters(bool Vector) const; + virtual unsigned getMaximumUnrollFactor() const; virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const; virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, Type *SubTp) const; @@ -156,7 +156,6 @@ FindInConvertTable(const X86TypeConversionCostTblEntry *Tbl, unsigned len, return -1; } - X86TTI::PopcntSupportKind X86TTI::getPopcntSupport(unsigned TyWidth) const { assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); // TODO: Currently the __builtin_popcount() implementation using SSE3 @@ -171,6 +170,18 @@ unsigned X86TTI::getNumberOfRegisters(bool Vector) const { return 8; } +unsigned X86TTI::getMaximumUnrollFactor() const { + if (ST->isAtom()) + return 1; + + // Sandybridge and Haswell have multiple execution ports and pipelined + // vector units. + if (ST->hasAVX()) + return 4; + + return 2; +} + unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const { // Legalize the type. std::pair LT = TLI->getTypeLegalizationCost(Ty); diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 9c82cb8..c29f416 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -116,9 +116,6 @@ static const unsigned RuntimeMemoryCheckThreshold = 4; /// This is the highest vector width that we try to generate. static const unsigned MaxVectorSize = 8; -/// This is the highest Unroll Factor. -static const unsigned MaxUnrollSize = 4; - namespace { // Forward declarations. @@ -2715,6 +2712,8 @@ LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize, UF = std::min(UF, (MaxLoopSizeThreshold / R.NumInstructions)); // Clamp the unroll factor ranges to reasonable factors. + unsigned MaxUnrollSize = TTI.getMaximumUnrollFactor(); + if (UF > MaxUnrollSize) UF = MaxUnrollSize; else if (UF < 1) -- cgit v1.1 From ca1dd05c3c12e857614ae6837f90894396225dd6 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Wed, 9 Jan 2013 01:35:34 +0000 Subject: These functions have default arguments of 0 for the last arg. Use them and add one where it seemed obvious that we wanted one. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171932 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 20 ++++++++++---------- lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp | 10 +++++----- lib/CodeGen/AsmPrinter/DIE.cpp | 4 ++-- lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 12 +++++------- lib/CodeGen/AsmPrinter/DwarfException.cpp | 2 +- lib/MC/MCObjectStreamer.cpp | 2 +- 6 files changed, 24 insertions(+), 26 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index aa64698..8a8c849 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1040,7 +1040,7 @@ void AsmPrinter::EmitConstantPool() { // Emit inter-object padding for alignment. unsigned AlignMask = CPE.getAlignment() - 1; unsigned NewOffset = (Offset + AlignMask) & ~AlignMask; - OutStreamer.EmitFill(NewOffset - Offset, 0/*fillval*/, 0/*addrspace*/); + OutStreamer.EmitZeros(NewOffset - Offset); Type *Ty = CPE.getType(); Offset = NewOffset + TM.getDataLayout()->getTypeAllocSize(Ty); @@ -1203,7 +1203,7 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI, assert(Value && "Unknown entry kind!"); unsigned EntrySize = MJTI->getEntrySize(*TM.getDataLayout()); - OutStreamer.EmitValue(Value, EntrySize, /*addrspace*/0); + OutStreamer.EmitValue(Value, EntrySize); } @@ -1326,19 +1326,19 @@ void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) { /// EmitInt8 - Emit a byte directive and value. /// void AsmPrinter::EmitInt8(int Value) const { - OutStreamer.EmitIntValue(Value, 1, 0/*addrspace*/); + OutStreamer.EmitIntValue(Value, 1); } /// EmitInt16 - Emit a short directive and value. /// void AsmPrinter::EmitInt16(int Value) const { - OutStreamer.EmitIntValue(Value, 2, 0/*addrspace*/); + OutStreamer.EmitIntValue(Value, 2); } /// EmitInt32 - Emit a long directive and value. /// void AsmPrinter::EmitInt32(int Value) const { - OutStreamer.EmitIntValue(Value, 4, 0/*addrspace*/); + OutStreamer.EmitIntValue(Value, 4); } /// EmitLabelDifference - Emit something like ".long Hi-Lo" where the size @@ -1353,14 +1353,14 @@ void AsmPrinter::EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo, OutContext); if (!MAI->hasSetDirective()) { - OutStreamer.EmitValue(Diff, Size, 0/*AddrSpace*/); + OutStreamer.EmitValue(Diff, Size); return; } // Otherwise, emit with .set (aka assignment). MCSymbol *SetLabel = GetTempSymbol("set", SetCounter++); OutStreamer.EmitAssignment(SetLabel, Diff); - OutStreamer.EmitSymbolValue(SetLabel, Size, 0/*AddrSpace*/); + OutStreamer.EmitSymbolValue(SetLabel, Size); } /// EmitLabelOffsetDifference - Emit something like ".long Hi+Offset-Lo" @@ -1384,12 +1384,12 @@ void AsmPrinter::EmitLabelOffsetDifference(const MCSymbol *Hi, uint64_t Offset, OutContext); if (!MAI->hasSetDirective()) - OutStreamer.EmitValue(Diff, 4, 0/*AddrSpace*/); + OutStreamer.EmitValue(Diff, 4); else { // Otherwise, emit with .set (aka assignment). MCSymbol *SetLabel = GetTempSymbol("set", SetCounter++); OutStreamer.EmitAssignment(SetLabel, Diff); - OutStreamer.EmitSymbolValue(SetLabel, 4, 0/*AddrSpace*/); + OutStreamer.EmitSymbolValue(SetLabel, 4); } } @@ -1407,7 +1407,7 @@ void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset, MCConstantExpr::Create(Offset, OutContext), OutContext); - OutStreamer.EmitValue(Expr, Size, 0/*AddrSpace*/); + OutStreamer.EmitValue(Expr, Size); } diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index ece92d8..088622b 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -58,7 +58,7 @@ void AsmPrinter::EmitCFAByte(unsigned Val) const { else OutStreamer.AddComment(dwarf::CallFrameString(Val)); } - OutStreamer.EmitIntValue(Val, 1, 0/*addrspace*/); + OutStreamer.EmitIntValue(Val, 1); } static const char *DecodeDWARFEncoding(unsigned Encoding) { @@ -102,7 +102,7 @@ void AsmPrinter::EmitEncodingByte(unsigned Val, const char *Desc) const { DecodeDWARFEncoding(Val)); } - OutStreamer.EmitIntValue(Val, 1, 0/*addrspace*/); + OutStreamer.EmitIntValue(Val, 1); } /// GetSizeOfEncodedValue - Return the size of the encoding in bytes. @@ -126,9 +126,9 @@ void AsmPrinter::EmitTTypeReference(const GlobalValue *GV, const MCExpr *Exp = TLOF.getTTypeGlobalReference(GV, Mang, MMI, Encoding, OutStreamer); - OutStreamer.EmitValue(Exp, GetSizeOfEncodedValue(Encoding), /*addrspace*/0); + OutStreamer.EmitValue(Exp, GetSizeOfEncodedValue(Encoding)); } else - OutStreamer.EmitIntValue(0, GetSizeOfEncodedValue(Encoding), 0); + OutStreamer.EmitIntValue(0, GetSizeOfEncodedValue(Encoding)); } /// EmitSectionOffset - Emit the 4-byte offset of Label from the start of its @@ -157,7 +157,7 @@ void AsmPrinter::EmitSectionOffset(const MCSymbol *Label, // If the section in question will end up with an address of 0 anyway, we can // just emit an absolute reference to save a relocation. if (Section.isBaseAddressKnownZero()) { - OutStreamer.EmitSymbolValue(Label, 4, 0/*AddrSpace*/); + OutStreamer.EmitSymbolValue(Label, 4); return; } diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp index a913ca4..fecb041 100644 --- a/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/lib/CodeGen/AsmPrinter/DIE.cpp @@ -204,7 +204,7 @@ void DIEInteger::EmitValue(AsmPrinter *Asm, unsigned Form) const { Size = Asm->getDataLayout().getPointerSize(); break; default: llvm_unreachable("DIE Value form not supported yet"); } - Asm->OutStreamer.EmitIntValue(Integer, Size, 0/*addrspace*/); + Asm->OutStreamer.EmitIntValue(Integer, Size); } /// SizeOf - Determine size of integer value in bytes. @@ -243,7 +243,7 @@ void DIEInteger::print(raw_ostream &O) { /// EmitValue - Emit label value. /// void DIELabel::EmitValue(AsmPrinter *AP, unsigned Form) const { - AP->OutStreamer.EmitSymbolValue(Label, SizeOf(AP, Form), 0/*AddrSpace*/); + AP->OutStreamer.EmitSymbolValue(Label, SizeOf(AP, Form)); } /// SizeOf - Determine size of label value in bytes. diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 66a5a6d..33d69d3 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -1942,8 +1942,7 @@ void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) { Asm->OutStreamer.AddComment("Section end label"); Asm->OutStreamer.EmitSymbolValue(Asm->GetTempSymbol("section_end",SectionEnd), - Asm->getDataLayout().getPointerSize(), - 0/*AddrSpace*/); + Asm->getDataLayout().getPointerSize()); // Mark end of matrix. Asm->OutStreamer.AddComment("DW_LNE_end_sequence"); @@ -2152,8 +2151,7 @@ void DwarfUnits::emitStrings(const MCSection *StrSection, // Emit the string itself with a terminating null byte. Asm->OutStreamer.EmitBytes(StringRef(Entries[i].second->getKeyData(), - Entries[i].second->getKeyLength()+1), - 0/*addrspace*/); + Entries[i].second->getKeyLength()+1)); } // If we've got an offset section go ahead and emit that now as well. @@ -2199,8 +2197,8 @@ void DwarfDebug::emitDebugLoc() { DotDebugLocEntry &Entry = *I; if (Entry.isMerged()) continue; if (Entry.isEmpty()) { - Asm->OutStreamer.EmitIntValue(0, Size, /*addrspace*/0); - Asm->OutStreamer.EmitIntValue(0, Size, /*addrspace*/0); + Asm->OutStreamer.EmitIntValue(0, Size); + Asm->OutStreamer.EmitIntValue(0, Size); Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_loc", index)); } else { Asm->OutStreamer.EmitSymbolValue(Entry.Begin, Size, 0); @@ -2292,7 +2290,7 @@ void DwarfDebug::emitDebugRanges() { if (*I) Asm->OutStreamer.EmitSymbolValue(const_cast(*I), Size, 0); else - Asm->OutStreamer.EmitIntValue(0, Size, /*addrspace*/0); + Asm->OutStreamer.EmitIntValue(0, Size); } } diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp index 975bb94..8e53900 100644 --- a/lib/CodeGen/AsmPrinter/DwarfException.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp @@ -608,7 +608,7 @@ void DwarfException::EmitExceptionTable() { if (!S.PadLabel) { if (VerboseAsm) Asm->OutStreamer.AddComment(" has no landing pad"); - Asm->OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/); + Asm->OutStreamer.EmitIntValue(0, 4/*size*/); } else { if (VerboseAsm) Asm->OutStreamer.AddComment(Twine(" jumps to ") + diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp index d205a8c..6f2dce6 100644 --- a/lib/MC/MCObjectStreamer.cpp +++ b/lib/MC/MCObjectStreamer.cpp @@ -315,7 +315,7 @@ bool MCObjectStreamer::EmitValueToOffset(const MCExpr *Offset, if (!Delta->EvaluateAsAbsolute(Res, getAssembler())) return true; - EmitFill(Res, Value, 0); + EmitFill(Res, Value); return false; } -- cgit v1.1 From 68ca56285f9b6e82eb16ff8ea02a301f2c489fae Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Wed, 9 Jan 2013 01:57:54 +0000 Subject: These functions have default arguments of 0 for the last arg. Use them. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171933 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 6 +++--- lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 2 +- lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp | 2 +- lib/MC/MCDwarf.cpp | 34 +++++++++++++++---------------- lib/MC/MCParser/ELFAsmParser.cpp | 4 ++-- lib/Target/ARM/ARMAsmPrinter.cpp | 4 ++-- lib/Target/PowerPC/PPCAsmPrinter.cpp | 12 +++++------ lib/Target/X86/X86AsmPrinter.cpp | 16 +++++++-------- 8 files changed, 39 insertions(+), 41 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 8a8c849..32df0e2 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -391,9 +391,9 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { // - pointer to mangled symbol above with initializer unsigned PtrSize = TD->getPointerSizeInBits()/8; OutStreamer.EmitSymbolValue(GetExternalSymbolSymbol("_tlv_bootstrap"), - PtrSize, 0); - OutStreamer.EmitIntValue(0, PtrSize, 0); - OutStreamer.EmitSymbolValue(MangSym, PtrSize, 0); + PtrSize); + OutStreamer.EmitIntValue(0, PtrSize); + OutStreamer.EmitSymbolValue(MangSym, PtrSize); OutStreamer.AddBlankLine(); return; diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 33d69d3..afe6901 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -2113,7 +2113,7 @@ void DwarfDebug::emitDebugPubTypes() { if (Asm->isVerbose()) Asm->OutStreamer.AddComment("External Name"); // Emit the name with a terminating null byte. - Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength()+1), 0); + Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength()+1)); } Asm->OutStreamer.AddComment("End Mark"); diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp index b802853..ec1c7a3 100644 --- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp @@ -100,7 +100,7 @@ void OcamlGCMetadataPrinter::finishAssembly(AsmPrinter &AP) { EmitCamlGlobal(getModule(), AP, "data_end"); // FIXME: Why does ocaml emit this?? - AP.OutStreamer.EmitIntValue(0, IntPtrSize, 0); + AP.OutStreamer.EmitIntValue(0, IntPtrSize); AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection()); EmitCamlGlobal(getModule(), AP, "frametable"); diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp index d53d2fc..5691822 100644 --- a/lib/MC/MCDwarf.cpp +++ b/lib/MC/MCDwarf.cpp @@ -269,8 +269,8 @@ const MCSymbol *MCDwarfFileTable::Emit(MCStreamer *MCOS) { const std::vector &MCDwarfDirs = context.getMCDwarfDirs(); for (unsigned i = 0; i < MCDwarfDirs.size(); i++) { - MCOS->EmitBytes(MCDwarfDirs[i], 0); // the DirectoryName - MCOS->EmitBytes(StringRef("\0", 1), 0); // the null term. of the string + MCOS->EmitBytes(MCDwarfDirs[i]); // the DirectoryName + MCOS->EmitBytes(StringRef("\0", 1)); // the null term. of the string } MCOS->EmitIntValue(0, 1); // Terminate the directory list @@ -278,8 +278,8 @@ const MCSymbol *MCDwarfFileTable::Emit(MCStreamer *MCOS) { const std::vector &MCDwarfFiles = MCOS->getContext().getMCDwarfFiles(); for (unsigned i = 1; i < MCDwarfFiles.size(); i++) { - MCOS->EmitBytes(MCDwarfFiles[i]->getName(), 0); // FileName - MCOS->EmitBytes(StringRef("\0", 1), 0); // the null term. of the string + MCOS->EmitBytes(MCDwarfFiles[i]->getName()); // FileName + MCOS->EmitBytes(StringRef("\0", 1)); // the null term. of the string // the Directory num MCOS->EmitULEB128IntValue(MCDwarfFiles[i]->getDirIndex()); MCOS->EmitIntValue(0, 1); // last modification timestamp (always 0) @@ -342,7 +342,7 @@ void MCDwarfLineAddr::Emit(MCStreamer *MCOS, int64_t LineDelta, SmallString<256> Tmp; raw_svector_ostream OS(Tmp); MCDwarfLineAddr::Encode(LineDelta, AddrDelta, OS); - MCOS->EmitBytes(OS.str(), /*AddrSpace=*/0); + MCOS->EmitBytes(OS.str()); } /// Utility function to encode a Dwarf pair of LineDelta and AddrDeltas. @@ -618,29 +618,29 @@ static void EmitGenDwarfInfo(MCStreamer *MCOS, const std::vector &MCDwarfDirs = context.getMCDwarfDirs(); if (MCDwarfDirs.size() > 0) { - MCOS->EmitBytes(MCDwarfDirs[0], 0); - MCOS->EmitBytes("/", 0); + MCOS->EmitBytes(MCDwarfDirs[0]); + MCOS->EmitBytes("/"); } const std::vector &MCDwarfFiles = MCOS->getContext().getMCDwarfFiles(); - MCOS->EmitBytes(MCDwarfFiles[1]->getName(), 0); + MCOS->EmitBytes(MCDwarfFiles[1]->getName()); MCOS->EmitIntValue(0, 1); // NULL byte to terminate the string. // AT_comp_dir, the working directory the assembly was done in. - MCOS->EmitBytes(context.getCompilationDir(), 0); + MCOS->EmitBytes(context.getCompilationDir()); MCOS->EmitIntValue(0, 1); // NULL byte to terminate the string. // AT_APPLE_flags, the command line arguments of the assembler tool. StringRef DwarfDebugFlags = context.getDwarfDebugFlags(); if (!DwarfDebugFlags.empty()){ - MCOS->EmitBytes(DwarfDebugFlags, 0); + MCOS->EmitBytes(DwarfDebugFlags); MCOS->EmitIntValue(0, 1); // NULL byte to terminate the string. } // AT_producer, the version of the assembler tool. - MCOS->EmitBytes(StringRef("llvm-mc (based on LLVM "), 0); - MCOS->EmitBytes(StringRef(PACKAGE_VERSION), 0); - MCOS->EmitBytes(StringRef(")"), 0); + MCOS->EmitBytes(StringRef("llvm-mc (based on LLVM ")); + MCOS->EmitBytes(StringRef(PACKAGE_VERSION)); + MCOS->EmitBytes(StringRef(")")); MCOS->EmitIntValue(0, 1); // NULL byte to terminate the string. // AT_language, a 4 byte value. We use DW_LANG_Mips_Assembler as the dwarf2 @@ -661,7 +661,7 @@ static void EmitGenDwarfInfo(MCStreamer *MCOS, MCOS->EmitULEB128IntValue(2); // AT_name, of the label without any leading underbar. - MCOS->EmitBytes(Entry->getName(), 0); + MCOS->EmitBytes(Entry->getName()); MCOS->EmitIntValue(0, 1); // NULL byte to terminate the string. // AT_decl_file, index into the file table. @@ -1071,7 +1071,7 @@ void FrameEmitterImpl::EmitCFIInstruction(MCStreamer &Streamer, } case MCCFIInstruction::OpEscape: if (VerboseAsm) Streamer.AddComment("Escape bytes"); - Streamer.EmitBytes(Instr.getValues(), 0); + Streamer.EmitBytes(Instr.getValues()); return; } llvm_unreachable("Unhandled case in switch"); @@ -1229,7 +1229,7 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCStreamer &streamer, Augmentation += "R"; if (IsSignalFrame) Augmentation += "S"; - streamer.EmitBytes(Augmentation.str(), 0); + streamer.EmitBytes(Augmentation.str()); } streamer.EmitIntValue(0, 1); @@ -1493,7 +1493,7 @@ void MCDwarfFrameEmitter::EmitAdvanceLoc(MCStreamer &Streamer, SmallString<256> Tmp; raw_svector_ostream OS(Tmp); MCDwarfFrameEmitter::EncodeAdvanceLoc(AddrDelta, OS); - Streamer.EmitBytes(OS.str(), /*AddrSpace=*/0); + Streamer.EmitBytes(OS.str()); } void MCDwarfFrameEmitter::EncodeAdvanceLoc(uint64_t AddrDelta, diff --git a/lib/MC/MCParser/ELFAsmParser.cpp b/lib/MC/MCParser/ELFAsmParser.cpp index d55de1f..87126f0 100644 --- a/lib/MC/MCParser/ELFAsmParser.cpp +++ b/lib/MC/MCParser/ELFAsmParser.cpp @@ -517,7 +517,7 @@ bool ELFAsmParser::ParseDirectiveIdent(StringRef, SMLoc) { getStreamer().EmitIntValue(0, 1); SeenIdent = true; } - getStreamer().EmitBytes(Data, 0); + getStreamer().EmitBytes(Data); getStreamer().EmitIntValue(0, 1); getStreamer().PopSection(); return false; @@ -569,7 +569,7 @@ bool ELFAsmParser::ParseDirectiveVersion(StringRef, SMLoc) { getStreamer().EmitIntValue(Data.size()+1, 4); // namesz. getStreamer().EmitIntValue(0, 4); // descsz = 0 (no description). getStreamer().EmitIntValue(1, 4); // type = NT_VERSION. - getStreamer().EmitBytes(Data, 0); // name. + getStreamer().EmitBytes(Data); // name. getStreamer().EmitIntValue(0, 1); // terminate the string. getStreamer().EmitValueToAlignment(4); // ensure 4 byte alignment. getStreamer().PopSection(); diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index fc6ac90..d66b299 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -182,7 +182,7 @@ namespace { const size_t TagHeaderSize = 1 + 4; Streamer.EmitIntValue(VendorHeaderSize + TagHeaderSize + ContentsSize, 4); - Streamer.EmitBytes(CurrentVendor, 0); + Streamer.EmitBytes(CurrentVendor); Streamer.EmitIntValue(0, 1); // '\0' Streamer.EmitIntValue(ARMBuildAttrs::File, 1); @@ -199,7 +199,7 @@ namespace { Streamer.EmitULEB128IntValue(item.IntValue, 0); break; case AttributeItemType::TextAttribute: - Streamer.EmitBytes(item.StringValue.upper(), 0); + Streamer.EmitBytes(item.StringValue.upper()); Streamer.EmitIntValue(0, 1); // '\0' break; } diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index adb673b..4319894 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -732,14 +732,14 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() { // Generates a R_PPC64_ADDR64 (from FK_DATA_8) relocation for the function // entry point. OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol1, OutContext), - 8/*size*/, 0/*addrspace*/); + 8 /*size*/); MCSymbol *Symbol2 = OutContext.GetOrCreateSymbol(StringRef(".TOC.")); // Generates a R_PPC64_TOC relocation for TOC base insertion. OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol2, MCSymbolRefExpr::VK_PPC_TOC, OutContext), - 8/*size*/, 0/*addrspace*/); + 8/*size*/); // Emit a null environment pointer. - OutStreamer.EmitIntValue(0, 8 /* size */, 0 /* addrspace */); + OutStreamer.EmitIntValue(0, 8 /* size */); OutStreamer.SwitchSection(Current); MCSymbol *RealFnSym = OutContext.GetOrCreateSymbol( @@ -1031,7 +1031,7 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) { if (MCSym.getInt()) // External to current translation unit. - OutStreamer.EmitIntValue(0, isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/); + OutStreamer.EmitIntValue(0, isPPC64 ? 8 : 4/*size*/); else // Internal to current translation unit. // @@ -1041,7 +1041,7 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) { // fill in the value for the NLP in those cases. OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(), OutContext), - isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/); + isPPC64 ? 8 : 4/*size*/); } Stubs.clear(); @@ -1060,7 +1060,7 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) { OutStreamer.EmitValue(MCSymbolRefExpr:: Create(Stubs[i].second.getPointer(), OutContext), - isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/); + isPPC64 ? 8 : 4/*size*/); } Stubs.clear(); diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp index 5b3e0ba..75fa9d2 100644 --- a/lib/Target/X86/X86AsmPrinter.cpp +++ b/lib/Target/X86/X86AsmPrinter.cpp @@ -543,7 +543,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { MCSA_IndirectSymbol); // hlt; hlt; hlt; hlt; hlt hlt = 0xf4. const char HltInsts[] = "\xf4\xf4\xf4\xf4\xf4"; - OutStreamer.EmitBytes(StringRef(HltInsts, 5), 0/*addrspace*/); + OutStreamer.EmitBytes(StringRef(HltInsts, 5)); } Stubs.clear(); @@ -569,7 +569,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { // .long 0 if (MCSym.getInt()) // External to current translation unit. - OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/); + OutStreamer.EmitIntValue(0, 4/*size*/); else // Internal to current translation unit. // @@ -578,8 +578,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { // using NLPs. However, sometimes the types are local to the file. So // we need to fill in the value for the NLP in those cases. OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(), - OutContext), - 4/*size*/, 0/*addrspace*/); + OutContext), 4/*size*/); } Stubs.clear(); OutStreamer.AddBlankLine(); @@ -596,8 +595,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { // .long _foo OutStreamer.EmitValue(MCSymbolRefExpr:: Create(Stubs[i].second.getPointer(), - OutContext), - 4/*size*/, 0/*addrspace*/); + OutContext), 4/*size*/); } Stubs.clear(); OutStreamer.AddBlankLine(); @@ -663,7 +661,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { name += ",DATA"; else name += ",data"; - OutStreamer.EmitBytes(name, 0); + OutStreamer.EmitBytes(name); } for (unsigned i = 0, e = DLLExportedFns.size(); i != e; ++i) { @@ -672,7 +670,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { else name = " -export:"; name += DLLExportedFns[i]->getName(); - OutStreamer.EmitBytes(name, 0); + OutStreamer.EmitBytes(name); } } } @@ -692,7 +690,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { OutStreamer.EmitLabel(Stubs[i].first); OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(), - TD->getPointerSize(), 0); + TD->getPointerSize()); } Stubs.clear(); } -- cgit v1.1 From 9302dcc91458f9d6c8005934f1180ead4427aaba Mon Sep 17 00:00:00 2001 From: Sean Silva Date: Wed, 9 Jan 2013 02:11:55 +0000 Subject: docs: Bring TableGen syntax a bit closer to reality. It's not just def's but actually a limited subset of Object's that are allowed inside a multiclass. Spotted by Joel Jones. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171935 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/TableGen/TGParser.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/TableGen/TGParser.cpp b/lib/TableGen/TGParser.cpp index 17f0abc..89299d7 100644 --- a/lib/TableGen/TGParser.cpp +++ b/lib/TableGen/TGParser.cpp @@ -2160,7 +2160,12 @@ bool TGParser::ParseTopLevelLet(MultiClass *CurMultiClass) { /// ParseMultiClass - Parse a multiclass definition. /// /// MultiClassInst ::= MULTICLASS ID TemplateArgList? -/// ':' BaseMultiClassList '{' MultiClassDef+ '}' +/// ':' BaseMultiClassList '{' MultiClassObject+ '}' +/// MultiClassObject ::= DefInst +/// MultiClassObject ::= MultiClassInst +/// MultiClassObject ::= DefMInst +/// MultiClassObject ::= LETCommand '{' ObjectList '}' +/// MultiClassObject ::= LETCommand Object /// bool TGParser::ParseMultiClass() { assert(Lex.getCode() == tgtok::MultiClass && "Unexpected token"); -- cgit v1.1 From 36febfd70e1a28d6008d45a753da4c75fd994140 Mon Sep 17 00:00:00 2001 From: Sean Silva Date: Wed, 9 Jan 2013 02:11:57 +0000 Subject: fix copy-paste-o git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171936 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/TableGen/TGParser.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/TableGen/TGParser.cpp b/lib/TableGen/TGParser.cpp index 89299d7..a600a13 100644 --- a/lib/TableGen/TGParser.cpp +++ b/lib/TableGen/TGParser.cpp @@ -443,13 +443,13 @@ Record *TGParser::ParseClassID() { /// MultiClass *TGParser::ParseMultiClassID() { if (Lex.getCode() != tgtok::Id) { - TokError("expected name for ClassID"); + TokError("expected name for MultiClassID"); return 0; } MultiClass *Result = MultiClasses[Lex.getCurStrVal()]; if (Result == 0) - TokError("Couldn't find class '" + Lex.getCurStrVal() + "'"); + TokError("Couldn't find multiclass '" + Lex.getCurStrVal() + "'"); Lex.Lex(); return Result; -- cgit v1.1 From 9d4a6610765c3466642397299271ae904d0d73f9 Mon Sep 17 00:00:00 2001 From: Sean Silva Date: Wed, 9 Jan 2013 02:17:13 +0000 Subject: tblgen: Reuse function that is 2 lines above. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171937 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/TableGen/TGParser.cpp | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) (limited to 'lib') diff --git a/lib/TableGen/TGParser.cpp b/lib/TableGen/TGParser.cpp index a600a13..b106700 100644 --- a/lib/TableGen/TGParser.cpp +++ b/lib/TableGen/TGParser.cpp @@ -456,18 +456,9 @@ MultiClass *TGParser::ParseMultiClassID() { } Record *TGParser::ParseDefmID() { - if (Lex.getCode() != tgtok::Id) { - TokError("expected multiclass name"); - return 0; - } - - MultiClass *MC = MultiClasses[Lex.getCurStrVal()]; - if (MC == 0) { - TokError("Couldn't find multiclass '" + Lex.getCurStrVal() + "'"); + MultiClass *MC = ParseMultiClassID(); + if (!MC) return 0; - } - - Lex.Lex(); return &MC->Rec; } -- cgit v1.1 From 7be9021754559956f7a079d6063b58a36e5bf2fb Mon Sep 17 00:00:00 2001 From: Sean Silva Date: Wed, 9 Jan 2013 02:17:14 +0000 Subject: Inline this into its only caller. It's clearer and additionally this gets rid of the usage of `DefmID`, which doesn't really correspond to anything in the language (it was just used in the name of this parsing function which parsed a `MultiClassID` and returned that multiclass's record). This area of the code still needs a lot of work. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171938 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/TableGen/TGParser.cpp | 16 +++++----------- lib/TableGen/TGParser.h | 1 - 2 files changed, 5 insertions(+), 12 deletions(-) (limited to 'lib') diff --git a/lib/TableGen/TGParser.cpp b/lib/TableGen/TGParser.cpp index b106700..b6c8092 100644 --- a/lib/TableGen/TGParser.cpp +++ b/lib/TableGen/TGParser.cpp @@ -455,14 +455,6 @@ MultiClass *TGParser::ParseMultiClassID() { return Result; } -Record *TGParser::ParseDefmID() { - MultiClass *MC = ParseMultiClassID(); - if (!MC) - return 0; - return &MC->Rec; -} - - /// ParseSubClassReference - Parse a reference to a subclass or to a templated /// subclass. This returns a SubClassRefTy with a null Record* on error. /// @@ -474,10 +466,12 @@ ParseSubClassReference(Record *CurRec, bool isDefm) { SubClassReference Result; Result.RefLoc = Lex.getLoc(); - if (isDefm) - Result.Rec = ParseDefmID(); - else + if (isDefm) { + if (MultiClass *MC = ParseMultiClassID()) + Result.Rec = &MC->Rec; + } else { Result.Rec = ParseClassID(); + } if (Result.Rec == 0) return Result; // If there is no template arg list, we're done. diff --git a/lib/TableGen/TGParser.h b/lib/TableGen/TGParser.h index 0ea962b..70fc9df 100644 --- a/lib/TableGen/TGParser.h +++ b/lib/TableGen/TGParser.h @@ -183,7 +183,6 @@ private: // Parser methods. Init *ParseObjectName(MultiClass *CurMultiClass); Record *ParseClassID(); MultiClass *ParseMultiClassID(); - Record *ParseDefmID(); }; } // end namespace llvm -- cgit v1.1 From 2af949ddddfaf2feb4a446c754e09d2d8c207ce4 Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Wed, 9 Jan 2013 02:45:05 +0000 Subject: [Object, DebugInfo] Make DWARFContext BE-aware. test/DebugInfo/member-pointers.ll would not fail in targetting BE any more. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171943 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/DebugInfo/DWARFContext.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/DebugInfo/DWARFContext.cpp b/lib/DebugInfo/DWARFContext.cpp index d6d9fcf..247ee5b 100644 --- a/lib/DebugInfo/DWARFContext.cpp +++ b/lib/DebugInfo/DWARFContext.cpp @@ -352,7 +352,7 @@ DIInliningInfo DWARFContext::getInliningInfoForAddress(uint64_t Address, } DWARFContextInMemory::DWARFContextInMemory(object::ObjectFile *Obj) : - IsLittleEndian(true /* FIXME */) { + IsLittleEndian(Obj->isLittleEndian()) { error_code ec; for (object::section_iterator i = Obj->begin_sections(), e = Obj->end_sections(); -- cgit v1.1 From 47579cf390c42e0577519e0a2b6044baece9df00 Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Wed, 9 Jan 2013 03:36:49 +0000 Subject: MIsched: add an ILP window property to machine model. This was an experimental option, but needs to be defined per-target. e.g. PPC A2 needs to aggressively hide latency. I converted some in-order scheduling tests to A2. Hal is working on more test cases. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171946 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineScheduler.cpp | 12 ++---------- lib/Target/ARM/ARMScheduleA9.td | 3 +++ lib/Target/X86/X86Schedule.td | 5 +++++ lib/Target/X86/X86ScheduleAtom.td | 1 + 4 files changed, 11 insertions(+), 10 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index 117b2bd..a32df78 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -48,15 +48,6 @@ static cl::opt MISchedCutoff("misched-cutoff", cl::Hidden, static bool ViewMISchedDAGs = false; #endif // NDEBUG -// Threshold to very roughly model an out-of-order processor's instruction -// buffers. If the actual value of this threshold matters much in practice, then -// it can be specified by the machine model. For now, it's an experimental -// tuning knob to determine when and if it matters. -static cl::opt ILPWindow("ilp-window", cl::Hidden, - cl::desc("Allow expected latency to exceed the critical path by N cycles " - "before attempting to balance ILP"), - cl::init(10U)); - // Experimental heuristics static cl::opt EnableLoadCluster("misched-cluster", cl::Hidden, cl::desc("Enable load clustering."), cl::init(true)); @@ -1297,7 +1288,8 @@ void ConvergingScheduler::SchedBoundary::setLatencyPolicy(CandPolicy &Policy) { if (L > RemLatency) RemLatency = L; } - if (RemLatency + ExpectedLatency >= Rem->CriticalPath + ILPWindow + unsigned CriticalPathLimit = Rem->CriticalPath + SchedModel->getILPWindow(); + if (RemLatency + ExpectedLatency >= CriticalPathLimit && RemLatency > Rem->getMaxRemainingCount(SchedModel)) { Policy.ReduceLatency = true; DEBUG(dbgs() << "Increase ILP: " << Available.getName() << '\n'); diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td index 404634f..4191931 100644 --- a/lib/Target/ARM/ARMScheduleA9.td +++ b/lib/Target/ARM/ARMScheduleA9.td @@ -1887,6 +1887,9 @@ def CortexA9Model : SchedMachineModel { let LoadLatency = 2; // Optimistic load latency assuming bypass. // This is overriden by OperandCycles if the // Itineraries are queried instead. + let ILPWindow = 10; // Don't reschedule small blocks to hide + // latency. Minimum latency requirements are already + // modeled strictly by reserving resources. let MispredictPenalty = 8; // Based on estimate of pipeline depth. let Itineraries = CortexA9Itineraries; diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td index c14407f..d99d085 100644 --- a/lib/Target/X86/X86Schedule.td +++ b/lib/Target/X86/X86Schedule.td @@ -470,12 +470,17 @@ def IIC_NOP : InstrItinClass; // latencies. Since these latencies are not used for pipeline hazards, // they do not need to be exact. // +// ILPWindow=10 is an arbitrary threshold that approximates cycles of +// latency hidden by instruction buffers. The actual value is not very +// important but should be zero for inorder and nonzero for OOO processors. +// // The GenericModel contains no instruciton itineraries. def GenericModel : SchedMachineModel { let IssueWidth = 4; let MinLatency = 0; let LoadLatency = 4; let HighLatency = 10; + let ILPWindow = 10; } include "X86ScheduleAtom.td" diff --git a/lib/Target/X86/X86ScheduleAtom.td b/lib/Target/X86/X86ScheduleAtom.td index 8710261..1e5f2d6 100644 --- a/lib/Target/X86/X86ScheduleAtom.td +++ b/lib/Target/X86/X86ScheduleAtom.td @@ -525,6 +525,7 @@ def AtomModel : SchedMachineModel { // OperandCycles may be used for expected latency. let LoadLatency = 3; // Expected cycles, may be overriden by OperandCycles. let HighLatency = 30;// Expected, may be overriden by OperandCycles. + let ILPWindow = 0; // Always try to hide expected latency. let Itineraries = AtomItineraries; } -- cgit v1.1 From 1ced208be9cab0f994c5df9000da36bc313b2507 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Wed, 9 Jan 2013 03:52:05 +0000 Subject: Last in the series of removing unnecessary '0' arguments for address space. Reordered the EmitULEB128IntValue arguments to make this easier. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171949 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp | 2 +- lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp | 2 +- lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 10 +++++----- lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp | 2 +- lib/MC/MCELFStreamer.cpp | 2 +- lib/MC/MCStreamer.cpp | 4 ++-- lib/Target/ARM/ARMAsmPrinter.cpp | 10 +++++----- lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 2 +- lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp | 2 +- lib/Target/X86/AsmParser/X86AsmParser.cpp | 2 +- lib/Target/XCore/XCoreAsmPrinter.cpp | 2 +- 11 files changed, 20 insertions(+), 20 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index 088622b..156acac 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -46,7 +46,7 @@ void AsmPrinter::EmitULEB128(unsigned Value, const char *Desc, if (isVerbose() && Desc) OutStreamer.AddComment(Desc); - OutStreamer.EmitULEB128IntValue(Value, 0/*addrspace*/, PadTo); + OutStreamer.EmitULEB128IntValue(Value, PadTo); } /// EmitCFAByte - Emit a .byte 42 directive for a DW_CFA_xxx value. diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp index c193999..f58ec9b 100644 --- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp @@ -173,7 +173,7 @@ void DwarfAccelTable::EmitOffsets(AsmPrinter *Asm, MCSymbol *SecBegin) { MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create((*HI)->Sym, Context), MCSymbolRefExpr::Create(SecBegin, Context), Context); - Asm->OutStreamer.EmitValue(Sub, sizeof(uint32_t), 0); + Asm->OutStreamer.EmitValue(Sub, sizeof(uint32_t)); } } } diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index afe6901..93106a0 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -2160,7 +2160,7 @@ void DwarfUnits::emitStrings(const MCSection *StrSection, unsigned offset = 0; unsigned size = 4; for (unsigned i = 0, e = Entries.size(); i != e; ++i) { - Asm->OutStreamer.EmitIntValue(offset, size, 0); + Asm->OutStreamer.EmitIntValue(offset, size); offset += Entries[i].second->getKeyLength() + 1; } } @@ -2201,8 +2201,8 @@ void DwarfDebug::emitDebugLoc() { Asm->OutStreamer.EmitIntValue(0, Size); Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_loc", index)); } else { - Asm->OutStreamer.EmitSymbolValue(Entry.Begin, Size, 0); - Asm->OutStreamer.EmitSymbolValue(Entry.End, Size, 0); + Asm->OutStreamer.EmitSymbolValue(Entry.Begin, Size); + Asm->OutStreamer.EmitSymbolValue(Entry.End, Size); DIVariable DV(Entry.Variable); Asm->OutStreamer.AddComment("Loc expr size"); MCSymbol *begin = Asm->OutStreamer.getContext().CreateTempSymbol(); @@ -2288,7 +2288,7 @@ void DwarfDebug::emitDebugRanges() { I = DebugRangeSymbols.begin(), E = DebugRangeSymbols.end(); I != E; ++I) { if (*I) - Asm->OutStreamer.EmitSymbolValue(const_cast(*I), Size, 0); + Asm->OutStreamer.EmitSymbolValue(const_cast(*I), Size); else Asm->OutStreamer.EmitIntValue(0, Size); } @@ -2374,7 +2374,7 @@ void DwarfDebug::emitDebugInlineInfo() { if (Asm->isVerbose()) Asm->OutStreamer.AddComment("low_pc"); Asm->OutStreamer.EmitSymbolValue(LI->first, - Asm->getDataLayout().getPointerSize(),0); + Asm->getDataLayout().getPointerSize()); } } diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp index ec1c7a3..98177c0 100644 --- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp @@ -145,7 +145,7 @@ void OcamlGCMetadataPrinter::finishAssembly(AsmPrinter &AP) { "Live root count "+Twine(LiveCount)+" >= 65536."); } - AP.OutStreamer.EmitSymbolValue(J->Label, IntPtrSize, 0); + AP.OutStreamer.EmitSymbolValue(J->Label, IntPtrSize); AP.EmitInt16(FrameSize); AP.EmitInt16(LiveCount); diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp index b08fa41..d05fcca 100644 --- a/lib/MC/MCELFStreamer.cpp +++ b/lib/MC/MCELFStreamer.cpp @@ -469,7 +469,7 @@ void MCELFStreamer::FinishImpl() { } void MCELFStreamer::EmitTCEntry(const MCSymbol &S) { // Creates a R_PPC64_TOC relocation - MCObjectStreamer::EmitSymbolValue(&S, 8, 0); + MCObjectStreamer::EmitSymbolValue(&S, 8); } MCStreamer *llvm::createELFStreamer(MCContext &Context, MCAsmBackend &MAB, diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp index 7dffc3e..00ebde3 100644 --- a/lib/MC/MCStreamer.cpp +++ b/lib/MC/MCStreamer.cpp @@ -104,8 +104,8 @@ void MCStreamer::EmitIntValue(uint64_t Value, unsigned Size, /// EmitULEB128Value - Special case of EmitULEB128Value that avoids the /// client having to pass in a MCExpr for constant integers. -void MCStreamer::EmitULEB128IntValue(uint64_t Value, unsigned AddrSpace, - unsigned Padding) { +void MCStreamer::EmitULEB128IntValue(uint64_t Value, unsigned Padding, + unsigned AddrSpace) { SmallString<128> Tmp; raw_svector_ostream OSE(Tmp); encodeULEB128(Value, OSE, Padding); diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index d66b299..ee2a228 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -192,11 +192,11 @@ namespace { // emit each field as its type (ULEB or String) for (unsigned int i=0; igetName()) + ".data"); -- cgit v1.1 From 9cceede447118852df76e340252387d1a2cce37d Mon Sep 17 00:00:00 2001 From: Sean Silva Date: Wed, 9 Jan 2013 04:49:14 +0000 Subject: tblgen: Factor out common code. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171951 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/TableGen/TGParser.cpp | 34 +++++++++++++++++----------------- lib/TableGen/TGParser.h | 1 + 2 files changed, 18 insertions(+), 17 deletions(-) (limited to 'lib') diff --git a/lib/TableGen/TGParser.cpp b/lib/TableGen/TGParser.cpp index b6c8092..1561e2c 100644 --- a/lib/TableGen/TGParser.cpp +++ b/lib/TableGen/TGParser.cpp @@ -1861,6 +1861,17 @@ bool TGParser::ParseBody(Record *CurRec) { return false; } +/// \brief Apply the current let bindings to \a CurRec. +/// \returns true on error, false otherwise. +bool TGParser::ApplyLetStack(Record *CurRec) { + for (unsigned i = 0, e = LetStack.size(); i != e; ++i) + for (unsigned j = 0, e = LetStack[i].size(); j != e; ++j) + if (SetValue(CurRec, LetStack[i][j].Loc, LetStack[i][j].Name, + LetStack[i][j].Bits, LetStack[i][j].Value)) + return true; + return false; +} + /// ParseObjectBody - Parse the body of a def or class. This consists of an /// optional ClassList followed by a Body. CurRec is the current def or class /// that is being parsed. @@ -1891,12 +1902,8 @@ bool TGParser::ParseObjectBody(Record *CurRec) { } } - // Process any variables on the let stack. - for (unsigned i = 0, e = LetStack.size(); i != e; ++i) - for (unsigned j = 0, e = LetStack[i].size(); j != e; ++j) - if (SetValue(CurRec, LetStack[i][j].Loc, LetStack[i][j].Name, - LetStack[i][j].Bits, LetStack[i][j].Value)) - return true; + if (ApplyLetStack(CurRec)) + return true; return ParseBody(CurRec); } @@ -2355,11 +2362,8 @@ bool TGParser::ResolveMulticlassDef(MultiClass &MC, Record *DefProto, SMLoc DefmPrefixLoc) { // If the mdef is inside a 'let' expression, add to each def. - for (unsigned i = 0, e = LetStack.size(); i != e; ++i) - for (unsigned j = 0, e = LetStack[i].size(); j != e; ++j) - if (SetValue(CurRec, LetStack[i][j].Loc, LetStack[i][j].Name, - LetStack[i][j].Bits, LetStack[i][j].Value)) - return Error(DefmPrefixLoc, "when instantiating this defm"); + if (ApplyLetStack(CurRec)) + return Error(DefmPrefixLoc, "when instantiating this defm"); // Don't create a top level definition for defm inside multiclasses, // instead, only update the prototypes and bind the template args @@ -2483,12 +2487,8 @@ bool TGParser::ParseDefm(MultiClass *CurMultiClass) { if (AddSubClass(CurRec, SubClass)) return true; - // Process any variables on the let stack. - for (unsigned i = 0, e = LetStack.size(); i != e; ++i) - for (unsigned j = 0, e = LetStack[i].size(); j != e; ++j) - if (SetValue(CurRec, LetStack[i][j].Loc, LetStack[i][j].Name, - LetStack[i][j].Bits, LetStack[i][j].Value)) - return true; + if (ApplyLetStack(CurRec)) + return true; } if (Lex.getCode() != tgtok::comma) break; diff --git a/lib/TableGen/TGParser.h b/lib/TableGen/TGParser.h index 70fc9df..215cbfc 100644 --- a/lib/TableGen/TGParser.h +++ b/lib/TableGen/TGParser.h @@ -183,6 +183,7 @@ private: // Parser methods. Init *ParseObjectName(MultiClass *CurMultiClass); Record *ParseClassID(); MultiClass *ParseMultiClassID(); + bool ApplyLetStack(Record *CurRec); }; } // end namespace llvm -- cgit v1.1 From 13f8cf55d43980e73d6cbb8f4894607709daa311 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Wed, 9 Jan 2013 05:14:33 +0000 Subject: Efficient lowering of vector sdiv when the divisor is a splatted power of two constant. PR 14848. The lowered sequence is based on the existing sequence the target-independent DAG Combiner creates for the scalar case. Patch by Zvi Rackover. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171953 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 50 ++++++++++++++++++++++++++++++++++++++ lib/Target/X86/X86ISelLowering.h | 1 + 2 files changed, 51 insertions(+) (limited to 'lib') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 4b00b46..f42884d 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1047,6 +1047,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SRA, MVT::v4i32, Custom); } + setOperationAction(ISD::SDIV, MVT::v8i16, Custom); + setOperationAction(ISD::SDIV, MVT::v4i32, Custom); } if (!TM.Options.UseSoftFloat && Subtarget->hasFp256()) { @@ -1111,6 +1113,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SRA, MVT::v16i16, Custom); setOperationAction(ISD::SRA, MVT::v32i8, Custom); + setOperationAction(ISD::SDIV, MVT::v16i16, Custom); + setOperationAction(ISD::SETCC, MVT::v32i8, Custom); setOperationAction(ISD::SETCC, MVT::v16i16, Custom); setOperationAction(ISD::SETCC, MVT::v8i32, Custom); @@ -1166,6 +1170,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SHL, MVT::v8i32, Legal); setOperationAction(ISD::SRA, MVT::v8i32, Legal); + + setOperationAction(ISD::SDIV, MVT::v8i32, Custom); } else { setOperationAction(ISD::ADD, MVT::v4i64, Custom); setOperationAction(ISD::ADD, MVT::v8i32, Custom); @@ -11377,6 +11383,49 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget, return DAG.getNode(ISD::ADD, dl, VT, Res, AhiBlo); } +SDValue X86TargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const { + EVT VT = Op.getValueType(); + EVT EltTy = VT.getVectorElementType(); + unsigned NumElts = VT.getVectorNumElements(); + SDValue N0 = Op.getOperand(0); + DebugLoc dl = Op.getDebugLoc(); + + // Lower sdiv X, pow2-const. + BuildVectorSDNode *C = dyn_cast(Op.getOperand(1)); + if (!C) + return SDValue(); + + APInt SplatValue, SplatUndef; + unsigned MinSplatBits; + bool HasAnyUndefs; + if (!C->isConstantSplat(SplatValue, SplatUndef, MinSplatBits, HasAnyUndefs)) + return SDValue(); + + if ((SplatValue != 0) && + (SplatValue.isPowerOf2() || (-SplatValue).isPowerOf2())) { + unsigned lg2 = SplatValue.countTrailingZeros(); + // Splat the sign bit. + SDValue Sz = DAG.getConstant(EltTy.getSizeInBits()-1, MVT::i32); + SDValue SGN = getTargetVShiftNode(X86ISD::VSRAI, dl, VT, N0, Sz, DAG); + // Add (N0 < 0) ? abs2 - 1 : 0; + SDValue Amt = DAG.getConstant(EltTy.getSizeInBits() - lg2, MVT::i32); + SDValue SRL = getTargetVShiftNode(X86ISD::VSRLI, dl, VT, SGN, Amt, DAG); + SDValue ADD = DAG.getNode(ISD::ADD, dl, VT, N0, SRL); + SDValue Lg2Amt = DAG.getConstant(lg2, MVT::i32); + SDValue SRA = getTargetVShiftNode(X86ISD::VSRAI, dl, VT, ADD, Lg2Amt, DAG); + + // If we're dividing by a positive value, we're done. Otherwise, we must + // negate the result. + if (SplatValue.isNonNegative()) + return SRA; + + SmallVector V(NumElts, DAG.getConstant(0, EltTy)); + SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], NumElts); + return DAG.getNode(ISD::SUB, dl, VT, Zero, SRA); + } + return SDValue(); +} + SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); @@ -12033,6 +12082,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG); case ISD::ADD: return LowerADD(Op, DAG); case ISD::SUB: return LowerSUB(Op, DAG); + case ISD::SDIV: return LowerSDIV(Op, DAG); } } diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 16ce364..35b5abd 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -841,6 +841,7 @@ namespace llvm { SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; -- cgit v1.1 From 699b8705563a62cbe2f56594144001a6c9639c2e Mon Sep 17 00:00:00 2001 From: Sean Silva Date: Wed, 9 Jan 2013 05:28:12 +0000 Subject: tblgen: use an early return to reduce indentation. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171954 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/TableGen/TGParser.cpp | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) (limited to 'lib') diff --git a/lib/TableGen/TGParser.cpp b/lib/TableGen/TGParser.cpp index 1561e2c..860b80d 100644 --- a/lib/TableGen/TGParser.cpp +++ b/lib/TableGen/TGParser.cpp @@ -2368,24 +2368,24 @@ bool TGParser::ResolveMulticlassDef(MultiClass &MC, // Don't create a top level definition for defm inside multiclasses, // instead, only update the prototypes and bind the template args // with the new created definition. - if (CurMultiClass) { - for (unsigned i = 0, e = CurMultiClass->DefPrototypes.size(); - i != e; ++i) - if (CurMultiClass->DefPrototypes[i]->getNameInit() - == CurRec->getNameInit()) - return Error(DefmPrefixLoc, "defm '" + CurRec->getNameInitAsString() + - "' already defined in this multiclass!"); - CurMultiClass->DefPrototypes.push_back(CurRec); - - // Copy the template arguments for the multiclass into the new def. - const std::vector &TA = - CurMultiClass->Rec.getTemplateArgs(); - - for (unsigned i = 0, e = TA.size(); i != e; ++i) { - const RecordVal *RV = CurMultiClass->Rec.getValue(TA[i]); - assert(RV && "Template arg doesn't exist?"); - CurRec->addValue(*RV); - } + if (!CurMultiClass) + return false; + for (unsigned i = 0, e = CurMultiClass->DefPrototypes.size(); + i != e; ++i) + if (CurMultiClass->DefPrototypes[i]->getNameInit() + == CurRec->getNameInit()) + return Error(DefmPrefixLoc, "defm '" + CurRec->getNameInitAsString() + + "' already defined in this multiclass!"); + CurMultiClass->DefPrototypes.push_back(CurRec); + + // Copy the template arguments for the multiclass into the new def. + const std::vector &TA = + CurMultiClass->Rec.getTemplateArgs(); + + for (unsigned i = 0, e = TA.size(); i != e; ++i) { + const RecordVal *RV = CurMultiClass->Rec.getValue(TA[i]); + assert(RV && "Template arg doesn't exist?"); + CurRec->addValue(*RV); } return false; -- cgit v1.1 From 53208a91a0ff20a956357147472d75058c2d5cce Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Wed, 9 Jan 2013 09:26:23 +0000 Subject: Alter the hasing computation when inserting into the folding set. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171960 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Attributes.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index cdea350..a1f306c 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -45,8 +45,7 @@ Attribute Attribute::get(LLVMContext &Context, AttrBuilder &B) { // Otherwise, build a key to look up the existing attributes. LLVMContextImpl *pImpl = Context.pImpl; FoldingSetNodeID ID; - // FIXME: Don't look up ConstantInts here. - ID.AddPointer(ConstantInt::get(Type::getInt64Ty(Context), B.getBitMask())); + ID.AddInteger(B.getBitMask()); void *InsertPoint; AttributeImpl *PA = pImpl->AttrsSet.FindNodeOrInsertPos(ID, InsertPoint); -- cgit v1.1 From 2c8cf4b404e549482f593f62f9e27e0bab4a8b3f Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Wed, 9 Jan 2013 13:18:15 +0000 Subject: Refactor to expose RTLIB calls to targets. fp128 is almost but not quite completely illegal as a type on AArch64. As a result it needs to have a register class (for argument passing mainly), but all operations need to be lowered to runtime calls. Currently there's no way for targets to do this (without duplicating code), as the relevant functions are hidden in SelectionDAG. This patch changes that. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171971 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 22 +- lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp | 458 +++++++++------------- lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp | 23 +- lib/CodeGen/SelectionDAG/LegalizeTypes.cpp | 40 +- lib/CodeGen/SelectionDAG/LegalizeTypes.h | 6 - lib/CodeGen/SelectionDAG/TargetLowering.cpp | 155 ++++++++ 6 files changed, 367 insertions(+), 337 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 5eaf67e..db3abaf 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1841,26 +1841,6 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { return ExpandVectorBuildThroughStack(Node); } -static bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, - SDValue &Chain, const TargetLowering &TLI) { - const Function *F = DAG.getMachineFunction().getFunction(); - - // Conservatively require the attributes of the call to match those of - // the return. Ignore noalias because it doesn't affect the call sequence. - Attribute CallerRetAttr = F->getAttributes().getRetAttributes(); - if (AttrBuilder(CallerRetAttr) - .removeAttribute(Attribute::NoAlias).hasAttributes()) - return false; - - // It's not safe to eliminate the sign / zero extension of the return value. - if (CallerRetAttr.hasAttribute(Attribute::ZExt) || - CallerRetAttr.hasAttribute(Attribute::SExt)) - return false; - - // Check if the only use is a function return node. - return TLI.isUsedByReturnOnly(Node, Chain); -} - // ExpandLibCall - Expand a node into a call to a libcall. If the result value // does not fit into a register, return the lo part and set the hi part to the // by-reg argument. If it does fit into a single register, return the result @@ -1891,7 +1871,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, // isTailCall may be true since the callee does not reference caller stack // frame. Check if it's in the right position. SDValue TCChain = InChain; - bool isTailCall = isInTailCallPosition(DAG, Node, TCChain, TLI); + bool isTailCall = TLI.isInTailCallPosition(DAG, Node, TCChain); if (isTailCall) InChain = TCChain; diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 92dc5a9..4859ad0 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -152,23 +152,23 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; - return MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::ADD_F32, - RTLIB::ADD_F64, - RTLIB::ADD_F80, - RTLIB::ADD_PPCF128), - NVT, Ops, 2, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::ADD_F32, + RTLIB::ADD_F64, + RTLIB::ADD_F80, + RTLIB::ADD_PPCF128), + NVT, Ops, 2, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::CEIL_F32, - RTLIB::CEIL_F64, - RTLIB::CEIL_F80, - RTLIB::CEIL_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::CEIL_F32, + RTLIB::CEIL_F64, + RTLIB::CEIL_F80, + RTLIB::CEIL_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) { @@ -216,90 +216,90 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) { SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::COS_F32, - RTLIB::COS_F64, - RTLIB::COS_F80, - RTLIB::COS_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::COS_F32, + RTLIB::COS_F64, + RTLIB::COS_F80, + RTLIB::COS_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; - return MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::DIV_F32, - RTLIB::DIV_F64, - RTLIB::DIV_F80, - RTLIB::DIV_PPCF128), - NVT, Ops, 2, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::DIV_F32, + RTLIB::DIV_F64, + RTLIB::DIV_F80, + RTLIB::DIV_PPCF128), + NVT, Ops, 2, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::EXP_F32, - RTLIB::EXP_F64, - RTLIB::EXP_F80, - RTLIB::EXP_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::EXP_F32, + RTLIB::EXP_F64, + RTLIB::EXP_F80, + RTLIB::EXP_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::EXP2_F32, - RTLIB::EXP2_F64, - RTLIB::EXP2_F80, - RTLIB::EXP2_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::EXP2_F32, + RTLIB::EXP2_F64, + RTLIB::EXP2_F80, + RTLIB::EXP2_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::FLOOR_F32, - RTLIB::FLOOR_F64, - RTLIB::FLOOR_F80, - RTLIB::FLOOR_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::FLOOR_F32, + RTLIB::FLOOR_F64, + RTLIB::FLOOR_F80, + RTLIB::FLOOR_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::LOG_F32, - RTLIB::LOG_F64, - RTLIB::LOG_F80, - RTLIB::LOG_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::LOG_F32, + RTLIB::LOG_F64, + RTLIB::LOG_F80, + RTLIB::LOG_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::LOG2_F32, - RTLIB::LOG2_F64, - RTLIB::LOG2_F80, - RTLIB::LOG2_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::LOG2_F32, + RTLIB::LOG2_F64, + RTLIB::LOG2_F80, + RTLIB::LOG2_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::LOG10_F32, - RTLIB::LOG10_F64, - RTLIB::LOG10_F80, - RTLIB::LOG10_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::LOG10_F32, + RTLIB::LOG10_F64, + RTLIB::LOG10_F80, + RTLIB::LOG10_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) { @@ -307,35 +307,35 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) { SDValue Ops[3] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)), GetSoftenedFloat(N->getOperand(2)) }; - return MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::FMA_F32, - RTLIB::FMA_F64, - RTLIB::FMA_F80, - RTLIB::FMA_PPCF128), - NVT, Ops, 3, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::FMA_F32, + RTLIB::FMA_F64, + RTLIB::FMA_F80, + RTLIB::FMA_PPCF128), + NVT, Ops, 3, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; - return MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::MUL_F32, - RTLIB::MUL_F64, - RTLIB::MUL_F80, - RTLIB::MUL_PPCF128), - NVT, Ops, 2, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::MUL_F32, + RTLIB::MUL_F64, + RTLIB::MUL_F80, + RTLIB::MUL_PPCF128), + NVT, Ops, 2, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::NEARBYINT_F32, - RTLIB::NEARBYINT_F64, - RTLIB::NEARBYINT_F80, - RTLIB::NEARBYINT_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::NEARBYINT_F32, + RTLIB::NEARBYINT_F64, + RTLIB::NEARBYINT_F80, + RTLIB::NEARBYINT_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) { @@ -343,12 +343,12 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) { // Expand Y = FNEG(X) -> Y = SUB -0.0, X SDValue Ops[2] = { DAG.getConstantFP(-0.0, N->getValueType(0)), GetSoftenedFloat(N->getOperand(0)) }; - return MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::SUB_F32, - RTLIB::SUB_F64, - RTLIB::SUB_F80, - RTLIB::SUB_PPCF128), - NVT, Ops, 2, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::SUB_F32, + RTLIB::SUB_F64, + RTLIB::SUB_F80, + RTLIB::SUB_PPCF128), + NVT, Ops, 2, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { @@ -356,7 +356,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { SDValue Op = N->getOperand(0); RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0)); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!"); - return MakeLibCall(LC, NVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, N->getDebugLoc()); } // FIXME: Should we just use 'normal' FP_EXTEND / FP_TRUNC instead of special @@ -364,8 +364,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP32(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = N->getOperand(0); - return MakeLibCall(RTLIB::FPEXT_F16_F32, NVT, &Op, 1, false, - N->getDebugLoc()); + return TLI.makeLibCall(DAG, RTLIB::FPEXT_F16_F32, NVT, &Op, 1, false, + N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) { @@ -373,19 +373,19 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) { SDValue Op = N->getOperand(0); RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), N->getValueType(0)); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!"); - return MakeLibCall(LC, NVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; - return MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::POW_F32, - RTLIB::POW_F64, - RTLIB::POW_F80, - RTLIB::POW_PPCF128), - NVT, Ops, 2, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::POW_F32, + RTLIB::POW_F64, + RTLIB::POW_F80, + RTLIB::POW_PPCF128), + NVT, Ops, 2, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) { @@ -393,80 +393,80 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) { "Unsupported power type!"); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), N->getOperand(1) }; - return MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::POWI_F32, - RTLIB::POWI_F64, - RTLIB::POWI_F80, - RTLIB::POWI_PPCF128), - NVT, Ops, 2, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::POWI_F32, + RTLIB::POWI_F64, + RTLIB::POWI_F80, + RTLIB::POWI_PPCF128), + NVT, Ops, 2, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; - return MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::REM_F32, - RTLIB::REM_F64, - RTLIB::REM_F80, - RTLIB::REM_PPCF128), - NVT, Ops, 2, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::REM_F32, + RTLIB::REM_F64, + RTLIB::REM_F80, + RTLIB::REM_PPCF128), + NVT, Ops, 2, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::RINT_F32, - RTLIB::RINT_F64, - RTLIB::RINT_F80, - RTLIB::RINT_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::RINT_F32, + RTLIB::RINT_F64, + RTLIB::RINT_F80, + RTLIB::RINT_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::SIN_F32, - RTLIB::SIN_F64, - RTLIB::SIN_F80, - RTLIB::SIN_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::SIN_F32, + RTLIB::SIN_F64, + RTLIB::SIN_F80, + RTLIB::SIN_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::SQRT_F32, - RTLIB::SQRT_F64, - RTLIB::SQRT_F80, - RTLIB::SQRT_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::SQRT_F32, + RTLIB::SQRT_F64, + RTLIB::SQRT_F80, + RTLIB::SQRT_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; - return MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::SUB_F32, - RTLIB::SUB_F64, - RTLIB::SUB_F80, - RTLIB::SUB_PPCF128), - NVT, Ops, 2, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::SUB_F32, + RTLIB::SUB_F64, + RTLIB::SUB_F80, + RTLIB::SUB_PPCF128), + NVT, Ops, 2, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::TRUNC_F32, - RTLIB::TRUNC_F64, - RTLIB::TRUNC_F80, - RTLIB::TRUNC_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::TRUNC_F32, + RTLIB::TRUNC_F64, + RTLIB::TRUNC_F80, + RTLIB::TRUNC_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { @@ -559,8 +559,9 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) { // Sign/zero extend the argument if the libcall takes a larger type. SDValue Op = DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl, NVT, N->getOperand(0)); - return MakeLibCall(LC, TLI.getTypeToTransformTo(*DAG.getContext(), RVT), - &Op, 1, false, dl); + return TLI.makeLibCall(DAG, LC, + TLI.getTypeToTransformTo(*DAG.getContext(), RVT), + &Op, 1, false, dl); } @@ -607,92 +608,6 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { return false; } -/// SoftenSetCCOperands - Soften the operands of a comparison. This code is -/// shared among BR_CC, SELECT_CC, and SETCC handlers. -void DAGTypeLegalizer::SoftenSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, - ISD::CondCode &CCCode, DebugLoc dl) { - SDValue LHSInt = GetSoftenedFloat(NewLHS); - SDValue RHSInt = GetSoftenedFloat(NewRHS); - EVT VT = NewLHS.getValueType(); - - assert((VT == MVT::f32 || VT == MVT::f64) && "Unsupported setcc type!"); - - // Expand into one or more soft-fp libcall(s). - RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL; - switch (CCCode) { - case ISD::SETEQ: - case ISD::SETOEQ: - LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : RTLIB::OEQ_F64; - break; - case ISD::SETNE: - case ISD::SETUNE: - LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 : RTLIB::UNE_F64; - break; - case ISD::SETGE: - case ISD::SETOGE: - LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 : RTLIB::OGE_F64; - break; - case ISD::SETLT: - case ISD::SETOLT: - LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : RTLIB::OLT_F64; - break; - case ISD::SETLE: - case ISD::SETOLE: - LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 : RTLIB::OLE_F64; - break; - case ISD::SETGT: - case ISD::SETOGT: - LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 : RTLIB::OGT_F64; - break; - case ISD::SETUO: - LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : RTLIB::UO_F64; - break; - case ISD::SETO: - LC1 = (VT == MVT::f32) ? RTLIB::O_F32 : RTLIB::O_F64; - break; - default: - LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : RTLIB::UO_F64; - switch (CCCode) { - case ISD::SETONE: - // SETONE = SETOLT | SETOGT - LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : RTLIB::OLT_F64; - // Fallthrough - case ISD::SETUGT: - LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 : RTLIB::OGT_F64; - break; - case ISD::SETUGE: - LC2 = (VT == MVT::f32) ? RTLIB::OGE_F32 : RTLIB::OGE_F64; - break; - case ISD::SETULT: - LC2 = (VT == MVT::f32) ? RTLIB::OLT_F32 : RTLIB::OLT_F64; - break; - case ISD::SETULE: - LC2 = (VT == MVT::f32) ? RTLIB::OLE_F32 : RTLIB::OLE_F64; - break; - case ISD::SETUEQ: - LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : RTLIB::OEQ_F64; - break; - default: llvm_unreachable("Do not know how to soften this setcc!"); - } - } - - // Use the target specific return value for comparions lib calls. - EVT RetVT = TLI.getCmpLibcallReturnType(); - SDValue Ops[2] = { LHSInt, RHSInt }; - NewLHS = MakeLibCall(LC1, RetVT, Ops, 2, false/*sign irrelevant*/, dl); - NewRHS = DAG.getConstant(0, RetVT); - CCCode = TLI.getCmpLibcallCC(LC1); - if (LC2 != RTLIB::UNKNOWN_LIBCALL) { - SDValue Tmp = DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(RetVT), - NewLHS, NewRHS, DAG.getCondCode(CCCode)); - NewLHS = MakeLibCall(LC2, RetVT, Ops, 2, false/*sign irrelevant*/, dl); - NewLHS = DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(RetVT), NewLHS, - NewRHS, DAG.getCondCode(TLI.getCmpLibcallCC(LC2))); - NewLHS = DAG.getNode(ISD::OR, dl, Tmp.getValueType(), Tmp, NewLHS); - NewRHS = SDValue(); - } -} - SDValue DAGTypeLegalizer::SoftenFloatOp_BITCAST(SDNode *N) { return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), N->getValueType(0), GetSoftenedFloat(N->getOperand(0))); @@ -706,15 +621,19 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) { assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall"); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return MakeLibCall(LC, RVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) { SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3); ISD::CondCode CCCode = cast(N->getOperand(1))->get(); - SoftenSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); - // If SoftenSetCCOperands returned a scalar, we need to compare the result + EVT VT = NewLHS.getValueType(); + NewLHS = GetSoftenedFloat(NewLHS); + NewRHS = GetSoftenedFloat(NewRHS); + TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, N->getDebugLoc()); + + // If softenSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. if (NewRHS.getNode() == 0) { NewRHS = DAG.getConstant(0, NewLHS.getValueType()); @@ -733,7 +652,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_SINT(SDNode *N) { RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!"); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return MakeLibCall(LC, RVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_UINT(SDNode *N) { @@ -741,22 +660,26 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_UINT(SDNode *N) { RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!"); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return MakeLibCall(LC, RVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatOp_FP32_TO_FP16(SDNode *N) { EVT RVT = N->getValueType(0); RTLIB::Libcall LC = RTLIB::FPROUND_F32_F16; SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return MakeLibCall(LC, RVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) { SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); ISD::CondCode CCCode = cast(N->getOperand(4))->get(); - SoftenSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); - // If SoftenSetCCOperands returned a scalar, we need to compare the result + EVT VT = NewLHS.getValueType(); + NewLHS = GetSoftenedFloat(NewLHS); + NewRHS = GetSoftenedFloat(NewRHS); + TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, N->getDebugLoc()); + + // If softenSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. if (NewRHS.getNode() == 0) { NewRHS = DAG.getConstant(0, NewLHS.getValueType()); @@ -773,9 +696,13 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) { SDValue DAGTypeLegalizer::SoftenFloatOp_SETCC(SDNode *N) { SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); ISD::CondCode CCCode = cast(N->getOperand(2))->get(); - SoftenSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); - // If SoftenSetCCOperands returned a scalar, use it. + EVT VT = NewLHS.getValueType(); + NewLHS = GetSoftenedFloat(NewLHS); + NewRHS = GetSoftenedFloat(NewRHS); + TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, N->getDebugLoc()); + + // If softenSetCCOperands returned a scalar, use it. if (NewRHS.getNode() == 0) { assert(NewLHS.getValueType() == N->getValueType(0) && "Unexpected setcc expansion!"); @@ -947,13 +874,13 @@ void DAGTypeLegalizer::ExpandFloatRes_FCOS(SDNode *N, void DAGTypeLegalizer::ExpandFloatRes_FDIV(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SDValue Call = MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::DIV_F32, - RTLIB::DIV_F64, - RTLIB::DIV_F80, - RTLIB::DIV_PPCF128), - N->getValueType(0), Ops, 2, false, - N->getDebugLoc()); + SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::DIV_F32, + RTLIB::DIV_F64, + RTLIB::DIV_F80, + RTLIB::DIV_PPCF128), + N->getValueType(0), Ops, 2, false, + N->getDebugLoc()); GetPairElements(Call, Lo, Hi); } @@ -1014,26 +941,26 @@ void DAGTypeLegalizer::ExpandFloatRes_FLOG10(SDNode *N, void DAGTypeLegalizer::ExpandFloatRes_FMA(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Ops[3] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) }; - SDValue Call = MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::FMA_F32, - RTLIB::FMA_F64, - RTLIB::FMA_F80, - RTLIB::FMA_PPCF128), - N->getValueType(0), Ops, 3, false, - N->getDebugLoc()); + SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::FMA_F32, + RTLIB::FMA_F64, + RTLIB::FMA_F80, + RTLIB::FMA_PPCF128), + N->getValueType(0), Ops, 3, false, + N->getDebugLoc()); GetPairElements(Call, Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SDValue Call = MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::MUL_F32, - RTLIB::MUL_F64, - RTLIB::MUL_F80, - RTLIB::MUL_PPCF128), - N->getValueType(0), Ops, 2, false, - N->getDebugLoc()); + SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::MUL_F32, + RTLIB::MUL_F64, + RTLIB::MUL_F80, + RTLIB::MUL_PPCF128), + N->getValueType(0), Ops, 2, false, + N->getDebugLoc()); GetPairElements(Call, Lo, Hi); } @@ -1111,13 +1038,13 @@ void DAGTypeLegalizer::ExpandFloatRes_FSQRT(SDNode *N, void DAGTypeLegalizer::ExpandFloatRes_FSUB(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SDValue Call = MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::SUB_F32, - RTLIB::SUB_F64, - RTLIB::SUB_F80, - RTLIB::SUB_PPCF128), - N->getValueType(0), Ops, 2, false, - N->getDebugLoc()); + SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::SUB_F32, + RTLIB::SUB_F64, + RTLIB::SUB_F80, + RTLIB::SUB_PPCF128), + N->getValueType(0), Ops, 2, false, + N->getDebugLoc()); GetPairElements(Call, Lo, Hi); } @@ -1193,7 +1120,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, } assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!"); - Hi = MakeLibCall(LC, VT, &Src, 1, true, dl); + Hi = TLI.makeLibCall(DAG, LC, VT, &Src, 1, true, dl); GetPairElements(Hi, Lo, Hi); } @@ -1364,7 +1291,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) { RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!"); - return MakeLibCall(LC, RVT, &N->getOperand(0), 1, false, dl); + return TLI.makeLibCall(DAG, LC, RVT, &N->getOperand(0), 1, false, dl); } SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) { @@ -1396,7 +1323,8 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) { RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!"); - return MakeLibCall(LC, N->getValueType(0), &N->getOperand(0), 1, false, dl); + return TLI.makeLibCall(DAG, LC, N->getValueType(0), &N->getOperand(0), 1, + false, dl); } SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) { diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 5e33ef1..18748f5 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -1767,7 +1767,8 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo, SDValue Op = N->getOperand(0); RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!"); - SplitInteger(MakeLibCall(LC, VT, &Op, 1, true/*irrelevant*/, dl), Lo, Hi); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, &Op, 1, true/*irrelevant*/, dl), + Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo, @@ -1777,7 +1778,8 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo, SDValue Op = N->getOperand(0); RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!"); - SplitInteger(MakeLibCall(LC, VT, &Op, 1, false/*irrelevant*/, dl), Lo, Hi); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, &Op, 1, false/*irrelevant*/, dl), + Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, @@ -1992,7 +1994,8 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N, assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported MUL!"); SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SplitInteger(MakeLibCall(LC, VT, Ops, 2, true/*irrelevant*/, dl), Lo, Hi); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true/*irrelevant*/, dl), + Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node, @@ -2054,7 +2057,7 @@ void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N, assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!"); SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SplitInteger(MakeLibCall(LC, VT, Ops, 2, true, dl), Lo, Hi); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true, dl), Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, @@ -2138,7 +2141,7 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, if (LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC)) { SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SplitInteger(MakeLibCall(LC, VT, Ops, 2, isSigned, dl), Lo, Hi); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, isSigned, dl), Lo, Hi); return; } @@ -2221,7 +2224,7 @@ void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N, assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!"); SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SplitInteger(MakeLibCall(LC, VT, Ops, 2, true, dl), Lo, Hi); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true, dl), Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N, @@ -2361,7 +2364,7 @@ void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N, assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UDIV!"); SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SplitInteger(MakeLibCall(LC, VT, Ops, 2, false, dl), Lo, Hi); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, false, dl), Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N, @@ -2381,7 +2384,7 @@ void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N, assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UREM!"); SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SplitInteger(MakeLibCall(LC, VT, Ops, 2, false, dl), Lo, Hi); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, false, dl), Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N, @@ -2668,7 +2671,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) { RTLIB::Libcall LC = RTLIB::getSINTTOFP(Op.getValueType(), DstVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Don't know how to expand this SINT_TO_FP!"); - return MakeLibCall(LC, DstVT, &Op, 1, true, N->getDebugLoc()); + return TLI.makeLibCall(DAG, LC, DstVT, &Op, 1, true, N->getDebugLoc()); } SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { @@ -2846,7 +2849,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { RTLIB::Libcall LC = RTLIB::getUINTTOFP(SrcVT, DstVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Don't know how to expand this UINT_TO_FP!"); - return MakeLibCall(LC, DstVT, &Op, 1, true, dl); + return TLI.makeLibCall(DAG, LC, DstVT, &Op, 1, true, dl); } SDValue DAGTypeLegalizer::ExpandIntOp_ATOMIC_STORE(SDNode *N) { diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 6aea2d8..e26d165 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -1020,50 +1020,20 @@ SDValue DAGTypeLegalizer::LibCallify(RTLIB::Libcall LC, SDNode *N, unsigned NumOps = N->getNumOperands(); DebugLoc dl = N->getDebugLoc(); if (NumOps == 0) { - return MakeLibCall(LC, N->getValueType(0), 0, 0, isSigned, dl); + return TLI.makeLibCall(DAG, LC, N->getValueType(0), 0, 0, isSigned, dl); } else if (NumOps == 1) { SDValue Op = N->getOperand(0); - return MakeLibCall(LC, N->getValueType(0), &Op, 1, isSigned, dl); + return TLI.makeLibCall(DAG, LC, N->getValueType(0), &Op, 1, isSigned, dl); } else if (NumOps == 2) { SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - return MakeLibCall(LC, N->getValueType(0), Ops, 2, isSigned, dl); + return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, 2, isSigned, dl); } SmallVector Ops(NumOps); for (unsigned i = 0; i < NumOps; ++i) Ops[i] = N->getOperand(i); - return MakeLibCall(LC, N->getValueType(0), &Ops[0], NumOps, isSigned, dl); -} - -/// MakeLibCall - Generate a libcall taking the given operands as arguments and -/// returning a result of type RetVT. -SDValue DAGTypeLegalizer::MakeLibCall(RTLIB::Libcall LC, EVT RetVT, - const SDValue *Ops, unsigned NumOps, - bool isSigned, DebugLoc dl) { - TargetLowering::ArgListTy Args; - Args.reserve(NumOps); - - TargetLowering::ArgListEntry Entry; - for (unsigned i = 0; i != NumOps; ++i) { - Entry.Node = Ops[i]; - Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext()); - Entry.isSExt = isSigned; - Entry.isZExt = !isSigned; - Args.push_back(Entry); - } - SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), - TLI.getPointerTy()); - - Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); - TargetLowering:: - CallLoweringInfo CLI(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false, - false, 0, TLI.getLibcallCallingConv(LC), - /*isTailCall=*/false, - /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, - Callee, Args, DAG, dl); - std::pair CallInfo = TLI.LowerCallTo(CLI); - - return CallInfo.first; + return TLI.makeLibCall(DAG, LC, N->getValueType(0), + &Ops[0], NumOps, isSigned, dl); } // ExpandChainLibCall - Expand a node into a call to a libcall. Similar to diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 8c53ba3..724fdb9 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -159,9 +159,6 @@ private: SDValue GetVectorElementPointer(SDValue VecPtr, EVT EltVT, SDValue Index); SDValue JoinIntegers(SDValue Lo, SDValue Hi); SDValue LibCallify(RTLIB::Libcall LC, SDNode *N, bool isSigned); - SDValue MakeLibCall(RTLIB::Libcall LC, EVT RetVT, - const SDValue *Ops, unsigned NumOps, bool isSigned, - DebugLoc dl); std::pair ExpandChainLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned); @@ -433,9 +430,6 @@ private: SDValue SoftenFloatOp_SETCC(SDNode *N); SDValue SoftenFloatOp_STORE(SDNode *N, unsigned OpNo); - void SoftenSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, - ISD::CondCode &CCCode, DebugLoc dl); - //===--------------------------------------------------------------------===// // Float Expansion Support: LegalizeFloatTypes.cpp //===--------------------------------------------------------------------===// diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 34f3bc9..d2da9b7 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1012,6 +1012,161 @@ MVT::SimpleValueType TargetLowering::getCmpLibcallReturnType() const { return MVT::i32; // return the default value } +/// Check whether a given call node is in tail position within its function. If +/// so, it sets Chain to the input chain of the tail call. +bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, + SDValue &Chain) const { + const Function *F = DAG.getMachineFunction().getFunction(); + + // Conservatively require the attributes of the call to match those of + // the return. Ignore noalias because it doesn't affect the call sequence. + Attribute CallerRetAttr = F->getAttributes().getRetAttributes(); + if (AttrBuilder(CallerRetAttr) + .removeAttribute(Attribute::NoAlias).hasAttributes()) + return false; + + // It's not safe to eliminate the sign / zero extension of the return value. + if (CallerRetAttr.hasAttribute(Attribute::ZExt) || + CallerRetAttr.hasAttribute(Attribute::SExt)) + return false; + + // Check if the only use is a function return node. + return isUsedByReturnOnly(Node, Chain); +} + + +/// Generate a libcall taking the given operands as arguments and returning a +/// result of type RetVT. +SDValue TargetLowering::makeLibCall(SelectionDAG &DAG, + RTLIB::Libcall LC, EVT RetVT, + const SDValue *Ops, unsigned NumOps, + bool isSigned, DebugLoc dl) const { + TargetLowering::ArgListTy Args; + Args.reserve(NumOps); + + TargetLowering::ArgListEntry Entry; + for (unsigned i = 0; i != NumOps; ++i) { + Entry.Node = Ops[i]; + Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext()); + Entry.isSExt = isSigned; + Entry.isZExt = !isSigned; + Args.push_back(Entry); + } + SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), getPointerTy()); + + Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); + TargetLowering:: + CallLoweringInfo CLI(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false, + false, 0, getLibcallCallingConv(LC), + /*isTailCall=*/false, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, + Callee, Args, DAG, dl); + std::pair CallInfo = LowerCallTo(CLI); + + return CallInfo.first; +} + + +/// SoftenSetCCOperands - Soften the operands of a comparison. This code is +/// shared among BR_CC, SELECT_CC, and SETCC handlers. +void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, + SDValue &NewLHS, SDValue &NewRHS, + ISD::CondCode &CCCode, + DebugLoc dl) const { + assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128) + && "Unsupported setcc type!"); + + // Expand into one or more soft-fp libcall(s). + RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL; + switch (CCCode) { + case ISD::SETEQ: + case ISD::SETOEQ: + LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : + (VT == MVT::f64) ? RTLIB::OEQ_F64 : RTLIB::OEQ_F128; + break; + case ISD::SETNE: + case ISD::SETUNE: + LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 : + (VT == MVT::f64) ? RTLIB::UNE_F64 : RTLIB::UNE_F128; + break; + case ISD::SETGE: + case ISD::SETOGE: + LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 : + (VT == MVT::f64) ? RTLIB::OGE_F64 : RTLIB::OGE_F128; + break; + case ISD::SETLT: + case ISD::SETOLT: + LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : + (VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128; + break; + case ISD::SETLE: + case ISD::SETOLE: + LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 : + (VT == MVT::f64) ? RTLIB::OLE_F64 : RTLIB::OLE_F128; + break; + case ISD::SETGT: + case ISD::SETOGT: + LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 : + (VT == MVT::f64) ? RTLIB::OGT_F64 : RTLIB::OGT_F128; + break; + case ISD::SETUO: + LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : + (VT == MVT::f64) ? RTLIB::UO_F64 : RTLIB::UO_F128; + break; + case ISD::SETO: + LC1 = (VT == MVT::f32) ? RTLIB::O_F32 : + (VT == MVT::f64) ? RTLIB::O_F64 : RTLIB::O_F128; + break; + default: + LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : + (VT == MVT::f64) ? RTLIB::UO_F64 : RTLIB::UO_F128; + switch (CCCode) { + case ISD::SETONE: + // SETONE = SETOLT | SETOGT + LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : + (VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128; + // Fallthrough + case ISD::SETUGT: + LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 : + (VT == MVT::f64) ? RTLIB::OGT_F64 : RTLIB::OGT_F128; + break; + case ISD::SETUGE: + LC2 = (VT == MVT::f32) ? RTLIB::OGE_F32 : + (VT == MVT::f64) ? RTLIB::OGE_F64 : RTLIB::OGE_F128; + break; + case ISD::SETULT: + LC2 = (VT == MVT::f32) ? RTLIB::OLT_F32 : + (VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128; + break; + case ISD::SETULE: + LC2 = (VT == MVT::f32) ? RTLIB::OLE_F32 : + (VT == MVT::f64) ? RTLIB::OLE_F64 : RTLIB::OLE_F128; + break; + case ISD::SETUEQ: + LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : + (VT == MVT::f64) ? RTLIB::OEQ_F64 : RTLIB::OEQ_F128; + break; + default: llvm_unreachable("Do not know how to soften this setcc!"); + } + } + + // Use the target specific return value for comparions lib calls. + EVT RetVT = getCmpLibcallReturnType(); + SDValue Ops[2] = { NewLHS, NewRHS }; + NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, 2, false/*sign irrelevant*/, dl); + NewRHS = DAG.getConstant(0, RetVT); + CCCode = getCmpLibcallCC(LC1); + if (LC2 != RTLIB::UNKNOWN_LIBCALL) { + SDValue Tmp = DAG.getNode(ISD::SETCC, dl, getSetCCResultType(RetVT), + NewLHS, NewRHS, DAG.getCondCode(CCCode)); + NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, 2, false/*sign irrelevant*/, dl); + NewLHS = DAG.getNode(ISD::SETCC, dl, getSetCCResultType(RetVT), NewLHS, + NewRHS, DAG.getCondCode(getCmpLibcallCC(LC2))); + NewLHS = DAG.getNode(ISD::OR, dl, Tmp.getValueType(), Tmp, NewLHS); + NewRHS = SDValue(); + } +} + /// getVectorTypeBreakdown - Vector types are broken down into some number of /// legal first class types. For example, MVT::v8f32 maps to 2 MVT::v4f32 /// with Altivec or SSE1, or 8 promoted MVT::f64 values with the X86 FP stack. -- cgit v1.1 From a1db5de9e70dd8ffda57b1a4373915ea866b6f1d Mon Sep 17 00:00:00 2001 From: Adhemerval Zanella Date: Wed, 9 Jan 2013 17:08:15 +0000 Subject: PowerPC: EH adjustments This patch adjust the r171506 to make all DWARF enconding pc-relative for PPC64. It also adds the R_PPC64_REL32 relocation handling in MCJIT (since the eh_frame will not generate PIC-relative relocation) and also adds the emission of stubs created by the TTypeEncoding. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171979 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp | 9 ++++++++- lib/MC/MCObjectFileInfo.cpp | 17 +++++++---------- lib/Target/PowerPC/PPCAsmPrinter.cpp | 19 +++++++++++++++++++ 3 files changed, 34 insertions(+), 11 deletions(-) (limited to 'lib') diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp index 0a68f4e..1524b48 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp @@ -523,7 +523,7 @@ void RuntimeDyldELF::resolvePPC64Relocation(const SectionEntry &Section, case ELF::R_PPC64_ADDR32 : { int32_t Result = static_cast(Value + Addend); if (SignExtend32<32>(Result) != Result) - llvm_unreachable("Relocation R_PPC64_REL32 overflow"); + llvm_unreachable("Relocation R_PPC64_ADDR32 overflow"); writeInt32BE(LocalAddress, Result); } break; case ELF::R_PPC64_REL24 : { @@ -534,6 +534,13 @@ void RuntimeDyldELF::resolvePPC64Relocation(const SectionEntry &Section, // Generates a 'bl
' instruction writeInt32BE(LocalAddress, 0x48000001 | (delta & 0x03FFFFFC)); } break; + case ELF::R_PPC64_REL32 : { + uint64_t FinalAddress = (Section.LoadAddress + Offset); + int32_t delta = static_cast(Value - FinalAddress + Addend); + if (SignExtend32<32>(delta) != delta) + llvm_unreachable("Relocation R_PPC64_REL32 overflow"); + writeInt32BE(LocalAddress, delta); + } break; case ELF::R_PPC64_ADDR64 : writeInt64BE(LocalAddress, Value + Addend); break; diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp index a46f7be..a304584 100644 --- a/lib/MC/MCObjectFileInfo.cpp +++ b/lib/MC/MCObjectFileInfo.cpp @@ -257,16 +257,13 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) { ? dwarf::DW_EH_PE_udata4 : dwarf::DW_EH_PE_absptr; } } else if (T.getArch() == Triple::ppc64) { - PersonalityEncoding = dwarf::DW_EH_PE_udata8; - PersonalityEncoding |= (RelocM == Reloc::PIC_) - ? dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel - : dwarf::DW_EH_PE_absptr; - unsigned PICFlag = (RelocM == Reloc::PIC_) ? dwarf::DW_EH_PE_pcrel - : dwarf::DW_EH_PE_absptr; - FDECFIEncoding = PICFlag | dwarf::DW_EH_PE_sdata4; - LSDAEncoding = PICFlag | dwarf::DW_EH_PE_udata8; - FDEEncoding = PICFlag | dwarf::DW_EH_PE_sdata4; - TTypeEncoding = PICFlag | dwarf::DW_EH_PE_sdata4; + PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | + dwarf::DW_EH_PE_udata8; + FDECFIEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; + LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8; + FDEEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8; + TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | + dwarf::DW_EH_PE_udata8; } // Solaris requires different flags for .eh_frame to seemingly every other diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index 4319894..839f918 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -768,6 +768,25 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) { } } + MachineModuleInfoELF &MMIELF = + MMI->getObjFileInfo(); + + MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList(); + if (!Stubs.empty()) { + OutStreamer.SwitchSection(getObjFileLowering().getDataSection()); + for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { + // L_foo$stub: + OutStreamer.EmitLabel(Stubs[i].first); + // .long _foo + OutStreamer.EmitValue(MCSymbolRefExpr::Create(Stubs[i].second.getPointer(), + OutContext), + isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/); + } + + Stubs.clear(); + OutStreamer.AddBlankLine(); + } + return AsmPrinter::doFinalization(M); } -- cgit v1.1 From 12cd49ae1d3c8f45f3e41b6cce681b667b99ef07 Mon Sep 17 00:00:00 2001 From: Sergei Larin Date: Wed, 9 Jan 2013 17:54:33 +0000 Subject: Fix a typo in MachineInstr::unbundleFromSucc() method. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171983 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineInstr.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index 29d8866..a4d9813 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -897,7 +897,7 @@ void MachineInstr::unbundleFromSucc() { assert(isBundledWithSucc() && "MI isn't bundled with its successor"); clearFlag(BundledSucc); MachineBasicBlock::instr_iterator Succ = this; - --Succ; + ++Succ; assert(Succ->isBundledWithPred() && "Inconsistent bundle flags"); Succ->clearFlag(BundledPred); } -- cgit v1.1 From d9cc865787d673a8d1021d0b9659fd438feba845 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Wed, 9 Jan 2013 18:12:03 +0000 Subject: LICM: Hoist insertvalue/extractvalue out of loops. Fixes PR14854. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171984 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/LICM.cpp | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index dc6bef7..f94cd2a 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -440,13 +440,12 @@ bool LICM::canSinkOrHoistInst(Instruction &I) { } // Only these instructions are hoistable/sinkable. - bool HoistableKind = (isa(I) || isa(I) || - isa(I) || isa(I) || - isa(I) || isa(I) || - isa(I) || - isa(I)); - if (!HoistableKind) - return false; + if (!isa(I) && !isa(I) && !isa(I) && + !isa(I) && !isa(I) && + !isa(I) && !isa(I) && + !isa(I) && !isa(I) && + !isa(I)) + return false; return isSafeToExecuteUnconditionally(I); } -- cgit v1.1 From 25377c8c6dafd094f17833f2c37daff0b77a16fc Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Wed, 9 Jan 2013 18:28:16 +0000 Subject: Don't require BUNDLE headers in MachineInstr::getBundleSize(). It is possible to build MI bundles that don't begin with a BUNDLE header. Add support for such bundles, counting all instructions inside the bundle. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171985 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineInstr.cpp | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index a4d9813..53dbf03 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -982,18 +982,13 @@ MachineInstr::getRegClassConstraint(unsigned OpIdx, return NULL; } -/// getBundleSize - Return the number of instructions inside the MI bundle. +/// Return the number of instructions inside the MI bundle, not counting the +/// header instruction. unsigned MachineInstr::getBundleSize() const { - assert(isBundle() && "Expecting a bundle"); - - const MachineBasicBlock *MBB = getParent(); - MachineBasicBlock::const_instr_iterator I = *this, E = MBB->instr_end(); + MachineBasicBlock::const_instr_iterator I = this; unsigned Size = 0; - while ((++I != E) && I->isInsideBundle()) { - ++Size; - } - assert(Size > 1 && "Malformed bundle"); - + while (I->isBundledWithSucc()) + ++Size, ++I; return Size; } -- cgit v1.1 From ebed123c5c2d10bb2aceb272f25644f685ebcd09 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Wed, 9 Jan 2013 18:35:09 +0000 Subject: Don't print bundle flags. The bundle flags are used by MachineBasicBlock::print(), they don't need to clutter up individual MachineInstrs. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171986 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineInstr.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index 53dbf03..92f6a7f 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -1585,7 +1585,8 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { } bool HaveSemi = false; - if (Flags) { + const unsigned PrintableFlags = FrameSetup; + if (Flags & PrintableFlags) { if (!HaveSemi) OS << ";"; HaveSemi = true; OS << " flags: "; -- cgit v1.1 From 5581115075e688bdf44920f6b93dd42e94c3ccf6 Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Wed, 9 Jan 2013 19:23:24 +0000 Subject: [ObjCARC Debug Messages] This is a squashed commit of 3x debug message commits ala echristo's suggestion. 1. Added debug messages when in OptimizeIndividualCalls we move calls into predecessors and then erase the original call. 2. Added debug messages when in the process of moving calls in ObjCARCOpt::MoveCalls we create new RR and delete old RR. 3. Added a debug message when we visit a specific retain instruction in ObjCARCOpt::PerformCodePlacement. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171988 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/ObjCARC.cpp | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'lib') diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp index e6ec841..8eb9d19 100644 --- a/lib/Transforms/Scalar/ObjCARC.cpp +++ b/lib/Transforms/Scalar/ObjCARC.cpp @@ -2562,10 +2562,16 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { Op = new BitCastInst(Op, ParamTy, "", InsertPos); Clone->setArgOperand(0, Op); Clone->insertBefore(InsertPos); + + DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Cloning " + << *CInst << "\n" + " And inserting " + "clone at " << *InsertPos << "\n"); Worklist.push_back(std::make_pair(Clone, Incoming)); } } // Erase the original call. + DEBUG(dbgs() << "Erasing: " << *CInst << "\n"); EraseInstruction(CInst); continue; } @@ -3226,6 +3232,11 @@ void ObjCARCOpt::MoveCalls(Value *Arg, MDNode::get(M->getContext(), ArrayRef())); else Call->setTailCall(); + + DEBUG(dbgs() << "ObjCARCOpt::MoveCalls: Inserting new Release: " << *Call + << "\n" + " At insertion point: " << *InsertPt + << "\n"); } for (SmallPtrSet::const_iterator PI = RetainsToMove.ReverseInsertPts.begin(), @@ -3241,6 +3252,11 @@ void ObjCARCOpt::MoveCalls(Value *Arg, Call->setDoesNotThrow(); if (ReleasesToMove.IsTailCallRelease) Call->setTailCall(); + + DEBUG(dbgs() << "ObjCARCOpt::MoveCalls: Inserting new Retain: " << *Call + << "\n" + " At insertion point: " << *InsertPt + << "\n"); } // Delete the original retain and release calls. @@ -3250,6 +3266,8 @@ void ObjCARCOpt::MoveCalls(Value *Arg, Instruction *OrigRetain = *AI; Retains.blot(OrigRetain); DeadInsts.push_back(OrigRetain); + DEBUG(dbgs() << "ObjCARCOpt::MoveCalls: Deleting retain: " << *OrigRetain << + "\n"); } for (SmallPtrSet::const_iterator AI = ReleasesToMove.Calls.begin(), @@ -3257,6 +3275,8 @@ void ObjCARCOpt::MoveCalls(Value *Arg, Instruction *OrigRelease = *AI; Releases.erase(OrigRelease); DeadInsts.push_back(OrigRelease); + DEBUG(dbgs() << "ObjCARCOpt::MoveCalls: Deleting release: " << *OrigRelease + << "\n"); } } @@ -3282,6 +3302,10 @@ ObjCARCOpt::PerformCodePlacement(DenseMap if (!V) continue; // blotted Instruction *Retain = cast(V); + + DEBUG(dbgs() << "ObjCARCOpt::PerformCodePlacement: Visiting: " << *Retain + << "\n"); + Value *Arg = GetObjCArg(Retain); // If the object being released is in static or stack storage, we know it's -- cgit v1.1 From f48acd5ecd2616623f441f2922d8b4c637e3cd6c Mon Sep 17 00:00:00 2001 From: Argyrios Kyrtzidis Date: Wed, 9 Jan 2013 19:42:40 +0000 Subject: Move the internal PrintStackTrace function that is used for llvm::sys::PrintStackTraceOnErrorSignal(), into a new function llvm::sys::PrintStackTrace, so that it's available to clients for logging purposes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171989 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Support/Unix/Signals.inc | 26 +++++++++++++++----------- lib/Support/Windows/Signals.inc | 4 ++++ 2 files changed, 19 insertions(+), 11 deletions(-) (limited to 'lib') diff --git a/lib/Support/Unix/Signals.inc b/lib/Support/Unix/Signals.inc index 9e98af7..87162d6 100644 --- a/lib/Support/Unix/Signals.inc +++ b/lib/Support/Unix/Signals.inc @@ -254,7 +254,7 @@ void llvm::sys::AddSignalHandler(void (*FnPtr)(void *), void *Cookie) { // // On glibc systems we have the 'backtrace' function, which works nicely, but // doesn't demangle symbols. -static void PrintStackTrace(void *) { +void llvm::sys::PrintStackTrace(FILE *FD) { #if defined(HAVE_BACKTRACE) && defined(ENABLE_BACKTRACES) static void* StackTrace[256]; // Use backtrace() to output a backtrace on Linux systems with glibc. @@ -278,26 +278,26 @@ static void PrintStackTrace(void *) { Dl_info dlinfo; dladdr(StackTrace[i], &dlinfo); - fprintf(stderr, "%-2d", i); + fprintf(FD, "%-2d", i); const char* name = strrchr(dlinfo.dli_fname, '/'); - if (name == NULL) fprintf(stderr, " %-*s", width, dlinfo.dli_fname); - else fprintf(stderr, " %-*s", width, name+1); + if (name == NULL) fprintf(FD, " %-*s", width, dlinfo.dli_fname); + else fprintf(FD, " %-*s", width, name+1); - fprintf(stderr, " %#0*lx", + fprintf(FD, " %#0*lx", (int)(sizeof(void*) * 2) + 2, (unsigned long)StackTrace[i]); if (dlinfo.dli_sname != NULL) { int res; - fputc(' ', stderr); + fputc(' ', FD); char* d = abi::__cxa_demangle(dlinfo.dli_sname, NULL, NULL, &res); - if (d == NULL) fputs(dlinfo.dli_sname, stderr); - else fputs(d, stderr); + if (d == NULL) fputs(dlinfo.dli_sname, FD); + else fputs(d, FD); free(d); - fprintf(stderr, " + %tu",(char*)StackTrace[i]-(char*)dlinfo.dli_saddr); + fprintf(FD, " + %tu",(char*)StackTrace[i]-(char*)dlinfo.dli_saddr); } - fputc('\n', stderr); + fputc('\n', FD); } #else backtrace_symbols_fd(StackTrace, depth, STDERR_FILENO); @@ -305,10 +305,14 @@ static void PrintStackTrace(void *) { #endif } +static void PrintStackTraceSignalHandler(void *) { + PrintStackTrace(stderr); +} + /// PrintStackTraceOnErrorSignal - When an error signal (such as SIGABRT or /// SIGSEGV) is delivered to the process, print a stack trace and then exit. void llvm::sys::PrintStackTraceOnErrorSignal() { - AddSignalHandler(PrintStackTrace, 0); + AddSignalHandler(PrintStackTraceSignalHandler, 0); #if defined(__APPLE__) // Environment variable to disable any kind of crash dialog. diff --git a/lib/Support/Windows/Signals.inc b/lib/Support/Windows/Signals.inc index a969753..3dd6660 100644 --- a/lib/Support/Windows/Signals.inc +++ b/lib/Support/Windows/Signals.inc @@ -295,6 +295,10 @@ void sys::PrintStackTraceOnErrorSignal() { LeaveCriticalSection(&CriticalSection); } +void llvm::sys::PrintStackTrace(FILE *) { + // FIXME: Implement. +} + void sys::SetInterruptFunction(void (*IF)()) { RegisterHandler(); -- cgit v1.1 From 78ec0255d9ab184af7799c14d93879e5f21b9007 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Wed, 9 Jan 2013 20:56:40 +0000 Subject: Fix a DAG combine bug visitBRCOND() is transforming br(xor(x, y)) to br(x != y). It cahced XOR's operands before calling visitXOR() but failed to update the operands when visitXOR changed the XOR node. rdar://12968664 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171999 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index ff00d0d..359c4cf 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -6735,18 +6735,24 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { if (Op0.getOpcode() == Op1.getOpcode()) { // Avoid missing important xor optimizations. SDValue Tmp = visitXOR(TheXor); - if (Tmp.getNode() && Tmp.getNode() != TheXor) { - DEBUG(dbgs() << "\nReplacing.8 "; - TheXor->dump(&DAG); - dbgs() << "\nWith: "; - Tmp.getNode()->dump(&DAG); - dbgs() << '\n'); - WorkListRemover DeadNodes(*this); - DAG.ReplaceAllUsesOfValueWith(N1, Tmp); - removeFromWorkList(TheXor); - DAG.DeleteNode(TheXor); - return DAG.getNode(ISD::BRCOND, N->getDebugLoc(), - MVT::Other, Chain, Tmp, N2); + if (Tmp.getNode()) { + if (Tmp.getNode() != TheXor) { + DEBUG(dbgs() << "\nReplacing.8 "; + TheXor->dump(&DAG); + dbgs() << "\nWith: "; + Tmp.getNode()->dump(&DAG); + dbgs() << '\n'); + WorkListRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(N1, Tmp); + removeFromWorkList(TheXor); + DAG.DeleteNode(TheXor); + return DAG.getNode(ISD::BRCOND, N->getDebugLoc(), + MVT::Other, Chain, Tmp, N2); + } + + // visitXOR has changed XOR's operands. + Op0 = TheXor->getOperand(0); + Op1 = TheXor->getOperand(1); } } -- cgit v1.1 From 14925e6b885f8bd8cf448627386d412831f4bf1b Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Wed, 9 Jan 2013 22:29:00 +0000 Subject: ARM Cost model: Use the size of vector registers and widest vectorizable instruction to determine the max vectorization factor. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172010 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/TargetTransformInfo.cpp | 8 +++++ lib/CodeGen/BasicTargetTransformInfo.cpp | 5 +++ lib/Target/ARM/ARMTargetTransformInfo.cpp | 10 ++++++ lib/Target/X86/X86TargetTransformInfo.cpp | 17 ++++++++++ lib/Transforms/Vectorize/LoopVectorize.cpp | 51 ++++++++++++++++++++++++++++-- 5 files changed, 88 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp index 02af2d3..3ef74eb 100644 --- a/lib/Analysis/TargetTransformInfo.cpp +++ b/lib/Analysis/TargetTransformInfo.cpp @@ -92,6 +92,10 @@ unsigned TargetTransformInfo::getNumberOfRegisters(bool Vector) const { return PrevTTI->getNumberOfRegisters(Vector); } +unsigned TargetTransformInfo::getRegisterBitWidth(bool Vector) const { + return PrevTTI->getRegisterBitWidth(Vector); +} + unsigned TargetTransformInfo::getMaximumUnrollFactor() const { return PrevTTI->getMaximumUnrollFactor(); } @@ -220,6 +224,10 @@ struct NoTTI : ImmutablePass, TargetTransformInfo { return 8; } + unsigned getRegisterBitWidth(bool Vector) const { + return 32; + } + unsigned getMaximumUnrollFactor() const { return 1; } diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp index 2f3ac9a..3892cc4 100644 --- a/lib/CodeGen/BasicTargetTransformInfo.cpp +++ b/lib/CodeGen/BasicTargetTransformInfo.cpp @@ -84,6 +84,7 @@ public: virtual unsigned getNumberOfRegisters(bool Vector) const; virtual unsigned getMaximumUnrollFactor() const; + virtual unsigned getRegisterBitWidth(bool Vector) const; virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const; virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, Type *SubTp) const; @@ -183,6 +184,10 @@ unsigned BasicTTI::getNumberOfRegisters(bool Vector) const { return 1; } +unsigned BasicTTI::getRegisterBitWidth(bool Vector) const { + return 32; +} + unsigned BasicTTI::getMaximumUnrollFactor() const { return 1; } diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp index 634004a..404a6ff 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -94,6 +94,16 @@ public: return 16; } + unsigned getRegisterBitWidth(bool Vector) const { + if (Vector) { + if (ST->hasNEON()) + return 128; + return 0; + } + + return 32; + } + unsigned getMaximumUnrollFactor() const { // These are out of order CPUs: if (ST->isCortexA15() || ST->isSwift()) diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index 6ab08cb..675c896 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -83,6 +83,7 @@ public: /// @{ virtual unsigned getNumberOfRegisters(bool Vector) const; + virtual unsigned getRegisterBitWidth(bool Vector) const; virtual unsigned getMaximumUnrollFactor() const; virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const; virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, @@ -165,11 +166,27 @@ X86TTI::PopcntSupportKind X86TTI::getPopcntSupport(unsigned TyWidth) const { } unsigned X86TTI::getNumberOfRegisters(bool Vector) const { + if (Vector && !ST->hasSSE1()) + return 0; + if (ST->is64Bit()) return 16; return 8; } +unsigned X86TTI::getRegisterBitWidth(bool Vector) const { + if (Vector) { + if (ST->hasAVX()) return 256; + if (ST->hasSSE1()) return 128; + return 0; + } + + if (ST->is64Bit()) + return 64; + return 32; + +} + unsigned X86TTI::getMaximumUnrollFactor() const { if (ST->isAtom()) return 1; diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index c29f416..cde4bb8 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -113,9 +113,6 @@ static const unsigned MaxLoopSizeThreshold = 32; /// number of pointers. Notice that the check is quadratic! static const unsigned RuntimeMemoryCheckThreshold = 4; -/// This is the highest vector width that we try to generate. -static const unsigned MaxVectorSize = 8; - namespace { // Forward declarations. @@ -523,6 +520,10 @@ public: /// possible. unsigned selectVectorizationFactor(bool OptForSize, unsigned UserVF); + /// \returns The size (in bits) of the widest type in the code that + /// needs to be vectorized. We ignore values that remain scalar such as + /// 64 bit loop indices. + unsigned getWidestType(); /// \return The most profitable unroll factor. /// If UserUF is non-zero then this method finds the best unroll-factor @@ -2621,6 +2622,20 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize, unsigned TC = SE->getSmallConstantTripCount(TheLoop, TheLoop->getLoopLatch()); DEBUG(dbgs() << "LV: Found trip count:"<block_begin(), + be = TheLoop->block_end(); bb != be; ++bb) { + BasicBlock *BB = *bb; + + // For each instruction in the loop. + for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) { + if (Legal->isUniformAfterVectorization(it)) + continue; + + Type *T = it->getType(); + + if (StoreInst *ST = dyn_cast(it)) + T = ST->getValueOperand()->getType(); + + // PHINodes and pointers are difficult to analyze, but we catch all other + // uses of the types in other instructions. + if (isa(it) || T->isPointerTy() || T->isVoidTy()) + continue; + + MaxWidth = std::max(MaxWidth, T->getScalarSizeInBits()); + } + } + + return MaxWidth; +} + unsigned LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize, unsigned UserUF) { -- cgit v1.1 From 5573de8493ac11564fba02b7407176a98b45d7ce Mon Sep 17 00:00:00 2001 From: Joel Jones Date: Wed, 9 Jan 2013 22:34:16 +0000 Subject: Fix description of ARMOperand git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172011 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 8eae642..e9bdc4a 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -271,7 +271,7 @@ public: namespace { /// ARMOperand - Instances of this class represent a parsed ARM machine -/// instruction. +/// operand. class ARMOperand : public MCParsedAsmOperand { enum KindTy { k_CondCode, -- cgit v1.1 From e92800dd533988c46e58b0f4226df76a3bb1cb0c Mon Sep 17 00:00:00 2001 From: "Michael J. Spencer" Date: Wed, 9 Jan 2013 22:58:43 +0000 Subject: [Object][Archive] Use uint64_t instead of APInt. It is significantly faster. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172015 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Object/Archive.cpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'lib') diff --git a/lib/Object/Archive.cpp b/lib/Object/Archive.cpp index dafcb72..95eba62 100644 --- a/lib/Object/Archive.cpp +++ b/lib/Object/Archive.cpp @@ -48,9 +48,9 @@ struct ArchiveMemberHeader { } uint64_t getSize() const { - APInt ret; + uint64_t ret; StringRef(Size, sizeof(Size)).getAsInteger(10, ret); - return ret.getZExtValue(); + return ret; } }; } @@ -110,11 +110,11 @@ error_code Archive::Child::getName(StringRef &Result) const { } // It's a long name. // Get the offset. - APInt offset; + uint64_t offset; name.substr(1).getAsInteger(10, offset); const char *addr = Parent->StringTable->Data.begin() + sizeof(ArchiveMemberHeader) - + offset.getZExtValue(); + + offset; // Verify it. if (Parent->StringTable == Parent->end_children() || addr < (Parent->StringTable->Data.begin() @@ -133,9 +133,9 @@ error_code Archive::Child::getName(StringRef &Result) const { } return object_error::success; } else if (name.startswith("#1/")) { - APInt name_size; + uint64_t name_size; name.substr(3).getAsInteger(10, name_size); - Result = Data.substr(0, name_size.getZExtValue()); + Result = Data.substr(0, name_size); return object_error::success; } // It's a simple name. @@ -151,9 +151,9 @@ uint64_t Archive::Child::getSize() const { // Don't include attached name. StringRef name = ToHeader(Data.data())->getName(); if (name.startswith("#1/")) { - APInt name_size; + uint64_t name_size; name.substr(3).getAsInteger(10, name_size); - size -= name_size.getZExtValue(); + size -= name_size; } return size; } @@ -163,9 +163,9 @@ MemoryBuffer *Archive::Child::getBuffer() const { if (getName(name)) return NULL; int size = sizeof(ArchiveMemberHeader); if (name.startswith("#1/")) { - APInt name_size; + uint64_t name_size; name.substr(3).getAsInteger(10, name_size); - size += name_size.getZExtValue(); + size += name_size; } return MemoryBuffer::getMemBuffer(Data.substr(size, getSize()), name, -- cgit v1.1 From 1db9b6957c2565a2322206bd5907530895f1c7ac Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Wed, 9 Jan 2013 23:36:50 +0000 Subject: Revert s/Raw/getBitMask/g name change. This is possibly causing LTO test hangings. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172020 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Bitcode/Reader/BitcodeReader.cpp | 2 +- lib/IR/AttributeImpl.h | 4 ++-- lib/IR/Attributes.cpp | 37 +++++++++++++++++++----------------- lib/IR/Core.cpp | 4 ++-- 4 files changed, 25 insertions(+), 22 deletions(-) (limited to 'lib') diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index 59cda22..3bd64a9 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -477,7 +477,7 @@ bool BitcodeReader::ParseAttributeBlock() { for (unsigned i = 0, e = Record.size(); i != e; i += 2) { Attribute ReconstitutedAttr = Attribute::decodeLLVMAttributesForBitcode(Context, Record[i+1]); - Record[i+1] = ReconstitutedAttr.getBitMask(); + Record[i+1] = ReconstitutedAttr.Raw(); } for (unsigned i = 0, e = Record.size(); i != e; i += 2) { diff --git a/lib/IR/AttributeImpl.h b/lib/IR/AttributeImpl.h index 1164d68..10f30e7 100644 --- a/lib/IR/AttributeImpl.h +++ b/lib/IR/AttributeImpl.h @@ -57,7 +57,7 @@ public: bool operator==(StringRef Kind) const; bool operator!=(StringRef Kind) const; - uint64_t getBitMask() const; // FIXME: Remove. + uint64_t Raw() const; // FIXME: Remove. static uint64_t getAttrMask(Attribute::AttrKind Val); @@ -93,7 +93,7 @@ public: ArrayRef AttrList){ for (unsigned i = 0, e = AttrList.size(); i != e; ++i) { ID.AddInteger(AttrList[i].Index); - ID.AddInteger(AttrList[i].Attrs.getBitMask()); + ID.AddInteger(AttrList[i].Attrs.Raw()); } } }; diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index a1f306c..5024a63 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -45,7 +45,7 @@ Attribute Attribute::get(LLVMContext &Context, AttrBuilder &B) { // Otherwise, build a key to look up the existing attributes. LLVMContextImpl *pImpl = Context.pImpl; FoldingSetNodeID ID; - ID.AddInteger(B.getBitMask()); + ID.AddInteger(B.Raw()); void *InsertPoint; AttributeImpl *PA = pImpl->AttrsSet.FindNodeOrInsertPos(ID, InsertPoint); @@ -53,7 +53,7 @@ Attribute Attribute::get(LLVMContext &Context, AttrBuilder &B) { if (!PA) { // If we didn't find any existing attributes of the same shape then create a // new one and insert it. - PA = new AttributeImpl(Context, B.getBitMask()); + PA = new AttributeImpl(Context, B.Raw()); pImpl->AttrsSet.InsertNode(PA, InsertPoint); } @@ -103,8 +103,8 @@ bool Attribute::operator!=(AttrKind K) const { return !(*this == K); } -uint64_t Attribute::getBitMask() const { - return pImpl ? pImpl->getBitMask() : 0; +uint64_t Attribute::Raw() const { + return pImpl ? pImpl->Raw() : 0; } Attribute Attribute::typeIncompatible(Type *Ty) { @@ -139,10 +139,10 @@ uint64_t Attribute::encodeLLVMAttributesForBitcode(Attribute Attrs) { // Store the alignment in the bitcode as a 16-bit raw value instead of a 5-bit // log2 encoded value. Shift the bits above the alignment up by 11 bits. - uint64_t EncodedAttrs = Attrs.getBitMask() & 0xffff; + uint64_t EncodedAttrs = Attrs.Raw() & 0xffff; if (Attrs.hasAttribute(Attribute::Alignment)) EncodedAttrs |= Attrs.getAlignment() << 16; - EncodedAttrs |= (Attrs.getBitMask() & (0xffffULL << 21)) << 11; + EncodedAttrs |= (Attrs.Raw() & (0xffffULL << 21)) << 11; return EncodedAttrs; } @@ -320,7 +320,7 @@ AttrBuilder &AttrBuilder::addRawValue(uint64_t Val) { } AttrBuilder &AttrBuilder::addAttributes(const Attribute &A) { - uint64_t Mask = A.getBitMask(); + uint64_t Mask = A.Raw(); for (Attribute::AttrKind I = Attribute::None; I != Attribute::EndAttrKinds; I = Attribute::AttrKind(I + 1)) { @@ -338,7 +338,7 @@ AttrBuilder &AttrBuilder::addAttributes(const Attribute &A) { } AttrBuilder &AttrBuilder::removeAttributes(const Attribute &A){ - uint64_t Mask = A.getBitMask(); + uint64_t Mask = A.Raw(); for (Attribute::AttrKind I = Attribute::None; I != Attribute::EndAttrKinds; I = Attribute::AttrKind(I + 1)) { @@ -364,14 +364,14 @@ bool AttrBuilder::hasAttributes() const { } bool AttrBuilder::hasAttributes(const Attribute &A) const { - return getBitMask() & A.getBitMask(); + return Raw() & A.Raw(); } bool AttrBuilder::hasAlignmentAttr() const { return Alignment != 0; } -uint64_t AttrBuilder::getBitMask() const { +uint64_t AttrBuilder::Raw() const { uint64_t Mask = 0; for (DenseSet::const_iterator I = Attrs.begin(), @@ -438,7 +438,7 @@ bool AttributeImpl::operator!=(StringRef Kind) const { return !(*this == Kind); } -uint64_t AttributeImpl::getBitMask() const { +uint64_t AttributeImpl::Raw() const { // FIXME: Remove this. return cast(Data)->getZExtValue(); } @@ -485,15 +485,15 @@ uint64_t AttributeImpl::getAttrMask(Attribute::AttrKind Val) { } bool AttributeImpl::hasAttribute(Attribute::AttrKind A) const { - return (getBitMask() & getAttrMask(A)) != 0; + return (Raw() & getAttrMask(A)) != 0; } bool AttributeImpl::hasAttributes() const { - return getBitMask() != 0; + return Raw() != 0; } uint64_t AttributeImpl::getAlignment() const { - return getBitMask() & getAttrMask(Attribute::Alignment); + return Raw() & getAttrMask(Attribute::Alignment); } void AttributeImpl::setAlignment(unsigned Align) { @@ -501,7 +501,7 @@ void AttributeImpl::setAlignment(unsigned Align) { } uint64_t AttributeImpl::getStackAlignment() const { - return getBitMask() & getAttrMask(Attribute::StackAlignment); + return Raw() & getAttrMask(Attribute::StackAlignment); } void AttributeImpl::setStackAlignment(unsigned Align) { @@ -511,9 +511,12 @@ void AttributeImpl::setStackAlignment(unsigned Align) { void AttributeImpl::Profile(FoldingSetNodeID &ID, Constant *Data, ArrayRef Vals) { ID.AddInteger(cast(Data)->getZExtValue()); +#if 0 + // FIXME: Not yet supported. for (ArrayRef::iterator I = Vals.begin(), E = Vals.end(); I != E; ++I) ID.AddPointer(*I); +#endif } //===----------------------------------------------------------------------===// @@ -611,9 +614,9 @@ unsigned AttributeSet::getStackAlignment(unsigned Index) const { return getAttributes(Index).getStackAlignment(); } -uint64_t AttributeSet::getBitMask(unsigned Index) const { +uint64_t AttributeSet::Raw(unsigned Index) const { // FIXME: Remove this. - return getAttributes(Index).getBitMask(); + return getAttributes(Index).Raw(); } /// getAttributes - The attributes for the specified index are returned. diff --git a/lib/IR/Core.cpp b/lib/IR/Core.cpp index 2024ac9..12cb971 100644 --- a/lib/IR/Core.cpp +++ b/lib/IR/Core.cpp @@ -1401,7 +1401,7 @@ void LLVMRemoveFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA) { LLVMAttribute LLVMGetFunctionAttr(LLVMValueRef Fn) { Function *Func = unwrap(Fn); const AttributeSet PAL = Func->getAttributes(); - return (LLVMAttribute)PAL.getBitMask(AttributeSet::FunctionIndex); + return (LLVMAttribute)PAL.Raw(AttributeSet::FunctionIndex); } /*--.. Operations on parameters ............................................--*/ @@ -1477,7 +1477,7 @@ void LLVMRemoveAttribute(LLVMValueRef Arg, LLVMAttribute PA) { LLVMAttribute LLVMGetAttribute(LLVMValueRef Arg) { Argument *A = unwrap(Arg); return (LLVMAttribute)A->getParent()->getAttributes(). - getBitMask(A->getArgNo()+1); + Raw(A->getArgNo()+1); } -- cgit v1.1 From bf82b07f5f2ff1ebd3fb607a9f0d31d2b5ada947 Mon Sep 17 00:00:00 2001 From: "Michael J. Spencer" Date: Thu, 10 Jan 2013 00:07:38 +0000 Subject: [Object][Archive] Apparently StringRef::getAsInteger for APInt accepts spaces. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172022 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Object/Archive.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Object/Archive.cpp b/lib/Object/Archive.cpp index 95eba62..0d12be8 100644 --- a/lib/Object/Archive.cpp +++ b/lib/Object/Archive.cpp @@ -49,7 +49,7 @@ struct ArchiveMemberHeader { uint64_t getSize() const { uint64_t ret; - StringRef(Size, sizeof(Size)).getAsInteger(10, ret); + StringRef(Size, sizeof(Size)).rtrim(" ").getAsInteger(10, ret); return ret; } }; @@ -110,7 +110,7 @@ error_code Archive::Child::getName(StringRef &Result) const { } // It's a long name. // Get the offset. - uint64_t offset; + std::size_t offset; name.substr(1).getAsInteger(10, offset); const char *addr = Parent->StringTable->Data.begin() + sizeof(ArchiveMemberHeader) @@ -218,6 +218,10 @@ Archive::Archive(MemoryBuffer *source, error_code &ec) SymbolTable = i; StringTable = e; if (i != e) ++i; + if (i == e) { + ec = object_error::parse_failed; + return; + } if ((ec = i->getName(name))) return; if (name[0] != '/') { -- cgit v1.1 From 7932c41884f182ae44a3feacc8a6a462e9097ca1 Mon Sep 17 00:00:00 2001 From: "Michael J. Spencer" Date: Thu, 10 Jan 2013 01:05:34 +0000 Subject: [Object][Archive] Fix name handling with bsd style long names. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172026 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Object/Archive.cpp | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/Object/Archive.cpp b/lib/Object/Archive.cpp index 0d12be8..e143338 100644 --- a/lib/Object/Archive.cpp +++ b/lib/Object/Archive.cpp @@ -49,7 +49,8 @@ struct ArchiveMemberHeader { uint64_t getSize() const { uint64_t ret; - StringRef(Size, sizeof(Size)).rtrim(" ").getAsInteger(10, ret); + if (StringRef(Size, sizeof(Size)).rtrim(" ").getAsInteger(10, ret)) + llvm_unreachable("Size is not an integer."); return ret; } }; @@ -111,7 +112,8 @@ error_code Archive::Child::getName(StringRef &Result) const { // It's a long name. // Get the offset. std::size_t offset; - name.substr(1).getAsInteger(10, offset); + if (name.substr(1).rtrim(" ").getAsInteger(10, offset)) + llvm_unreachable("Long name offset is not an integer"); const char *addr = Parent->StringTable->Data.begin() + sizeof(ArchiveMemberHeader) + offset; @@ -134,8 +136,9 @@ error_code Archive::Child::getName(StringRef &Result) const { return object_error::success; } else if (name.startswith("#1/")) { uint64_t name_size; - name.substr(3).getAsInteger(10, name_size); - Result = Data.substr(0, name_size); + if (name.substr(3).rtrim(" ").getAsInteger(10, name_size)) + llvm_unreachable("Long name length is not an ingeter"); + Result = Data.substr(sizeof(ArchiveMemberHeader), name_size); return object_error::success; } // It's a simple name. @@ -152,21 +155,24 @@ uint64_t Archive::Child::getSize() const { StringRef name = ToHeader(Data.data())->getName(); if (name.startswith("#1/")) { uint64_t name_size; - name.substr(3).getAsInteger(10, name_size); + if (name.substr(3).rtrim(" ").getAsInteger(10, name_size)) + llvm_unreachable("Long name length is not an integer"); size -= name_size; } return size; } MemoryBuffer *Archive::Child::getBuffer() const { - StringRef name; - if (getName(name)) return NULL; + StringRef name = ToHeader(Data.data())->getName(); int size = sizeof(ArchiveMemberHeader); if (name.startswith("#1/")) { uint64_t name_size; - name.substr(3).getAsInteger(10, name_size); + if (name.substr(3).rtrim(" ").getAsInteger(10, name_size)) + llvm_unreachable("Long name length is not an integer"); size += name_size; } + if (getName(name)) + return 0; return MemoryBuffer::getMemBuffer(Data.substr(size, getSize()), name, false); -- cgit v1.1 From 86441169da23959c81d8648c3dfdc7a0bb8d2225 Mon Sep 17 00:00:00 2001 From: Manman Ren Date: Thu, 10 Jan 2013 01:10:10 +0000 Subject: Stack Alignment: throw error if we can't satisfy the minimal alignment requirement when creating stack objects in MachineFrameInfo. Add CreateStackObjectWithMinAlign to throw error when the minimal alignment can't be achieved and to clamp the alignment when the preferred alignment can't be achieved. Same is true for CreateVariableSizedObject. Will not emit error in CreateSpillStackObject or CreateStackObject. As long as callers of CreateStackObject do not assume the object will be aligned at the requested alignment, we should not have miscompile since later optimizations which look at the object's alignment will have the correct information. rdar://12713765 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172027 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineFunction.cpp | 48 ++++++++++++++--------- lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp | 3 +- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 3 +- 3 files changed, 34 insertions(+), 20 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index 9647e83..3d7d20d 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -473,24 +473,32 @@ void MachineFrameInfo::ensureMaxAlignment(unsigned Align) { } /// clampStackAlignment - Clamp the alignment if requested and emit a warning. -static inline unsigned clampStackAlignment(bool ShouldClamp, unsigned Align, - unsigned StackAlign) { - if (!ShouldClamp || Align <= StackAlign) - return Align; - DEBUG(dbgs() << "Warning: requested alignment " << Align - << " exceeds the stack alignment " << StackAlign - << " when stack realignment is off" << '\n'); +static inline unsigned clampStackAlignment(bool ShouldClamp, unsigned PrefAlign, + unsigned MinAlign, unsigned StackAlign, + const AllocaInst *Alloca = 0) { + if (!ShouldClamp || PrefAlign <= StackAlign) + return PrefAlign; + if (Alloca && MinAlign > StackAlign) + Alloca->getParent()->getContext().emitError(Alloca, + "Requested Minimal Alignment exceeds the Stack Alignment!"); + else + assert(MinAlign <= StackAlign && + "Requested Minimal Alignment exceeds the Stack Alignment!"); return StackAlign; } -/// CreateStackObject - Create a new statically sized stack object, returning -/// a nonnegative identifier to represent it. +/// CreateStackObjectWithMinAlign - Create a new statically sized stack +/// object, returning a nonnegative identifier to represent it. This function +/// takes a preferred alignment and a minimal alignment. /// -int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment, - bool isSS, bool MayNeedSP, const AllocaInst *Alloca) { +int MachineFrameInfo::CreateStackObjectWithMinAlign(uint64_t Size, + unsigned PrefAlignment, unsigned MinAlignment, + bool isSS, bool MayNeedSP, const AllocaInst *Alloca) { assert(Size != 0 && "Cannot allocate zero size stack objects!"); - Alignment = clampStackAlignment(!TFI.isStackRealignable() || !RealignOption, - Alignment, TFI.getStackAlignment()); + unsigned Alignment = clampStackAlignment( + !TFI.isStackRealignable() || !RealignOption, + PrefAlignment, MinAlignment, + TFI.getStackAlignment(), Alloca); Objects.push_back(StackObject(Size, Alignment, 0, false, isSS, MayNeedSP, Alloca)); int Index = (int)Objects.size() - NumFixedObjects - 1; @@ -506,7 +514,8 @@ int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment, int MachineFrameInfo::CreateSpillStackObject(uint64_t Size, unsigned Alignment) { Alignment = clampStackAlignment(!TFI.isStackRealignable() || !RealignOption, - Alignment, TFI.getStackAlignment()); + Alignment, 0, + TFI.getStackAlignment()); CreateStackObject(Size, Alignment, true, false); int Index = (int)Objects.size() - NumFixedObjects - 1; ensureMaxAlignment(Alignment); @@ -518,10 +527,13 @@ int MachineFrameInfo::CreateSpillStackObject(uint64_t Size, /// variable sized object is created, whether or not the index returned is /// actually used. /// -int MachineFrameInfo::CreateVariableSizedObject(unsigned Alignment) { +int MachineFrameInfo::CreateVariableSizedObject(unsigned PrefAlignment, + unsigned MinAlignment, const AllocaInst *Alloca) { HasVarSizedObjects = true; - Alignment = clampStackAlignment(!TFI.isStackRealignable() || !RealignOption, - Alignment, TFI.getStackAlignment()); + unsigned Alignment = clampStackAlignment( + !TFI.isStackRealignable() || !RealignOption, + PrefAlignment, MinAlignment, + TFI.getStackAlignment(), Alloca); Objects.push_back(StackObject(0, Alignment, 0, false, false, true, 0)); ensureMaxAlignment(Alignment); return (int)Objects.size()-NumFixedObjects-1; @@ -542,7 +554,7 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset, unsigned StackAlign = TFI.getStackAlignment(); unsigned Align = MinAlign(SPOffset, StackAlign); Align = clampStackAlignment(!TFI.isStackRealignable() || !RealignOption, - Align, TFI.getStackAlignment()); + Align, 0, TFI.getStackAlignment()); Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable, /*isSS*/ false, /*NeedSP*/ false, diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index b46edad..229c50b 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -95,7 +95,8 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) { (TySize >= 8 && isa(Ty) && cast(Ty)->getElementType()->isIntegerTy(8))); StaticAllocaMap[AI] = - MF->getFrameInfo()->CreateStackObject(TySize, Align, false, + MF->getFrameInfo()->CreateStackObjectWithMinAlign(TySize, Align, + AI->getAlignment(), false, MayNeedSP, AI); } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 8c22db3..ee98b00 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3259,7 +3259,8 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { // Inform the Frame Information that we have just allocated a variable-sized // object. - FuncInfo.MF->getFrameInfo()->CreateVariableSizedObject(Align ? Align : 1); + FuncInfo.MF->getFrameInfo()->CreateVariableSizedObject(Align ? Align : 1, + I.getAlignment(), &I); } void SelectionDAGBuilder::visitLoad(const LoadInst &I) { -- cgit v1.1 From b11f05043465bceae4853a3bd2c01d7d664cc5e3 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Thu, 10 Jan 2013 01:29:42 +0000 Subject: Support headerless bundles in MachineInstr::hasProperty(). This function can still work without a BUNDLE header instruction. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172029 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineInstr.cpp | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index 92f6a7f..cdf46b6 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -752,20 +752,18 @@ void MachineInstr::addMemOperand(MachineFunction &MF, } bool MachineInstr::hasPropertyInBundle(unsigned Mask, QueryType Type) const { - const MachineBasicBlock *MBB = getParent(); - MachineBasicBlock::const_instr_iterator MII = *this; ++MII; - while (MII != MBB->end() && MII->isInsideBundle()) { + for (MachineBasicBlock::const_instr_iterator MII = this;; ++MII) { if (MII->getDesc().getFlags() & Mask) { if (Type == AnyInBundle) return true; } else { - if (Type == AllInBundle) + if (Type == AllInBundle && !MII->isBundle()) return false; } - ++MII; + // This was the last instruction in the bundle. + if (!MII->isBundledWithSucc()) + return Type == AllInBundle; } - - return Type == AllInBundle; } bool MachineInstr::isIdenticalTo(const MachineInstr *Other, -- cgit v1.1 From 06c7008e30d3e278f2d779135ff2ce50bfc643fc Mon Sep 17 00:00:00 2001 From: Douglas Gregor Date: Thu, 10 Jan 2013 01:58:46 +0000 Subject: Fix a race condition in llvm::sys::path::unique_file: when we end up failing to create the unique file because the path doesn't exist, don't fail if someone else manages to create the path before we do. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172032 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Support/Unix/PathV2.inc | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/Support/Unix/PathV2.inc b/lib/Support/Unix/PathV2.inc index 25712a8..741f44a 100644 --- a/lib/Support/Unix/PathV2.inc +++ b/lib/Support/Unix/PathV2.inc @@ -421,11 +421,12 @@ retry_random_path: rety_open_create: int RandomFD = ::open(RandomPath.c_str(), O_RDWR | O_CREAT | O_EXCL, mode); if (RandomFD == -1) { + int SavedErrno = errno; // If the file existed, try again, otherwise, error. - if (errno == errc::file_exists) + if (SavedErrno == errc::file_exists) goto retry_random_path; // If path prefix doesn't exist, try to create it. - if (errno == errc::no_such_file_or_directory && + if (SavedErrno == errc::no_such_file_or_directory && !exists(path::parent_path(RandomPath))) { StringRef p(RandomPath); SmallString<64> dir_to_create; @@ -440,13 +441,15 @@ rety_open_create: (*i)[1] == '/' && (*i)[2] != '/') return make_error_code(errc::no_such_file_or_directory); - if (::mkdir(dir_to_create.c_str(), 0700) == -1) + if (::mkdir(dir_to_create.c_str(), 0700) == -1 && + errno != errc::file_exists) return error_code(errno, system_category()); } } goto rety_open_create; } - return error_code(errno, system_category()); + + return error_code(SavedErrno, system_category()); } // Make the path absolute. -- cgit v1.1 From 69a2d6f55afb2bc42bc19e754bcebee39ecdb8bc Mon Sep 17 00:00:00 2001 From: Douglas Gregor Date: Thu, 10 Jan 2013 02:01:35 +0000 Subject: Fix a race condition in the lock-file manager: once the lock file is gone, check for the actual file we care about. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172033 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Support/LockFileManager.cpp | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Support/LockFileManager.cpp b/lib/Support/LockFileManager.cpp index 075d8a5..31eec75 100644 --- a/lib/Support/LockFileManager.cpp +++ b/lib/Support/LockFileManager.cpp @@ -64,6 +64,7 @@ bool LockFileManager::processStillExecuting(StringRef Hostname, int PID) { LockFileManager::LockFileManager(StringRef FileName) { + this->FileName = FileName; LockFileName = FileName; LockFileName += ".lock"; @@ -175,6 +176,7 @@ void LockFileManager::waitForUnlock() { #endif // Don't wait more than an hour for the file to appear. const unsigned MaxSeconds = 3600; + bool LockFileGone = false; do { // Sleep for the designated interval, to allow the owning process time to // finish up and remove the lock file. @@ -185,10 +187,18 @@ void LockFileManager::waitForUnlock() { #else nanosleep(&Interval, NULL); #endif - // If the file no longer exists, we're done. + // If the lock file no longer exists, wait for the actual file. bool Exists = false; - if (!sys::fs::exists(LockFileName.str(), Exists) && !Exists) - return; + if (!LockFileGone) { + if (!sys::fs::exists(LockFileName.str(), Exists) && !Exists) { + LockFileGone = true; + Exists = false; + } + } + if (LockFileGone) { + if (!sys::fs::exists(FileName.str(), Exists) && Exists) + return; + } if (!processStillExecuting((*Owner).first, (*Owner).second)) return; -- cgit v1.1 From 5dc300107b0f31ef876f2bf93e9893f6aa8204a2 Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Thu, 10 Jan 2013 02:03:50 +0000 Subject: [ObjCARC Debug Message] Added debug message when we convert an autorelease into an autoreleaseRV. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172034 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/ObjCARC.cpp | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'lib') diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp index 8eb9d19..34700eb 100644 --- a/lib/Transforms/Scalar/ObjCARC.cpp +++ b/lib/Transforms/Scalar/ObjCARC.cpp @@ -3749,7 +3749,13 @@ void ObjCARCOpt::OptimizeReturns(Function &F) { // Convert the autorelease to an autoreleaseRV, since it's // returning the value. if (AutoreleaseClass == IC_Autorelease) { + DEBUG(dbgs() << "ObjCARCOpt::OptimizeReturns: Converting autorelease " + "=> autoreleaseRV since it's returning a value.\n" + " In: " << *Autorelease + << "\n"); Autorelease->setCalledFunction(getAutoreleaseRVCallee(F.getParent())); + DEBUG(dbgs() << " Out: " << *Autorelease + << "\n"); AutoreleaseClass = IC_AutoreleaseRV; } -- cgit v1.1 From 1d505a33f9cb77a2adb644b85136e7be64a186d9 Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Thu, 10 Jan 2013 10:31:11 +0000 Subject: Fix TryToShrinkGlobalToBoolean in GlobalOpt, so that it does not discard address spaces. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172051 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/IPO/GlobalOpt.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index abd37c2..ff2964f 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -1825,7 +1825,8 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { GlobalValue::InternalLinkage, ConstantInt::getFalse(GV->getContext()), GV->getName()+".b", - GV->getThreadLocalMode()); + GV->getThreadLocalMode(), + GV->getType()->getAddressSpace()); GV->getParent()->getGlobalList().insert(GV, NewGV); Constant *InitVal = GV->getInitializer(); -- cgit v1.1 From 2b8f6ae6b791099ceff8ad87bcca7f165655c5c7 Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Thu, 10 Jan 2013 10:49:36 +0000 Subject: Fix a copy/paste error in the IR Linker, casting an ArrayType instead of a VectorType. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172054 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Linker/LinkModules.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp index e973919..41ec114 100644 --- a/lib/Linker/LinkModules.cpp +++ b/lib/Linker/LinkModules.cpp @@ -180,7 +180,7 @@ bool TypeMapTy::areTypesIsomorphic(Type *DstTy, Type *SrcTy) { if (DATy->getNumElements() != cast(SrcTy)->getNumElements()) return false; } else if (VectorType *DVTy = dyn_cast(DstTy)) { - if (DVTy->getNumElements() != cast(SrcTy)->getNumElements()) + if (DVTy->getNumElements() != cast(SrcTy)->getNumElements()) return false; } -- cgit v1.1 From c560bf638b74eb48347a7b945b90aa89ffcc1620 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Thu, 10 Jan 2013 17:34:39 +0000 Subject: LoopVectorizer: Fix a bug in the vectorization of BinaryOperators. The BinaryOperator can be folded to an Undef, and we don't want to set NSW flags to undef vals. PR14878 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172079 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/LoopVectorize.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index cde4bb8..4803a9d 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1682,13 +1682,13 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal, for (unsigned Part = 0; Part < UF; ++Part) { Value *V = Builder.CreateBinOp(BinOp->getOpcode(), A[Part], B[Part]); - // Update the NSW, NUW and Exact flags. - BinaryOperator *VecOp = cast(V); - if (isa(BinOp)) { + // Update the NSW, NUW and Exact flags. Notice: V can be an Undef. + BinaryOperator *VecOp = dyn_cast(V); + if (VecOp && isa(BinOp)) { VecOp->setHasNoSignedWrap(BinOp->hasNoSignedWrap()); VecOp->setHasNoUnsignedWrap(BinOp->hasNoUnsignedWrap()); } - if (isa(VecOp)) + if (VecOp && isa(VecOp)) VecOp->setIsExact(BinOp->isExact()); Entry[Part] = V; -- cgit v1.1 From 4aebce83212d7271454c8767085645fe11054b44 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Thu, 10 Jan 2013 18:42:44 +0000 Subject: Allow hasProperty() to be called on bundle-internal instructions. When calling hasProperty() on an instruction inside a bundle, it should always behave as if IgnoreBundle was passed, and just return properties for the current instruction. Only attempt to aggregate bundle properties whan asked about the bundle header. The assertion fires on existing ARM test cases without this fix. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172082 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineInstr.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index cdf46b6..df82a17 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -752,6 +752,7 @@ void MachineInstr::addMemOperand(MachineFunction &MF, } bool MachineInstr::hasPropertyInBundle(unsigned Mask, QueryType Type) const { + assert(!isBundledWithPred() && "Must be called on bundle header"); for (MachineBasicBlock::const_instr_iterator MII = this;; ++MII) { if (MII->getDesc().getFlags() & Mask) { if (Type == AnyInBundle) -- cgit v1.1 From d122009e57217bd574703c46dd14b1a9235ed0b7 Mon Sep 17 00:00:00 2001 From: Jordan Rose Date: Thu, 10 Jan 2013 18:50:05 +0000 Subject: TableGen: record anonymous instantiations of classes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172084 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/TableGen/TGParser.cpp | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/TableGen/TGParser.cpp b/lib/TableGen/TGParser.cpp index 860b80d..8bc28f0 100644 --- a/lib/TableGen/TGParser.cpp +++ b/lib/TableGen/TGParser.cpp @@ -383,7 +383,7 @@ static std::string GetNewAnonymousName() { } /// ParseObjectName - If an object name is specified, return it. Otherwise, -/// return an anonymous name. +/// return 0. /// ObjectName ::= Value [ '#' Value ]* /// ObjectName ::= /*empty*/ /// @@ -395,7 +395,7 @@ Init *TGParser::ParseObjectName(MultiClass *CurMultiClass) { // These are all of the tokens that can begin an object body. // Some of these can also begin values but we disallow those cases // because they are unlikely to be useful. - return StringInit::get(GetNewAnonymousName()); + return 0; default: break; } @@ -1204,7 +1204,8 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType, static unsigned AnonCounter = 0; Record *NewRec = new Record("anonymous.val."+utostr(AnonCounter++), NameLoc, - Records); + Records, + /*IsAnonymous=*/true); SubClassReference SCRef; SCRef.RefLoc = NameLoc; SCRef.Rec = Class; @@ -1919,7 +1920,13 @@ bool TGParser::ParseDef(MultiClass *CurMultiClass) { Lex.Lex(); // Eat the 'def' token. // Parse ObjectName and make a record for it. - Record *CurRec = new Record(ParseObjectName(CurMultiClass), DefLoc, Records); + Record *CurRec; + Init *Name = ParseObjectName(CurMultiClass); + if (Name) + CurRec = new Record(Name, DefLoc, Records); + else + CurRec = new Record(GetNewAnonymousName(), DefLoc, Records, + /*IsAnonymous=*/true); if (!CurMultiClass && Loops.empty()) { // Top-level def definition. @@ -2248,8 +2255,11 @@ InstantiateMulticlassDef(MultiClass &MC, // name, substitute the prefix for #NAME#. Otherwise, use the defm name // as a prefix. - if (DefmPrefix == 0) + bool IsAnonymous = false; + if (DefmPrefix == 0) { DefmPrefix = StringInit::get(GetNewAnonymousName()); + IsAnonymous = true; + } Init *DefName = DefProto->getNameInit(); @@ -2268,7 +2278,7 @@ InstantiateMulticlassDef(MultiClass &MC, // Make a trail of SMLocs from the multiclass instantiations. SmallVector Locs(1, DefmPrefixLoc); Locs.append(DefProto->getLoc().begin(), DefProto->getLoc().end()); - Record *CurRec = new Record(DefName, Locs, Records); + Record *CurRec = new Record(DefName, Locs, Records, IsAnonymous); SubClassReference Ref; Ref.RefLoc = DefmPrefixLoc; -- cgit v1.1 From b50df4a3df6db2ace3c011267934d3d10bdcc8db Mon Sep 17 00:00:00 2001 From: Jordan Rose Date: Thu, 10 Jan 2013 18:50:11 +0000 Subject: TableGen: Keep track of superclass reference ranges. def foo : bar; ~~~ This allows us to produce more precise diagnostics about a certain superclass, and even provide fixits. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172085 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/TableGen/TGParser.cpp | 78 +++++++++++++++++++++++++++-------------------- lib/TableGen/TGParser.h | 2 +- 2 files changed, 46 insertions(+), 34 deletions(-) (limited to 'lib') diff --git a/lib/TableGen/TGParser.cpp b/lib/TableGen/TGParser.cpp index 8bc28f0..8ee3a7b 100644 --- a/lib/TableGen/TGParser.cpp +++ b/lib/TableGen/TGParser.cpp @@ -26,7 +26,7 @@ using namespace llvm; namespace llvm { struct SubClassReference { - SMLoc RefLoc; + SMRange RefRange; Record *Rec; std::vector TemplateArgs; SubClassReference() : Rec(0) {} @@ -35,7 +35,7 @@ struct SubClassReference { }; struct SubMultiClassReference { - SMLoc RefLoc; + SMRange RefRange; MultiClass *MC; std::vector TemplateArgs; SubMultiClassReference() : MC(0) {} @@ -150,22 +150,23 @@ bool TGParser::AddSubClass(Record *CurRec, SubClassReference &SubClass) { // Add all of the values in the subclass into the current class. const std::vector &Vals = SC->getValues(); for (unsigned i = 0, e = Vals.size(); i != e; ++i) - if (AddValue(CurRec, SubClass.RefLoc, Vals[i])) + if (AddValue(CurRec, SubClass.RefRange.Start, Vals[i])) return true; const std::vector &TArgs = SC->getTemplateArgs(); // Ensure that an appropriate number of template arguments are specified. if (TArgs.size() < SubClass.TemplateArgs.size()) - return Error(SubClass.RefLoc, "More template args specified than expected"); + return Error(SubClass.RefRange.Start, + "More template args specified than expected"); // Loop over all of the template arguments, setting them to the specified // value or leaving them as the default if necessary. for (unsigned i = 0, e = TArgs.size(); i != e; ++i) { if (i < SubClass.TemplateArgs.size()) { // If a value is specified for this template arg, set it now. - if (SetValue(CurRec, SubClass.RefLoc, TArgs[i], std::vector(), - SubClass.TemplateArgs[i])) + if (SetValue(CurRec, SubClass.RefRange.Start, TArgs[i], + std::vector(), SubClass.TemplateArgs[i])) return true; // Resolve it next. @@ -175,7 +176,8 @@ bool TGParser::AddSubClass(Record *CurRec, SubClassReference &SubClass) { CurRec->removeValue(TArgs[i]); } else if (!CurRec->getValue(TArgs[i])->getValue()->isComplete()) { - return Error(SubClass.RefLoc,"Value not specified for template argument #" + return Error(SubClass.RefRange.Start, + "Value not specified for template argument #" + utostr(i) + " (" + TArgs[i]->getAsUnquotedString() + ") of subclass '" + SC->getNameInitAsString() + "'!"); } @@ -184,17 +186,18 @@ bool TGParser::AddSubClass(Record *CurRec, SubClassReference &SubClass) { // Since everything went well, we can now set the "superclass" list for the // current record. const std::vector &SCs = SC->getSuperClasses(); + ArrayRef SCRanges = SC->getSuperClassRanges(); for (unsigned i = 0, e = SCs.size(); i != e; ++i) { if (CurRec->isSubClassOf(SCs[i])) - return Error(SubClass.RefLoc, + return Error(SubClass.RefRange.Start, "Already subclass of '" + SCs[i]->getName() + "'!\n"); - CurRec->addSuperClass(SCs[i]); + CurRec->addSuperClass(SCs[i], SCRanges[i]); } if (CurRec->isSubClassOf(SC)) - return Error(SubClass.RefLoc, + return Error(SubClass.RefRange.Start, "Already subclass of '" + SC->getName() + "'!\n"); - CurRec->addSuperClass(SC); + CurRec->addSuperClass(SC, SubClass.RefRange); return false; } @@ -211,7 +214,7 @@ bool TGParser::AddSubMultiClass(MultiClass *CurMC, // Add all of the values in the subclass into the current class. const std::vector &SMCVals = SMC->Rec.getValues(); for (unsigned i = 0, e = SMCVals.size(); i != e; ++i) - if (AddValue(CurRec, SubMultiClass.RefLoc, SMCVals[i])) + if (AddValue(CurRec, SubMultiClass.RefRange.Start, SMCVals[i])) return true; int newDefStart = CurMC->DefPrototypes.size(); @@ -226,7 +229,7 @@ bool TGParser::AddSubMultiClass(MultiClass *CurMC, // Add all of the values in the superclass into the current def. for (unsigned i = 0, e = MCVals.size(); i != e; ++i) - if (AddValue(NewDef, SubMultiClass.RefLoc, MCVals[i])) + if (AddValue(NewDef, SubMultiClass.RefRange.Start, MCVals[i])) return true; CurMC->DefPrototypes.push_back(NewDef); @@ -237,7 +240,7 @@ bool TGParser::AddSubMultiClass(MultiClass *CurMC, // Ensure that an appropriate number of template arguments are // specified. if (SMCTArgs.size() < SubMultiClass.TemplateArgs.size()) - return Error(SubMultiClass.RefLoc, + return Error(SubMultiClass.RefRange.Start, "More template args specified than expected"); // Loop over all of the template arguments, setting them to the specified @@ -246,7 +249,7 @@ bool TGParser::AddSubMultiClass(MultiClass *CurMC, if (i < SubMultiClass.TemplateArgs.size()) { // If a value is specified for this template arg, set it in the // superclass now. - if (SetValue(CurRec, SubMultiClass.RefLoc, SMCTArgs[i], + if (SetValue(CurRec, SubMultiClass.RefRange.Start, SMCTArgs[i], std::vector(), SubMultiClass.TemplateArgs[i])) return true; @@ -266,7 +269,7 @@ bool TGParser::AddSubMultiClass(MultiClass *CurMC, ++j) { Record *Def = *j; - if (SetValue(Def, SubMultiClass.RefLoc, SMCTArgs[i], + if (SetValue(Def, SubMultiClass.RefRange.Start, SMCTArgs[i], std::vector(), SubMultiClass.TemplateArgs[i])) return true; @@ -278,7 +281,7 @@ bool TGParser::AddSubMultiClass(MultiClass *CurMC, Def->removeValue(SMCTArgs[i]); } } else if (!CurRec->getValue(SMCTArgs[i])->getValue()->isComplete()) { - return Error(SubMultiClass.RefLoc, + return Error(SubMultiClass.RefRange.Start, "Value not specified for template argument #" + utostr(i) + " (" + SMCTArgs[i]->getAsUnquotedString() + ") of subclass '" + SMC->Rec.getNameInitAsString() + "'!"); @@ -464,7 +467,7 @@ MultiClass *TGParser::ParseMultiClassID() { SubClassReference TGParser:: ParseSubClassReference(Record *CurRec, bool isDefm) { SubClassReference Result; - Result.RefLoc = Lex.getLoc(); + Result.RefRange.Start = Lex.getLoc(); if (isDefm) { if (MultiClass *MC = ParseMultiClassID()) @@ -475,8 +478,10 @@ ParseSubClassReference(Record *CurRec, bool isDefm) { if (Result.Rec == 0) return Result; // If there is no template arg list, we're done. - if (Lex.getCode() != tgtok::less) + if (Lex.getCode() != tgtok::less) { + Result.RefRange.End = Lex.getLoc(); return Result; + } Lex.Lex(); // Eat the '<' if (Lex.getCode() == tgtok::greater) { @@ -497,6 +502,7 @@ ParseSubClassReference(Record *CurRec, bool isDefm) { return Result; } Lex.Lex(); + Result.RefRange.End = Lex.getLoc(); return Result; } @@ -511,14 +517,16 @@ ParseSubClassReference(Record *CurRec, bool isDefm) { SubMultiClassReference TGParser:: ParseSubMultiClassReference(MultiClass *CurMC) { SubMultiClassReference Result; - Result.RefLoc = Lex.getLoc(); + Result.RefRange.Start = Lex.getLoc(); Result.MC = ParseMultiClassID(); if (Result.MC == 0) return Result; // If there is no template arg list, we're done. - if (Lex.getCode() != tgtok::less) + if (Lex.getCode() != tgtok::less) { + Result.RefRange.End = Lex.getLoc(); return Result; + } Lex.Lex(); // Eat the '<' if (Lex.getCode() == tgtok::greater) { @@ -539,6 +547,7 @@ ParseSubMultiClassReference(MultiClass *CurMC) { return Result; } Lex.Lex(); + Result.RefRange.End = Lex.getLoc(); return Result; } @@ -1199,6 +1208,7 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType, return 0; } Lex.Lex(); // eat the '>' + SMLoc EndLoc = Lex.getLoc(); // Create the new record, set it as CurRec temporarily. static unsigned AnonCounter = 0; @@ -1207,7 +1217,7 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType, Records, /*IsAnonymous=*/true); SubClassReference SCRef; - SCRef.RefLoc = NameLoc; + SCRef.RefRange = SMRange(NameLoc, EndLoc); SCRef.Rec = Class; SCRef.TemplateArgs = ValueList; // Add info about the subclass to NewRec. @@ -2246,7 +2256,7 @@ Record *TGParser:: InstantiateMulticlassDef(MultiClass &MC, Record *DefProto, Init *DefmPrefix, - SMLoc DefmPrefixLoc) { + SMRange DefmPrefixRange) { // We need to preserve DefProto so it can be reused for later // instantiations, so create a new Record to inherit from it. @@ -2276,21 +2286,21 @@ InstantiateMulticlassDef(MultiClass &MC, } // Make a trail of SMLocs from the multiclass instantiations. - SmallVector Locs(1, DefmPrefixLoc); + SmallVector Locs(1, DefmPrefixRange.Start); Locs.append(DefProto->getLoc().begin(), DefProto->getLoc().end()); Record *CurRec = new Record(DefName, Locs, Records, IsAnonymous); SubClassReference Ref; - Ref.RefLoc = DefmPrefixLoc; + Ref.RefRange = DefmPrefixRange; Ref.Rec = DefProto; AddSubClass(CurRec, Ref); // Set the value for NAME. We don't resolve references to it 'til later, // though, so that uses in nested multiclass names don't get // confused. - if (SetValue(CurRec, Ref.RefLoc, "NAME", std::vector(), + if (SetValue(CurRec, Ref.RefRange.Start, "NAME", std::vector(), DefmPrefix)) { - Error(DefmPrefixLoc, "Could not resolve " + Error(DefmPrefixRange.Start, "Could not resolve " + CurRec->getNameInitAsString() + ":NAME to '" + DefmPrefix->getAsUnquotedString() + "'"); return 0; @@ -2321,7 +2331,7 @@ InstantiateMulticlassDef(MultiClass &MC, // Ensure redefinition doesn't happen. if (Records.getDef(CurRec->getNameInitAsString())) { - Error(DefmPrefixLoc, "def '" + CurRec->getNameInitAsString() + + Error(DefmPrefixRange.Start, "def '" + CurRec->getNameInitAsString() + "' already defined, instantiating defm with subdef '" + DefProto->getNameInitAsString() + "'"); return 0; @@ -2407,14 +2417,14 @@ bool TGParser::ResolveMulticlassDef(MultiClass &MC, /// bool TGParser::ParseDefm(MultiClass *CurMultiClass) { assert(Lex.getCode() == tgtok::Defm && "Unexpected token!"); - + SMLoc DefmLoc = Lex.getLoc(); Init *DefmPrefix = 0; if (Lex.Lex() == tgtok::Id) { // eat the defm. DefmPrefix = ParseObjectName(CurMultiClass); } - SMLoc DefmPrefixLoc = Lex.getLoc(); + SMLoc DefmPrefixEndLoc = Lex.getLoc(); if (Lex.getCode() != tgtok::colon) return TokError("expected ':' after defm identifier"); @@ -2450,15 +2460,17 @@ bool TGParser::ParseDefm(MultiClass *CurMultiClass) { for (unsigned i = 0, e = MC->DefPrototypes.size(); i != e; ++i) { Record *DefProto = MC->DefPrototypes[i]; - Record *CurRec = InstantiateMulticlassDef(*MC, DefProto, DefmPrefix, DefmPrefixLoc); + Record *CurRec = InstantiateMulticlassDef(*MC, DefProto, DefmPrefix, + SMRange(DefmLoc, + DefmPrefixEndLoc)); if (!CurRec) return true; - if (ResolveMulticlassDefArgs(*MC, CurRec, DefmPrefixLoc, SubClassLoc, + if (ResolveMulticlassDefArgs(*MC, CurRec, DefmLoc, SubClassLoc, TArgs, TemplateVals, true/*Delete args*/)) return Error(SubClassLoc, "could not instantiate def"); - if (ResolveMulticlassDef(*MC, CurRec, DefProto, DefmPrefixLoc)) + if (ResolveMulticlassDef(*MC, CurRec, DefProto, DefmLoc)) return Error(SubClassLoc, "could not instantiate def"); NewRecDefs.push_back(CurRec); diff --git a/lib/TableGen/TGParser.h b/lib/TableGen/TGParser.h index 215cbfc..e55805d 100644 --- a/lib/TableGen/TGParser.h +++ b/lib/TableGen/TGParser.h @@ -134,7 +134,7 @@ private: // Parser methods. Record *InstantiateMulticlassDef(MultiClass &MC, Record *DefProto, Init *DefmPrefix, - SMLoc DefmPrefixLoc); + SMRange DefmPrefixRange); bool ResolveMulticlassDefArgs(MultiClass &MC, Record *DefProto, SMLoc DefmPrefixLoc, -- cgit v1.1 From 9b1f44b147ff943729207be2b0509f6e53d62bbb Mon Sep 17 00:00:00 2001 From: Jordan Rose Date: Thu, 10 Jan 2013 18:50:15 +0000 Subject: Add basic fix-its to SMDiagnostic. Like Clang's FixItHint, SMFixIt represents an insertion, replacement, or removal of source text. One or more fix-its can be emitted as part of a diagnostic, and will be printed below the source range line to show the user how they can fix their code. Currently, the only client of SMFixIt is clang-tblgen; thus, the tests for this behavior live in clang/test/TableGen/tg-fixits.td. If/when SMFixIt is adopted within LLVM itself, those tests should be moved to the LLVM suite. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172086 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Support/SourceMgr.cpp | 190 ++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 158 insertions(+), 32 deletions(-) (limited to 'lib') diff --git a/lib/Support/SourceMgr.cpp b/lib/Support/SourceMgr.cpp index 6540319..58a7713 100644 --- a/lib/Support/SourceMgr.cpp +++ b/lib/Support/SourceMgr.cpp @@ -15,12 +15,16 @@ #include "llvm/Support/SourceMgr.h" #include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/Twine.h" +#include "llvm/Support/Locale.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/system_error.h" using namespace llvm; +static const size_t TabStop = 8; + namespace { struct LineNoCacheTy { int LastQueryBufferID; @@ -146,7 +150,8 @@ void SourceMgr::PrintIncludeStack(SMLoc IncludeLoc, raw_ostream &OS) const { /// prefixed to the message. SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Msg, - ArrayRef Ranges) const { + ArrayRef Ranges, + ArrayRef FixIts) const { // First thing to do: find the current buffer containing the specified // location to pull out the source line. @@ -193,6 +198,7 @@ SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, SourceMgr::DiagKind Kind, R.End = SMLoc::getFromPointer(LineEnd); // Translate from SMLoc ranges to column ranges. + // FIXME: Handle multibyte characters. ColRanges.push_back(std::make_pair(R.Start.getPointer()-LineStart, R.End.getPointer()-LineStart)); } @@ -202,13 +208,13 @@ SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, SourceMgr::DiagKind Kind, return SMDiagnostic(*this, Loc, BufferID, LineAndCol.first, LineAndCol.second-1, Kind, Msg.str(), - LineStr, ColRanges); + LineStr, ColRanges, FixIts); } void SourceMgr::PrintMessage(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Msg, ArrayRef Ranges, - bool ShowColors) const { - SMDiagnostic Diagnostic = GetMessage(Loc, Kind, Msg, Ranges); + ArrayRef FixIts, bool ShowColors) const { + SMDiagnostic Diagnostic = GetMessage(Loc, Kind, Msg, Ranges, FixIts); // Report the message with the diagnostic handler if present. if (DiagHandler) { @@ -231,15 +237,104 @@ void SourceMgr::PrintMessage(SMLoc Loc, SourceMgr::DiagKind Kind, // SMDiagnostic Implementation //===----------------------------------------------------------------------===// -SMDiagnostic::SMDiagnostic(const SourceMgr &sm, SMLoc L, const std::string &FN, +SMDiagnostic::SMDiagnostic(const SourceMgr &sm, SMLoc L, StringRef FN, int Line, int Col, SourceMgr::DiagKind Kind, - const std::string &Msg, - const std::string &LineStr, - ArrayRef > Ranges) + StringRef Msg, StringRef LineStr, + ArrayRef > Ranges, + ArrayRef Hints) : SM(&sm), Loc(L), Filename(FN), LineNo(Line), ColumnNo(Col), Kind(Kind), - Message(Msg), LineContents(LineStr), Ranges(Ranges.vec()) { + Message(Msg), LineContents(LineStr), Ranges(Ranges.vec()), + FixIts(Hints.begin(), Hints.end()) { + std::sort(FixIts.begin(), FixIts.end()); +} + +void buildFixItLine(std::string &CaretLine, std::string &FixItLine, + ArrayRef FixIts, ArrayRef SourceLine) { + if (FixIts.empty()) + return; + + const char *LineStart = SourceLine.begin(); + const char *LineEnd = SourceLine.end(); + + size_t PrevHintEndCol = 0; + + for (ArrayRef::iterator I = FixIts.begin(), E = FixIts.end(); + I != E; ++I) { + // If the fixit contains a newline or tab, ignore it. + if (I->getText().find_first_of("\n\r\t") != StringRef::npos) + continue; + + SMRange R = I->getRange(); + + // If the line doesn't contain any part of the range, then ignore it. + if (R.Start.getPointer() > LineEnd || R.End.getPointer() < LineStart) + continue; + + // Translate from SMLoc to column. + // Ignore pieces of the range that go onto other lines. + // FIXME: Handle multibyte characters in the source line. + unsigned FirstCol; + if (R.Start.getPointer() < LineStart) + FirstCol = 0; + else + FirstCol = R.Start.getPointer() - LineStart; + + // If we inserted a long previous hint, push this one forwards, and add + // an extra space to show that this is not part of the previous + // completion. This is sort of the best we can do when two hints appear + // to overlap. + // + // Note that if this hint is located immediately after the previous + // hint, no space will be added, since the location is more important. + unsigned HintCol = FirstCol; + if (HintCol < PrevHintEndCol) + HintCol = PrevHintEndCol + 1; + + // FIXME: This assertion is intended to catch unintended use of multibyte + // characters in fixits. If we decide to do this, we'll have to track + // separate byte widths for the source and fixit lines. + assert((size_t)llvm::sys::locale::columnWidth(I->getText()) == + I->getText().size()); + + // This relies on one byte per column in our fixit hints. + unsigned LastColumnModified = HintCol + I->getText().size(); + if (LastColumnModified > FixItLine.size()) + FixItLine.resize(LastColumnModified, ' '); + + std::copy(I->getText().begin(), I->getText().end(), + FixItLine.begin() + HintCol); + + PrevHintEndCol = LastColumnModified; + + // For replacements, mark the removal range with '~'. + // FIXME: Handle multibyte characters in the source line. + unsigned LastCol; + if (R.End.getPointer() >= LineEnd) + LastCol = LineEnd - LineStart; + else + LastCol = R.End.getPointer() - LineStart; + + std::fill(&CaretLine[FirstCol], &CaretLine[LastCol], '~'); + } } +static void printSourceLine(raw_ostream &S, StringRef LineContents) { + // Print out the source line one character at a time, so we can expand tabs. + for (unsigned i = 0, e = LineContents.size(), OutCol = 0; i != e; ++i) { + if (LineContents[i] != '\t') { + S << LineContents[i]; + ++OutCol; + continue; + } + + // If we have a tab, emit at least one space, then round up to 8 columns. + do { + S << ' '; + ++OutCol; + } while ((OutCol % TabStop) != 0); + } + S << '\n'; +} void SMDiagnostic::print(const char *ProgName, raw_ostream &S, bool ShowColors) const { @@ -297,43 +392,49 @@ void SMDiagnostic::print(const char *ProgName, raw_ostream &S, if (LineNo == -1 || ColumnNo == -1) return; + // FIXME: If there are multibyte characters in the source, all our ranges will + // be wrong. To do this properly, we'll need a byte-to-column map like Clang's + // TextDiagnostic. For now, we'll just handle tabs by expanding them later, + // and bail out rather than show incorrect ranges and misaligned fixits for + // any other odd characters. + SmallString<128> PrintableLine(LineContents); + std::replace(PrintableLine.begin(), PrintableLine.end(), '\t', ' '); + size_t NumColumns = (size_t)llvm::sys::locale::columnWidth(PrintableLine); + if (NumColumns != PrintableLine.size()) { + printSourceLine(S, LineContents); + return; + } + // Build the line with the caret and ranges. - std::string CaretLine(LineContents.size()+1, ' '); + std::string CaretLine(NumColumns+1, ' '); // Expand any ranges. for (unsigned r = 0, e = Ranges.size(); r != e; ++r) { std::pair R = Ranges[r]; - for (unsigned i = R.first, - e = std::min(R.second, (unsigned)LineContents.size()); i != e; ++i) - CaretLine[i] = '~'; + std::fill(&CaretLine[R.first], + &CaretLine[std::min((size_t)R.second, CaretLine.size())], + '~'); } - + + // Add any fix-its. + // FIXME: Find the beginning of the line properly for multibyte characters. + std::string FixItInsertionLine; + buildFixItLine(CaretLine, FixItInsertionLine, FixIts, + makeArrayRef(Loc.getPointer() - ColumnNo, + LineContents.size())); + // Finally, plop on the caret. - if (unsigned(ColumnNo) <= LineContents.size()) + if (unsigned(ColumnNo) <= NumColumns) CaretLine[ColumnNo] = '^'; else - CaretLine[LineContents.size()] = '^'; + CaretLine[NumColumns] = '^'; // ... and remove trailing whitespace so the output doesn't wrap for it. We // know that the line isn't completely empty because it has the caret in it at // least. CaretLine.erase(CaretLine.find_last_not_of(' ')+1); - // Print out the source line one character at a time, so we can expand tabs. - for (unsigned i = 0, e = LineContents.size(), OutCol = 0; i != e; ++i) { - if (LineContents[i] != '\t') { - S << LineContents[i]; - ++OutCol; - continue; - } - - // If we have a tab, emit at least one space, then round up to 8 columns. - do { - S << ' '; - ++OutCol; - } while (OutCol & 7); - } - S << '\n'; + printSourceLine(S, LineContents); if (ShowColors) S.changeColor(raw_ostream::GREEN, true); @@ -350,11 +451,36 @@ void SMDiagnostic::print(const char *ProgName, raw_ostream &S, do { S << CaretLine[i]; ++OutCol; - } while (OutCol & 7); + } while ((OutCol % TabStop) != 0); } + S << '\n'; if (ShowColors) S.resetColor(); + + // Print out the replacement line, matching tabs in the source line. + if (FixItInsertionLine.empty()) + return; + for (size_t i = 0, e = FixItInsertionLine.size(), OutCol = 0; i != e; ++i) { + if (i >= LineContents.size() || LineContents[i] != '\t') { + S << FixItInsertionLine[i]; + ++OutCol; + continue; + } + + // Okay, we have a tab. Insert the appropriate number of characters. + do { + S << FixItInsertionLine[i]; + // FIXME: This is trying not to break up replacements, but then to re-sync + // with the tabs between replacements. This will fail, though, if two + // fix-it replacements are exactly adjacent, or if a fix-it contains a + // space. Really we should be precomputing column widths, which we'll + // need anyway for multibyte chars. + if (FixItInsertionLine[i] != ' ') + ++i; + ++OutCol; + } while (((OutCol % TabStop) != 0) && i != e); + } S << '\n'; } -- cgit v1.1 From e9d4eba45122892670fe24458bf161e559a799e7 Mon Sep 17 00:00:00 2001 From: Owen Anderson Date: Thu, 10 Jan 2013 22:06:52 +0000 Subject: Teach InstCombine to hoist FABS and FNEG through FPTRUNC instructions. The application of these operations commutes with the truncation, so we should prefer to do them in the smallest size we can, to save register space, use smaller constant pool entries, etc. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172117 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/InstCombine/InstCombineCasts.cpp | 28 ++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index 5af4442..c782032 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -1204,8 +1204,34 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { } break; } + + // (fptrunc (fneg x)) -> (fneg (fptrunc x)) + if (BinaryOperator::isFNeg(OpI)) { + Value *InnerTrunc = Builder->CreateFPTrunc(OpI->getOperand(1), + CI.getType()); + return BinaryOperator::CreateFNeg(InnerTrunc); + } } - + + IntrinsicInst *II = dyn_cast(CI.getOperand(0)); + if (II) { + switch (II->getIntrinsicID()) { + default: break; + case Intrinsic::fabs: { + // (fptrunc (fabs x)) -> (fabs (fptrunc x)) + Value *InnerTrunc = Builder->CreateFPTrunc(II->getArgOperand(0), + CI.getType()); + Type *IntrinsicType[] = { CI.getType() }; + Function *Overload = + Intrinsic::getDeclaration(CI.getParent()->getParent()->getParent(), + II->getIntrinsicID(), IntrinsicType); + + Value *Args[] = { InnerTrunc }; + return CallInst::Create(Overload, Args, II->getName()); + } + } + } + // Fold (fptrunc (sqrt (fpext x))) -> (sqrtf x) CallInst *Call = dyn_cast(CI.getOperand(0)); if (Call && Call->getCalledFunction() && TLI->has(LibFunc::sqrtf) && -- cgit v1.1 From c1ec207b615cb058d30dc642ee311ed06fe59cfe Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Thu, 10 Jan 2013 22:10:27 +0000 Subject: [ms-inline asm] Add support for calling functions from inline assembly. Part of rdar://12991541 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172121 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCParser/AsmParser.cpp | 25 +++++++++++----------- lib/Target/X86/AsmParser/X86AsmParser.cpp | 35 ++++++++++++++++++++++++++----- 2 files changed, 43 insertions(+), 17 deletions(-) (limited to 'lib') diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index 7c3fea5..64a0885 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -3777,8 +3777,8 @@ bool AsmParser::ParseMSInlineAsm(void *AsmLoc, std::string &AsmString, MCAsmParserSemaCallback &SI) { SmallVector InputDecls; SmallVector OutputDecls; - SmallVector InputDeclsOffsetOf; - SmallVector OutputDeclsOffsetOf; + SmallVector InputDeclsAddressOf; + SmallVector OutputDeclsAddressOf; SmallVector InputConstraints; SmallVector OutputConstraints; std::set ClobberRegs; @@ -3815,7 +3815,7 @@ bool AsmParser::ParseMSInlineAsm(void *AsmLoc, std::string &AsmString, } // Register operand. - if (Operand->isReg() && !Operand->isOffsetOf()) { + if (Operand->isReg() && !Operand->needAddressOf()) { unsigned NumDefs = Desc.getNumDefs(); // Clobber. if (NumDefs && Operand->getMCOperandNum() < NumDefs) { @@ -3829,11 +3829,12 @@ bool AsmParser::ParseMSInlineAsm(void *AsmLoc, std::string &AsmString, // Expr/Input or Output. unsigned Size; + bool IsVarDecl; void *OpDecl = SI.LookupInlineAsmIdentifier(Operand->getName(), AsmLoc, - Size); + Size, IsVarDecl); if (OpDecl) { bool isOutput = (i == 1) && Desc.mayStore(); - if (!Operand->isOffsetOf() && Operand->needSizeDirective()) + if (Operand->isMem() && Operand->needSizeDirective()) AsmStrRewrites.push_back(AsmRewrite(AOK_SizeDirective, Operand->getStartLoc(), /*Len*/0, @@ -3842,7 +3843,7 @@ bool AsmParser::ParseMSInlineAsm(void *AsmLoc, std::string &AsmString, std::string Constraint = "="; ++InputIdx; OutputDecls.push_back(OpDecl); - OutputDeclsOffsetOf.push_back(Operand->isOffsetOf()); + OutputDeclsAddressOf.push_back(Operand->needAddressOf()); Constraint += Operand->getConstraint().str(); OutputConstraints.push_back(Constraint); AsmStrRewrites.push_back(AsmRewrite(AOK_Output, @@ -3850,7 +3851,7 @@ bool AsmParser::ParseMSInlineAsm(void *AsmLoc, std::string &AsmString, Operand->getNameLen())); } else { InputDecls.push_back(OpDecl); - InputDeclsOffsetOf.push_back(Operand->isOffsetOf()); + InputDeclsAddressOf.push_back(Operand->needAddressOf()); InputConstraints.push_back(Operand->getConstraint().str()); AsmStrRewrites.push_back(AsmRewrite(AOK_Input, Operand->getStartLoc(), @@ -3876,14 +3877,14 @@ bool AsmParser::ParseMSInlineAsm(void *AsmLoc, std::string &AsmString, OpDecls.resize(NumExprs); Constraints.resize(NumExprs); // FIXME: Constraints are hard coded to 'm', but we need an 'r' - // constraint for offsetof. This needs to be cleaned up! + // constraint for addressof. This needs to be cleaned up! for (unsigned i = 0; i < NumOutputs; ++i) { - OpDecls[i] = std::make_pair(OutputDecls[i], OutputDeclsOffsetOf[i]); - Constraints[i] = OutputDeclsOffsetOf[i] ? "=r" : OutputConstraints[i]; + OpDecls[i] = std::make_pair(OutputDecls[i], OutputDeclsAddressOf[i]); + Constraints[i] = OutputDeclsAddressOf[i] ? "=r" : OutputConstraints[i]; } for (unsigned i = 0, j = NumOutputs; i < NumInputs; ++i, ++j) { - OpDecls[j] = std::make_pair(InputDecls[i], InputDeclsOffsetOf[i]); - Constraints[j] = InputDeclsOffsetOf[i] ? "r" : InputConstraints[i]; + OpDecls[j] = std::make_pair(InputDecls[i], InputDeclsAddressOf[i]); + Constraints[j] = InputDeclsAddressOf[i] ? "r" : InputConstraints[i]; } } diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 5007803..d26254b 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -168,6 +168,7 @@ struct X86Operand : public MCParsedAsmOperand { SMLoc StartLoc, EndLoc; SMLoc OffsetOfLoc; + bool AddressOf; union { struct { @@ -340,6 +341,10 @@ struct X86Operand : public MCParsedAsmOperand { return OffsetOfLoc.getPointer(); } + bool needAddressOf() const { + return AddressOf; + } + bool needSizeDirective() const { assert(Kind == Memory && "Invalid access!"); return Mem.NeedSizeDir; @@ -471,9 +476,11 @@ struct X86Operand : public MCParsedAsmOperand { } static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc, + bool AddressOf = false, SMLoc OffsetOfLoc = SMLoc()) { X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc); Res->Reg.RegNo = RegNo; + Res->AddressOf = AddressOf; Res->OffsetOfLoc = OffsetOfLoc; return Res; } @@ -836,24 +843,39 @@ X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg, SMLoc Start) { return 0; bool NeedSizeDir = false; - if (!Size && isParsingInlineAsm()) { + bool IsVarDecl = false; + if (isParsingInlineAsm()) { if (const MCSymbolRefExpr *SymRef = dyn_cast(Disp)) { const MCSymbol &Sym = SymRef->getSymbol(); // FIXME: The SemaLookup will fail if the name is anything other then an // identifier. // FIXME: Pass a valid SMLoc. - SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, Size); + unsigned tSize; + SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, tSize, + IsVarDecl); + if (!Size) + Size = tSize; NeedSizeDir = Size > 0; } } if (!isParsingInlineAsm()) return X86Operand::CreateMem(Disp, Start, End, Size); - else + else { + // If this is not a VarDecl then assume it is a FuncDecl or some other label + // reference. We need an 'r' constraint here, so we need to create register + // operand to ensure proper matching. Just pick a GPR based on the size of + // a pointer. + if (!IsVarDecl) { + unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX; + return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true); + } + // When parsing inline assembly we set the base register to a non-zero value // as we don't know the actual value at this time. This is necessary to // get the matching correct in some cases. return X86Operand::CreateMem(/*SegReg*/0, Disp, /*BaseReg*/1, /*IndexReg*/0, /*Scale*/1, Start, End, Size, NeedSizeDir); + } } /// Parse the '.' operator. @@ -929,7 +951,8 @@ X86Operand *X86AsmParser::ParseIntelOffsetOfOperator(SMLoc Start) { // register operand to ensure proper matching. Just pick a GPR based on // the size of a pointer. unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX; - return X86Operand::CreateReg(RegNo, Start, End, OffsetOfLoc); + return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true, + OffsetOfLoc); } /// Parse the 'TYPE' operator. The TYPE operator returns the size of a C or @@ -952,7 +975,9 @@ X86Operand *X86AsmParser::ParseIntelTypeOperator(SMLoc Start) { // FIXME: The SemaLookup will fail if the name is anything other then an // identifier. // FIXME: Pass a valid SMLoc. - if (!SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, Size)) + bool IsVarDecl; + if (!SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, Size, + IsVarDecl)) return ErrorOperand(Start, "Unable to lookup TYPE of expr!"); Size /= 8; // Size is in terms of bits, but we want bytes in the context. -- cgit v1.1 From 81bfd711de84310232354dcc971b8ec45206716a Mon Sep 17 00:00:00 2001 From: Jakub Staszak Date: Thu, 10 Jan 2013 22:13:13 +0000 Subject: Remove unneeded includes from FunctionLoweringInfo.h. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172123 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index ee98b00..599ef2c 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -15,9 +15,9 @@ #include "SelectionDAGBuilder.h" #include "SDNodeDbgValue.h" #include "llvm/ADT/BitVector.h" -#include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SmallSet.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/Analysis.h" -- cgit v1.1 From 4ff23d09fa49d4ad68b4613114a743771df21df3 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Thu, 10 Jan 2013 22:13:27 +0000 Subject: PR14896: Handle memcpy from constant string where the memcpy size is larger than the string size. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172124 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 6c29c67..344d144 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3374,10 +3374,11 @@ static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG, } assert(!VT.isVector() && "Can't handle vector type here!"); - unsigned NumVTBytes = VT.getSizeInBits() / 8; + unsigned NumVTBits = VT.getSizeInBits(); + unsigned NumVTBytes = NumVTBits / 8; unsigned NumBytes = std::min(NumVTBytes, unsigned(Str.size())); - APInt Val(NumBytes*8, 0); + APInt Val(NumVTBits, 0); if (TLI.isLittleEndian()) { for (unsigned i = 0; i != NumBytes; ++i) Val |= (uint64_t)(unsigned char)Str[i] << i*8; -- cgit v1.1 From 03de2a621c2f063294eb3f65ea1136f3acac4e98 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Thu, 10 Jan 2013 22:36:33 +0000 Subject: [msan] Change va_start/va_copy shadow memset alignment to 8. This fixes va_start/va_copy of a va_list field which happens to not be laid out at a 16-byte boundary. Differential Revision: http://llvm-reviews.chandlerc.com/D276 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172128 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Instrumentation/MemorySanitizer.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 58d5801..76da970 100644 --- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -1774,7 +1774,7 @@ struct VarArgAMD64Helper : public VarArgHelper { // Unpoison the whole __va_list_tag. // FIXME: magic ABI constants. IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()), - /* size */24, /* alignment */16, false); + /* size */24, /* alignment */8, false); } void visitVACopyInst(VACopyInst &I) { @@ -1785,7 +1785,7 @@ struct VarArgAMD64Helper : public VarArgHelper { // Unpoison the whole __va_list_tag. // FIXME: magic ABI constants. IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()), - /* size */ 24, /* alignment */ 16, false); + /* size */24, /* alignment */8, false); } void finalizeInstrumentation() { -- cgit v1.1 From 5d0f0617799c5d200f5a1679423e963c27ac2e8b Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Thu, 10 Jan 2013 22:44:57 +0000 Subject: Remove a couple of if-else chains in parsing directives, replacing them by a switch. Committed with Jim's and Chris's approval. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172136 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCParser/AsmParser.cpp | 389 ++++++++++++++++++++++++++---------------- 1 file changed, 240 insertions(+), 149 deletions(-) (limited to 'lib') diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index 64a0885..6276a18 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -292,7 +292,26 @@ private: // Directive Parsing. - // ".ascii", ".asciiz", ".string" + enum DirectiveKind { + VK_NO_DIRECTIVE, // Placeholder + VK_SET, VK_EQU, VK_EQUIV, VK_ASCII, VK_ASCIZ, VK_STRING, VK_BYTE, VK_SHORT, + VK_VALUE, VK_2BYTE, VK_LONG, VK_INT, VK_4BYTE, VK_QUAD, VK_8BYTE, VK_SINGLE, + VK_FLOAT, VK_DOUBLE, VK_ALIGN, VK_ALIGN32, VK_BALIGN, VK_BALIGNW, + VK_BALIGNL, VK_P2ALIGN, VK_P2ALIGNW, VK_P2ALIGNL, VK_ORG, VK_FILL, + VK_SPACE, VK_SKIP, VK_ENDR, + VK_BUNDLE_ALIGN_MODE, VK_BUNDLE_LOCK, VK_BUNDLE_UNLOCK, + VK_ZERO, VK_EXTERN, VK_GLOBL, VK_GLOBAL, VK_INDIRECT_SYMBOL, + VK_LAZY_REFERENCE, VK_NO_DEAD_STRIP, VK_SYMBOL_RESOLVER, VK_PRIVATE_EXTERN, + VK_REFERENCE, VK_WEAK_DEFINITION, VK_WEAK_REFERENCE, + VK_WEAK_DEF_CAN_BE_HIDDEN, VK_COMM, VM_COMMON, VK_LCOMM, VK_ABORT, + VK_INCLUDE, VK_INCBIN, VK_CODE16, VK_CODE16GCC, VK_REPT, VK_IRP, VK_IRPC, + VK_IF, VK_IFB, VK_IFNB, VK_IFC, VK_IFNC, VK_IFDEF, VK_IFNDEF, VK_IFNOTDEF, + VK_ELSEIF, VK_ELSE, VK_ENDIF + }; + + StringMap DirectiveKindMapping; + + // ".ascii", ".asciz", ".string" bool ParseDirectiveAscii(StringRef IDVal, bool ZeroTerminated); bool ParseDirectiveValue(unsigned Size); // ".byte", ".long", ... bool ParseDirectiveRealValue(const fltSemantics &); // ".single", ... @@ -351,6 +370,8 @@ private: // "_emit" bool ParseDirectiveEmit(SMLoc DirectiveLoc, ParseStatementInfo &Info); + + void initializeDirectiveKindMapping(); }; /// \brief Generic implementations of directive handling, etc. which is shared @@ -508,6 +529,8 @@ AsmParser::AsmParser(SourceMgr &_SM, MCContext &_Ctx, PlatformParser = createELFAsmParser(); PlatformParser->Initialize(*this); } + + initializeDirectiveKindMapping(); } AsmParser::~AsmParser() { @@ -1155,30 +1178,39 @@ bool AsmParser::ParseStatement(ParseStatementInfo &Info) { IDVal = ""; } - // Handle conditional assembly here before checking for skipping. We // have to do this so that .endif isn't skipped in a ".if 0" block for // example. - if (IDVal == ".if") - return ParseDirectiveIf(IDLoc); - if (IDVal == ".ifb") - return ParseDirectiveIfb(IDLoc, true); - if (IDVal == ".ifnb") - return ParseDirectiveIfb(IDLoc, false); - if (IDVal == ".ifc") - return ParseDirectiveIfc(IDLoc, true); - if (IDVal == ".ifnc") - return ParseDirectiveIfc(IDLoc, false); - if (IDVal == ".ifdef") - return ParseDirectiveIfdef(IDLoc, true); - if (IDVal == ".ifndef" || IDVal == ".ifnotdef") - return ParseDirectiveIfdef(IDLoc, false); - if (IDVal == ".elseif") - return ParseDirectiveElseIf(IDLoc); - if (IDVal == ".else") - return ParseDirectiveElse(IDLoc); - if (IDVal == ".endif") - return ParseDirectiveEndIf(IDLoc); + StringMap::const_iterator DirKindIt = + DirectiveKindMapping.find(IDVal); + DirectiveKind DirKind = + (DirKindIt == DirectiveKindMapping.end()) ? VK_NO_DIRECTIVE : + DirKindIt->getValue(); + switch (DirKind) { + default: + break; + case VK_IF: + return ParseDirectiveIf(IDLoc); + case VK_IFB: + return ParseDirectiveIfb(IDLoc, true); + case VK_IFNB: + return ParseDirectiveIfb(IDLoc, false); + case VK_IFC: + return ParseDirectiveIfc(IDLoc, true); + case VK_IFNC: + return ParseDirectiveIfc(IDLoc, false); + case VK_IFDEF: + return ParseDirectiveIfdef(IDLoc, true); + case VK_IFNDEF: + case VK_IFNOTDEF: + return ParseDirectiveIfdef(IDLoc, false); + case VK_ELSEIF: + return ParseDirectiveElseIf(IDLoc); + case VK_ELSE: + return ParseDirectiveElse(IDLoc); + case VK_ENDIF: + return ParseDirectiveEndIf(IDLoc); + } // If we are in a ".if 0" block, ignore this statement. if (TheCondState.Ignore) { @@ -1256,139 +1288,126 @@ bool AsmParser::ParseStatement(ParseStatementInfo &Info) { if (!getTargetParser().ParseDirective(ID)) return false; - // Assembler features - if (IDVal == ".set" || IDVal == ".equ") - return ParseDirectiveSet(IDVal, true); - if (IDVal == ".equiv") - return ParseDirectiveSet(IDVal, false); - - // Data directives - - if (IDVal == ".ascii") - return ParseDirectiveAscii(IDVal, false); - if (IDVal == ".asciz" || IDVal == ".string") - return ParseDirectiveAscii(IDVal, true); - - if (IDVal == ".byte") - return ParseDirectiveValue(1); - if (IDVal == ".short") - return ParseDirectiveValue(2); - if (IDVal == ".value") - return ParseDirectiveValue(2); - if (IDVal == ".2byte") - return ParseDirectiveValue(2); - if (IDVal == ".long") - return ParseDirectiveValue(4); - if (IDVal == ".int") - return ParseDirectiveValue(4); - if (IDVal == ".4byte") - return ParseDirectiveValue(4); - if (IDVal == ".quad") - return ParseDirectiveValue(8); - if (IDVal == ".8byte") - return ParseDirectiveValue(8); - if (IDVal == ".single" || IDVal == ".float") - return ParseDirectiveRealValue(APFloat::IEEEsingle); - if (IDVal == ".double") - return ParseDirectiveRealValue(APFloat::IEEEdouble); - - if (IDVal == ".align") { - bool IsPow2 = !getContext().getAsmInfo().getAlignmentIsInBytes(); - return ParseDirectiveAlign(IsPow2, /*ExprSize=*/1); - } - if (IDVal == ".align32") { - bool IsPow2 = !getContext().getAsmInfo().getAlignmentIsInBytes(); - return ParseDirectiveAlign(IsPow2, /*ExprSize=*/4); - } - if (IDVal == ".balign") - return ParseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/1); - if (IDVal == ".balignw") - return ParseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/2); - if (IDVal == ".balignl") - return ParseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/4); - if (IDVal == ".p2align") - return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/1); - if (IDVal == ".p2alignw") - return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/2); - if (IDVal == ".p2alignl") - return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/4); - - if (IDVal == ".bundle_align_mode") - return ParseDirectiveBundleAlignMode(); - if (IDVal == ".bundle_lock") - return ParseDirectiveBundleLock(); - if (IDVal == ".bundle_unlock") - return ParseDirectiveBundleUnlock(); - - if (IDVal == ".org") - return ParseDirectiveOrg(); - - if (IDVal == ".fill") - return ParseDirectiveFill(); - if (IDVal == ".space" || IDVal == ".skip") - return ParseDirectiveSpace(); - if (IDVal == ".zero") - return ParseDirectiveZero(); - - // Symbol attribute directives - - if (IDVal == ".extern") { - EatToEndOfStatement(); // .extern is the default, ignore it. - return false; + switch (DirKind) { + default: + break; + case VK_SET: + case VK_EQU: + return ParseDirectiveSet(IDVal, true); + case VK_EQUIV: + return ParseDirectiveSet(IDVal, false); + case VK_ASCII: + return ParseDirectiveAscii(IDVal, false); + case VK_ASCIZ: + case VK_STRING: + return ParseDirectiveAscii(IDVal, true); + case VK_BYTE: + return ParseDirectiveValue(1); + case VK_SHORT: + case VK_VALUE: + case VK_2BYTE: + return ParseDirectiveValue(2); + case VK_LONG: + case VK_INT: + case VK_4BYTE: + return ParseDirectiveValue(4); + case VK_QUAD: + case VK_8BYTE: + return ParseDirectiveValue(8); + case VK_SINGLE: + case VK_FLOAT: + return ParseDirectiveRealValue(APFloat::IEEEsingle); + case VK_DOUBLE: + return ParseDirectiveRealValue(APFloat::IEEEdouble); + case VK_ALIGN: { + bool IsPow2 = !getContext().getAsmInfo().getAlignmentIsInBytes(); + return ParseDirectiveAlign(IsPow2, /*ExprSize=*/1); + } + case VK_ALIGN32: { + bool IsPow2 = !getContext().getAsmInfo().getAlignmentIsInBytes(); + return ParseDirectiveAlign(IsPow2, /*ExprSize=*/4); + } + case VK_BALIGN: + return ParseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/1); + case VK_BALIGNW: + return ParseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/2); + case VK_BALIGNL: + return ParseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/4); + case VK_P2ALIGN: + return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/1); + case VK_P2ALIGNW: + return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/2); + case VK_P2ALIGNL: + return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/4); + case VK_ORG: + return ParseDirectiveOrg(); + case VK_FILL: + return ParseDirectiveFill(); + case VK_SPACE: + case VK_SKIP: + return ParseDirectiveSpace(); + case VK_ZERO: + return ParseDirectiveZero(); + case VK_EXTERN: + EatToEndOfStatement(); // .extern is the default, ignore it. + return false; + case VK_GLOBL: + case VK_GLOBAL: + return ParseDirectiveSymbolAttribute(MCSA_Global); + case VK_INDIRECT_SYMBOL: + return ParseDirectiveSymbolAttribute(MCSA_IndirectSymbol); + case VK_LAZY_REFERENCE: + return ParseDirectiveSymbolAttribute(MCSA_LazyReference); + case VK_NO_DEAD_STRIP: + return ParseDirectiveSymbolAttribute(MCSA_NoDeadStrip); + case VK_SYMBOL_RESOLVER: + return ParseDirectiveSymbolAttribute(MCSA_SymbolResolver); + case VK_PRIVATE_EXTERN: + return ParseDirectiveSymbolAttribute(MCSA_PrivateExtern); + case VK_REFERENCE: + return ParseDirectiveSymbolAttribute(MCSA_Reference); + case VK_WEAK_DEFINITION: + return ParseDirectiveSymbolAttribute(MCSA_WeakDefinition); + case VK_WEAK_REFERENCE: + return ParseDirectiveSymbolAttribute(MCSA_WeakReference); + case VK_WEAK_DEF_CAN_BE_HIDDEN: + return ParseDirectiveSymbolAttribute(MCSA_WeakDefAutoPrivate); + case VK_COMM: + case VM_COMMON: + return ParseDirectiveComm(/*IsLocal=*/false); + case VK_LCOMM: + return ParseDirectiveComm(/*IsLocal=*/true); + case VK_ABORT: + return ParseDirectiveAbort(); + case VK_INCLUDE: + return ParseDirectiveInclude(); + case VK_INCBIN: + return ParseDirectiveIncbin(); + case VK_CODE16: + case VK_CODE16GCC: + return TokError(Twine(IDVal) + " not supported yet"); + case VK_REPT: + return ParseDirectiveRept(IDLoc); + case VK_IRP: + return ParseDirectiveIrp(IDLoc); + case VK_IRPC: + return ParseDirectiveIrpc(IDLoc); + case VK_ENDR: + return ParseDirectiveEndr(IDLoc); + case VK_BUNDLE_ALIGN_MODE: + return ParseDirectiveBundleAlignMode(); + case VK_BUNDLE_LOCK: + return ParseDirectiveBundleLock(); + case VK_BUNDLE_UNLOCK: + return ParseDirectiveBundleUnlock(); } - if (IDVal == ".globl" || IDVal == ".global") - return ParseDirectiveSymbolAttribute(MCSA_Global); - if (IDVal == ".indirect_symbol") - return ParseDirectiveSymbolAttribute(MCSA_IndirectSymbol); - if (IDVal == ".lazy_reference") - return ParseDirectiveSymbolAttribute(MCSA_LazyReference); - if (IDVal == ".no_dead_strip") - return ParseDirectiveSymbolAttribute(MCSA_NoDeadStrip); - if (IDVal == ".symbol_resolver") - return ParseDirectiveSymbolAttribute(MCSA_SymbolResolver); - if (IDVal == ".private_extern") - return ParseDirectiveSymbolAttribute(MCSA_PrivateExtern); - if (IDVal == ".reference") - return ParseDirectiveSymbolAttribute(MCSA_Reference); - if (IDVal == ".weak_definition") - return ParseDirectiveSymbolAttribute(MCSA_WeakDefinition); - if (IDVal == ".weak_reference") - return ParseDirectiveSymbolAttribute(MCSA_WeakReference); - if (IDVal == ".weak_def_can_be_hidden") - return ParseDirectiveSymbolAttribute(MCSA_WeakDefAutoPrivate); - - if (IDVal == ".comm" || IDVal == ".common") - return ParseDirectiveComm(/*IsLocal=*/false); - if (IDVal == ".lcomm") - return ParseDirectiveComm(/*IsLocal=*/true); - - if (IDVal == ".abort") - return ParseDirectiveAbort(); - if (IDVal == ".include") - return ParseDirectiveInclude(); - if (IDVal == ".incbin") - return ParseDirectiveIncbin(); - - if (IDVal == ".code16" || IDVal == ".code16gcc") - return TokError(Twine(IDVal) + " not supported yet"); - - // Macro-like directives - if (IDVal == ".rept") - return ParseDirectiveRept(IDLoc); - if (IDVal == ".irp") - return ParseDirectiveIrp(IDLoc); - if (IDVal == ".irpc") - return ParseDirectiveIrpc(IDLoc); - if (IDVal == ".endr") - return ParseDirectiveEndr(IDLoc); - - // Look up the handler in the handler table. + + // Look up the handler in the extension handler table. std::pair Handler = DirectiveMap.lookup(IDVal); if (Handler.first) return (*Handler.second)(Handler.first, IDVal, IDLoc); - return Error(IDLoc, "unknown directive"); } @@ -2872,6 +2891,78 @@ bool AsmParser::ParseDirectiveEndIf(SMLoc DirectiveLoc) { return false; } +void AsmParser::initializeDirectiveKindMapping() { + DirectiveKindMapping[".set"] = VK_SET; + DirectiveKindMapping[".equ"] = VK_EQU; + DirectiveKindMapping[".equiv"] = VK_EQUIV; + DirectiveKindMapping[".ascii"] = VK_ASCII; + DirectiveKindMapping[".asciz"] = VK_ASCIZ; + DirectiveKindMapping[".string"] = VK_STRING; + DirectiveKindMapping[".byte"] = VK_BYTE; + DirectiveKindMapping[".short"] = VK_SHORT; + DirectiveKindMapping[".value"] = VK_VALUE; + DirectiveKindMapping[".2byte"] = VK_2BYTE; + DirectiveKindMapping[".long"] = VK_LONG; + DirectiveKindMapping[".int"] = VK_INT; + DirectiveKindMapping[".4byte"] = VK_4BYTE; + DirectiveKindMapping[".quad"] = VK_QUAD; + DirectiveKindMapping[".8byte"] = VK_8BYTE; + DirectiveKindMapping[".single"] = VK_SINGLE; + DirectiveKindMapping[".float"] = VK_FLOAT; + DirectiveKindMapping[".double"] = VK_DOUBLE; + DirectiveKindMapping[".align"] = VK_ALIGN; + DirectiveKindMapping[".align32"] = VK_ALIGN32; + DirectiveKindMapping[".balign"] = VK_BALIGN; + DirectiveKindMapping[".balignw"] = VK_BALIGNW; + DirectiveKindMapping[".balignl"] = VK_BALIGNL; + DirectiveKindMapping[".p2align"] = VK_P2ALIGN; + DirectiveKindMapping[".p2alignw"] = VK_P2ALIGNW; + DirectiveKindMapping[".p2alignl"] = VK_P2ALIGNL; + DirectiveKindMapping[".org"] = VK_ORG; + DirectiveKindMapping[".fill"] = VK_FILL; + DirectiveKindMapping[".space"] = VK_SPACE; + DirectiveKindMapping[".skip"] = VK_SKIP; + DirectiveKindMapping[".zero"] = VK_ZERO; + DirectiveKindMapping[".extern"] = VK_EXTERN; + DirectiveKindMapping[".globl"] = VK_GLOBL; + DirectiveKindMapping[".global"] = VK_GLOBAL; + DirectiveKindMapping[".indirect_symbol"] = VK_INDIRECT_SYMBOL; + DirectiveKindMapping[".lazy_reference"] = VK_LAZY_REFERENCE; + DirectiveKindMapping[".no_dead_strip"] = VK_NO_DEAD_STRIP; + DirectiveKindMapping[".symbol_resolver"] = VK_SYMBOL_RESOLVER; + DirectiveKindMapping[".private_extern"] = VK_PRIVATE_EXTERN; + DirectiveKindMapping[".reference"] = VK_REFERENCE; + DirectiveKindMapping[".weak_definition"] = VK_WEAK_DEFINITION; + DirectiveKindMapping[".weak_reference"] = VK_WEAK_REFERENCE; + DirectiveKindMapping[".weak_def_can_be_hidden"] = VK_WEAK_DEF_CAN_BE_HIDDEN; + DirectiveKindMapping[".comm"] = VK_COMM; + DirectiveKindMapping[".common"] = VM_COMMON; + DirectiveKindMapping[".lcomm"] = VK_LCOMM; + DirectiveKindMapping[".abort"] = VK_ABORT; + DirectiveKindMapping[".include"] = VK_INCLUDE; + DirectiveKindMapping[".incbin"] = VK_INCBIN; + DirectiveKindMapping[".code16"] = VK_CODE16; + DirectiveKindMapping[".code16gcc"] = VK_CODE16GCC; + DirectiveKindMapping[".rept"] = VK_REPT; + DirectiveKindMapping[".irp"] = VK_IRP; + DirectiveKindMapping[".irpc"] = VK_IRPC; + DirectiveKindMapping[".endr"] = VK_ENDR; + DirectiveKindMapping[".bundle_align_mode"] = VK_BUNDLE_ALIGN_MODE; + DirectiveKindMapping[".bundle_lock"] = VK_BUNDLE_LOCK; + DirectiveKindMapping[".bundle_unlock"] = VK_BUNDLE_UNLOCK; + DirectiveKindMapping[".if"] = VK_IF; + DirectiveKindMapping[".ifb"] = VK_IFB; + DirectiveKindMapping[".ifnb"] = VK_IFNB; + DirectiveKindMapping[".ifc"] = VK_IFC; + DirectiveKindMapping[".ifnc"] = VK_IFNC; + DirectiveKindMapping[".ifdef"] = VK_IFDEF; + DirectiveKindMapping[".ifndef"] = VK_IFNDEF; + DirectiveKindMapping[".ifnotdef"] = VK_IFNOTDEF; + DirectiveKindMapping[".elseif"] = VK_ELSEIF; + DirectiveKindMapping[".else"] = VK_ELSE; + DirectiveKindMapping[".endif"] = VK_ENDIF; +} + /// ParseDirectiveFile /// ::= .file [number] filename /// ::= .file number directory filename -- cgit v1.1 From 582e4f278b95d50a45c6f56e33da5e78c19afc17 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Thu, 10 Jan 2013 23:22:53 +0000 Subject: CastInst::castIsValid should return true if the dest type is the same as Value's current type. The casting is trivial even for aggregate type. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172143 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Instructions.cpp | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'lib') diff --git a/lib/IR/Instructions.cpp b/lib/IR/Instructions.cpp index f2e9813..26398ce 100644 --- a/lib/IR/Instructions.cpp +++ b/lib/IR/Instructions.cpp @@ -2624,6 +2624,11 @@ CastInst::castIsValid(Instruction::CastOps op, Value *S, Type *DstTy) { // Check for type sanity on the arguments Type *SrcTy = S->getType(); + + // If this is a cast to the same type then it's trivially true. + if (SrcTy == DstTy) + return true; + if (!SrcTy->isFirstClassType() || !DstTy->isFirstClassType() || SrcTy->isAggregateType() || DstTy->isAggregateType()) return false; -- cgit v1.1 From 253449db20f86b655852a397245ba16ff262452f Mon Sep 17 00:00:00 2001 From: Shuxin Yang Date: Thu, 10 Jan 2013 23:32:01 +0000 Subject: PR14904: Segmentation fault running pass 'Recognize loop idioms' The root cause is mistakenly taking for granted that "dyn_cast(a-Value)" return a non-NULL instruction. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172145 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/LoopIdiomRecognize.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index c4f9012..8258719 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -407,7 +407,7 @@ bool NclPopcountRecognize::detectIdiom(Instruction *&CntInst, // step 2: detect instructions corresponding to "x2 = x1 & (x1 - 1)" { - if (DefX2->getOpcode() != Instruction::And) + if (!DefX2 || DefX2->getOpcode() != Instruction::And) return false; BinaryOperator *SubOneOp; -- cgit v1.1 From 63e6f48e138fe52cd6268248b337f34d49fca886 Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Thu, 10 Jan 2013 23:32:57 +0000 Subject: fix comments a bit git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172146 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCParser/AsmParser.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index 6276a18..b8c6f50 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -374,8 +374,8 @@ private: void initializeDirectiveKindMapping(); }; -/// \brief Generic implementations of directive handling, etc. which is shared -/// (or the default, at least) for all assembler parser. +/// \brief Generic implementation of directive handling, etc. which is shared +/// (or the default, at least) for all assembler parsers. class GenericAsmParser : public MCAsmParserExtension { template void AddDirectiveHandler(StringRef Directive) { -- cgit v1.1 From 7109fbe98260d7a574762f5b9920d3d8e99052e3 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Thu, 10 Jan 2013 23:39:07 +0000 Subject: [ms-inline asm] Make sure we set a default value for AddressOf. Follow on to r172121. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172148 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/AsmParser/X86AsmParser.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index d26254b..8545a56 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -495,7 +495,7 @@ struct X86Operand : public MCParsedAsmOperand { /// Create an absolute memory operand. static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc, - unsigned Size = 0, bool NeedSizeDir = false){ + unsigned Size = 0, bool NeedSizeDir = false) { X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc); Res->Mem.SegReg = 0; Res->Mem.Disp = Disp; @@ -504,6 +504,7 @@ struct X86Operand : public MCParsedAsmOperand { Res->Mem.Scale = 1; Res->Mem.Size = Size; Res->Mem.NeedSizeDir = NeedSizeDir; + Res->AddressOf = false; return Res; } -- cgit v1.1 From 7eef9c1091760b8c0b3c0afd4df23d5044167a53 Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Thu, 10 Jan 2013 23:40:56 +0000 Subject: Rename enumerations s/VK/DK/ to conform to naming convention git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172149 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCParser/AsmParser.cpp | 306 +++++++++++++++++++++--------------------- 1 file changed, 153 insertions(+), 153 deletions(-) (limited to 'lib') diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index b8c6f50..ecf35ff 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -293,20 +293,20 @@ private: // Directive Parsing. enum DirectiveKind { - VK_NO_DIRECTIVE, // Placeholder - VK_SET, VK_EQU, VK_EQUIV, VK_ASCII, VK_ASCIZ, VK_STRING, VK_BYTE, VK_SHORT, - VK_VALUE, VK_2BYTE, VK_LONG, VK_INT, VK_4BYTE, VK_QUAD, VK_8BYTE, VK_SINGLE, - VK_FLOAT, VK_DOUBLE, VK_ALIGN, VK_ALIGN32, VK_BALIGN, VK_BALIGNW, - VK_BALIGNL, VK_P2ALIGN, VK_P2ALIGNW, VK_P2ALIGNL, VK_ORG, VK_FILL, - VK_SPACE, VK_SKIP, VK_ENDR, - VK_BUNDLE_ALIGN_MODE, VK_BUNDLE_LOCK, VK_BUNDLE_UNLOCK, - VK_ZERO, VK_EXTERN, VK_GLOBL, VK_GLOBAL, VK_INDIRECT_SYMBOL, - VK_LAZY_REFERENCE, VK_NO_DEAD_STRIP, VK_SYMBOL_RESOLVER, VK_PRIVATE_EXTERN, - VK_REFERENCE, VK_WEAK_DEFINITION, VK_WEAK_REFERENCE, - VK_WEAK_DEF_CAN_BE_HIDDEN, VK_COMM, VM_COMMON, VK_LCOMM, VK_ABORT, - VK_INCLUDE, VK_INCBIN, VK_CODE16, VK_CODE16GCC, VK_REPT, VK_IRP, VK_IRPC, - VK_IF, VK_IFB, VK_IFNB, VK_IFC, VK_IFNC, VK_IFDEF, VK_IFNDEF, VK_IFNOTDEF, - VK_ELSEIF, VK_ELSE, VK_ENDIF + DK_NO_DIRECTIVE, // Placeholder + DK_SET, DK_EQU, DK_EQUIV, DK_ASCII, DK_ASCIZ, DK_STRING, DK_BYTE, DK_SHORT, + DK_VALUE, DK_2BYTE, DK_LONG, DK_INT, DK_4BYTE, DK_QUAD, DK_8BYTE, DK_SINGLE, + DK_FLOAT, DK_DOUBLE, DK_ALIGN, DK_ALIGN32, DK_BALIGN, DK_BALIGNW, + DK_BALIGNL, DK_P2ALIGN, DK_P2ALIGNW, DK_P2ALIGNL, DK_ORG, DK_FILL, + DK_SPACE, DK_SKIP, DK_ENDR, + DK_BUNDLE_ALIGN_MODE, DK_BUNDLE_LOCK, DK_BUNDLE_UNLOCK, + DK_ZERO, DK_EXTERN, DK_GLOBL, DK_GLOBAL, DK_INDIRECT_SYMBOL, + DK_LAZY_REFERENCE, DK_NO_DEAD_STRIP, DK_SYMBOL_RESOLVER, DK_PRIVATE_EXTERN, + DK_REFERENCE, DK_WEAK_DEFINITION, DK_WEAK_REFERENCE, + DK_WEAK_DEF_CAN_BE_HIDDEN, DK_COMM, DK_COMMON, DK_LCOMM, DK_ABORT, + DK_INCLUDE, DK_INCBIN, DK_CODE16, DK_CODE16GCC, DK_REPT, DK_IRP, DK_IRPC, + DK_IF, DK_IFB, DK_IFNB, DK_IFC, DK_IFNC, DK_IFDEF, DK_IFNDEF, DK_IFNOTDEF, + DK_ELSEIF, DK_ELSE, DK_ENDIF }; StringMap DirectiveKindMapping; @@ -1184,31 +1184,31 @@ bool AsmParser::ParseStatement(ParseStatementInfo &Info) { StringMap::const_iterator DirKindIt = DirectiveKindMapping.find(IDVal); DirectiveKind DirKind = - (DirKindIt == DirectiveKindMapping.end()) ? VK_NO_DIRECTIVE : + (DirKindIt == DirectiveKindMapping.end()) ? DK_NO_DIRECTIVE : DirKindIt->getValue(); switch (DirKind) { default: break; - case VK_IF: + case DK_IF: return ParseDirectiveIf(IDLoc); - case VK_IFB: + case DK_IFB: return ParseDirectiveIfb(IDLoc, true); - case VK_IFNB: + case DK_IFNB: return ParseDirectiveIfb(IDLoc, false); - case VK_IFC: + case DK_IFC: return ParseDirectiveIfc(IDLoc, true); - case VK_IFNC: + case DK_IFNC: return ParseDirectiveIfc(IDLoc, false); - case VK_IFDEF: + case DK_IFDEF: return ParseDirectiveIfdef(IDLoc, true); - case VK_IFNDEF: - case VK_IFNOTDEF: + case DK_IFNDEF: + case DK_IFNOTDEF: return ParseDirectiveIfdef(IDLoc, false); - case VK_ELSEIF: + case DK_ELSEIF: return ParseDirectiveElseIf(IDLoc); - case VK_ELSE: + case DK_ELSE: return ParseDirectiveElse(IDLoc); - case VK_ENDIF: + case DK_ENDIF: return ParseDirectiveEndIf(IDLoc); } @@ -1291,114 +1291,114 @@ bool AsmParser::ParseStatement(ParseStatementInfo &Info) { switch (DirKind) { default: break; - case VK_SET: - case VK_EQU: + case DK_SET: + case DK_EQU: return ParseDirectiveSet(IDVal, true); - case VK_EQUIV: + case DK_EQUIV: return ParseDirectiveSet(IDVal, false); - case VK_ASCII: + case DK_ASCII: return ParseDirectiveAscii(IDVal, false); - case VK_ASCIZ: - case VK_STRING: + case DK_ASCIZ: + case DK_STRING: return ParseDirectiveAscii(IDVal, true); - case VK_BYTE: + case DK_BYTE: return ParseDirectiveValue(1); - case VK_SHORT: - case VK_VALUE: - case VK_2BYTE: + case DK_SHORT: + case DK_VALUE: + case DK_2BYTE: return ParseDirectiveValue(2); - case VK_LONG: - case VK_INT: - case VK_4BYTE: + case DK_LONG: + case DK_INT: + case DK_4BYTE: return ParseDirectiveValue(4); - case VK_QUAD: - case VK_8BYTE: + case DK_QUAD: + case DK_8BYTE: return ParseDirectiveValue(8); - case VK_SINGLE: - case VK_FLOAT: + case DK_SINGLE: + case DK_FLOAT: return ParseDirectiveRealValue(APFloat::IEEEsingle); - case VK_DOUBLE: + case DK_DOUBLE: return ParseDirectiveRealValue(APFloat::IEEEdouble); - case VK_ALIGN: { + case DK_ALIGN: { bool IsPow2 = !getContext().getAsmInfo().getAlignmentIsInBytes(); return ParseDirectiveAlign(IsPow2, /*ExprSize=*/1); } - case VK_ALIGN32: { + case DK_ALIGN32: { bool IsPow2 = !getContext().getAsmInfo().getAlignmentIsInBytes(); return ParseDirectiveAlign(IsPow2, /*ExprSize=*/4); } - case VK_BALIGN: + case DK_BALIGN: return ParseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/1); - case VK_BALIGNW: + case DK_BALIGNW: return ParseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/2); - case VK_BALIGNL: + case DK_BALIGNL: return ParseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/4); - case VK_P2ALIGN: + case DK_P2ALIGN: return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/1); - case VK_P2ALIGNW: + case DK_P2ALIGNW: return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/2); - case VK_P2ALIGNL: + case DK_P2ALIGNL: return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/4); - case VK_ORG: + case DK_ORG: return ParseDirectiveOrg(); - case VK_FILL: + case DK_FILL: return ParseDirectiveFill(); - case VK_SPACE: - case VK_SKIP: + case DK_SPACE: + case DK_SKIP: return ParseDirectiveSpace(); - case VK_ZERO: + case DK_ZERO: return ParseDirectiveZero(); - case VK_EXTERN: + case DK_EXTERN: EatToEndOfStatement(); // .extern is the default, ignore it. return false; - case VK_GLOBL: - case VK_GLOBAL: + case DK_GLOBL: + case DK_GLOBAL: return ParseDirectiveSymbolAttribute(MCSA_Global); - case VK_INDIRECT_SYMBOL: + case DK_INDIRECT_SYMBOL: return ParseDirectiveSymbolAttribute(MCSA_IndirectSymbol); - case VK_LAZY_REFERENCE: + case DK_LAZY_REFERENCE: return ParseDirectiveSymbolAttribute(MCSA_LazyReference); - case VK_NO_DEAD_STRIP: + case DK_NO_DEAD_STRIP: return ParseDirectiveSymbolAttribute(MCSA_NoDeadStrip); - case VK_SYMBOL_RESOLVER: + case DK_SYMBOL_RESOLVER: return ParseDirectiveSymbolAttribute(MCSA_SymbolResolver); - case VK_PRIVATE_EXTERN: + case DK_PRIVATE_EXTERN: return ParseDirectiveSymbolAttribute(MCSA_PrivateExtern); - case VK_REFERENCE: + case DK_REFERENCE: return ParseDirectiveSymbolAttribute(MCSA_Reference); - case VK_WEAK_DEFINITION: + case DK_WEAK_DEFINITION: return ParseDirectiveSymbolAttribute(MCSA_WeakDefinition); - case VK_WEAK_REFERENCE: + case DK_WEAK_REFERENCE: return ParseDirectiveSymbolAttribute(MCSA_WeakReference); - case VK_WEAK_DEF_CAN_BE_HIDDEN: + case DK_WEAK_DEF_CAN_BE_HIDDEN: return ParseDirectiveSymbolAttribute(MCSA_WeakDefAutoPrivate); - case VK_COMM: - case VM_COMMON: + case DK_COMM: + case DK_COMMON: return ParseDirectiveComm(/*IsLocal=*/false); - case VK_LCOMM: + case DK_LCOMM: return ParseDirectiveComm(/*IsLocal=*/true); - case VK_ABORT: + case DK_ABORT: return ParseDirectiveAbort(); - case VK_INCLUDE: + case DK_INCLUDE: return ParseDirectiveInclude(); - case VK_INCBIN: + case DK_INCBIN: return ParseDirectiveIncbin(); - case VK_CODE16: - case VK_CODE16GCC: + case DK_CODE16: + case DK_CODE16GCC: return TokError(Twine(IDVal) + " not supported yet"); - case VK_REPT: + case DK_REPT: return ParseDirectiveRept(IDLoc); - case VK_IRP: + case DK_IRP: return ParseDirectiveIrp(IDLoc); - case VK_IRPC: + case DK_IRPC: return ParseDirectiveIrpc(IDLoc); - case VK_ENDR: + case DK_ENDR: return ParseDirectiveEndr(IDLoc); - case VK_BUNDLE_ALIGN_MODE: + case DK_BUNDLE_ALIGN_MODE: return ParseDirectiveBundleAlignMode(); - case VK_BUNDLE_LOCK: + case DK_BUNDLE_LOCK: return ParseDirectiveBundleLock(); - case VK_BUNDLE_UNLOCK: + case DK_BUNDLE_UNLOCK: return ParseDirectiveBundleUnlock(); } @@ -2892,75 +2892,75 @@ bool AsmParser::ParseDirectiveEndIf(SMLoc DirectiveLoc) { } void AsmParser::initializeDirectiveKindMapping() { - DirectiveKindMapping[".set"] = VK_SET; - DirectiveKindMapping[".equ"] = VK_EQU; - DirectiveKindMapping[".equiv"] = VK_EQUIV; - DirectiveKindMapping[".ascii"] = VK_ASCII; - DirectiveKindMapping[".asciz"] = VK_ASCIZ; - DirectiveKindMapping[".string"] = VK_STRING; - DirectiveKindMapping[".byte"] = VK_BYTE; - DirectiveKindMapping[".short"] = VK_SHORT; - DirectiveKindMapping[".value"] = VK_VALUE; - DirectiveKindMapping[".2byte"] = VK_2BYTE; - DirectiveKindMapping[".long"] = VK_LONG; - DirectiveKindMapping[".int"] = VK_INT; - DirectiveKindMapping[".4byte"] = VK_4BYTE; - DirectiveKindMapping[".quad"] = VK_QUAD; - DirectiveKindMapping[".8byte"] = VK_8BYTE; - DirectiveKindMapping[".single"] = VK_SINGLE; - DirectiveKindMapping[".float"] = VK_FLOAT; - DirectiveKindMapping[".double"] = VK_DOUBLE; - DirectiveKindMapping[".align"] = VK_ALIGN; - DirectiveKindMapping[".align32"] = VK_ALIGN32; - DirectiveKindMapping[".balign"] = VK_BALIGN; - DirectiveKindMapping[".balignw"] = VK_BALIGNW; - DirectiveKindMapping[".balignl"] = VK_BALIGNL; - DirectiveKindMapping[".p2align"] = VK_P2ALIGN; - DirectiveKindMapping[".p2alignw"] = VK_P2ALIGNW; - DirectiveKindMapping[".p2alignl"] = VK_P2ALIGNL; - DirectiveKindMapping[".org"] = VK_ORG; - DirectiveKindMapping[".fill"] = VK_FILL; - DirectiveKindMapping[".space"] = VK_SPACE; - DirectiveKindMapping[".skip"] = VK_SKIP; - DirectiveKindMapping[".zero"] = VK_ZERO; - DirectiveKindMapping[".extern"] = VK_EXTERN; - DirectiveKindMapping[".globl"] = VK_GLOBL; - DirectiveKindMapping[".global"] = VK_GLOBAL; - DirectiveKindMapping[".indirect_symbol"] = VK_INDIRECT_SYMBOL; - DirectiveKindMapping[".lazy_reference"] = VK_LAZY_REFERENCE; - DirectiveKindMapping[".no_dead_strip"] = VK_NO_DEAD_STRIP; - DirectiveKindMapping[".symbol_resolver"] = VK_SYMBOL_RESOLVER; - DirectiveKindMapping[".private_extern"] = VK_PRIVATE_EXTERN; - DirectiveKindMapping[".reference"] = VK_REFERENCE; - DirectiveKindMapping[".weak_definition"] = VK_WEAK_DEFINITION; - DirectiveKindMapping[".weak_reference"] = VK_WEAK_REFERENCE; - DirectiveKindMapping[".weak_def_can_be_hidden"] = VK_WEAK_DEF_CAN_BE_HIDDEN; - DirectiveKindMapping[".comm"] = VK_COMM; - DirectiveKindMapping[".common"] = VM_COMMON; - DirectiveKindMapping[".lcomm"] = VK_LCOMM; - DirectiveKindMapping[".abort"] = VK_ABORT; - DirectiveKindMapping[".include"] = VK_INCLUDE; - DirectiveKindMapping[".incbin"] = VK_INCBIN; - DirectiveKindMapping[".code16"] = VK_CODE16; - DirectiveKindMapping[".code16gcc"] = VK_CODE16GCC; - DirectiveKindMapping[".rept"] = VK_REPT; - DirectiveKindMapping[".irp"] = VK_IRP; - DirectiveKindMapping[".irpc"] = VK_IRPC; - DirectiveKindMapping[".endr"] = VK_ENDR; - DirectiveKindMapping[".bundle_align_mode"] = VK_BUNDLE_ALIGN_MODE; - DirectiveKindMapping[".bundle_lock"] = VK_BUNDLE_LOCK; - DirectiveKindMapping[".bundle_unlock"] = VK_BUNDLE_UNLOCK; - DirectiveKindMapping[".if"] = VK_IF; - DirectiveKindMapping[".ifb"] = VK_IFB; - DirectiveKindMapping[".ifnb"] = VK_IFNB; - DirectiveKindMapping[".ifc"] = VK_IFC; - DirectiveKindMapping[".ifnc"] = VK_IFNC; - DirectiveKindMapping[".ifdef"] = VK_IFDEF; - DirectiveKindMapping[".ifndef"] = VK_IFNDEF; - DirectiveKindMapping[".ifnotdef"] = VK_IFNOTDEF; - DirectiveKindMapping[".elseif"] = VK_ELSEIF; - DirectiveKindMapping[".else"] = VK_ELSE; - DirectiveKindMapping[".endif"] = VK_ENDIF; + DirectiveKindMapping[".set"] = DK_SET; + DirectiveKindMapping[".equ"] = DK_EQU; + DirectiveKindMapping[".equiv"] = DK_EQUIV; + DirectiveKindMapping[".ascii"] = DK_ASCII; + DirectiveKindMapping[".asciz"] = DK_ASCIZ; + DirectiveKindMapping[".string"] = DK_STRING; + DirectiveKindMapping[".byte"] = DK_BYTE; + DirectiveKindMapping[".short"] = DK_SHORT; + DirectiveKindMapping[".value"] = DK_VALUE; + DirectiveKindMapping[".2byte"] = DK_2BYTE; + DirectiveKindMapping[".long"] = DK_LONG; + DirectiveKindMapping[".int"] = DK_INT; + DirectiveKindMapping[".4byte"] = DK_4BYTE; + DirectiveKindMapping[".quad"] = DK_QUAD; + DirectiveKindMapping[".8byte"] = DK_8BYTE; + DirectiveKindMapping[".single"] = DK_SINGLE; + DirectiveKindMapping[".float"] = DK_FLOAT; + DirectiveKindMapping[".double"] = DK_DOUBLE; + DirectiveKindMapping[".align"] = DK_ALIGN; + DirectiveKindMapping[".align32"] = DK_ALIGN32; + DirectiveKindMapping[".balign"] = DK_BALIGN; + DirectiveKindMapping[".balignw"] = DK_BALIGNW; + DirectiveKindMapping[".balignl"] = DK_BALIGNL; + DirectiveKindMapping[".p2align"] = DK_P2ALIGN; + DirectiveKindMapping[".p2alignw"] = DK_P2ALIGNW; + DirectiveKindMapping[".p2alignl"] = DK_P2ALIGNL; + DirectiveKindMapping[".org"] = DK_ORG; + DirectiveKindMapping[".fill"] = DK_FILL; + DirectiveKindMapping[".space"] = DK_SPACE; + DirectiveKindMapping[".skip"] = DK_SKIP; + DirectiveKindMapping[".zero"] = DK_ZERO; + DirectiveKindMapping[".extern"] = DK_EXTERN; + DirectiveKindMapping[".globl"] = DK_GLOBL; + DirectiveKindMapping[".global"] = DK_GLOBAL; + DirectiveKindMapping[".indirect_symbol"] = DK_INDIRECT_SYMBOL; + DirectiveKindMapping[".lazy_reference"] = DK_LAZY_REFERENCE; + DirectiveKindMapping[".no_dead_strip"] = DK_NO_DEAD_STRIP; + DirectiveKindMapping[".symbol_resolver"] = DK_SYMBOL_RESOLVER; + DirectiveKindMapping[".private_extern"] = DK_PRIVATE_EXTERN; + DirectiveKindMapping[".reference"] = DK_REFERENCE; + DirectiveKindMapping[".weak_definition"] = DK_WEAK_DEFINITION; + DirectiveKindMapping[".weak_reference"] = DK_WEAK_REFERENCE; + DirectiveKindMapping[".weak_def_can_be_hidden"] = DK_WEAK_DEF_CAN_BE_HIDDEN; + DirectiveKindMapping[".comm"] = DK_COMM; + DirectiveKindMapping[".common"] = DK_COMMON; + DirectiveKindMapping[".lcomm"] = DK_LCOMM; + DirectiveKindMapping[".abort"] = DK_ABORT; + DirectiveKindMapping[".include"] = DK_INCLUDE; + DirectiveKindMapping[".incbin"] = DK_INCBIN; + DirectiveKindMapping[".code16"] = DK_CODE16; + DirectiveKindMapping[".code16gcc"] = DK_CODE16GCC; + DirectiveKindMapping[".rept"] = DK_REPT; + DirectiveKindMapping[".irp"] = DK_IRP; + DirectiveKindMapping[".irpc"] = DK_IRPC; + DirectiveKindMapping[".endr"] = DK_ENDR; + DirectiveKindMapping[".bundle_align_mode"] = DK_BUNDLE_ALIGN_MODE; + DirectiveKindMapping[".bundle_lock"] = DK_BUNDLE_LOCK; + DirectiveKindMapping[".bundle_unlock"] = DK_BUNDLE_UNLOCK; + DirectiveKindMapping[".if"] = DK_IF; + DirectiveKindMapping[".ifb"] = DK_IFB; + DirectiveKindMapping[".ifnb"] = DK_IFNB; + DirectiveKindMapping[".ifc"] = DK_IFC; + DirectiveKindMapping[".ifnc"] = DK_IFNC; + DirectiveKindMapping[".ifdef"] = DK_IFDEF; + DirectiveKindMapping[".ifndef"] = DK_IFNDEF; + DirectiveKindMapping[".ifnotdef"] = DK_IFNOTDEF; + DirectiveKindMapping[".elseif"] = DK_ELSEIF; + DirectiveKindMapping[".else"] = DK_ELSE; + DirectiveKindMapping[".endif"] = DK_ENDIF; } /// ParseDirectiveFile -- cgit v1.1 From 805141e5043b7bd2fade62102c79b2beb2e3aec4 Mon Sep 17 00:00:00 2001 From: Jakub Staszak Date: Thu, 10 Jan 2013 23:43:56 +0000 Subject: Remove heavy and unused #inclues from X86TargetObjectFile.cpp. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172151 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86TargetObjectFile.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp index b8ee319..871dacd 100644 --- a/lib/Target/X86/X86TargetObjectFile.cpp +++ b/lib/Target/X86/X86TargetObjectFile.cpp @@ -8,16 +8,12 @@ //===----------------------------------------------------------------------===// #include "X86TargetObjectFile.h" -#include "X86TargetMachine.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSectionELF.h" -#include "llvm/MC/MCSectionMachO.h" #include "llvm/Support/Dwarf.h" -#include "llvm/Support/ELF.h" #include "llvm/Target/Mangler.h" + using namespace llvm; using namespace dwarf; -- cgit v1.1 From 51873db5af7267e757cd1c65cae43df48588bccf Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Fri, 11 Jan 2013 00:37:35 +0000 Subject: llvm/lib/MC/MCParser/AsmParser.cpp: [ms-inline-asm] Fix a couple of undefined behaviors. Operand->needAddressOf() is not initialized at !Operand->isReg(). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172153 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCParser/AsmParser.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index ecf35ff..00c16e1 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -3934,7 +3934,8 @@ bool AsmParser::ParseMSInlineAsm(void *AsmLoc, std::string &AsmString, std::string Constraint = "="; ++InputIdx; OutputDecls.push_back(OpDecl); - OutputDeclsAddressOf.push_back(Operand->needAddressOf()); + OutputDeclsAddressOf.push_back(Operand->isReg() && + Operand->needAddressOf()); Constraint += Operand->getConstraint().str(); OutputConstraints.push_back(Constraint); AsmStrRewrites.push_back(AsmRewrite(AOK_Output, @@ -3942,7 +3943,8 @@ bool AsmParser::ParseMSInlineAsm(void *AsmLoc, std::string &AsmString, Operand->getNameLen())); } else { InputDecls.push_back(OpDecl); - InputDeclsAddressOf.push_back(Operand->needAddressOf()); + InputDeclsAddressOf.push_back(Operand->isReg() && + Operand->needAddressOf()); InputConstraints.push_back(Operand->getConstraint().str()); AsmStrRewrites.push_back(AsmRewrite(AOK_Input, Operand->getStartLoc(), -- cgit v1.1 From b789b949b60c9a28686e638f75b2640d16d7144e Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Fri, 11 Jan 2013 01:13:54 +0000 Subject: X86AsmParser.cpp: Fix up r172148, to add initializer in another CreateMem(). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172157 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/AsmParser/X86AsmParser.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 8545a56..5ce258e 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -528,6 +528,7 @@ struct X86Operand : public MCParsedAsmOperand { Res->Mem.Scale = Scale; Res->Mem.Size = Size; Res->Mem.NeedSizeDir = NeedSizeDir; + Res->AddressOf = false; return Res; } }; -- cgit v1.1 From 4d0b4a45dc724666d5de4f2f7c2d295487e4ca3e Mon Sep 17 00:00:00 2001 From: Michael Ilseman Date: Fri, 11 Jan 2013 01:45:05 +0000 Subject: Support for half intrinsics. Pushes MMX into slower encoding path. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172159 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Function.cpp | 39 ++++++++++++++++++++++----------------- lib/IR/Verifier.cpp | 1 + 2 files changed, 23 insertions(+), 17 deletions(-) (limited to 'lib') diff --git a/lib/IR/Function.cpp b/lib/IR/Function.cpp index 317017a..cd35aff 100644 --- a/lib/IR/Function.cpp +++ b/lib/IR/Function.cpp @@ -372,27 +372,28 @@ enum IIT_Info { IIT_I16 = 3, IIT_I32 = 4, IIT_I64 = 5, - IIT_F32 = 6, - IIT_F64 = 7, - IIT_V2 = 8, - IIT_V4 = 9, - IIT_V8 = 10, - IIT_V16 = 11, - IIT_V32 = 12, - IIT_MMX = 13, + IIT_F16 = 6, + IIT_F32 = 7, + IIT_F64 = 8, + IIT_V2 = 9, + IIT_V4 = 10, + IIT_V8 = 11, + IIT_V16 = 12, + IIT_V32 = 13, IIT_PTR = 14, IIT_ARG = 15, // Values from 16+ are only encodable with the inefficient encoding. - IIT_METADATA = 16, - IIT_EMPTYSTRUCT = 17, - IIT_STRUCT2 = 18, - IIT_STRUCT3 = 19, - IIT_STRUCT4 = 20, - IIT_STRUCT5 = 21, - IIT_EXTEND_VEC_ARG = 22, - IIT_TRUNC_VEC_ARG = 23, - IIT_ANYPTR = 24 + IIT_MMX = 16, + IIT_METADATA = 17, + IIT_EMPTYSTRUCT = 18, + IIT_STRUCT2 = 19, + IIT_STRUCT3 = 20, + IIT_STRUCT4 = 21, + IIT_STRUCT5 = 22, + IIT_EXTEND_VEC_ARG = 23, + IIT_TRUNC_VEC_ARG = 24, + IIT_ANYPTR = 25 }; @@ -412,6 +413,9 @@ static void DecodeIITType(unsigned &NextElt, ArrayRef Infos, case IIT_METADATA: OutputTable.push_back(IITDescriptor::get(IITDescriptor::Metadata, 0)); return; + case IIT_F16: + OutputTable.push_back(IITDescriptor::get(IITDescriptor::Half, 0)); + return; case IIT_F32: OutputTable.push_back(IITDescriptor::get(IITDescriptor::Float, 0)); return; @@ -546,6 +550,7 @@ static Type *DecodeFixedType(ArrayRef &Infos, case IITDescriptor::Void: return Type::getVoidTy(Context); case IITDescriptor::MMX: return Type::getX86_MMXTy(Context); case IITDescriptor::Metadata: return Type::getMetadataTy(Context); + case IITDescriptor::Half: return Type::getHalfTy(Context); case IITDescriptor::Float: return Type::getFloatTy(Context); case IITDescriptor::Double: return Type::getDoubleTy(Context); diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp index ee18f77..4252764 100644 --- a/lib/IR/Verifier.cpp +++ b/lib/IR/Verifier.cpp @@ -1800,6 +1800,7 @@ bool Verifier::VerifyIntrinsicType(Type *Ty, case IITDescriptor::Void: return !Ty->isVoidTy(); case IITDescriptor::MMX: return !Ty->isX86_MMXTy(); case IITDescriptor::Metadata: return !Ty->isMetadataTy(); + case IITDescriptor::Half: return !Ty->isHalfTy(); case IITDescriptor::Float: return !Ty->isFloatTy(); case IITDescriptor::Double: return !Ty->isDoubleTy(); case IITDescriptor::Integer: return !Ty->isIntegerTy(D.Integer_Width); -- cgit v1.1 From 9b1e854698d036cae6ab1d6576f709bec6fce082 Mon Sep 17 00:00:00 2001 From: Jordan Rose Date: Fri, 11 Jan 2013 02:37:55 +0000 Subject: SMDiagnostic: don't emit ranges if there are /any/ multibyte characters. Right now, only OS X has a way to determine the column width of a string (PR14910). Until we have a good way to deal with this, we just won't print carets, source ranges, or fixits for SMDiagnostic if the source line has multibyte characters in it. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172164 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Support/SourceMgr.cpp | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) (limited to 'lib') diff --git a/lib/Support/SourceMgr.cpp b/lib/Support/SourceMgr.cpp index 58a7713..fa82265 100644 --- a/lib/Support/SourceMgr.cpp +++ b/lib/Support/SourceMgr.cpp @@ -336,6 +336,10 @@ static void printSourceLine(raw_ostream &S, StringRef LineContents) { S << '\n'; } +static bool isNonASCII(char c) { + return c & 0x80; +} + void SMDiagnostic::print(const char *ProgName, raw_ostream &S, bool ShowColors) const { // Display colors only if OS supports colors. @@ -392,18 +396,17 @@ void SMDiagnostic::print(const char *ProgName, raw_ostream &S, if (LineNo == -1 || ColumnNo == -1) return; - // FIXME: If there are multibyte characters in the source, all our ranges will - // be wrong. To do this properly, we'll need a byte-to-column map like Clang's - // TextDiagnostic. For now, we'll just handle tabs by expanding them later, - // and bail out rather than show incorrect ranges and misaligned fixits for - // any other odd characters. - SmallString<128> PrintableLine(LineContents); - std::replace(PrintableLine.begin(), PrintableLine.end(), '\t', ' '); - size_t NumColumns = (size_t)llvm::sys::locale::columnWidth(PrintableLine); - if (NumColumns != PrintableLine.size()) { + // FIXME: If there are multibyte or multi-column characters in the source, all + // our ranges will be wrong. To do this properly, we'll need a byte-to-column + // map like Clang's TextDiagnostic. For now, we'll just handle tabs by + // expanding them later, and bail out rather than show incorrect ranges and + // misaligned fixits for any other odd characters. + if (std::find_if(LineContents.begin(), LineContents.end(), isNonASCII) != + LineContents.end()) { printSourceLine(S, LineContents); return; } + size_t NumColumns = LineContents.size(); // Build the line with the caret and ranges. std::string CaretLine(NumColumns+1, ' '); -- cgit v1.1 From b956ec176a23dff2324c4938c3433c5e5ce2eae5 Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Fri, 11 Jan 2013 02:50:09 +0000 Subject: Revert r172153, "llvm/lib/MC/MCParser/AsmParser.cpp: [ms-inline-asm] Fix a couple of undefined behaviors. Operand->needAddressOf() is not initialized at !Operand->isReg()." It has been redundant since r172157. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172166 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCParser/AsmParser.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index 00c16e1..ecf35ff 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -3934,8 +3934,7 @@ bool AsmParser::ParseMSInlineAsm(void *AsmLoc, std::string &AsmString, std::string Constraint = "="; ++InputIdx; OutputDecls.push_back(OpDecl); - OutputDeclsAddressOf.push_back(Operand->isReg() && - Operand->needAddressOf()); + OutputDeclsAddressOf.push_back(Operand->needAddressOf()); Constraint += Operand->getConstraint().str(); OutputConstraints.push_back(Constraint); AsmStrRewrites.push_back(AsmRewrite(AOK_Output, @@ -3943,8 +3942,7 @@ bool AsmParser::ParseMSInlineAsm(void *AsmLoc, std::string &AsmString, Operand->getNameLen())); } else { InputDecls.push_back(OpDecl); - InputDeclsAddressOf.push_back(Operand->isReg() && - Operand->needAddressOf()); + InputDeclsAddressOf.push_back(Operand->needAddressOf()); InputConstraints.push_back(Operand->getConstraint().str()); AsmStrRewrites.push_back(AsmRewrite(AOK_Input, Operand->getStartLoc(), -- cgit v1.1 From a675c74208f77351ea7fa3eed4f542ae781ab566 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Fri, 11 Jan 2013 07:11:59 +0000 Subject: ARM Cost Model: We need to detect the max bitwidth of types in the loop in order to select the max vectorization factor. We don't have a detailed analysis on which values are vectorized and which stay scalars in the vectorized loop so we use another method. We look at reduction variables, loads and stores, which are the only ways to get information in and out of loop iterations. If the data types are extended and truncated then the cost model will catch the cost of the vector zext/sext/trunc operations. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172178 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/LoopVectorize.cpp | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 4803a9d..4bb8c43 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2635,7 +2635,7 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize, assert(MaxVectorSize <= 32 && "Did not expect to pack so many elements" " into one vector."); - + unsigned VF = MaxVectorSize; // If we optimize the program for size, avoid creating the tail loop. @@ -2697,17 +2697,23 @@ unsigned LoopVectorizationCostModel::getWidestType() { // For each instruction in the loop. for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) { - if (Legal->isUniformAfterVectorization(it)) + Type *T = it->getType(); + + // Only examine Loads, Stores and PHINodes. + if (!isa(it) && !isa(it) && !isa(it)) continue; - Type *T = it->getType(); + // Examine PHI nodes that are reduction variables. + if (PHINode *PN = dyn_cast(it)) + if (!Legal->getReductionVars()->count(PN)) + continue; + // Examine the stored values. if (StoreInst *ST = dyn_cast(it)) T = ST->getValueOperand()->getType(); - // PHINodes and pointers are difficult to analyze, but we catch all other - // uses of the types in other instructions. - if (isa(it) || T->isPointerTy() || T->isVoidTy()) + // Ignore stored/loaded pointer types. + if (T->isPointerTy()) continue; MaxWidth = std::max(MaxWidth, T->getScalarSizeInBits()); -- cgit v1.1 From 5f2801bd653f82927ddd7612cb149e78a8509ed4 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Fri, 11 Jan 2013 10:36:13 +0000 Subject: Simplify writing floating types to assembly. This removes previous special cases for each floating-point type in favour of a shared codepath. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172189 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 106 ++++++++++------------------------ 1 file changed, 32 insertions(+), 74 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 32df0e2..d4a745d 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1746,90 +1746,48 @@ static void emitGlobalConstantStruct(const ConstantStruct *CS, static void emitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace, AsmPrinter &AP) { - if (CFP->getType()->isHalfTy()) { - if (AP.isVerbose()) { - SmallString<10> Str; - CFP->getValueAPF().toString(Str); - AP.OutStreamer.GetCommentOS() << "half " << Str << '\n'; - } - uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); - AP.OutStreamer.EmitIntValue(Val, 2, AddrSpace); - return; - } - - if (CFP->getType()->isFloatTy()) { - if (AP.isVerbose()) { - float Val = CFP->getValueAPF().convertToFloat(); - uint64_t IntVal = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); - AP.OutStreamer.GetCommentOS() << "float " << Val << '\n' - << " (" << format("0x%x", IntVal) << ")\n"; - } - uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); - AP.OutStreamer.EmitIntValue(Val, 4, AddrSpace); - return; - } + APInt API = CFP->getValueAPF().bitcastToAPInt(); - // FP Constants are printed as integer constants to avoid losing - // precision. - if (CFP->getType()->isDoubleTy()) { - if (AP.isVerbose()) { - double Val = CFP->getValueAPF().convertToDouble(); - uint64_t IntVal = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); - AP.OutStreamer.GetCommentOS() << "double " << Val << '\n' - << " (" << format("0x%lx", IntVal) << ")\n"; - } + // First print a comment with what we think the original floating-point value + // should have been. + if (AP.isVerbose()) { + SmallString<8> StrVal; + CFP->getValueAPF().toString(StrVal); - uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); - AP.OutStreamer.EmitIntValue(Val, 8, AddrSpace); - return; + CFP->getType()->print(AP.OutStreamer.GetCommentOS()); + AP.OutStreamer.GetCommentOS() << ' ' << StrVal << '\n'; } - if (CFP->getType()->isX86_FP80Ty() || CFP->getType()->isFP128Ty()) { - // all long double variants are printed as hex - // API needed to prevent premature destruction - APInt API = CFP->getValueAPF().bitcastToAPInt(); - const uint64_t *p = API.getRawData(); - if (AP.isVerbose()) { - // Convert to double so we can print the approximate val as a comment. - SmallString<8> StrVal; - CFP->getValueAPF().toString(StrVal); + // Now iterate through the APInt chunks, emitting them in endian-correct + // order, possibly with a smaller chunk at beginning/end (e.g. for x87 80-bit + // floats). + unsigned NumBytes = API.getBitWidth() / 8; + unsigned TrailingBytes = NumBytes % sizeof(uint64_t); + const uint64_t *p = API.getRawData(); - const char *TyNote = CFP->getType()->isFP128Ty() ? "fp128 " : "x86_fp80 "; - AP.OutStreamer.GetCommentOS() << TyNote << StrVal << '\n'; - } + // PPC's long double has odd notions of endianness compared to how LLVM + // handles it: p[0] goes first for *big* endian on PPC. + if (AP.TM.getDataLayout()->isBigEndian() != CFP->getType()->isPPC_FP128Ty()) { + int Chunk = API.getNumWords() - 1; - // The 80-bit type is made of a 64-bit and 16-bit value, the 128-bit has 2 - // 64-bit words. - uint32_t TrailingSize = CFP->getType()->isFP128Ty() ? 8 : 2; + if (TrailingBytes) + AP.OutStreamer.EmitIntValue(p[Chunk--], TrailingBytes, AddrSpace); - if (AP.TM.getDataLayout()->isBigEndian()) { - AP.OutStreamer.EmitIntValue(p[1], TrailingSize, AddrSpace); - AP.OutStreamer.EmitIntValue(p[0], 8, AddrSpace); - } else { - AP.OutStreamer.EmitIntValue(p[0], 8, AddrSpace); - AP.OutStreamer.EmitIntValue(p[1], TrailingSize, AddrSpace); - } + for (; Chunk >= 0; --Chunk) + AP.OutStreamer.EmitIntValue(p[Chunk], sizeof(uint64_t), AddrSpace); + } else { + unsigned Chunk; + for (Chunk = 0; Chunk < NumBytes / sizeof(uint64_t); ++Chunk) + AP.OutStreamer.EmitIntValue(p[Chunk], sizeof(uint64_t), AddrSpace); - // Emit the tail padding for the long double. - const DataLayout &TD = *AP.TM.getDataLayout(); - AP.OutStreamer.EmitZeros(TD.getTypeAllocSize(CFP->getType()) - - TD.getTypeStoreSize(CFP->getType()), AddrSpace); - return; + if (TrailingBytes) + AP.OutStreamer.EmitIntValue(p[Chunk], TrailingBytes, AddrSpace); } - assert(CFP->getType()->isPPC_FP128Ty() && - "Floating point constant type not handled"); - // All long double variants are printed as hex - // API needed to prevent premature destruction. - APInt API = CFP->getValueAPF().bitcastToAPInt(); - const uint64_t *p = API.getRawData(); - if (AP.TM.getDataLayout()->isBigEndian()) { - AP.OutStreamer.EmitIntValue(p[0], 8, AddrSpace); - AP.OutStreamer.EmitIntValue(p[1], 8, AddrSpace); - } else { - AP.OutStreamer.EmitIntValue(p[1], 8, AddrSpace); - AP.OutStreamer.EmitIntValue(p[0], 8, AddrSpace); - } + // Emit the tail padding for the long double. + const DataLayout &TD = *AP.TM.getDataLayout(); + AP.OutStreamer.EmitZeros(TD.getTypeAllocSize(CFP->getType()) - + TD.getTypeStoreSize(CFP->getType()), AddrSpace); } static void emitGlobalConstantLargeInt(const ConstantInt *CI, -- cgit v1.1 From dced3cdb0408f0802db332453a1e9c69c5fea70c Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Fri, 11 Jan 2013 16:33:30 +0000 Subject: Fix bug in exception table allocation (PR13678) Patch by Michael Muller. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172214 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/ExecutionEngine/JIT/JITEmitter.cpp | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp index 6fd4df4..c273876 100644 --- a/lib/ExecutionEngine/JIT/JITEmitter.cpp +++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp @@ -969,14 +969,24 @@ bool JITEmitter::finishFunction(MachineFunction &F) { SavedBufferBegin = BufferBegin; SavedBufferEnd = BufferEnd; SavedCurBufferPtr = CurBufferPtr; - - BufferBegin = CurBufferPtr = MemMgr->startExceptionTable(F.getFunction(), - ActualSize); - BufferEnd = BufferBegin+ActualSize; - EmittedFunctions[F.getFunction()].ExceptionTable = BufferBegin; - uint8_t *EhStart; - uint8_t *FrameRegister = DE->EmitDwarfTable(F, *this, FnStart, FnEnd, - EhStart); + uint8_t *FrameRegister; + + while (true) { + BufferBegin = CurBufferPtr = MemMgr->startExceptionTable(F.getFunction(), + ActualSize); + BufferEnd = BufferBegin+ActualSize; + EmittedFunctions[F.getFunction()].ExceptionTable = BufferBegin; + uint8_t *EhStart; + FrameRegister = DE->EmitDwarfTable(F, *this, FnStart, FnEnd, EhStart); + + // If the buffer was large enough to hold the table then we are done. + if (CurBufferPtr != BufferEnd) + break; + + // Try again with twice as much space. + ActualSize = (CurBufferPtr - BufferBegin) * 2; + MemMgr->deallocateExceptionTable(BufferBegin); + } MemMgr->endExceptionTable(F.getFunction(), BufferBegin, CurBufferPtr, FrameRegister); BufferBegin = SavedBufferBegin; -- cgit v1.1 From a125cacf7d154d0e5cad47f011e619e45517c839 Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Fri, 11 Jan 2013 17:28:14 +0000 Subject: Added -view-callgraph module pass. -dot-callgraph similarly follows a standard module pass pattern. Patch by Speziale Ettore! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172220 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/IPA/CallPrinter.cpp | 87 ++++++++++++++++++++++++++++++++++++++++ lib/Analysis/IPA/IPA.cpp | 2 + 2 files changed, 89 insertions(+) create mode 100644 lib/Analysis/IPA/CallPrinter.cpp (limited to 'lib') diff --git a/lib/Analysis/IPA/CallPrinter.cpp b/lib/Analysis/IPA/CallPrinter.cpp new file mode 100644 index 0000000..306ae7a --- /dev/null +++ b/lib/Analysis/IPA/CallPrinter.cpp @@ -0,0 +1,87 @@ +//===- CallPrinter.cpp - DOT printer for call graph -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines '-dot-callgraph', which emit a callgraph..dot +// containing the call graph of a module. +// +// There is also a pass available to directly call dotty ('-view-callgraph'). +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/CallPrinter.h" +#include "llvm/Analysis/DOTGraphTraitsPass.h" + +using namespace llvm; + +namespace llvm { + +template<> +struct DOTGraphTraits : public DefaultDOTGraphTraits { + DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {} + + static std::string getGraphName(CallGraph *Graph) { + return "Call graph"; + } + + std::string getNodeLabel(CallGraphNode *Node, CallGraph *Graph) { + if (Function *Func = Node->getFunction()) + return Func->getName(); + + return "external node"; + } +}; + +} // end llvm namespace + +namespace { + +struct CallGraphViewer + : public DOTGraphTraitsModuleViewer { + static char ID; + + CallGraphViewer() + : DOTGraphTraitsModuleViewer("callgraph", ID) { + initializeCallGraphViewerPass(*PassRegistry::getPassRegistry()); + } +}; + +struct CallGraphPrinter + : public DOTGraphTraitsModulePrinter { + static char ID; + + CallGraphPrinter() + : DOTGraphTraitsModulePrinter("callgraph", ID) { + initializeCallGraphPrinterPass(*PassRegistry::getPassRegistry()); + } +}; + +} // end anonymous namespace + +char CallGraphViewer::ID = 0; +INITIALIZE_PASS(CallGraphViewer, "view-callgraph", + "View call graph", + false, false) + +char CallGraphPrinter::ID = 0; +INITIALIZE_PASS(CallGraphPrinter, "dot-callgraph", + "Print call graph to 'dot' file", + false, false) + +// Create methods available outside of this file, to use them +// "include/llvm/LinkAllPasses.h". Otherwise the pass would be deleted by +// the link time optimization. + +ModulePass *llvm::createCallGraphViewerPass() { + return new CallGraphViewer(); +} + +ModulePass *llvm::createCallGraphPrinterPass() { + return new CallGraphPrinter(); +} diff --git a/lib/Analysis/IPA/IPA.cpp b/lib/Analysis/IPA/IPA.cpp index 0ba2e04..aa5164e 100644 --- a/lib/Analysis/IPA/IPA.cpp +++ b/lib/Analysis/IPA/IPA.cpp @@ -20,6 +20,8 @@ using namespace llvm; void llvm::initializeIPA(PassRegistry &Registry) { initializeBasicCallGraphPass(Registry); initializeCallGraphAnalysisGroup(Registry); + initializeCallGraphPrinterPass(Registry); + initializeCallGraphViewerPass(Registry); initializeFindUsedTypesPass(Registry); initializeGlobalsModRefPass(Registry); } -- cgit v1.1 From 74c1be27a42c2f83e5be23062ea1e6f9ebf18d20 Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Fri, 11 Jan 2013 17:34:05 +0000 Subject: Update CMakeLists for CallPrinter.cpp. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172222 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/IPA/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/Analysis/IPA/CMakeLists.txt b/lib/Analysis/IPA/CMakeLists.txt index 34d6d1b..318119b 100644 --- a/lib/Analysis/IPA/CMakeLists.txt +++ b/lib/Analysis/IPA/CMakeLists.txt @@ -1,6 +1,7 @@ add_llvm_library(LLVMipa CallGraph.cpp CallGraphSCCPass.cpp + CallPrinter.cpp FindUsedTypes.cpp GlobalsModRef.cpp IPA.cpp -- cgit v1.1 From e9ccacd376b2a271434e739f0b7d468cc691723b Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Fri, 11 Jan 2013 17:46:50 +0000 Subject: Fix typo from r170452. Affects -enable-misched heuristics. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172223 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineScheduler.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index a32df78..b965c7f 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -1533,8 +1533,8 @@ void ConvergingScheduler::checkResourceLimits( ConvergingScheduler::SchedCandidate &BotCand) { // Set ReduceLatency to true if needed. - Bot.setLatencyPolicy(TopCand.Policy); - Top.setLatencyPolicy(BotCand.Policy); + Bot.setLatencyPolicy(BopCand.Policy); + Top.setLatencyPolicy(TotCand.Policy); // Handle resource-limited regions. if (Top.IsResourceLimited && Bot.IsResourceLimited -- cgit v1.1 From eed4e0193ff04ba27bfb6d0d4201505f03d99a7c Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Fri, 11 Jan 2013 17:51:16 +0000 Subject: Follow-up typo correction from building the wrong branch. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172224 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineScheduler.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index b965c7f..c949266 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -1533,8 +1533,8 @@ void ConvergingScheduler::checkResourceLimits( ConvergingScheduler::SchedCandidate &BotCand) { // Set ReduceLatency to true if needed. - Bot.setLatencyPolicy(BopCand.Policy); - Top.setLatencyPolicy(TotCand.Policy); + Bot.setLatencyPolicy(BotCand.Policy); + Top.setLatencyPolicy(TopCand.Policy); // Handle resource-limited regions. if (Top.IsResourceLimited && Bot.IsResourceLimited -- cgit v1.1 From fffe3634933471ee9805412ffa221080c9e9e8fd Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Fri, 11 Jan 2013 18:12:39 +0000 Subject: For inline asm: - recognize string "{memory}" in the MI generation - mark as mayload/maystore when there's a memory clobber constraint. PR14859. Patch by Krzysztof Parzyszek git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172228 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineInstr.cpp | 6 +++++- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 6 ++++++ lib/CodeGen/SelectionDAG/TargetLowering.cpp | 10 +++++++--- 3 files changed, 18 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index df82a17..8f7c5fd 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -1476,10 +1476,14 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { OS << " "; getOperand(InlineAsm::MIOp_AsmString).print(OS, TM); - // Print HasSideEffects, IsAlignStack + // Print HasSideEffects, MayLoad, MayStore, IsAlignStack unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm(); if (ExtraInfo & InlineAsm::Extra_HasSideEffects) OS << " [sideeffect]"; + if (ExtraInfo & InlineAsm::Extra_MayLoad) + OS << " [mayload]"; + if (ExtraInfo & InlineAsm::Extra_MayStore) + OS << " [maystore]"; if (ExtraInfo & InlineAsm::Extra_IsAlignStack) OS << " [alignstack]"; if (getInlineAsmDialect() == InlineAsm::AD_ATT) diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 599ef2c..4e07fd3 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -5948,6 +5948,10 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Compute the constraint code and ConstraintType to use. TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG); + if (OpInfo.ConstraintType == TargetLowering::C_Memory && + OpInfo.Type == InlineAsm::isClobber) + continue; + // If this is a memory input, and if the operand is not indirect, do what we // need to to provide an address for the memory input. if (OpInfo.ConstraintType == TargetLowering::C_Memory && @@ -6051,6 +6055,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { ExtraInfo |= InlineAsm::Extra_MayLoad; else if (OpInfo.Type == InlineAsm::isOutput) ExtraInfo |= InlineAsm::Extra_MayStore; + else if (OpInfo.Type == InlineAsm::isClobber) + ExtraInfo |= (InlineAsm::Extra_MayLoad | InlineAsm::Extra_MayStore); } } diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index d2da9b7..35a11b4 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -3068,7 +3068,9 @@ PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { TargetLowering::ConstraintType TargetLowering::getConstraintType(const std::string &Constraint) const { - if (Constraint.size() == 1) { + unsigned S = Constraint.size(); + + if (S == 1) { switch (Constraint[0]) { default: break; case 'r': return C_RegisterClass; @@ -3097,9 +3099,11 @@ TargetLowering::getConstraintType(const std::string &Constraint) const { } } - if (Constraint.size() > 1 && Constraint[0] == '{' && - Constraint[Constraint.size()-1] == '}') + if (S > 1 && Constraint[0] == '{' && Constraint[S-1] == '}') { + if (S == 8 && !Constraint.compare(1, 6, "memory", 6)) // "{memory}" + return C_Memory; return C_Register; + } return C_Unknown; } -- cgit v1.1 From 3e40d927a775994d8f4c2d30695be69c248fa16c Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Fri, 11 Jan 2013 19:54:13 +0000 Subject: ARM Cost Model: Modify the target independent cost model to ask the target if it supports the different CAST types. We didn't do this on X86 because of the different register sizes and types, but on ARM this makes sense. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172245 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/BasicTargetTransformInfo.cpp | 33 ++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp index 3892cc4..59192f4 100644 --- a/lib/CodeGen/BasicTargetTransformInfo.cpp +++ b/lib/CodeGen/BasicTargetTransformInfo.cpp @@ -241,6 +241,27 @@ unsigned BasicTTI::getCastInstrCost(unsigned Opcode, Type *Dst, std::pair SrcLT = TLI->getTypeLegalizationCost(Src); std::pair DstLT = TLI->getTypeLegalizationCost(Dst); + // Check for NOOP conversions. + if (SrcLT.first == DstLT.first && + SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) { + + // Bitcast between types that are legalized to the same type are free. + if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc) + return 0; + } + + if (Opcode == Instruction::Trunc && + TLI->isTruncateFree(SrcLT.second, DstLT.second)) + return 0; + + if (Opcode == Instruction::ZExt && + TLI->isZExtFree(SrcLT.second, DstLT.second)) + return 0; + + // If the cast is marked as legal (or promote) then assume low cost. + if (TLI->isOperationLegalOrPromote(ISD, DstLT.second)) + return 1; + // Handle scalar conversions. if (!Src->isVectorTy() && !Dst->isVectorTy()) { @@ -248,14 +269,6 @@ unsigned BasicTTI::getCastInstrCost(unsigned Opcode, Type *Dst, if (Opcode == Instruction::BitCast) return 0; - if (Opcode == Instruction::Trunc && - TLI->isTruncateFree(SrcLT.second, DstLT.second)) - return 0; - - if (Opcode == Instruction::ZExt && - TLI->isZExtFree(SrcLT.second, DstLT.second)) - return 0; - // Just check the op cost. If the operation is legal then assume it costs 1. if (!TLI->isOperationExpand(ISD, DstLT.second)) return 1; @@ -271,10 +284,6 @@ unsigned BasicTTI::getCastInstrCost(unsigned Opcode, Type *Dst, if (SrcLT.first == DstLT.first && SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) { - // Bitcast between types that are legalized to the same type are free. - if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc) - return 0; - // Assume that Zext is done using AND. if (Opcode == Instruction::ZExt) return 1; -- cgit v1.1 From 69e42dbd006c0afb732067ece7327988b1e24c01 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Fri, 11 Jan 2013 20:05:37 +0000 Subject: Split TargetLowering into a CodeGen and a SelectionDAG part. This fixes some of the cycles between libCodeGen and libSelectionDAG. It's still a complete mess but as long as the edges consist of virtual call it doesn't cause breakage. BasicTTI did static calls and thus broke some build configurations. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172246 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/BasicTargetTransformInfo.cpp | 8 +- lib/CodeGen/CMakeLists.txt | 15 +- lib/CodeGen/DwarfEHPrepare.cpp | 2 +- lib/CodeGen/IfConversion.cpp | 2 +- lib/CodeGen/MachineBlockPlacement.cpp | 2 +- lib/CodeGen/MachineLICM.cpp | 2 +- lib/CodeGen/SelectionDAG/TargetLowering.cpp | 976 +------------------- lib/CodeGen/SjLjEHPrepare.cpp | 6 +- lib/CodeGen/StackProtector.cpp | 6 +- lib/CodeGen/TargetLoweringBase.cpp | 1274 +++++++++++++++++++++++++++ 10 files changed, 1301 insertions(+), 992 deletions(-) create mode 100644 lib/CodeGen/TargetLoweringBase.cpp (limited to 'lib') diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp index 59192f4..ea5e937 100644 --- a/lib/CodeGen/BasicTargetTransformInfo.cpp +++ b/lib/CodeGen/BasicTargetTransformInfo.cpp @@ -26,7 +26,7 @@ using namespace llvm; namespace { class BasicTTI : public ImmutablePass, public TargetTransformInfo { - const TargetLowering *TLI; + const TargetLoweringBase *TLI; /// Estimate the overhead of scalarizing an instruction. Insert and Extract /// are set if the result needs to be inserted and/or extracted from vectors. @@ -37,7 +37,7 @@ public: llvm_unreachable("This pass cannot be directly constructed"); } - BasicTTI(const TargetLowering *TLI) : ImmutablePass(ID), TLI(TLI) { + BasicTTI(const TargetLoweringBase *TLI) : ImmutablePass(ID), TLI(TLI) { initializeBasicTTIPass(*PassRegistry::getPassRegistry()); } @@ -112,7 +112,7 @@ INITIALIZE_AG_PASS(BasicTTI, TargetTransformInfo, "basictti", char BasicTTI::ID = 0; ImmutablePass * -llvm::createBasicTargetTransformInfoPass(const TargetLowering *TLI) { +llvm::createBasicTargetTransformInfoPass(const TargetLoweringBase *TLI) { return new BasicTTI(TLI); } @@ -128,7 +128,7 @@ bool BasicTTI::isLegalICmpImmediate(int64_t imm) const { bool BasicTTI::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale) const { - TargetLowering::AddrMode AM; + TargetLoweringBase::AddrMode AM; AM.BaseGV = BaseGV; AM.BaseOffs = BaseOffset; AM.HasBaseReg = HasBaseReg; diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index d5f3932..ddc7ada 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -9,8 +9,8 @@ add_llvm_library(LLVMCodeGen CodeGen.cpp CodePlacementOpt.cpp CriticalAntiDepBreaker.cpp - DeadMachineInstructionElim.cpp DFAPacketizer.cpp + DeadMachineInstructionElim.cpp DwarfEHPrepare.cpp EarlyIfConversion.cpp EdgeBundles.cpp @@ -32,21 +32,20 @@ add_llvm_library(LLVMCodeGen LiveInterval.cpp LiveIntervalAnalysis.cpp LiveIntervalUnion.cpp + LiveRangeCalc.cpp + LiveRangeEdit.cpp LiveRegMatrix.cpp LiveStackAnalysis.cpp LiveVariables.cpp - LiveRangeCalc.cpp - LiveRangeEdit.cpp LocalStackSlotAllocation.cpp MachineBasicBlock.cpp MachineBlockFrequencyInfo.cpp MachineBlockPlacement.cpp MachineBranchProbabilityInfo.cpp + MachineCSE.cpp MachineCodeEmitter.cpp MachineCopyPropagation.cpp - MachineCSE.cpp MachineDominators.cpp - MachinePostDominators.cpp MachineFunction.cpp MachineFunctionAnalysis.cpp MachineFunctionPass.cpp @@ -58,6 +57,7 @@ add_llvm_library(LLVMCodeGen MachineModuleInfo.cpp MachineModuleInfoImpls.cpp MachinePassRegistry.cpp + MachinePostDominators.cpp MachineRegisterInfo.cpp MachineSSAUpdater.cpp MachineScheduler.cpp @@ -91,16 +91,17 @@ add_llvm_library(LLVMCodeGen ShrinkWrapping.cpp SjLjEHPrepare.cpp SlotIndexes.cpp - Spiller.cpp SpillPlacement.cpp + Spiller.cpp SplitKit.cpp + StackColoring.cpp StackProtector.cpp StackSlotColoring.cpp - StackColoring.cpp StrongPHIElimination.cpp TailDuplication.cpp TargetFrameLoweringImpl.cpp TargetInstrInfo.cpp + TargetLoweringBase.cpp TargetLoweringObjectFileImpl.cpp TargetOptionsImpl.cpp TargetRegisterInfo.cpp diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp index 4cafa96..f27ec77 100644 --- a/lib/CodeGen/DwarfEHPrepare.cpp +++ b/lib/CodeGen/DwarfEHPrepare.cpp @@ -33,7 +33,7 @@ STATISTIC(NumResumesLowered, "Number of resume calls lowered"); namespace { class DwarfEHPrepare : public FunctionPass { const TargetMachine *TM; - const TargetLowering *TLI; + const TargetLoweringBase *TLI; // RewindFunction - _Unwind_Resume or the target equivalent. Constant *RewindFunction; diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp index 8906991..3583a9b 100644 --- a/lib/CodeGen/IfConversion.cpp +++ b/lib/CodeGen/IfConversion.cpp @@ -151,7 +151,7 @@ namespace { /// basic block number. std::vector BBAnalysis; - const TargetLowering *TLI; + const TargetLoweringBase *TLI; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; const InstrItineraryData *InstrItins; diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index 07a3e03..3b09c6b 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -171,7 +171,7 @@ class MachineBlockPlacement : public MachineFunctionPass { const TargetInstrInfo *TII; /// \brief A handle to the target's lowering info. - const TargetLowering *TLI; + const TargetLoweringBase *TLI; /// \brief Allocator and owner of BlockChain structures. /// diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index 760cf8a..ed3ed4d 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -62,7 +62,7 @@ namespace { class MachineLICM : public MachineFunctionPass { const TargetMachine *TM; const TargetInstrInfo *TII; - const TargetLowering *TLI; + const TargetLoweringBase *TLI; const TargetRegisterInfo *TRI; const MachineFrameInfo *MFI; MachineRegisterInfo *MRI; diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 35a11b4..2e248e9 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -33,324 +33,6 @@ #include using namespace llvm; -/// InitLibcallNames - Set default libcall names. -/// -static void InitLibcallNames(const char **Names) { - Names[RTLIB::SHL_I16] = "__ashlhi3"; - Names[RTLIB::SHL_I32] = "__ashlsi3"; - Names[RTLIB::SHL_I64] = "__ashldi3"; - Names[RTLIB::SHL_I128] = "__ashlti3"; - Names[RTLIB::SRL_I16] = "__lshrhi3"; - Names[RTLIB::SRL_I32] = "__lshrsi3"; - Names[RTLIB::SRL_I64] = "__lshrdi3"; - Names[RTLIB::SRL_I128] = "__lshrti3"; - Names[RTLIB::SRA_I16] = "__ashrhi3"; - Names[RTLIB::SRA_I32] = "__ashrsi3"; - Names[RTLIB::SRA_I64] = "__ashrdi3"; - Names[RTLIB::SRA_I128] = "__ashrti3"; - Names[RTLIB::MUL_I8] = "__mulqi3"; - Names[RTLIB::MUL_I16] = "__mulhi3"; - Names[RTLIB::MUL_I32] = "__mulsi3"; - Names[RTLIB::MUL_I64] = "__muldi3"; - Names[RTLIB::MUL_I128] = "__multi3"; - Names[RTLIB::MULO_I32] = "__mulosi4"; - Names[RTLIB::MULO_I64] = "__mulodi4"; - Names[RTLIB::MULO_I128] = "__muloti4"; - Names[RTLIB::SDIV_I8] = "__divqi3"; - Names[RTLIB::SDIV_I16] = "__divhi3"; - Names[RTLIB::SDIV_I32] = "__divsi3"; - Names[RTLIB::SDIV_I64] = "__divdi3"; - Names[RTLIB::SDIV_I128] = "__divti3"; - Names[RTLIB::UDIV_I8] = "__udivqi3"; - Names[RTLIB::UDIV_I16] = "__udivhi3"; - Names[RTLIB::UDIV_I32] = "__udivsi3"; - Names[RTLIB::UDIV_I64] = "__udivdi3"; - Names[RTLIB::UDIV_I128] = "__udivti3"; - Names[RTLIB::SREM_I8] = "__modqi3"; - Names[RTLIB::SREM_I16] = "__modhi3"; - Names[RTLIB::SREM_I32] = "__modsi3"; - Names[RTLIB::SREM_I64] = "__moddi3"; - Names[RTLIB::SREM_I128] = "__modti3"; - Names[RTLIB::UREM_I8] = "__umodqi3"; - Names[RTLIB::UREM_I16] = "__umodhi3"; - Names[RTLIB::UREM_I32] = "__umodsi3"; - Names[RTLIB::UREM_I64] = "__umoddi3"; - Names[RTLIB::UREM_I128] = "__umodti3"; - - // These are generally not available. - Names[RTLIB::SDIVREM_I8] = 0; - Names[RTLIB::SDIVREM_I16] = 0; - Names[RTLIB::SDIVREM_I32] = 0; - Names[RTLIB::SDIVREM_I64] = 0; - Names[RTLIB::SDIVREM_I128] = 0; - Names[RTLIB::UDIVREM_I8] = 0; - Names[RTLIB::UDIVREM_I16] = 0; - Names[RTLIB::UDIVREM_I32] = 0; - Names[RTLIB::UDIVREM_I64] = 0; - Names[RTLIB::UDIVREM_I128] = 0; - - Names[RTLIB::NEG_I32] = "__negsi2"; - Names[RTLIB::NEG_I64] = "__negdi2"; - Names[RTLIB::ADD_F32] = "__addsf3"; - Names[RTLIB::ADD_F64] = "__adddf3"; - Names[RTLIB::ADD_F80] = "__addxf3"; - Names[RTLIB::ADD_F128] = "__addtf3"; - Names[RTLIB::ADD_PPCF128] = "__gcc_qadd"; - Names[RTLIB::SUB_F32] = "__subsf3"; - Names[RTLIB::SUB_F64] = "__subdf3"; - Names[RTLIB::SUB_F80] = "__subxf3"; - Names[RTLIB::SUB_F128] = "__subtf3"; - Names[RTLIB::SUB_PPCF128] = "__gcc_qsub"; - Names[RTLIB::MUL_F32] = "__mulsf3"; - Names[RTLIB::MUL_F64] = "__muldf3"; - Names[RTLIB::MUL_F80] = "__mulxf3"; - Names[RTLIB::MUL_F128] = "__multf3"; - Names[RTLIB::MUL_PPCF128] = "__gcc_qmul"; - Names[RTLIB::DIV_F32] = "__divsf3"; - Names[RTLIB::DIV_F64] = "__divdf3"; - Names[RTLIB::DIV_F80] = "__divxf3"; - Names[RTLIB::DIV_F128] = "__divtf3"; - Names[RTLIB::DIV_PPCF128] = "__gcc_qdiv"; - Names[RTLIB::REM_F32] = "fmodf"; - Names[RTLIB::REM_F64] = "fmod"; - Names[RTLIB::REM_F80] = "fmodl"; - Names[RTLIB::REM_F128] = "fmodl"; - Names[RTLIB::REM_PPCF128] = "fmodl"; - Names[RTLIB::FMA_F32] = "fmaf"; - Names[RTLIB::FMA_F64] = "fma"; - Names[RTLIB::FMA_F80] = "fmal"; - Names[RTLIB::FMA_F128] = "fmal"; - Names[RTLIB::FMA_PPCF128] = "fmal"; - Names[RTLIB::POWI_F32] = "__powisf2"; - Names[RTLIB::POWI_F64] = "__powidf2"; - Names[RTLIB::POWI_F80] = "__powixf2"; - Names[RTLIB::POWI_F128] = "__powitf2"; - Names[RTLIB::POWI_PPCF128] = "__powitf2"; - Names[RTLIB::SQRT_F32] = "sqrtf"; - Names[RTLIB::SQRT_F64] = "sqrt"; - Names[RTLIB::SQRT_F80] = "sqrtl"; - Names[RTLIB::SQRT_F128] = "sqrtl"; - Names[RTLIB::SQRT_PPCF128] = "sqrtl"; - Names[RTLIB::LOG_F32] = "logf"; - Names[RTLIB::LOG_F64] = "log"; - Names[RTLIB::LOG_F80] = "logl"; - Names[RTLIB::LOG_F128] = "logl"; - Names[RTLIB::LOG_PPCF128] = "logl"; - Names[RTLIB::LOG2_F32] = "log2f"; - Names[RTLIB::LOG2_F64] = "log2"; - Names[RTLIB::LOG2_F80] = "log2l"; - Names[RTLIB::LOG2_F128] = "log2l"; - Names[RTLIB::LOG2_PPCF128] = "log2l"; - Names[RTLIB::LOG10_F32] = "log10f"; - Names[RTLIB::LOG10_F64] = "log10"; - Names[RTLIB::LOG10_F80] = "log10l"; - Names[RTLIB::LOG10_F128] = "log10l"; - Names[RTLIB::LOG10_PPCF128] = "log10l"; - Names[RTLIB::EXP_F32] = "expf"; - Names[RTLIB::EXP_F64] = "exp"; - Names[RTLIB::EXP_F80] = "expl"; - Names[RTLIB::EXP_F128] = "expl"; - Names[RTLIB::EXP_PPCF128] = "expl"; - Names[RTLIB::EXP2_F32] = "exp2f"; - Names[RTLIB::EXP2_F64] = "exp2"; - Names[RTLIB::EXP2_F80] = "exp2l"; - Names[RTLIB::EXP2_F128] = "exp2l"; - Names[RTLIB::EXP2_PPCF128] = "exp2l"; - Names[RTLIB::SIN_F32] = "sinf"; - Names[RTLIB::SIN_F64] = "sin"; - Names[RTLIB::SIN_F80] = "sinl"; - Names[RTLIB::SIN_F128] = "sinl"; - Names[RTLIB::SIN_PPCF128] = "sinl"; - Names[RTLIB::COS_F32] = "cosf"; - Names[RTLIB::COS_F64] = "cos"; - Names[RTLIB::COS_F80] = "cosl"; - Names[RTLIB::COS_F128] = "cosl"; - Names[RTLIB::COS_PPCF128] = "cosl"; - Names[RTLIB::POW_F32] = "powf"; - Names[RTLIB::POW_F64] = "pow"; - Names[RTLIB::POW_F80] = "powl"; - Names[RTLIB::POW_F128] = "powl"; - Names[RTLIB::POW_PPCF128] = "powl"; - Names[RTLIB::CEIL_F32] = "ceilf"; - Names[RTLIB::CEIL_F64] = "ceil"; - Names[RTLIB::CEIL_F80] = "ceill"; - Names[RTLIB::CEIL_F128] = "ceill"; - Names[RTLIB::CEIL_PPCF128] = "ceill"; - Names[RTLIB::TRUNC_F32] = "truncf"; - Names[RTLIB::TRUNC_F64] = "trunc"; - Names[RTLIB::TRUNC_F80] = "truncl"; - Names[RTLIB::TRUNC_F128] = "truncl"; - Names[RTLIB::TRUNC_PPCF128] = "truncl"; - Names[RTLIB::RINT_F32] = "rintf"; - Names[RTLIB::RINT_F64] = "rint"; - Names[RTLIB::RINT_F80] = "rintl"; - Names[RTLIB::RINT_F128] = "rintl"; - Names[RTLIB::RINT_PPCF128] = "rintl"; - Names[RTLIB::NEARBYINT_F32] = "nearbyintf"; - Names[RTLIB::NEARBYINT_F64] = "nearbyint"; - Names[RTLIB::NEARBYINT_F80] = "nearbyintl"; - Names[RTLIB::NEARBYINT_F128] = "nearbyintl"; - Names[RTLIB::NEARBYINT_PPCF128] = "nearbyintl"; - Names[RTLIB::FLOOR_F32] = "floorf"; - Names[RTLIB::FLOOR_F64] = "floor"; - Names[RTLIB::FLOOR_F80] = "floorl"; - Names[RTLIB::FLOOR_F128] = "floorl"; - Names[RTLIB::FLOOR_PPCF128] = "floorl"; - Names[RTLIB::COPYSIGN_F32] = "copysignf"; - Names[RTLIB::COPYSIGN_F64] = "copysign"; - Names[RTLIB::COPYSIGN_F80] = "copysignl"; - Names[RTLIB::COPYSIGN_F128] = "copysignl"; - Names[RTLIB::COPYSIGN_PPCF128] = "copysignl"; - Names[RTLIB::FPEXT_F64_F128] = "__extenddftf2"; - Names[RTLIB::FPEXT_F32_F128] = "__extendsftf2"; - Names[RTLIB::FPEXT_F32_F64] = "__extendsfdf2"; - Names[RTLIB::FPEXT_F16_F32] = "__gnu_h2f_ieee"; - Names[RTLIB::FPROUND_F32_F16] = "__gnu_f2h_ieee"; - Names[RTLIB::FPROUND_F64_F32] = "__truncdfsf2"; - Names[RTLIB::FPROUND_F80_F32] = "__truncxfsf2"; - Names[RTLIB::FPROUND_F128_F32] = "__trunctfsf2"; - Names[RTLIB::FPROUND_PPCF128_F32] = "__trunctfsf2"; - Names[RTLIB::FPROUND_F80_F64] = "__truncxfdf2"; - Names[RTLIB::FPROUND_F128_F64] = "__trunctfdf2"; - Names[RTLIB::FPROUND_PPCF128_F64] = "__trunctfdf2"; - Names[RTLIB::FPTOSINT_F32_I8] = "__fixsfqi"; - Names[RTLIB::FPTOSINT_F32_I16] = "__fixsfhi"; - Names[RTLIB::FPTOSINT_F32_I32] = "__fixsfsi"; - Names[RTLIB::FPTOSINT_F32_I64] = "__fixsfdi"; - Names[RTLIB::FPTOSINT_F32_I128] = "__fixsfti"; - Names[RTLIB::FPTOSINT_F64_I8] = "__fixdfqi"; - Names[RTLIB::FPTOSINT_F64_I16] = "__fixdfhi"; - Names[RTLIB::FPTOSINT_F64_I32] = "__fixdfsi"; - Names[RTLIB::FPTOSINT_F64_I64] = "__fixdfdi"; - Names[RTLIB::FPTOSINT_F64_I128] = "__fixdfti"; - Names[RTLIB::FPTOSINT_F80_I32] = "__fixxfsi"; - Names[RTLIB::FPTOSINT_F80_I64] = "__fixxfdi"; - Names[RTLIB::FPTOSINT_F80_I128] = "__fixxfti"; - Names[RTLIB::FPTOSINT_F128_I32] = "__fixtfsi"; - Names[RTLIB::FPTOSINT_F128_I64] = "__fixtfdi"; - Names[RTLIB::FPTOSINT_F128_I128] = "__fixtfti"; - Names[RTLIB::FPTOSINT_PPCF128_I32] = "__fixtfsi"; - Names[RTLIB::FPTOSINT_PPCF128_I64] = "__fixtfdi"; - Names[RTLIB::FPTOSINT_PPCF128_I128] = "__fixtfti"; - Names[RTLIB::FPTOUINT_F32_I8] = "__fixunssfqi"; - Names[RTLIB::FPTOUINT_F32_I16] = "__fixunssfhi"; - Names[RTLIB::FPTOUINT_F32_I32] = "__fixunssfsi"; - Names[RTLIB::FPTOUINT_F32_I64] = "__fixunssfdi"; - Names[RTLIB::FPTOUINT_F32_I128] = "__fixunssfti"; - Names[RTLIB::FPTOUINT_F64_I8] = "__fixunsdfqi"; - Names[RTLIB::FPTOUINT_F64_I16] = "__fixunsdfhi"; - Names[RTLIB::FPTOUINT_F64_I32] = "__fixunsdfsi"; - Names[RTLIB::FPTOUINT_F64_I64] = "__fixunsdfdi"; - Names[RTLIB::FPTOUINT_F64_I128] = "__fixunsdfti"; - Names[RTLIB::FPTOUINT_F80_I32] = "__fixunsxfsi"; - Names[RTLIB::FPTOUINT_F80_I64] = "__fixunsxfdi"; - Names[RTLIB::FPTOUINT_F80_I128] = "__fixunsxfti"; - Names[RTLIB::FPTOUINT_F128_I32] = "__fixunstfsi"; - Names[RTLIB::FPTOUINT_F128_I64] = "__fixunstfdi"; - Names[RTLIB::FPTOUINT_F128_I128] = "__fixunstfti"; - Names[RTLIB::FPTOUINT_PPCF128_I32] = "__fixunstfsi"; - Names[RTLIB::FPTOUINT_PPCF128_I64] = "__fixunstfdi"; - Names[RTLIB::FPTOUINT_PPCF128_I128] = "__fixunstfti"; - Names[RTLIB::SINTTOFP_I32_F32] = "__floatsisf"; - Names[RTLIB::SINTTOFP_I32_F64] = "__floatsidf"; - Names[RTLIB::SINTTOFP_I32_F80] = "__floatsixf"; - Names[RTLIB::SINTTOFP_I32_F128] = "__floatsitf"; - Names[RTLIB::SINTTOFP_I32_PPCF128] = "__floatsitf"; - Names[RTLIB::SINTTOFP_I64_F32] = "__floatdisf"; - Names[RTLIB::SINTTOFP_I64_F64] = "__floatdidf"; - Names[RTLIB::SINTTOFP_I64_F80] = "__floatdixf"; - Names[RTLIB::SINTTOFP_I64_F128] = "__floatditf"; - Names[RTLIB::SINTTOFP_I64_PPCF128] = "__floatditf"; - Names[RTLIB::SINTTOFP_I128_F32] = "__floattisf"; - Names[RTLIB::SINTTOFP_I128_F64] = "__floattidf"; - Names[RTLIB::SINTTOFP_I128_F80] = "__floattixf"; - Names[RTLIB::SINTTOFP_I128_F128] = "__floattitf"; - Names[RTLIB::SINTTOFP_I128_PPCF128] = "__floattitf"; - Names[RTLIB::UINTTOFP_I32_F32] = "__floatunsisf"; - Names[RTLIB::UINTTOFP_I32_F64] = "__floatunsidf"; - Names[RTLIB::UINTTOFP_I32_F80] = "__floatunsixf"; - Names[RTLIB::UINTTOFP_I32_F128] = "__floatunsitf"; - Names[RTLIB::UINTTOFP_I32_PPCF128] = "__floatunsitf"; - Names[RTLIB::UINTTOFP_I64_F32] = "__floatundisf"; - Names[RTLIB::UINTTOFP_I64_F64] = "__floatundidf"; - Names[RTLIB::UINTTOFP_I64_F80] = "__floatundixf"; - Names[RTLIB::UINTTOFP_I64_F128] = "__floatunditf"; - Names[RTLIB::UINTTOFP_I64_PPCF128] = "__floatunditf"; - Names[RTLIB::UINTTOFP_I128_F32] = "__floatuntisf"; - Names[RTLIB::UINTTOFP_I128_F64] = "__floatuntidf"; - Names[RTLIB::UINTTOFP_I128_F80] = "__floatuntixf"; - Names[RTLIB::UINTTOFP_I128_F128] = "__floatuntitf"; - Names[RTLIB::UINTTOFP_I128_PPCF128] = "__floatuntitf"; - Names[RTLIB::OEQ_F32] = "__eqsf2"; - Names[RTLIB::OEQ_F64] = "__eqdf2"; - Names[RTLIB::OEQ_F128] = "__eqtf2"; - Names[RTLIB::UNE_F32] = "__nesf2"; - Names[RTLIB::UNE_F64] = "__nedf2"; - Names[RTLIB::UNE_F128] = "__netf2"; - Names[RTLIB::OGE_F32] = "__gesf2"; - Names[RTLIB::OGE_F64] = "__gedf2"; - Names[RTLIB::OGE_F128] = "__getf2"; - Names[RTLIB::OLT_F32] = "__ltsf2"; - Names[RTLIB::OLT_F64] = "__ltdf2"; - Names[RTLIB::OLT_F128] = "__lttf2"; - Names[RTLIB::OLE_F32] = "__lesf2"; - Names[RTLIB::OLE_F64] = "__ledf2"; - Names[RTLIB::OLE_F128] = "__letf2"; - Names[RTLIB::OGT_F32] = "__gtsf2"; - Names[RTLIB::OGT_F64] = "__gtdf2"; - Names[RTLIB::OGT_F128] = "__gttf2"; - Names[RTLIB::UO_F32] = "__unordsf2"; - Names[RTLIB::UO_F64] = "__unorddf2"; - Names[RTLIB::UO_F128] = "__unordtf2"; - Names[RTLIB::O_F32] = "__unordsf2"; - Names[RTLIB::O_F64] = "__unorddf2"; - Names[RTLIB::O_F128] = "__unordtf2"; - Names[RTLIB::MEMCPY] = "memcpy"; - Names[RTLIB::MEMMOVE] = "memmove"; - Names[RTLIB::MEMSET] = "memset"; - Names[RTLIB::UNWIND_RESUME] = "_Unwind_Resume"; - Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1] = "__sync_val_compare_and_swap_1"; - Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2] = "__sync_val_compare_and_swap_2"; - Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4] = "__sync_val_compare_and_swap_4"; - Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8] = "__sync_val_compare_and_swap_8"; - Names[RTLIB::SYNC_LOCK_TEST_AND_SET_1] = "__sync_lock_test_and_set_1"; - Names[RTLIB::SYNC_LOCK_TEST_AND_SET_2] = "__sync_lock_test_and_set_2"; - Names[RTLIB::SYNC_LOCK_TEST_AND_SET_4] = "__sync_lock_test_and_set_4"; - Names[RTLIB::SYNC_LOCK_TEST_AND_SET_8] = "__sync_lock_test_and_set_8"; - Names[RTLIB::SYNC_FETCH_AND_ADD_1] = "__sync_fetch_and_add_1"; - Names[RTLIB::SYNC_FETCH_AND_ADD_2] = "__sync_fetch_and_add_2"; - Names[RTLIB::SYNC_FETCH_AND_ADD_4] = "__sync_fetch_and_add_4"; - Names[RTLIB::SYNC_FETCH_AND_ADD_8] = "__sync_fetch_and_add_8"; - Names[RTLIB::SYNC_FETCH_AND_SUB_1] = "__sync_fetch_and_sub_1"; - Names[RTLIB::SYNC_FETCH_AND_SUB_2] = "__sync_fetch_and_sub_2"; - Names[RTLIB::SYNC_FETCH_AND_SUB_4] = "__sync_fetch_and_sub_4"; - Names[RTLIB::SYNC_FETCH_AND_SUB_8] = "__sync_fetch_and_sub_8"; - Names[RTLIB::SYNC_FETCH_AND_AND_1] = "__sync_fetch_and_and_1"; - Names[RTLIB::SYNC_FETCH_AND_AND_2] = "__sync_fetch_and_and_2"; - Names[RTLIB::SYNC_FETCH_AND_AND_4] = "__sync_fetch_and_and_4"; - Names[RTLIB::SYNC_FETCH_AND_AND_8] = "__sync_fetch_and_and_8"; - Names[RTLIB::SYNC_FETCH_AND_OR_1] = "__sync_fetch_and_or_1"; - Names[RTLIB::SYNC_FETCH_AND_OR_2] = "__sync_fetch_and_or_2"; - Names[RTLIB::SYNC_FETCH_AND_OR_4] = "__sync_fetch_and_or_4"; - Names[RTLIB::SYNC_FETCH_AND_OR_8] = "__sync_fetch_and_or_8"; - Names[RTLIB::SYNC_FETCH_AND_XOR_1] = "__sync_fetch_and_xor_1"; - Names[RTLIB::SYNC_FETCH_AND_XOR_2] = "__sync_fetch_and_xor_2"; - Names[RTLIB::SYNC_FETCH_AND_XOR_4] = "__sync_fetch_and_xor_4"; - Names[RTLIB::SYNC_FETCH_AND_XOR_8] = "__sync_fetch_and_xor_8"; - Names[RTLIB::SYNC_FETCH_AND_NAND_1] = "__sync_fetch_and_nand_1"; - Names[RTLIB::SYNC_FETCH_AND_NAND_2] = "__sync_fetch_and_nand_2"; - Names[RTLIB::SYNC_FETCH_AND_NAND_4] = "__sync_fetch_and_nand_4"; - Names[RTLIB::SYNC_FETCH_AND_NAND_8] = "__sync_fetch_and_nand_8"; -} - -/// InitLibcallCallingConvs - Set default libcall CallingConvs. -/// -static void InitLibcallCallingConvs(CallingConv::ID *CCs) { - for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i) { - CCs[i] = CallingConv::C; - } -} - /// getFPEXT - Return the FPEXT_*_* value for the given types, or /// UNKNOWN_LIBCALL if there is none. RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) { @@ -571,447 +253,15 @@ RTLIB::Libcall RTLIB::getUINTTOFP(EVT OpVT, EVT RetVT) { return UNKNOWN_LIBCALL; } -/// InitCmpLibcallCCs - Set default comparison libcall CC. -/// -static void InitCmpLibcallCCs(ISD::CondCode *CCs) { - memset(CCs, ISD::SETCC_INVALID, sizeof(ISD::CondCode)*RTLIB::UNKNOWN_LIBCALL); - CCs[RTLIB::OEQ_F32] = ISD::SETEQ; - CCs[RTLIB::OEQ_F64] = ISD::SETEQ; - CCs[RTLIB::OEQ_F128] = ISD::SETEQ; - CCs[RTLIB::UNE_F32] = ISD::SETNE; - CCs[RTLIB::UNE_F64] = ISD::SETNE; - CCs[RTLIB::UNE_F128] = ISD::SETNE; - CCs[RTLIB::OGE_F32] = ISD::SETGE; - CCs[RTLIB::OGE_F64] = ISD::SETGE; - CCs[RTLIB::OGE_F128] = ISD::SETGE; - CCs[RTLIB::OLT_F32] = ISD::SETLT; - CCs[RTLIB::OLT_F64] = ISD::SETLT; - CCs[RTLIB::OLT_F128] = ISD::SETLT; - CCs[RTLIB::OLE_F32] = ISD::SETLE; - CCs[RTLIB::OLE_F64] = ISD::SETLE; - CCs[RTLIB::OLE_F128] = ISD::SETLE; - CCs[RTLIB::OGT_F32] = ISD::SETGT; - CCs[RTLIB::OGT_F64] = ISD::SETGT; - CCs[RTLIB::OGT_F128] = ISD::SETGT; - CCs[RTLIB::UO_F32] = ISD::SETNE; - CCs[RTLIB::UO_F64] = ISD::SETNE; - CCs[RTLIB::UO_F128] = ISD::SETNE; - CCs[RTLIB::O_F32] = ISD::SETEQ; - CCs[RTLIB::O_F64] = ISD::SETEQ; - CCs[RTLIB::O_F128] = ISD::SETEQ; -} - /// NOTE: The constructor takes ownership of TLOF. TargetLowering::TargetLowering(const TargetMachine &tm, const TargetLoweringObjectFile *tlof) - : TM(tm), TD(TM.getDataLayout()), TLOF(*tlof) { - // All operations default to being supported. - memset(OpActions, 0, sizeof(OpActions)); - memset(LoadExtActions, 0, sizeof(LoadExtActions)); - memset(TruncStoreActions, 0, sizeof(TruncStoreActions)); - memset(IndexedModeActions, 0, sizeof(IndexedModeActions)); - memset(CondCodeActions, 0, sizeof(CondCodeActions)); - - // Set default actions for various operations. - for (unsigned VT = 0; VT != (unsigned)MVT::LAST_VALUETYPE; ++VT) { - // Default all indexed load / store to expand. - for (unsigned IM = (unsigned)ISD::PRE_INC; - IM != (unsigned)ISD::LAST_INDEXED_MODE; ++IM) { - setIndexedLoadAction(IM, (MVT::SimpleValueType)VT, Expand); - setIndexedStoreAction(IM, (MVT::SimpleValueType)VT, Expand); - } - - // These operations default to expand. - setOperationAction(ISD::FGETSIGN, (MVT::SimpleValueType)VT, Expand); - setOperationAction(ISD::CONCAT_VECTORS, (MVT::SimpleValueType)VT, Expand); - } - - // Most targets ignore the @llvm.prefetch intrinsic. - setOperationAction(ISD::PREFETCH, MVT::Other, Expand); - - // ConstantFP nodes default to expand. Targets can either change this to - // Legal, in which case all fp constants are legal, or use isFPImmLegal() - // to optimize expansions for certain constants. - setOperationAction(ISD::ConstantFP, MVT::f16, Expand); - setOperationAction(ISD::ConstantFP, MVT::f32, Expand); - setOperationAction(ISD::ConstantFP, MVT::f64, Expand); - setOperationAction(ISD::ConstantFP, MVT::f80, Expand); - setOperationAction(ISD::ConstantFP, MVT::f128, Expand); - - // These library functions default to expand. - setOperationAction(ISD::FLOG , MVT::f16, Expand); - setOperationAction(ISD::FLOG2, MVT::f16, Expand); - setOperationAction(ISD::FLOG10, MVT::f16, Expand); - setOperationAction(ISD::FEXP , MVT::f16, Expand); - setOperationAction(ISD::FEXP2, MVT::f16, Expand); - setOperationAction(ISD::FFLOOR, MVT::f16, Expand); - setOperationAction(ISD::FNEARBYINT, MVT::f16, Expand); - setOperationAction(ISD::FCEIL, MVT::f16, Expand); - setOperationAction(ISD::FRINT, MVT::f16, Expand); - setOperationAction(ISD::FTRUNC, MVT::f16, Expand); - setOperationAction(ISD::FLOG , MVT::f32, Expand); - setOperationAction(ISD::FLOG2, MVT::f32, Expand); - setOperationAction(ISD::FLOG10, MVT::f32, Expand); - setOperationAction(ISD::FEXP , MVT::f32, Expand); - setOperationAction(ISD::FEXP2, MVT::f32, Expand); - setOperationAction(ISD::FFLOOR, MVT::f32, Expand); - setOperationAction(ISD::FNEARBYINT, MVT::f32, Expand); - setOperationAction(ISD::FCEIL, MVT::f32, Expand); - setOperationAction(ISD::FRINT, MVT::f32, Expand); - setOperationAction(ISD::FTRUNC, MVT::f32, Expand); - setOperationAction(ISD::FLOG , MVT::f64, Expand); - setOperationAction(ISD::FLOG2, MVT::f64, Expand); - setOperationAction(ISD::FLOG10, MVT::f64, Expand); - setOperationAction(ISD::FEXP , MVT::f64, Expand); - setOperationAction(ISD::FEXP2, MVT::f64, Expand); - setOperationAction(ISD::FFLOOR, MVT::f64, Expand); - setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand); - setOperationAction(ISD::FCEIL, MVT::f64, Expand); - setOperationAction(ISD::FRINT, MVT::f64, Expand); - setOperationAction(ISD::FTRUNC, MVT::f64, Expand); - setOperationAction(ISD::FLOG , MVT::f128, Expand); - setOperationAction(ISD::FLOG2, MVT::f128, Expand); - setOperationAction(ISD::FLOG10, MVT::f128, Expand); - setOperationAction(ISD::FEXP , MVT::f128, Expand); - setOperationAction(ISD::FEXP2, MVT::f128, Expand); - setOperationAction(ISD::FFLOOR, MVT::f128, Expand); - setOperationAction(ISD::FNEARBYINT, MVT::f128, Expand); - setOperationAction(ISD::FCEIL, MVT::f128, Expand); - setOperationAction(ISD::FRINT, MVT::f128, Expand); - setOperationAction(ISD::FTRUNC, MVT::f128, Expand); - - // Default ISD::TRAP to expand (which turns it into abort). - setOperationAction(ISD::TRAP, MVT::Other, Expand); - - // On most systems, DEBUGTRAP and TRAP have no difference. The "Expand" - // here is to inform DAG Legalizer to replace DEBUGTRAP with TRAP. - // - setOperationAction(ISD::DEBUGTRAP, MVT::Other, Expand); - - IsLittleEndian = TD->isLittleEndian(); - PointerTy = MVT::getIntegerVT(8*TD->getPointerSize(0)); - memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*)); - memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray)); - maxStoresPerMemset = maxStoresPerMemcpy = maxStoresPerMemmove = 8; - maxStoresPerMemsetOptSize = maxStoresPerMemcpyOptSize - = maxStoresPerMemmoveOptSize = 4; - benefitFromCodePlacementOpt = false; - UseUnderscoreSetJmp = false; - UseUnderscoreLongJmp = false; - SelectIsExpensive = false; - IntDivIsCheap = false; - Pow2DivIsCheap = false; - JumpIsExpensive = false; - predictableSelectIsExpensive = false; - StackPointerRegisterToSaveRestore = 0; - ExceptionPointerRegister = 0; - ExceptionSelectorRegister = 0; - BooleanContents = UndefinedBooleanContent; - BooleanVectorContents = UndefinedBooleanContent; - SchedPreferenceInfo = Sched::ILP; - JumpBufSize = 0; - JumpBufAlignment = 0; - MinFunctionAlignment = 0; - PrefFunctionAlignment = 0; - PrefLoopAlignment = 0; - MinStackArgumentAlignment = 1; - ShouldFoldAtomicFences = false; - InsertFencesForAtomic = false; - SupportJumpTables = true; - MinimumJumpTableEntries = 4; - - InitLibcallNames(LibcallRoutineNames); - InitCmpLibcallCCs(CmpLibcallCCs); - InitLibcallCallingConvs(LibcallCallingConvs); -} - -TargetLowering::~TargetLowering() { - delete &TLOF; -} - -MVT TargetLowering::getShiftAmountTy(EVT LHSTy) const { - return MVT::getIntegerVT(8*TD->getPointerSize(0)); -} - -/// canOpTrap - Returns true if the operation can trap for the value type. -/// VT must be a legal type. -bool TargetLowering::canOpTrap(unsigned Op, EVT VT) const { - assert(isTypeLegal(VT)); - switch (Op) { - default: - return false; - case ISD::FDIV: - case ISD::FREM: - case ISD::SDIV: - case ISD::UDIV: - case ISD::SREM: - case ISD::UREM: - return true; - } -} - - -static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT, - unsigned &NumIntermediates, - MVT &RegisterVT, - TargetLowering *TLI) { - // Figure out the right, legal destination reg to copy into. - unsigned NumElts = VT.getVectorNumElements(); - MVT EltTy = VT.getVectorElementType(); - - unsigned NumVectorRegs = 1; - - // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we - // could break down into LHS/RHS like LegalizeDAG does. - if (!isPowerOf2_32(NumElts)) { - NumVectorRegs = NumElts; - NumElts = 1; - } - - // Divide the input until we get to a supported size. This will always - // end with a scalar if the target doesn't support vectors. - while (NumElts > 1 && !TLI->isTypeLegal(MVT::getVectorVT(EltTy, NumElts))) { - NumElts >>= 1; - NumVectorRegs <<= 1; - } - - NumIntermediates = NumVectorRegs; - - MVT NewVT = MVT::getVectorVT(EltTy, NumElts); - if (!TLI->isTypeLegal(NewVT)) - NewVT = EltTy; - IntermediateVT = NewVT; - - unsigned NewVTSize = NewVT.getSizeInBits(); - - // Convert sizes such as i33 to i64. - if (!isPowerOf2_32(NewVTSize)) - NewVTSize = NextPowerOf2(NewVTSize); - - MVT DestVT = TLI->getRegisterType(NewVT); - RegisterVT = DestVT; - if (EVT(DestVT).bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16. - return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits()); - - // Otherwise, promotion or legal types use the same number of registers as - // the vector decimated to the appropriate level. - return NumVectorRegs; -} - -/// isLegalRC - Return true if the value types that can be represented by the -/// specified register class are all legal. -bool TargetLowering::isLegalRC(const TargetRegisterClass *RC) const { - for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end(); - I != E; ++I) { - if (isTypeLegal(*I)) - return true; - } - return false; -} - -/// findRepresentativeClass - Return the largest legal super-reg register class -/// of the register class for the specified type and its associated "cost". -std::pair -TargetLowering::findRepresentativeClass(MVT VT) const { - const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); - const TargetRegisterClass *RC = RegClassForVT[VT.SimpleTy]; - if (!RC) - return std::make_pair(RC, 0); - - // Compute the set of all super-register classes. - BitVector SuperRegRC(TRI->getNumRegClasses()); - for (SuperRegClassIterator RCI(RC, TRI); RCI.isValid(); ++RCI) - SuperRegRC.setBitsInMask(RCI.getMask()); - - // Find the first legal register class with the largest spill size. - const TargetRegisterClass *BestRC = RC; - for (int i = SuperRegRC.find_first(); i >= 0; i = SuperRegRC.find_next(i)) { - const TargetRegisterClass *SuperRC = TRI->getRegClass(i); - // We want the largest possible spill size. - if (SuperRC->getSize() <= BestRC->getSize()) - continue; - if (!isLegalRC(SuperRC)) - continue; - BestRC = SuperRC; - } - return std::make_pair(BestRC, 1); -} - -/// computeRegisterProperties - Once all of the register classes are added, -/// this allows us to compute derived properties we expose. -void TargetLowering::computeRegisterProperties() { - assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE && - "Too many value types for ValueTypeActions to hold!"); - - // Everything defaults to needing one register. - for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) { - NumRegistersForVT[i] = 1; - RegisterTypeForVT[i] = TransformToType[i] = (MVT::SimpleValueType)i; - } - // ...except isVoid, which doesn't need any registers. - NumRegistersForVT[MVT::isVoid] = 0; - - // Find the largest integer register class. - unsigned LargestIntReg = MVT::LAST_INTEGER_VALUETYPE; - for (; RegClassForVT[LargestIntReg] == 0; --LargestIntReg) - assert(LargestIntReg != MVT::i1 && "No integer registers defined!"); - - // Every integer value type larger than this largest register takes twice as - // many registers to represent as the previous ValueType. - for (unsigned ExpandedReg = LargestIntReg + 1; - ExpandedReg <= MVT::LAST_INTEGER_VALUETYPE; ++ExpandedReg) { - NumRegistersForVT[ExpandedReg] = 2*NumRegistersForVT[ExpandedReg-1]; - RegisterTypeForVT[ExpandedReg] = (MVT::SimpleValueType)LargestIntReg; - TransformToType[ExpandedReg] = (MVT::SimpleValueType)(ExpandedReg - 1); - ValueTypeActions.setTypeAction((MVT::SimpleValueType)ExpandedReg, - TypeExpandInteger); - } - - // Inspect all of the ValueType's smaller than the largest integer - // register to see which ones need promotion. - unsigned LegalIntReg = LargestIntReg; - for (unsigned IntReg = LargestIntReg - 1; - IntReg >= (unsigned)MVT::i1; --IntReg) { - MVT IVT = (MVT::SimpleValueType)IntReg; - if (isTypeLegal(IVT)) { - LegalIntReg = IntReg; - } else { - RegisterTypeForVT[IntReg] = TransformToType[IntReg] = - (const MVT::SimpleValueType)LegalIntReg; - ValueTypeActions.setTypeAction(IVT, TypePromoteInteger); - } - } - - // ppcf128 type is really two f64's. - if (!isTypeLegal(MVT::ppcf128)) { - NumRegistersForVT[MVT::ppcf128] = 2*NumRegistersForVT[MVT::f64]; - RegisterTypeForVT[MVT::ppcf128] = MVT::f64; - TransformToType[MVT::ppcf128] = MVT::f64; - ValueTypeActions.setTypeAction(MVT::ppcf128, TypeExpandFloat); - } - - // Decide how to handle f64. If the target does not have native f64 support, - // expand it to i64 and we will be generating soft float library calls. - if (!isTypeLegal(MVT::f64)) { - NumRegistersForVT[MVT::f64] = NumRegistersForVT[MVT::i64]; - RegisterTypeForVT[MVT::f64] = RegisterTypeForVT[MVT::i64]; - TransformToType[MVT::f64] = MVT::i64; - ValueTypeActions.setTypeAction(MVT::f64, TypeSoftenFloat); - } - - // Decide how to handle f32. If the target does not have native support for - // f32, promote it to f64 if it is legal. Otherwise, expand it to i32. - if (!isTypeLegal(MVT::f32)) { - if (isTypeLegal(MVT::f64)) { - NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::f64]; - RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::f64]; - TransformToType[MVT::f32] = MVT::f64; - ValueTypeActions.setTypeAction(MVT::f32, TypePromoteInteger); - } else { - NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::i32]; - RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::i32]; - TransformToType[MVT::f32] = MVT::i32; - ValueTypeActions.setTypeAction(MVT::f32, TypeSoftenFloat); - } - } - - // Loop over all of the vector value types to see which need transformations. - for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE; - i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { - MVT VT = (MVT::SimpleValueType)i; - if (isTypeLegal(VT)) continue; - - // Determine if there is a legal wider type. If so, we should promote to - // that wider vector type. - MVT EltVT = VT.getVectorElementType(); - unsigned NElts = VT.getVectorNumElements(); - if (NElts != 1 && !shouldSplitVectorElementType(EltVT)) { - bool IsLegalWiderType = false; - // First try to promote the elements of integer vectors. If no legal - // promotion was found, fallback to the widen-vector method. - for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { - MVT SVT = (MVT::SimpleValueType)nVT; - // Promote vectors of integers to vectors with the same number - // of elements, with a wider element type. - if (SVT.getVectorElementType().getSizeInBits() > EltVT.getSizeInBits() - && SVT.getVectorNumElements() == NElts && - isTypeLegal(SVT) && SVT.getScalarType().isInteger()) { - TransformToType[i] = SVT; - RegisterTypeForVT[i] = SVT; - NumRegistersForVT[i] = 1; - ValueTypeActions.setTypeAction(VT, TypePromoteInteger); - IsLegalWiderType = true; - break; - } - } - - if (IsLegalWiderType) continue; - - // Try to widen the vector. - for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { - MVT SVT = (MVT::SimpleValueType)nVT; - if (SVT.getVectorElementType() == EltVT && - SVT.getVectorNumElements() > NElts && - isTypeLegal(SVT)) { - TransformToType[i] = SVT; - RegisterTypeForVT[i] = SVT; - NumRegistersForVT[i] = 1; - ValueTypeActions.setTypeAction(VT, TypeWidenVector); - IsLegalWiderType = true; - break; - } - } - if (IsLegalWiderType) continue; - } - - MVT IntermediateVT; - MVT RegisterVT; - unsigned NumIntermediates; - NumRegistersForVT[i] = - getVectorTypeBreakdownMVT(VT, IntermediateVT, NumIntermediates, - RegisterVT, this); - RegisterTypeForVT[i] = RegisterVT; - - MVT NVT = VT.getPow2VectorType(); - if (NVT == VT) { - // Type is already a power of 2. The default action is to split. - TransformToType[i] = MVT::Other; - unsigned NumElts = VT.getVectorNumElements(); - ValueTypeActions.setTypeAction(VT, - NumElts > 1 ? TypeSplitVector : TypeScalarizeVector); - } else { - TransformToType[i] = NVT; - ValueTypeActions.setTypeAction(VT, TypeWidenVector); - } - } - - // Determine the 'representative' register class for each value type. - // An representative register class is the largest (meaning one which is - // not a sub-register class / subreg register class) legal register class for - // a group of value types. For example, on i386, i8, i16, and i32 - // representative would be GR32; while on x86_64 it's GR64. - for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) { - const TargetRegisterClass* RRC; - uint8_t Cost; - tie(RRC, Cost) = findRepresentativeClass((MVT::SimpleValueType)i); - RepRegClassForVT[i] = RRC; - RepRegClassCostForVT[i] = Cost; - } -} + : TargetLoweringBase(tm, tlof) {} const char *TargetLowering::getTargetNodeName(unsigned Opcode) const { return NULL; } -EVT TargetLowering::getSetCCResultType(EVT VT) const { - assert(!VT.isVector() && "No default SetCC type for vectors!"); - return getPointerTy(0).SimpleTy; -} - -MVT::SimpleValueType TargetLowering::getCmpLibcallReturnType() const { - return MVT::i32; // return the default value -} - /// Check whether a given call node is in tail position within its function. If /// so, it sets Chain to the input chain of the tail call. bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, @@ -1167,80 +417,6 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, } } -/// getVectorTypeBreakdown - Vector types are broken down into some number of -/// legal first class types. For example, MVT::v8f32 maps to 2 MVT::v4f32 -/// with Altivec or SSE1, or 8 promoted MVT::f64 values with the X86 FP stack. -/// Similarly, MVT::v2i64 turns into 4 MVT::i32 values with both PPC and X86. -/// -/// This method returns the number of registers needed, and the VT for each -/// register. It also returns the VT and quantity of the intermediate values -/// before they are promoted/expanded. -/// -unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT, - EVT &IntermediateVT, - unsigned &NumIntermediates, - MVT &RegisterVT) const { - unsigned NumElts = VT.getVectorNumElements(); - - // If there is a wider vector type with the same element type as this one, - // or a promoted vector type that has the same number of elements which - // are wider, then we should convert to that legal vector type. - // This handles things like <2 x float> -> <4 x float> and - // <4 x i1> -> <4 x i32>. - LegalizeTypeAction TA = getTypeAction(Context, VT); - if (NumElts != 1 && (TA == TypeWidenVector || TA == TypePromoteInteger)) { - EVT RegisterEVT = getTypeToTransformTo(Context, VT); - if (isTypeLegal(RegisterEVT)) { - IntermediateVT = RegisterEVT; - RegisterVT = RegisterEVT.getSimpleVT(); - NumIntermediates = 1; - return 1; - } - } - - // Figure out the right, legal destination reg to copy into. - EVT EltTy = VT.getVectorElementType(); - - unsigned NumVectorRegs = 1; - - // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we - // could break down into LHS/RHS like LegalizeDAG does. - if (!isPowerOf2_32(NumElts)) { - NumVectorRegs = NumElts; - NumElts = 1; - } - - // Divide the input until we get to a supported size. This will always - // end with a scalar if the target doesn't support vectors. - while (NumElts > 1 && !isTypeLegal( - EVT::getVectorVT(Context, EltTy, NumElts))) { - NumElts >>= 1; - NumVectorRegs <<= 1; - } - - NumIntermediates = NumVectorRegs; - - EVT NewVT = EVT::getVectorVT(Context, EltTy, NumElts); - if (!isTypeLegal(NewVT)) - NewVT = EltTy; - IntermediateVT = NewVT; - - MVT DestVT = getRegisterType(Context, NewVT); - RegisterVT = DestVT; - unsigned NewVTSize = NewVT.getSizeInBits(); - - // Convert sizes such as i33 to i64. - if (!isPowerOf2_32(NewVTSize)) - NewVTSize = NextPowerOf2(NewVTSize); - - if (EVT(DestVT).bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16. - return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits()); - - // Otherwise, promotion or legal types use the same number of registers as - // the vector decimated to the appropriate level. - return NumVectorRegs; -} - /// Get the EVTs and ArgFlags collections that represent the legalized return /// type of the given function. This does not require a DAG or a return value, /// and is suitable for use before any DAGs for the function are constructed. @@ -1291,13 +467,6 @@ void llvm::GetReturnInfo(Type* ReturnType, AttributeSet attr, } } -/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate -/// function arguments in the caller parameter area. This is the actual -/// alignment, not its logarithm. -unsigned TargetLowering::getByValTypeAlignment(Type *Ty) const { - return TD->getCallFrameTypeAlignment(Ty); -} - /// getJumpTableEncoding - Return the entry encoding for a jump table in the /// current function. The returned value is a member of the /// MachineJumpTableInfo::JTEntryKind enum. @@ -1354,103 +523,6 @@ TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { } //===----------------------------------------------------------------------===// -// TargetTransformInfo Helpers -//===----------------------------------------------------------------------===// - -int TargetLowering::InstructionOpcodeToISD(unsigned Opcode) const { - enum InstructionOpcodes { -#define HANDLE_INST(NUM, OPCODE, CLASS) OPCODE = NUM, -#define LAST_OTHER_INST(NUM) InstructionOpcodesCount = NUM -#include "llvm/IR/Instruction.def" - }; - switch (static_cast(Opcode)) { - case Ret: return 0; - case Br: return 0; - case Switch: return 0; - case IndirectBr: return 0; - case Invoke: return 0; - case Resume: return 0; - case Unreachable: return 0; - case Add: return ISD::ADD; - case FAdd: return ISD::FADD; - case Sub: return ISD::SUB; - case FSub: return ISD::FSUB; - case Mul: return ISD::MUL; - case FMul: return ISD::FMUL; - case UDiv: return ISD::UDIV; - case SDiv: return ISD::UDIV; - case FDiv: return ISD::FDIV; - case URem: return ISD::UREM; - case SRem: return ISD::SREM; - case FRem: return ISD::FREM; - case Shl: return ISD::SHL; - case LShr: return ISD::SRL; - case AShr: return ISD::SRA; - case And: return ISD::AND; - case Or: return ISD::OR; - case Xor: return ISD::XOR; - case Alloca: return 0; - case Load: return ISD::LOAD; - case Store: return ISD::STORE; - case GetElementPtr: return 0; - case Fence: return 0; - case AtomicCmpXchg: return 0; - case AtomicRMW: return 0; - case Trunc: return ISD::TRUNCATE; - case ZExt: return ISD::ZERO_EXTEND; - case SExt: return ISD::SIGN_EXTEND; - case FPToUI: return ISD::FP_TO_UINT; - case FPToSI: return ISD::FP_TO_SINT; - case UIToFP: return ISD::UINT_TO_FP; - case SIToFP: return ISD::SINT_TO_FP; - case FPTrunc: return ISD::FP_ROUND; - case FPExt: return ISD::FP_EXTEND; - case PtrToInt: return ISD::BITCAST; - case IntToPtr: return ISD::BITCAST; - case BitCast: return ISD::BITCAST; - case ICmp: return ISD::SETCC; - case FCmp: return ISD::SETCC; - case PHI: return 0; - case Call: return 0; - case Select: return ISD::SELECT; - case UserOp1: return 0; - case UserOp2: return 0; - case VAArg: return 0; - case ExtractElement: return ISD::EXTRACT_VECTOR_ELT; - case InsertElement: return ISD::INSERT_VECTOR_ELT; - case ShuffleVector: return ISD::VECTOR_SHUFFLE; - case ExtractValue: return ISD::MERGE_VALUES; - case InsertValue: return ISD::MERGE_VALUES; - case LandingPad: return 0; - } - - llvm_unreachable("Unknown instruction type encountered!"); -} - -std::pair -TargetLowering::getTypeLegalizationCost(Type *Ty) const { - LLVMContext &C = Ty->getContext(); - EVT MTy = getValueType(Ty); - - unsigned Cost = 1; - // We keep legalizing the type until we find a legal kind. We assume that - // the only operation that costs anything is the split. After splitting - // we need to handle two types. - while (true) { - LegalizeKind LK = getTypeConversion(C, MTy); - - if (LK.first == TypeLegal) - return std::make_pair(Cost, MTy.getSimpleVT()); - - if (LK.first == TypeSplitVector || LK.first == TypeExpandInteger) - Cost *= 2; - - // Keep legalizing the type. - MTy = LK.second; - } -} - -//===----------------------------------------------------------------------===// // Optimization Methods //===----------------------------------------------------------------------===// @@ -2394,7 +1466,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, APInt newMask = APInt::getLowBitsSet(maskWidth, width); for (unsigned offset=0; offsetisLittleEndian()) + if (!getDataLayout()->isLittleEndian()) bestOffset = (origWidth/width - offset - 1) * (width/8); else bestOffset = (uint64_t)offset * (width/8); @@ -3199,7 +2271,7 @@ getRegForInlineAsmConstraint(const std::string &Constraint, std::make_pair(0u, static_cast(0)); // Figure out which register class contains this reg. - const TargetRegisterInfo *RI = TM.getRegisterInfo(); + const TargetRegisterInfo *RI = getTargetMachine().getRegisterInfo(); for (TargetRegisterInfo::regclass_iterator RCI = RI->regclass_begin(), E = RI->regclass_end(); RCI != E; ++RCI) { const TargetRegisterClass *RC = *RCI; @@ -3323,7 +2395,7 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints( // If OpTy is not a single value, it may be a struct/union that we // can tile with integers. if (!OpTy->isSingleValueType() && OpTy->isSized()) { - unsigned BitSize = TD->getTypeSizeInBits(OpTy); + unsigned BitSize = getDataLayout()->getTypeSizeInBits(OpTy); switch (BitSize) { default: break; case 1: @@ -3338,7 +2410,7 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints( } } else if (PointerType *PT = dyn_cast(OpTy)) { OpInfo.ConstraintVT = MVT::getIntegerVT( - 8*TD->getPointerSize(PT->getAddressSpace())); + 8*getDataLayout()->getPointerSize(PT->getAddressSpace())); } else { OpInfo.ConstraintVT = MVT::getVT(OpTy, true); } @@ -3633,44 +2705,6 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo, } } -//===----------------------------------------------------------------------===// -// Loop Strength Reduction hooks -//===----------------------------------------------------------------------===// - -/// isLegalAddressingMode - Return true if the addressing mode represented -/// by AM is legal for this target, for a load/store of the specified type. -bool TargetLowering::isLegalAddressingMode(const AddrMode &AM, - Type *Ty) const { - // The default implementation of this implements a conservative RISCy, r+r and - // r+i addr mode. - - // Allows a sign-extended 16-bit immediate field. - if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1) - return false; - - // No global is ever allowed as a base. - if (AM.BaseGV) - return false; - - // Only support r+r, - switch (AM.Scale) { - case 0: // "r+i" or just "i", depending on HasBaseReg. - break; - case 1: - if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed. - return false; - // Otherwise we have r+r or r+i. - break; - case 2: - if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed. - return false; - // Allow 2*r as r+r. - break; - } - - return true; -} - /// BuildExactDiv - Given an exact SDIV by a constant, create a multiplication /// with the multiplicative inverse of the constant. SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, DebugLoc dl, diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp index 09e923c..b58bb85 100644 --- a/lib/CodeGen/SjLjEHPrepare.cpp +++ b/lib/CodeGen/SjLjEHPrepare.cpp @@ -43,7 +43,7 @@ STATISTIC(NumSpilled, "Number of registers live across unwind edges"); namespace { class SjLjEHPrepare : public FunctionPass { - const TargetLowering *TLI; + const TargetLoweringBase *TLI; Type *FunctionContextTy; Constant *RegisterFn; Constant *UnregisterFn; @@ -58,7 +58,7 @@ namespace { AllocaInst *FuncCtx; public: static char ID; // Pass identification, replacement for typeid - explicit SjLjEHPrepare(const TargetLowering *tli = NULL) + explicit SjLjEHPrepare(const TargetLoweringBase *tli = NULL) : FunctionPass(ID), TLI(tli) { } bool doInitialization(Module &M); bool runOnFunction(Function &F); @@ -82,7 +82,7 @@ namespace { char SjLjEHPrepare::ID = 0; // Public Interface To the SjLjEHPrepare pass. -FunctionPass *llvm::createSjLjEHPreparePass(const TargetLowering *TLI) { +FunctionPass *llvm::createSjLjEHPreparePass(const TargetLoweringBase *TLI) { return new SjLjEHPrepare(TLI); } // doInitialization - Set up decalarations and types needed to process diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp index 665388b..e242804 100644 --- a/lib/CodeGen/StackProtector.cpp +++ b/lib/CodeGen/StackProtector.cpp @@ -36,7 +36,7 @@ namespace { class StackProtector : public FunctionPass { /// TLI - Keep a pointer of a TargetLowering to consult for determining /// target type sizes. - const TargetLowering *TLI; + const TargetLoweringBase *TLI; Function *F; Module *M; @@ -68,7 +68,7 @@ namespace { StackProtector() : FunctionPass(ID), TLI(0) { initializeStackProtectorPass(*PassRegistry::getPassRegistry()); } - StackProtector(const TargetLowering *tli) + StackProtector(const TargetLoweringBase *tli) : FunctionPass(ID), TLI(tli) { initializeStackProtectorPass(*PassRegistry::getPassRegistry()); } @@ -85,7 +85,7 @@ char StackProtector::ID = 0; INITIALIZE_PASS(StackProtector, "stack-protector", "Insert stack protectors", false, false) -FunctionPass *llvm::createStackProtectorPass(const TargetLowering *tli) { +FunctionPass *llvm::createStackProtectorPass(const TargetLoweringBase *tli) { return new StackProtector(tli); } diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp new file mode 100644 index 0000000..6284d52 --- /dev/null +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -0,0 +1,1274 @@ +//===-- TargetLoweringBase.cpp - Implement the TargetLoweringBase class ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the TargetLoweringBase class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Target/TargetLowering.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include +using namespace llvm; + +/// InitLibcallNames - Set default libcall names. +/// +static void InitLibcallNames(const char **Names) { + Names[RTLIB::SHL_I16] = "__ashlhi3"; + Names[RTLIB::SHL_I32] = "__ashlsi3"; + Names[RTLIB::SHL_I64] = "__ashldi3"; + Names[RTLIB::SHL_I128] = "__ashlti3"; + Names[RTLIB::SRL_I16] = "__lshrhi3"; + Names[RTLIB::SRL_I32] = "__lshrsi3"; + Names[RTLIB::SRL_I64] = "__lshrdi3"; + Names[RTLIB::SRL_I128] = "__lshrti3"; + Names[RTLIB::SRA_I16] = "__ashrhi3"; + Names[RTLIB::SRA_I32] = "__ashrsi3"; + Names[RTLIB::SRA_I64] = "__ashrdi3"; + Names[RTLIB::SRA_I128] = "__ashrti3"; + Names[RTLIB::MUL_I8] = "__mulqi3"; + Names[RTLIB::MUL_I16] = "__mulhi3"; + Names[RTLIB::MUL_I32] = "__mulsi3"; + Names[RTLIB::MUL_I64] = "__muldi3"; + Names[RTLIB::MUL_I128] = "__multi3"; + Names[RTLIB::MULO_I32] = "__mulosi4"; + Names[RTLIB::MULO_I64] = "__mulodi4"; + Names[RTLIB::MULO_I128] = "__muloti4"; + Names[RTLIB::SDIV_I8] = "__divqi3"; + Names[RTLIB::SDIV_I16] = "__divhi3"; + Names[RTLIB::SDIV_I32] = "__divsi3"; + Names[RTLIB::SDIV_I64] = "__divdi3"; + Names[RTLIB::SDIV_I128] = "__divti3"; + Names[RTLIB::UDIV_I8] = "__udivqi3"; + Names[RTLIB::UDIV_I16] = "__udivhi3"; + Names[RTLIB::UDIV_I32] = "__udivsi3"; + Names[RTLIB::UDIV_I64] = "__udivdi3"; + Names[RTLIB::UDIV_I128] = "__udivti3"; + Names[RTLIB::SREM_I8] = "__modqi3"; + Names[RTLIB::SREM_I16] = "__modhi3"; + Names[RTLIB::SREM_I32] = "__modsi3"; + Names[RTLIB::SREM_I64] = "__moddi3"; + Names[RTLIB::SREM_I128] = "__modti3"; + Names[RTLIB::UREM_I8] = "__umodqi3"; + Names[RTLIB::UREM_I16] = "__umodhi3"; + Names[RTLIB::UREM_I32] = "__umodsi3"; + Names[RTLIB::UREM_I64] = "__umoddi3"; + Names[RTLIB::UREM_I128] = "__umodti3"; + + // These are generally not available. + Names[RTLIB::SDIVREM_I8] = 0; + Names[RTLIB::SDIVREM_I16] = 0; + Names[RTLIB::SDIVREM_I32] = 0; + Names[RTLIB::SDIVREM_I64] = 0; + Names[RTLIB::SDIVREM_I128] = 0; + Names[RTLIB::UDIVREM_I8] = 0; + Names[RTLIB::UDIVREM_I16] = 0; + Names[RTLIB::UDIVREM_I32] = 0; + Names[RTLIB::UDIVREM_I64] = 0; + Names[RTLIB::UDIVREM_I128] = 0; + + Names[RTLIB::NEG_I32] = "__negsi2"; + Names[RTLIB::NEG_I64] = "__negdi2"; + Names[RTLIB::ADD_F32] = "__addsf3"; + Names[RTLIB::ADD_F64] = "__adddf3"; + Names[RTLIB::ADD_F80] = "__addxf3"; + Names[RTLIB::ADD_F128] = "__addtf3"; + Names[RTLIB::ADD_PPCF128] = "__gcc_qadd"; + Names[RTLIB::SUB_F32] = "__subsf3"; + Names[RTLIB::SUB_F64] = "__subdf3"; + Names[RTLIB::SUB_F80] = "__subxf3"; + Names[RTLIB::SUB_F128] = "__subtf3"; + Names[RTLIB::SUB_PPCF128] = "__gcc_qsub"; + Names[RTLIB::MUL_F32] = "__mulsf3"; + Names[RTLIB::MUL_F64] = "__muldf3"; + Names[RTLIB::MUL_F80] = "__mulxf3"; + Names[RTLIB::MUL_F128] = "__multf3"; + Names[RTLIB::MUL_PPCF128] = "__gcc_qmul"; + Names[RTLIB::DIV_F32] = "__divsf3"; + Names[RTLIB::DIV_F64] = "__divdf3"; + Names[RTLIB::DIV_F80] = "__divxf3"; + Names[RTLIB::DIV_F128] = "__divtf3"; + Names[RTLIB::DIV_PPCF128] = "__gcc_qdiv"; + Names[RTLIB::REM_F32] = "fmodf"; + Names[RTLIB::REM_F64] = "fmod"; + Names[RTLIB::REM_F80] = "fmodl"; + Names[RTLIB::REM_F128] = "fmodl"; + Names[RTLIB::REM_PPCF128] = "fmodl"; + Names[RTLIB::FMA_F32] = "fmaf"; + Names[RTLIB::FMA_F64] = "fma"; + Names[RTLIB::FMA_F80] = "fmal"; + Names[RTLIB::FMA_F128] = "fmal"; + Names[RTLIB::FMA_PPCF128] = "fmal"; + Names[RTLIB::POWI_F32] = "__powisf2"; + Names[RTLIB::POWI_F64] = "__powidf2"; + Names[RTLIB::POWI_F80] = "__powixf2"; + Names[RTLIB::POWI_F128] = "__powitf2"; + Names[RTLIB::POWI_PPCF128] = "__powitf2"; + Names[RTLIB::SQRT_F32] = "sqrtf"; + Names[RTLIB::SQRT_F64] = "sqrt"; + Names[RTLIB::SQRT_F80] = "sqrtl"; + Names[RTLIB::SQRT_F128] = "sqrtl"; + Names[RTLIB::SQRT_PPCF128] = "sqrtl"; + Names[RTLIB::LOG_F32] = "logf"; + Names[RTLIB::LOG_F64] = "log"; + Names[RTLIB::LOG_F80] = "logl"; + Names[RTLIB::LOG_F128] = "logl"; + Names[RTLIB::LOG_PPCF128] = "logl"; + Names[RTLIB::LOG2_F32] = "log2f"; + Names[RTLIB::LOG2_F64] = "log2"; + Names[RTLIB::LOG2_F80] = "log2l"; + Names[RTLIB::LOG2_F128] = "log2l"; + Names[RTLIB::LOG2_PPCF128] = "log2l"; + Names[RTLIB::LOG10_F32] = "log10f"; + Names[RTLIB::LOG10_F64] = "log10"; + Names[RTLIB::LOG10_F80] = "log10l"; + Names[RTLIB::LOG10_F128] = "log10l"; + Names[RTLIB::LOG10_PPCF128] = "log10l"; + Names[RTLIB::EXP_F32] = "expf"; + Names[RTLIB::EXP_F64] = "exp"; + Names[RTLIB::EXP_F80] = "expl"; + Names[RTLIB::EXP_F128] = "expl"; + Names[RTLIB::EXP_PPCF128] = "expl"; + Names[RTLIB::EXP2_F32] = "exp2f"; + Names[RTLIB::EXP2_F64] = "exp2"; + Names[RTLIB::EXP2_F80] = "exp2l"; + Names[RTLIB::EXP2_F128] = "exp2l"; + Names[RTLIB::EXP2_PPCF128] = "exp2l"; + Names[RTLIB::SIN_F32] = "sinf"; + Names[RTLIB::SIN_F64] = "sin"; + Names[RTLIB::SIN_F80] = "sinl"; + Names[RTLIB::SIN_F128] = "sinl"; + Names[RTLIB::SIN_PPCF128] = "sinl"; + Names[RTLIB::COS_F32] = "cosf"; + Names[RTLIB::COS_F64] = "cos"; + Names[RTLIB::COS_F80] = "cosl"; + Names[RTLIB::COS_F128] = "cosl"; + Names[RTLIB::COS_PPCF128] = "cosl"; + Names[RTLIB::POW_F32] = "powf"; + Names[RTLIB::POW_F64] = "pow"; + Names[RTLIB::POW_F80] = "powl"; + Names[RTLIB::POW_F128] = "powl"; + Names[RTLIB::POW_PPCF128] = "powl"; + Names[RTLIB::CEIL_F32] = "ceilf"; + Names[RTLIB::CEIL_F64] = "ceil"; + Names[RTLIB::CEIL_F80] = "ceill"; + Names[RTLIB::CEIL_F128] = "ceill"; + Names[RTLIB::CEIL_PPCF128] = "ceill"; + Names[RTLIB::TRUNC_F32] = "truncf"; + Names[RTLIB::TRUNC_F64] = "trunc"; + Names[RTLIB::TRUNC_F80] = "truncl"; + Names[RTLIB::TRUNC_F128] = "truncl"; + Names[RTLIB::TRUNC_PPCF128] = "truncl"; + Names[RTLIB::RINT_F32] = "rintf"; + Names[RTLIB::RINT_F64] = "rint"; + Names[RTLIB::RINT_F80] = "rintl"; + Names[RTLIB::RINT_F128] = "rintl"; + Names[RTLIB::RINT_PPCF128] = "rintl"; + Names[RTLIB::NEARBYINT_F32] = "nearbyintf"; + Names[RTLIB::NEARBYINT_F64] = "nearbyint"; + Names[RTLIB::NEARBYINT_F80] = "nearbyintl"; + Names[RTLIB::NEARBYINT_F128] = "nearbyintl"; + Names[RTLIB::NEARBYINT_PPCF128] = "nearbyintl"; + Names[RTLIB::FLOOR_F32] = "floorf"; + Names[RTLIB::FLOOR_F64] = "floor"; + Names[RTLIB::FLOOR_F80] = "floorl"; + Names[RTLIB::FLOOR_F128] = "floorl"; + Names[RTLIB::FLOOR_PPCF128] = "floorl"; + Names[RTLIB::COPYSIGN_F32] = "copysignf"; + Names[RTLIB::COPYSIGN_F64] = "copysign"; + Names[RTLIB::COPYSIGN_F80] = "copysignl"; + Names[RTLIB::COPYSIGN_F128] = "copysignl"; + Names[RTLIB::COPYSIGN_PPCF128] = "copysignl"; + Names[RTLIB::FPEXT_F64_F128] = "__extenddftf2"; + Names[RTLIB::FPEXT_F32_F128] = "__extendsftf2"; + Names[RTLIB::FPEXT_F32_F64] = "__extendsfdf2"; + Names[RTLIB::FPEXT_F16_F32] = "__gnu_h2f_ieee"; + Names[RTLIB::FPROUND_F32_F16] = "__gnu_f2h_ieee"; + Names[RTLIB::FPROUND_F64_F32] = "__truncdfsf2"; + Names[RTLIB::FPROUND_F80_F32] = "__truncxfsf2"; + Names[RTLIB::FPROUND_F128_F32] = "__trunctfsf2"; + Names[RTLIB::FPROUND_PPCF128_F32] = "__trunctfsf2"; + Names[RTLIB::FPROUND_F80_F64] = "__truncxfdf2"; + Names[RTLIB::FPROUND_F128_F64] = "__trunctfdf2"; + Names[RTLIB::FPROUND_PPCF128_F64] = "__trunctfdf2"; + Names[RTLIB::FPTOSINT_F32_I8] = "__fixsfqi"; + Names[RTLIB::FPTOSINT_F32_I16] = "__fixsfhi"; + Names[RTLIB::FPTOSINT_F32_I32] = "__fixsfsi"; + Names[RTLIB::FPTOSINT_F32_I64] = "__fixsfdi"; + Names[RTLIB::FPTOSINT_F32_I128] = "__fixsfti"; + Names[RTLIB::FPTOSINT_F64_I8] = "__fixdfqi"; + Names[RTLIB::FPTOSINT_F64_I16] = "__fixdfhi"; + Names[RTLIB::FPTOSINT_F64_I32] = "__fixdfsi"; + Names[RTLIB::FPTOSINT_F64_I64] = "__fixdfdi"; + Names[RTLIB::FPTOSINT_F64_I128] = "__fixdfti"; + Names[RTLIB::FPTOSINT_F80_I32] = "__fixxfsi"; + Names[RTLIB::FPTOSINT_F80_I64] = "__fixxfdi"; + Names[RTLIB::FPTOSINT_F80_I128] = "__fixxfti"; + Names[RTLIB::FPTOSINT_F128_I32] = "__fixtfsi"; + Names[RTLIB::FPTOSINT_F128_I64] = "__fixtfdi"; + Names[RTLIB::FPTOSINT_F128_I128] = "__fixtfti"; + Names[RTLIB::FPTOSINT_PPCF128_I32] = "__fixtfsi"; + Names[RTLIB::FPTOSINT_PPCF128_I64] = "__fixtfdi"; + Names[RTLIB::FPTOSINT_PPCF128_I128] = "__fixtfti"; + Names[RTLIB::FPTOUINT_F32_I8] = "__fixunssfqi"; + Names[RTLIB::FPTOUINT_F32_I16] = "__fixunssfhi"; + Names[RTLIB::FPTOUINT_F32_I32] = "__fixunssfsi"; + Names[RTLIB::FPTOUINT_F32_I64] = "__fixunssfdi"; + Names[RTLIB::FPTOUINT_F32_I128] = "__fixunssfti"; + Names[RTLIB::FPTOUINT_F64_I8] = "__fixunsdfqi"; + Names[RTLIB::FPTOUINT_F64_I16] = "__fixunsdfhi"; + Names[RTLIB::FPTOUINT_F64_I32] = "__fixunsdfsi"; + Names[RTLIB::FPTOUINT_F64_I64] = "__fixunsdfdi"; + Names[RTLIB::FPTOUINT_F64_I128] = "__fixunsdfti"; + Names[RTLIB::FPTOUINT_F80_I32] = "__fixunsxfsi"; + Names[RTLIB::FPTOUINT_F80_I64] = "__fixunsxfdi"; + Names[RTLIB::FPTOUINT_F80_I128] = "__fixunsxfti"; + Names[RTLIB::FPTOUINT_F128_I32] = "__fixunstfsi"; + Names[RTLIB::FPTOUINT_F128_I64] = "__fixunstfdi"; + Names[RTLIB::FPTOUINT_F128_I128] = "__fixunstfti"; + Names[RTLIB::FPTOUINT_PPCF128_I32] = "__fixunstfsi"; + Names[RTLIB::FPTOUINT_PPCF128_I64] = "__fixunstfdi"; + Names[RTLIB::FPTOUINT_PPCF128_I128] = "__fixunstfti"; + Names[RTLIB::SINTTOFP_I32_F32] = "__floatsisf"; + Names[RTLIB::SINTTOFP_I32_F64] = "__floatsidf"; + Names[RTLIB::SINTTOFP_I32_F80] = "__floatsixf"; + Names[RTLIB::SINTTOFP_I32_F128] = "__floatsitf"; + Names[RTLIB::SINTTOFP_I32_PPCF128] = "__floatsitf"; + Names[RTLIB::SINTTOFP_I64_F32] = "__floatdisf"; + Names[RTLIB::SINTTOFP_I64_F64] = "__floatdidf"; + Names[RTLIB::SINTTOFP_I64_F80] = "__floatdixf"; + Names[RTLIB::SINTTOFP_I64_F128] = "__floatditf"; + Names[RTLIB::SINTTOFP_I64_PPCF128] = "__floatditf"; + Names[RTLIB::SINTTOFP_I128_F32] = "__floattisf"; + Names[RTLIB::SINTTOFP_I128_F64] = "__floattidf"; + Names[RTLIB::SINTTOFP_I128_F80] = "__floattixf"; + Names[RTLIB::SINTTOFP_I128_F128] = "__floattitf"; + Names[RTLIB::SINTTOFP_I128_PPCF128] = "__floattitf"; + Names[RTLIB::UINTTOFP_I32_F32] = "__floatunsisf"; + Names[RTLIB::UINTTOFP_I32_F64] = "__floatunsidf"; + Names[RTLIB::UINTTOFP_I32_F80] = "__floatunsixf"; + Names[RTLIB::UINTTOFP_I32_F128] = "__floatunsitf"; + Names[RTLIB::UINTTOFP_I32_PPCF128] = "__floatunsitf"; + Names[RTLIB::UINTTOFP_I64_F32] = "__floatundisf"; + Names[RTLIB::UINTTOFP_I64_F64] = "__floatundidf"; + Names[RTLIB::UINTTOFP_I64_F80] = "__floatundixf"; + Names[RTLIB::UINTTOFP_I64_F128] = "__floatunditf"; + Names[RTLIB::UINTTOFP_I64_PPCF128] = "__floatunditf"; + Names[RTLIB::UINTTOFP_I128_F32] = "__floatuntisf"; + Names[RTLIB::UINTTOFP_I128_F64] = "__floatuntidf"; + Names[RTLIB::UINTTOFP_I128_F80] = "__floatuntixf"; + Names[RTLIB::UINTTOFP_I128_F128] = "__floatuntitf"; + Names[RTLIB::UINTTOFP_I128_PPCF128] = "__floatuntitf"; + Names[RTLIB::OEQ_F32] = "__eqsf2"; + Names[RTLIB::OEQ_F64] = "__eqdf2"; + Names[RTLIB::OEQ_F128] = "__eqtf2"; + Names[RTLIB::UNE_F32] = "__nesf2"; + Names[RTLIB::UNE_F64] = "__nedf2"; + Names[RTLIB::UNE_F128] = "__netf2"; + Names[RTLIB::OGE_F32] = "__gesf2"; + Names[RTLIB::OGE_F64] = "__gedf2"; + Names[RTLIB::OGE_F128] = "__getf2"; + Names[RTLIB::OLT_F32] = "__ltsf2"; + Names[RTLIB::OLT_F64] = "__ltdf2"; + Names[RTLIB::OLT_F128] = "__lttf2"; + Names[RTLIB::OLE_F32] = "__lesf2"; + Names[RTLIB::OLE_F64] = "__ledf2"; + Names[RTLIB::OLE_F128] = "__letf2"; + Names[RTLIB::OGT_F32] = "__gtsf2"; + Names[RTLIB::OGT_F64] = "__gtdf2"; + Names[RTLIB::OGT_F128] = "__gttf2"; + Names[RTLIB::UO_F32] = "__unordsf2"; + Names[RTLIB::UO_F64] = "__unorddf2"; + Names[RTLIB::UO_F128] = "__unordtf2"; + Names[RTLIB::O_F32] = "__unordsf2"; + Names[RTLIB::O_F64] = "__unorddf2"; + Names[RTLIB::O_F128] = "__unordtf2"; + Names[RTLIB::MEMCPY] = "memcpy"; + Names[RTLIB::MEMMOVE] = "memmove"; + Names[RTLIB::MEMSET] = "memset"; + Names[RTLIB::UNWIND_RESUME] = "_Unwind_Resume"; + Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1] = "__sync_val_compare_and_swap_1"; + Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2] = "__sync_val_compare_and_swap_2"; + Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4] = "__sync_val_compare_and_swap_4"; + Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8] = "__sync_val_compare_and_swap_8"; + Names[RTLIB::SYNC_LOCK_TEST_AND_SET_1] = "__sync_lock_test_and_set_1"; + Names[RTLIB::SYNC_LOCK_TEST_AND_SET_2] = "__sync_lock_test_and_set_2"; + Names[RTLIB::SYNC_LOCK_TEST_AND_SET_4] = "__sync_lock_test_and_set_4"; + Names[RTLIB::SYNC_LOCK_TEST_AND_SET_8] = "__sync_lock_test_and_set_8"; + Names[RTLIB::SYNC_FETCH_AND_ADD_1] = "__sync_fetch_and_add_1"; + Names[RTLIB::SYNC_FETCH_AND_ADD_2] = "__sync_fetch_and_add_2"; + Names[RTLIB::SYNC_FETCH_AND_ADD_4] = "__sync_fetch_and_add_4"; + Names[RTLIB::SYNC_FETCH_AND_ADD_8] = "__sync_fetch_and_add_8"; + Names[RTLIB::SYNC_FETCH_AND_SUB_1] = "__sync_fetch_and_sub_1"; + Names[RTLIB::SYNC_FETCH_AND_SUB_2] = "__sync_fetch_and_sub_2"; + Names[RTLIB::SYNC_FETCH_AND_SUB_4] = "__sync_fetch_and_sub_4"; + Names[RTLIB::SYNC_FETCH_AND_SUB_8] = "__sync_fetch_and_sub_8"; + Names[RTLIB::SYNC_FETCH_AND_AND_1] = "__sync_fetch_and_and_1"; + Names[RTLIB::SYNC_FETCH_AND_AND_2] = "__sync_fetch_and_and_2"; + Names[RTLIB::SYNC_FETCH_AND_AND_4] = "__sync_fetch_and_and_4"; + Names[RTLIB::SYNC_FETCH_AND_AND_8] = "__sync_fetch_and_and_8"; + Names[RTLIB::SYNC_FETCH_AND_OR_1] = "__sync_fetch_and_or_1"; + Names[RTLIB::SYNC_FETCH_AND_OR_2] = "__sync_fetch_and_or_2"; + Names[RTLIB::SYNC_FETCH_AND_OR_4] = "__sync_fetch_and_or_4"; + Names[RTLIB::SYNC_FETCH_AND_OR_8] = "__sync_fetch_and_or_8"; + Names[RTLIB::SYNC_FETCH_AND_XOR_1] = "__sync_fetch_and_xor_1"; + Names[RTLIB::SYNC_FETCH_AND_XOR_2] = "__sync_fetch_and_xor_2"; + Names[RTLIB::SYNC_FETCH_AND_XOR_4] = "__sync_fetch_and_xor_4"; + Names[RTLIB::SYNC_FETCH_AND_XOR_8] = "__sync_fetch_and_xor_8"; + Names[RTLIB::SYNC_FETCH_AND_NAND_1] = "__sync_fetch_and_nand_1"; + Names[RTLIB::SYNC_FETCH_AND_NAND_2] = "__sync_fetch_and_nand_2"; + Names[RTLIB::SYNC_FETCH_AND_NAND_4] = "__sync_fetch_and_nand_4"; + Names[RTLIB::SYNC_FETCH_AND_NAND_8] = "__sync_fetch_and_nand_8"; +} + +/// InitLibcallCallingConvs - Set default libcall CallingConvs. +/// +static void InitLibcallCallingConvs(CallingConv::ID *CCs) { + for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i) { + CCs[i] = CallingConv::C; + } +} + +/// getFPEXT - Return the FPEXT_*_* value for the given types, or +/// UNKNOWN_LIBCALL if there is none. +RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) { + if (OpVT == MVT::f32) { + if (RetVT == MVT::f64) + return FPEXT_F32_F64; + if (RetVT == MVT::f128) + return FPEXT_F32_F128; + } else if (OpVT == MVT::f64) { + if (RetVT == MVT::f128) + return FPEXT_F64_F128; + } + + return UNKNOWN_LIBCALL; +} + +/// getFPROUND - Return the FPROUND_*_* value for the given types, or +/// UNKNOWN_LIBCALL if there is none. +RTLIB::Libcall RTLIB::getFPROUND(EVT OpVT, EVT RetVT) { + if (RetVT == MVT::f32) { + if (OpVT == MVT::f64) + return FPROUND_F64_F32; + if (OpVT == MVT::f80) + return FPROUND_F80_F32; + if (OpVT == MVT::f128) + return FPROUND_F128_F32; + if (OpVT == MVT::ppcf128) + return FPROUND_PPCF128_F32; + } else if (RetVT == MVT::f64) { + if (OpVT == MVT::f80) + return FPROUND_F80_F64; + if (OpVT == MVT::f128) + return FPROUND_F128_F64; + if (OpVT == MVT::ppcf128) + return FPROUND_PPCF128_F64; + } + + return UNKNOWN_LIBCALL; +} + +/// getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or +/// UNKNOWN_LIBCALL if there is none. +RTLIB::Libcall RTLIB::getFPTOSINT(EVT OpVT, EVT RetVT) { + if (OpVT == MVT::f32) { + if (RetVT == MVT::i8) + return FPTOSINT_F32_I8; + if (RetVT == MVT::i16) + return FPTOSINT_F32_I16; + if (RetVT == MVT::i32) + return FPTOSINT_F32_I32; + if (RetVT == MVT::i64) + return FPTOSINT_F32_I64; + if (RetVT == MVT::i128) + return FPTOSINT_F32_I128; + } else if (OpVT == MVT::f64) { + if (RetVT == MVT::i8) + return FPTOSINT_F64_I8; + if (RetVT == MVT::i16) + return FPTOSINT_F64_I16; + if (RetVT == MVT::i32) + return FPTOSINT_F64_I32; + if (RetVT == MVT::i64) + return FPTOSINT_F64_I64; + if (RetVT == MVT::i128) + return FPTOSINT_F64_I128; + } else if (OpVT == MVT::f80) { + if (RetVT == MVT::i32) + return FPTOSINT_F80_I32; + if (RetVT == MVT::i64) + return FPTOSINT_F80_I64; + if (RetVT == MVT::i128) + return FPTOSINT_F80_I128; + } else if (OpVT == MVT::f128) { + if (RetVT == MVT::i32) + return FPTOSINT_F128_I32; + if (RetVT == MVT::i64) + return FPTOSINT_F128_I64; + if (RetVT == MVT::i128) + return FPTOSINT_F128_I128; + } else if (OpVT == MVT::ppcf128) { + if (RetVT == MVT::i32) + return FPTOSINT_PPCF128_I32; + if (RetVT == MVT::i64) + return FPTOSINT_PPCF128_I64; + if (RetVT == MVT::i128) + return FPTOSINT_PPCF128_I128; + } + return UNKNOWN_LIBCALL; +} + +/// getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or +/// UNKNOWN_LIBCALL if there is none. +RTLIB::Libcall RTLIB::getFPTOUINT(EVT OpVT, EVT RetVT) { + if (OpVT == MVT::f32) { + if (RetVT == MVT::i8) + return FPTOUINT_F32_I8; + if (RetVT == MVT::i16) + return FPTOUINT_F32_I16; + if (RetVT == MVT::i32) + return FPTOUINT_F32_I32; + if (RetVT == MVT::i64) + return FPTOUINT_F32_I64; + if (RetVT == MVT::i128) + return FPTOUINT_F32_I128; + } else if (OpVT == MVT::f64) { + if (RetVT == MVT::i8) + return FPTOUINT_F64_I8; + if (RetVT == MVT::i16) + return FPTOUINT_F64_I16; + if (RetVT == MVT::i32) + return FPTOUINT_F64_I32; + if (RetVT == MVT::i64) + return FPTOUINT_F64_I64; + if (RetVT == MVT::i128) + return FPTOUINT_F64_I128; + } else if (OpVT == MVT::f80) { + if (RetVT == MVT::i32) + return FPTOUINT_F80_I32; + if (RetVT == MVT::i64) + return FPTOUINT_F80_I64; + if (RetVT == MVT::i128) + return FPTOUINT_F80_I128; + } else if (OpVT == MVT::f128) { + if (RetVT == MVT::i32) + return FPTOUINT_F128_I32; + if (RetVT == MVT::i64) + return FPTOUINT_F128_I64; + if (RetVT == MVT::i128) + return FPTOUINT_F128_I128; + } else if (OpVT == MVT::ppcf128) { + if (RetVT == MVT::i32) + return FPTOUINT_PPCF128_I32; + if (RetVT == MVT::i64) + return FPTOUINT_PPCF128_I64; + if (RetVT == MVT::i128) + return FPTOUINT_PPCF128_I128; + } + return UNKNOWN_LIBCALL; +} + +/// getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or +/// UNKNOWN_LIBCALL if there is none. +RTLIB::Libcall RTLIB::getSINTTOFP(EVT OpVT, EVT RetVT) { + if (OpVT == MVT::i32) { + if (RetVT == MVT::f32) + return SINTTOFP_I32_F32; + if (RetVT == MVT::f64) + return SINTTOFP_I32_F64; + if (RetVT == MVT::f80) + return SINTTOFP_I32_F80; + if (RetVT == MVT::f128) + return SINTTOFP_I32_F128; + if (RetVT == MVT::ppcf128) + return SINTTOFP_I32_PPCF128; + } else if (OpVT == MVT::i64) { + if (RetVT == MVT::f32) + return SINTTOFP_I64_F32; + if (RetVT == MVT::f64) + return SINTTOFP_I64_F64; + if (RetVT == MVT::f80) + return SINTTOFP_I64_F80; + if (RetVT == MVT::f128) + return SINTTOFP_I64_F128; + if (RetVT == MVT::ppcf128) + return SINTTOFP_I64_PPCF128; + } else if (OpVT == MVT::i128) { + if (RetVT == MVT::f32) + return SINTTOFP_I128_F32; + if (RetVT == MVT::f64) + return SINTTOFP_I128_F64; + if (RetVT == MVT::f80) + return SINTTOFP_I128_F80; + if (RetVT == MVT::f128) + return SINTTOFP_I128_F128; + if (RetVT == MVT::ppcf128) + return SINTTOFP_I128_PPCF128; + } + return UNKNOWN_LIBCALL; +} + +/// getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or +/// UNKNOWN_LIBCALL if there is none. +RTLIB::Libcall RTLIB::getUINTTOFP(EVT OpVT, EVT RetVT) { + if (OpVT == MVT::i32) { + if (RetVT == MVT::f32) + return UINTTOFP_I32_F32; + if (RetVT == MVT::f64) + return UINTTOFP_I32_F64; + if (RetVT == MVT::f80) + return UINTTOFP_I32_F80; + if (RetVT == MVT::f128) + return UINTTOFP_I32_F128; + if (RetVT == MVT::ppcf128) + return UINTTOFP_I32_PPCF128; + } else if (OpVT == MVT::i64) { + if (RetVT == MVT::f32) + return UINTTOFP_I64_F32; + if (RetVT == MVT::f64) + return UINTTOFP_I64_F64; + if (RetVT == MVT::f80) + return UINTTOFP_I64_F80; + if (RetVT == MVT::f128) + return UINTTOFP_I64_F128; + if (RetVT == MVT::ppcf128) + return UINTTOFP_I64_PPCF128; + } else if (OpVT == MVT::i128) { + if (RetVT == MVT::f32) + return UINTTOFP_I128_F32; + if (RetVT == MVT::f64) + return UINTTOFP_I128_F64; + if (RetVT == MVT::f80) + return UINTTOFP_I128_F80; + if (RetVT == MVT::f128) + return UINTTOFP_I128_F128; + if (RetVT == MVT::ppcf128) + return UINTTOFP_I128_PPCF128; + } + return UNKNOWN_LIBCALL; +} + +/// InitCmpLibcallCCs - Set default comparison libcall CC. +/// +static void InitCmpLibcallCCs(ISD::CondCode *CCs) { + memset(CCs, ISD::SETCC_INVALID, sizeof(ISD::CondCode)*RTLIB::UNKNOWN_LIBCALL); + CCs[RTLIB::OEQ_F32] = ISD::SETEQ; + CCs[RTLIB::OEQ_F64] = ISD::SETEQ; + CCs[RTLIB::OEQ_F128] = ISD::SETEQ; + CCs[RTLIB::UNE_F32] = ISD::SETNE; + CCs[RTLIB::UNE_F64] = ISD::SETNE; + CCs[RTLIB::UNE_F128] = ISD::SETNE; + CCs[RTLIB::OGE_F32] = ISD::SETGE; + CCs[RTLIB::OGE_F64] = ISD::SETGE; + CCs[RTLIB::OGE_F128] = ISD::SETGE; + CCs[RTLIB::OLT_F32] = ISD::SETLT; + CCs[RTLIB::OLT_F64] = ISD::SETLT; + CCs[RTLIB::OLT_F128] = ISD::SETLT; + CCs[RTLIB::OLE_F32] = ISD::SETLE; + CCs[RTLIB::OLE_F64] = ISD::SETLE; + CCs[RTLIB::OLE_F128] = ISD::SETLE; + CCs[RTLIB::OGT_F32] = ISD::SETGT; + CCs[RTLIB::OGT_F64] = ISD::SETGT; + CCs[RTLIB::OGT_F128] = ISD::SETGT; + CCs[RTLIB::UO_F32] = ISD::SETNE; + CCs[RTLIB::UO_F64] = ISD::SETNE; + CCs[RTLIB::UO_F128] = ISD::SETNE; + CCs[RTLIB::O_F32] = ISD::SETEQ; + CCs[RTLIB::O_F64] = ISD::SETEQ; + CCs[RTLIB::O_F128] = ISD::SETEQ; +} + +/// NOTE: The constructor takes ownership of TLOF. +TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm, + const TargetLoweringObjectFile *tlof) + : TM(tm), TD(TM.getDataLayout()), TLOF(*tlof) { + // All operations default to being supported. + memset(OpActions, 0, sizeof(OpActions)); + memset(LoadExtActions, 0, sizeof(LoadExtActions)); + memset(TruncStoreActions, 0, sizeof(TruncStoreActions)); + memset(IndexedModeActions, 0, sizeof(IndexedModeActions)); + memset(CondCodeActions, 0, sizeof(CondCodeActions)); + + // Set default actions for various operations. + for (unsigned VT = 0; VT != (unsigned)MVT::LAST_VALUETYPE; ++VT) { + // Default all indexed load / store to expand. + for (unsigned IM = (unsigned)ISD::PRE_INC; + IM != (unsigned)ISD::LAST_INDEXED_MODE; ++IM) { + setIndexedLoadAction(IM, (MVT::SimpleValueType)VT, Expand); + setIndexedStoreAction(IM, (MVT::SimpleValueType)VT, Expand); + } + + // These operations default to expand. + setOperationAction(ISD::FGETSIGN, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::CONCAT_VECTORS, (MVT::SimpleValueType)VT, Expand); + } + + // Most targets ignore the @llvm.prefetch intrinsic. + setOperationAction(ISD::PREFETCH, MVT::Other, Expand); + + // ConstantFP nodes default to expand. Targets can either change this to + // Legal, in which case all fp constants are legal, or use isFPImmLegal() + // to optimize expansions for certain constants. + setOperationAction(ISD::ConstantFP, MVT::f16, Expand); + setOperationAction(ISD::ConstantFP, MVT::f32, Expand); + setOperationAction(ISD::ConstantFP, MVT::f64, Expand); + setOperationAction(ISD::ConstantFP, MVT::f80, Expand); + setOperationAction(ISD::ConstantFP, MVT::f128, Expand); + + // These library functions default to expand. + setOperationAction(ISD::FLOG , MVT::f16, Expand); + setOperationAction(ISD::FLOG2, MVT::f16, Expand); + setOperationAction(ISD::FLOG10, MVT::f16, Expand); + setOperationAction(ISD::FEXP , MVT::f16, Expand); + setOperationAction(ISD::FEXP2, MVT::f16, Expand); + setOperationAction(ISD::FFLOOR, MVT::f16, Expand); + setOperationAction(ISD::FNEARBYINT, MVT::f16, Expand); + setOperationAction(ISD::FCEIL, MVT::f16, Expand); + setOperationAction(ISD::FRINT, MVT::f16, Expand); + setOperationAction(ISD::FTRUNC, MVT::f16, Expand); + setOperationAction(ISD::FLOG , MVT::f32, Expand); + setOperationAction(ISD::FLOG2, MVT::f32, Expand); + setOperationAction(ISD::FLOG10, MVT::f32, Expand); + setOperationAction(ISD::FEXP , MVT::f32, Expand); + setOperationAction(ISD::FEXP2, MVT::f32, Expand); + setOperationAction(ISD::FFLOOR, MVT::f32, Expand); + setOperationAction(ISD::FNEARBYINT, MVT::f32, Expand); + setOperationAction(ISD::FCEIL, MVT::f32, Expand); + setOperationAction(ISD::FRINT, MVT::f32, Expand); + setOperationAction(ISD::FTRUNC, MVT::f32, Expand); + setOperationAction(ISD::FLOG , MVT::f64, Expand); + setOperationAction(ISD::FLOG2, MVT::f64, Expand); + setOperationAction(ISD::FLOG10, MVT::f64, Expand); + setOperationAction(ISD::FEXP , MVT::f64, Expand); + setOperationAction(ISD::FEXP2, MVT::f64, Expand); + setOperationAction(ISD::FFLOOR, MVT::f64, Expand); + setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand); + setOperationAction(ISD::FCEIL, MVT::f64, Expand); + setOperationAction(ISD::FRINT, MVT::f64, Expand); + setOperationAction(ISD::FTRUNC, MVT::f64, Expand); + setOperationAction(ISD::FLOG , MVT::f128, Expand); + setOperationAction(ISD::FLOG2, MVT::f128, Expand); + setOperationAction(ISD::FLOG10, MVT::f128, Expand); + setOperationAction(ISD::FEXP , MVT::f128, Expand); + setOperationAction(ISD::FEXP2, MVT::f128, Expand); + setOperationAction(ISD::FFLOOR, MVT::f128, Expand); + setOperationAction(ISD::FNEARBYINT, MVT::f128, Expand); + setOperationAction(ISD::FCEIL, MVT::f128, Expand); + setOperationAction(ISD::FRINT, MVT::f128, Expand); + setOperationAction(ISD::FTRUNC, MVT::f128, Expand); + + // Default ISD::TRAP to expand (which turns it into abort). + setOperationAction(ISD::TRAP, MVT::Other, Expand); + + // On most systems, DEBUGTRAP and TRAP have no difference. The "Expand" + // here is to inform DAG Legalizer to replace DEBUGTRAP with TRAP. + // + setOperationAction(ISD::DEBUGTRAP, MVT::Other, Expand); + + IsLittleEndian = TD->isLittleEndian(); + PointerTy = MVT::getIntegerVT(8*TD->getPointerSize(0)); + memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*)); + memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray)); + maxStoresPerMemset = maxStoresPerMemcpy = maxStoresPerMemmove = 8; + maxStoresPerMemsetOptSize = maxStoresPerMemcpyOptSize + = maxStoresPerMemmoveOptSize = 4; + benefitFromCodePlacementOpt = false; + UseUnderscoreSetJmp = false; + UseUnderscoreLongJmp = false; + SelectIsExpensive = false; + IntDivIsCheap = false; + Pow2DivIsCheap = false; + JumpIsExpensive = false; + predictableSelectIsExpensive = false; + StackPointerRegisterToSaveRestore = 0; + ExceptionPointerRegister = 0; + ExceptionSelectorRegister = 0; + BooleanContents = UndefinedBooleanContent; + BooleanVectorContents = UndefinedBooleanContent; + SchedPreferenceInfo = Sched::ILP; + JumpBufSize = 0; + JumpBufAlignment = 0; + MinFunctionAlignment = 0; + PrefFunctionAlignment = 0; + PrefLoopAlignment = 0; + MinStackArgumentAlignment = 1; + ShouldFoldAtomicFences = false; + InsertFencesForAtomic = false; + SupportJumpTables = true; + MinimumJumpTableEntries = 4; + + InitLibcallNames(LibcallRoutineNames); + InitCmpLibcallCCs(CmpLibcallCCs); + InitLibcallCallingConvs(LibcallCallingConvs); +} + +TargetLoweringBase::~TargetLoweringBase() { + delete &TLOF; +} + +MVT TargetLoweringBase::getShiftAmountTy(EVT LHSTy) const { + return MVT::getIntegerVT(8*TD->getPointerSize(0)); +} + +/// canOpTrap - Returns true if the operation can trap for the value type. +/// VT must be a legal type. +bool TargetLoweringBase::canOpTrap(unsigned Op, EVT VT) const { + assert(isTypeLegal(VT)); + switch (Op) { + default: + return false; + case ISD::FDIV: + case ISD::FREM: + case ISD::SDIV: + case ISD::UDIV: + case ISD::SREM: + case ISD::UREM: + return true; + } +} + + +static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT, + unsigned &NumIntermediates, + MVT &RegisterVT, + TargetLoweringBase *TLI) { + // Figure out the right, legal destination reg to copy into. + unsigned NumElts = VT.getVectorNumElements(); + MVT EltTy = VT.getVectorElementType(); + + unsigned NumVectorRegs = 1; + + // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we + // could break down into LHS/RHS like LegalizeDAG does. + if (!isPowerOf2_32(NumElts)) { + NumVectorRegs = NumElts; + NumElts = 1; + } + + // Divide the input until we get to a supported size. This will always + // end with a scalar if the target doesn't support vectors. + while (NumElts > 1 && !TLI->isTypeLegal(MVT::getVectorVT(EltTy, NumElts))) { + NumElts >>= 1; + NumVectorRegs <<= 1; + } + + NumIntermediates = NumVectorRegs; + + MVT NewVT = MVT::getVectorVT(EltTy, NumElts); + if (!TLI->isTypeLegal(NewVT)) + NewVT = EltTy; + IntermediateVT = NewVT; + + unsigned NewVTSize = NewVT.getSizeInBits(); + + // Convert sizes such as i33 to i64. + if (!isPowerOf2_32(NewVTSize)) + NewVTSize = NextPowerOf2(NewVTSize); + + MVT DestVT = TLI->getRegisterType(NewVT); + RegisterVT = DestVT; + if (EVT(DestVT).bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16. + return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits()); + + // Otherwise, promotion or legal types use the same number of registers as + // the vector decimated to the appropriate level. + return NumVectorRegs; +} + +/// isLegalRC - Return true if the value types that can be represented by the +/// specified register class are all legal. +bool TargetLoweringBase::isLegalRC(const TargetRegisterClass *RC) const { + for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end(); + I != E; ++I) { + if (isTypeLegal(*I)) + return true; + } + return false; +} + +/// findRepresentativeClass - Return the largest legal super-reg register class +/// of the register class for the specified type and its associated "cost". +std::pair +TargetLoweringBase::findRepresentativeClass(MVT VT) const { + const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); + const TargetRegisterClass *RC = RegClassForVT[VT.SimpleTy]; + if (!RC) + return std::make_pair(RC, 0); + + // Compute the set of all super-register classes. + BitVector SuperRegRC(TRI->getNumRegClasses()); + for (SuperRegClassIterator RCI(RC, TRI); RCI.isValid(); ++RCI) + SuperRegRC.setBitsInMask(RCI.getMask()); + + // Find the first legal register class with the largest spill size. + const TargetRegisterClass *BestRC = RC; + for (int i = SuperRegRC.find_first(); i >= 0; i = SuperRegRC.find_next(i)) { + const TargetRegisterClass *SuperRC = TRI->getRegClass(i); + // We want the largest possible spill size. + if (SuperRC->getSize() <= BestRC->getSize()) + continue; + if (!isLegalRC(SuperRC)) + continue; + BestRC = SuperRC; + } + return std::make_pair(BestRC, 1); +} + +/// computeRegisterProperties - Once all of the register classes are added, +/// this allows us to compute derived properties we expose. +void TargetLoweringBase::computeRegisterProperties() { + assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE && + "Too many value types for ValueTypeActions to hold!"); + + // Everything defaults to needing one register. + for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) { + NumRegistersForVT[i] = 1; + RegisterTypeForVT[i] = TransformToType[i] = (MVT::SimpleValueType)i; + } + // ...except isVoid, which doesn't need any registers. + NumRegistersForVT[MVT::isVoid] = 0; + + // Find the largest integer register class. + unsigned LargestIntReg = MVT::LAST_INTEGER_VALUETYPE; + for (; RegClassForVT[LargestIntReg] == 0; --LargestIntReg) + assert(LargestIntReg != MVT::i1 && "No integer registers defined!"); + + // Every integer value type larger than this largest register takes twice as + // many registers to represent as the previous ValueType. + for (unsigned ExpandedReg = LargestIntReg + 1; + ExpandedReg <= MVT::LAST_INTEGER_VALUETYPE; ++ExpandedReg) { + NumRegistersForVT[ExpandedReg] = 2*NumRegistersForVT[ExpandedReg-1]; + RegisterTypeForVT[ExpandedReg] = (MVT::SimpleValueType)LargestIntReg; + TransformToType[ExpandedReg] = (MVT::SimpleValueType)(ExpandedReg - 1); + ValueTypeActions.setTypeAction((MVT::SimpleValueType)ExpandedReg, + TypeExpandInteger); + } + + // Inspect all of the ValueType's smaller than the largest integer + // register to see which ones need promotion. + unsigned LegalIntReg = LargestIntReg; + for (unsigned IntReg = LargestIntReg - 1; + IntReg >= (unsigned)MVT::i1; --IntReg) { + MVT IVT = (MVT::SimpleValueType)IntReg; + if (isTypeLegal(IVT)) { + LegalIntReg = IntReg; + } else { + RegisterTypeForVT[IntReg] = TransformToType[IntReg] = + (const MVT::SimpleValueType)LegalIntReg; + ValueTypeActions.setTypeAction(IVT, TypePromoteInteger); + } + } + + // ppcf128 type is really two f64's. + if (!isTypeLegal(MVT::ppcf128)) { + NumRegistersForVT[MVT::ppcf128] = 2*NumRegistersForVT[MVT::f64]; + RegisterTypeForVT[MVT::ppcf128] = MVT::f64; + TransformToType[MVT::ppcf128] = MVT::f64; + ValueTypeActions.setTypeAction(MVT::ppcf128, TypeExpandFloat); + } + + // Decide how to handle f64. If the target does not have native f64 support, + // expand it to i64 and we will be generating soft float library calls. + if (!isTypeLegal(MVT::f64)) { + NumRegistersForVT[MVT::f64] = NumRegistersForVT[MVT::i64]; + RegisterTypeForVT[MVT::f64] = RegisterTypeForVT[MVT::i64]; + TransformToType[MVT::f64] = MVT::i64; + ValueTypeActions.setTypeAction(MVT::f64, TypeSoftenFloat); + } + + // Decide how to handle f32. If the target does not have native support for + // f32, promote it to f64 if it is legal. Otherwise, expand it to i32. + if (!isTypeLegal(MVT::f32)) { + if (isTypeLegal(MVT::f64)) { + NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::f64]; + RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::f64]; + TransformToType[MVT::f32] = MVT::f64; + ValueTypeActions.setTypeAction(MVT::f32, TypePromoteInteger); + } else { + NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::i32]; + RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::i32]; + TransformToType[MVT::f32] = MVT::i32; + ValueTypeActions.setTypeAction(MVT::f32, TypeSoftenFloat); + } + } + + // Loop over all of the vector value types to see which need transformations. + for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE; + i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { + MVT VT = (MVT::SimpleValueType)i; + if (isTypeLegal(VT)) continue; + + // Determine if there is a legal wider type. If so, we should promote to + // that wider vector type. + MVT EltVT = VT.getVectorElementType(); + unsigned NElts = VT.getVectorNumElements(); + if (NElts != 1 && !shouldSplitVectorElementType(EltVT)) { + bool IsLegalWiderType = false; + // First try to promote the elements of integer vectors. If no legal + // promotion was found, fallback to the widen-vector method. + for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { + MVT SVT = (MVT::SimpleValueType)nVT; + // Promote vectors of integers to vectors with the same number + // of elements, with a wider element type. + if (SVT.getVectorElementType().getSizeInBits() > EltVT.getSizeInBits() + && SVT.getVectorNumElements() == NElts && + isTypeLegal(SVT) && SVT.getScalarType().isInteger()) { + TransformToType[i] = SVT; + RegisterTypeForVT[i] = SVT; + NumRegistersForVT[i] = 1; + ValueTypeActions.setTypeAction(VT, TypePromoteInteger); + IsLegalWiderType = true; + break; + } + } + + if (IsLegalWiderType) continue; + + // Try to widen the vector. + for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { + MVT SVT = (MVT::SimpleValueType)nVT; + if (SVT.getVectorElementType() == EltVT && + SVT.getVectorNumElements() > NElts && + isTypeLegal(SVT)) { + TransformToType[i] = SVT; + RegisterTypeForVT[i] = SVT; + NumRegistersForVT[i] = 1; + ValueTypeActions.setTypeAction(VT, TypeWidenVector); + IsLegalWiderType = true; + break; + } + } + if (IsLegalWiderType) continue; + } + + MVT IntermediateVT; + MVT RegisterVT; + unsigned NumIntermediates; + NumRegistersForVT[i] = + getVectorTypeBreakdownMVT(VT, IntermediateVT, NumIntermediates, + RegisterVT, this); + RegisterTypeForVT[i] = RegisterVT; + + MVT NVT = VT.getPow2VectorType(); + if (NVT == VT) { + // Type is already a power of 2. The default action is to split. + TransformToType[i] = MVT::Other; + unsigned NumElts = VT.getVectorNumElements(); + ValueTypeActions.setTypeAction(VT, + NumElts > 1 ? TypeSplitVector : TypeScalarizeVector); + } else { + TransformToType[i] = NVT; + ValueTypeActions.setTypeAction(VT, TypeWidenVector); + } + } + + // Determine the 'representative' register class for each value type. + // An representative register class is the largest (meaning one which is + // not a sub-register class / subreg register class) legal register class for + // a group of value types. For example, on i386, i8, i16, and i32 + // representative would be GR32; while on x86_64 it's GR64. + for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) { + const TargetRegisterClass* RRC; + uint8_t Cost; + tie(RRC, Cost) = findRepresentativeClass((MVT::SimpleValueType)i); + RepRegClassForVT[i] = RRC; + RepRegClassCostForVT[i] = Cost; + } +} + +EVT TargetLoweringBase::getSetCCResultType(EVT VT) const { + assert(!VT.isVector() && "No default SetCC type for vectors!"); + return getPointerTy(0).SimpleTy; +} + +MVT::SimpleValueType TargetLoweringBase::getCmpLibcallReturnType() const { + return MVT::i32; // return the default value +} + +/// getVectorTypeBreakdown - Vector types are broken down into some number of +/// legal first class types. For example, MVT::v8f32 maps to 2 MVT::v4f32 +/// with Altivec or SSE1, or 8 promoted MVT::f64 values with the X86 FP stack. +/// Similarly, MVT::v2i64 turns into 4 MVT::i32 values with both PPC and X86. +/// +/// This method returns the number of registers needed, and the VT for each +/// register. It also returns the VT and quantity of the intermediate values +/// before they are promoted/expanded. +/// +unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT, + EVT &IntermediateVT, + unsigned &NumIntermediates, + MVT &RegisterVT) const { + unsigned NumElts = VT.getVectorNumElements(); + + // If there is a wider vector type with the same element type as this one, + // or a promoted vector type that has the same number of elements which + // are wider, then we should convert to that legal vector type. + // This handles things like <2 x float> -> <4 x float> and + // <4 x i1> -> <4 x i32>. + LegalizeTypeAction TA = getTypeAction(Context, VT); + if (NumElts != 1 && (TA == TypeWidenVector || TA == TypePromoteInteger)) { + EVT RegisterEVT = getTypeToTransformTo(Context, VT); + if (isTypeLegal(RegisterEVT)) { + IntermediateVT = RegisterEVT; + RegisterVT = RegisterEVT.getSimpleVT(); + NumIntermediates = 1; + return 1; + } + } + + // Figure out the right, legal destination reg to copy into. + EVT EltTy = VT.getVectorElementType(); + + unsigned NumVectorRegs = 1; + + // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we + // could break down into LHS/RHS like LegalizeDAG does. + if (!isPowerOf2_32(NumElts)) { + NumVectorRegs = NumElts; + NumElts = 1; + } + + // Divide the input until we get to a supported size. This will always + // end with a scalar if the target doesn't support vectors. + while (NumElts > 1 && !isTypeLegal( + EVT::getVectorVT(Context, EltTy, NumElts))) { + NumElts >>= 1; + NumVectorRegs <<= 1; + } + + NumIntermediates = NumVectorRegs; + + EVT NewVT = EVT::getVectorVT(Context, EltTy, NumElts); + if (!isTypeLegal(NewVT)) + NewVT = EltTy; + IntermediateVT = NewVT; + + MVT DestVT = getRegisterType(Context, NewVT); + RegisterVT = DestVT; + unsigned NewVTSize = NewVT.getSizeInBits(); + + // Convert sizes such as i33 to i64. + if (!isPowerOf2_32(NewVTSize)) + NewVTSize = NextPowerOf2(NewVTSize); + + if (EVT(DestVT).bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16. + return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits()); + + // Otherwise, promotion or legal types use the same number of registers as + // the vector decimated to the appropriate level. + return NumVectorRegs; +} + +/// Get the EVTs and ArgFlags collections that represent the legalized return +/// type of the given function. This does not require a DAG or a return value, +/// and is suitable for use before any DAGs for the function are constructed. +/// TODO: Move this out of TargetLowering.cpp. +void llvm::GetReturnInfo(Type* ReturnType, AttributeSet attr, + SmallVectorImpl &Outs, + const TargetLowering &TLI) { + SmallVector ValueVTs; + ComputeValueVTs(TLI, ReturnType, ValueVTs); + unsigned NumValues = ValueVTs.size(); + if (NumValues == 0) return; + + for (unsigned j = 0, f = NumValues; j != f; ++j) { + EVT VT = ValueVTs[j]; + ISD::NodeType ExtendKind = ISD::ANY_EXTEND; + + if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt)) + ExtendKind = ISD::SIGN_EXTEND; + else if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt)) + ExtendKind = ISD::ZERO_EXTEND; + + // FIXME: C calling convention requires the return type to be promoted to + // at least 32-bit. But this is not necessary for non-C calling + // conventions. The frontend should mark functions whose return values + // require promoting with signext or zeroext attributes. + if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) { + MVT MinVT = TLI.getRegisterType(ReturnType->getContext(), MVT::i32); + if (VT.bitsLT(MinVT)) + VT = MinVT; + } + + unsigned NumParts = TLI.getNumRegisters(ReturnType->getContext(), VT); + MVT PartVT = TLI.getRegisterType(ReturnType->getContext(), VT); + + // 'inreg' on function refers to return value + ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); + if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::InReg)) + Flags.setInReg(); + + // Propagate extension type if any + if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt)) + Flags.setSExt(); + else if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt)) + Flags.setZExt(); + + for (unsigned i = 0; i < NumParts; ++i) + Outs.push_back(ISD::OutputArg(Flags, PartVT, /*isFixed=*/true, 0, 0)); + } +} + +/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate +/// function arguments in the caller parameter area. This is the actual +/// alignment, not its logarithm. +unsigned TargetLoweringBase::getByValTypeAlignment(Type *Ty) const { + return TD->getCallFrameTypeAlignment(Ty); +} + +//===----------------------------------------------------------------------===// +// TargetTransformInfo Helpers +//===----------------------------------------------------------------------===// + +int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const { + enum InstructionOpcodes { +#define HANDLE_INST(NUM, OPCODE, CLASS) OPCODE = NUM, +#define LAST_OTHER_INST(NUM) InstructionOpcodesCount = NUM +#include "llvm/IR/Instruction.def" + }; + switch (static_cast(Opcode)) { + case Ret: return 0; + case Br: return 0; + case Switch: return 0; + case IndirectBr: return 0; + case Invoke: return 0; + case Resume: return 0; + case Unreachable: return 0; + case Add: return ISD::ADD; + case FAdd: return ISD::FADD; + case Sub: return ISD::SUB; + case FSub: return ISD::FSUB; + case Mul: return ISD::MUL; + case FMul: return ISD::FMUL; + case UDiv: return ISD::UDIV; + case SDiv: return ISD::UDIV; + case FDiv: return ISD::FDIV; + case URem: return ISD::UREM; + case SRem: return ISD::SREM; + case FRem: return ISD::FREM; + case Shl: return ISD::SHL; + case LShr: return ISD::SRL; + case AShr: return ISD::SRA; + case And: return ISD::AND; + case Or: return ISD::OR; + case Xor: return ISD::XOR; + case Alloca: return 0; + case Load: return ISD::LOAD; + case Store: return ISD::STORE; + case GetElementPtr: return 0; + case Fence: return 0; + case AtomicCmpXchg: return 0; + case AtomicRMW: return 0; + case Trunc: return ISD::TRUNCATE; + case ZExt: return ISD::ZERO_EXTEND; + case SExt: return ISD::SIGN_EXTEND; + case FPToUI: return ISD::FP_TO_UINT; + case FPToSI: return ISD::FP_TO_SINT; + case UIToFP: return ISD::UINT_TO_FP; + case SIToFP: return ISD::SINT_TO_FP; + case FPTrunc: return ISD::FP_ROUND; + case FPExt: return ISD::FP_EXTEND; + case PtrToInt: return ISD::BITCAST; + case IntToPtr: return ISD::BITCAST; + case BitCast: return ISD::BITCAST; + case ICmp: return ISD::SETCC; + case FCmp: return ISD::SETCC; + case PHI: return 0; + case Call: return 0; + case Select: return ISD::SELECT; + case UserOp1: return 0; + case UserOp2: return 0; + case VAArg: return 0; + case ExtractElement: return ISD::EXTRACT_VECTOR_ELT; + case InsertElement: return ISD::INSERT_VECTOR_ELT; + case ShuffleVector: return ISD::VECTOR_SHUFFLE; + case ExtractValue: return ISD::MERGE_VALUES; + case InsertValue: return ISD::MERGE_VALUES; + case LandingPad: return 0; + } + + llvm_unreachable("Unknown instruction type encountered!"); +} + +std::pair +TargetLoweringBase::getTypeLegalizationCost(Type *Ty) const { + LLVMContext &C = Ty->getContext(); + EVT MTy = getValueType(Ty); + + unsigned Cost = 1; + // We keep legalizing the type until we find a legal kind. We assume that + // the only operation that costs anything is the split. After splitting + // we need to handle two types. + while (true) { + LegalizeKind LK = getTypeConversion(C, MTy); + + if (LK.first == TypeLegal) + return std::make_pair(Cost, MTy.getSimpleVT()); + + if (LK.first == TypeSplitVector || LK.first == TypeExpandInteger) + Cost *= 2; + + // Keep legalizing the type. + MTy = LK.second; + } +} + +//===----------------------------------------------------------------------===// +// Loop Strength Reduction hooks +//===----------------------------------------------------------------------===// + +/// isLegalAddressingMode - Return true if the addressing mode represented +/// by AM is legal for this target, for a load/store of the specified type. +bool TargetLoweringBase::isLegalAddressingMode(const AddrMode &AM, + Type *Ty) const { + // The default implementation of this implements a conservative RISCy, r+r and + // r+i addr mode. + + // Allows a sign-extended 16-bit immediate field. + if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1) + return false; + + // No global is ever allowed as a base. + if (AM.BaseGV) + return false; + + // Only support r+r, + switch (AM.Scale) { + case 0: // "r+i" or just "i", depending on HasBaseReg. + break; + case 1: + if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed. + return false; + // Otherwise we have r+r or r+i. + break; + case 2: + if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed. + return false; + // Allow 2*r as r+r. + break; + } + + return true; +} -- cgit v1.1 From cddd8a613e377e3064a2e7ca1d7ac8cb25f9b849 Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Fri, 11 Jan 2013 20:07:53 +0000 Subject: Added debug messages to GlobalOpt. Specifically: 1. Added a missing new line when we emit a debug message saying that we are marking a global variable as constant. 2. Added debug messages that describe what is occuring when GlobalOpt is evaluating a block/function. 3. Added a debug message that says what specific constructor is being evaluated. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172247 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/IPO/GlobalOpt.cpp | 127 +++++++++++++++++++++++++++++++++------ 1 file changed, 110 insertions(+), 17 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index ff2964f..1562d72 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -1990,7 +1990,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, return Changed; } else if (GS.StoredType <= GlobalStatus::isInitializerStored) { - DEBUG(dbgs() << "MARKING CONSTANT: " << *GV); + DEBUG(dbgs() << "MARKING CONSTANT: " << *GV << "\n"); GV->setConstant(true); // Clean up any obviously simplifiable users now. @@ -2585,24 +2585,38 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, while (1) { Constant *InstResult = 0; + DEBUG(dbgs() << "Evaluating Instruction: " << *CurInst << "\n"); + if (StoreInst *SI = dyn_cast(CurInst)) { - if (!SI->isSimple()) return false; // no volatile/atomic accesses. + if (!SI->isSimple()) { + DEBUG(dbgs() << "Store is not simple! Can not evaluate.\n"); + return false; // no volatile/atomic accesses. + } Constant *Ptr = getVal(SI->getOperand(1)); - if (ConstantExpr *CE = dyn_cast(Ptr)) + if (ConstantExpr *CE = dyn_cast(Ptr)) { + DEBUG(dbgs() << "Folding constant ptr expression: " << *Ptr); Ptr = ConstantFoldConstantExpression(CE, TD, TLI); - if (!isSimpleEnoughPointerToCommit(Ptr)) + DEBUG(dbgs() << "; To: " << *Ptr << "\n"); + } + if (!isSimpleEnoughPointerToCommit(Ptr)) { // If this is too complex for us to commit, reject it. + DEBUG(dbgs() << "Pointer is too complex for us to evaluate store."); return false; + } Constant *Val = getVal(SI->getOperand(0)); // If this might be too difficult for the backend to handle (e.g. the addr // of one global variable divided by another) then we can't commit it. - if (!isSimpleEnoughValueToCommit(Val, SimpleConstants, TD)) + if (!isSimpleEnoughValueToCommit(Val, SimpleConstants, TD)) { + DEBUG(dbgs() << "Store value is too complex to evaluate store. " << *Val + << "\n"); return false; + } - if (ConstantExpr *CE = dyn_cast(Ptr)) + if (ConstantExpr *CE = dyn_cast(Ptr)) { if (CE->getOpcode() == Instruction::BitCast) { + DEBUG(dbgs() << "Attempting to resolve bitcast on constant ptr.\n"); // If we're evaluating a store through a bitcast, then we need // to pull the bitcast off the pointer type and push it onto the // stored value. @@ -2631,6 +2645,8 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, // If we can't improve the situation by introspecting NewTy, // we have to give up. } else { + DEBUG(dbgs() << "Failed to bitcast constant ptr, can not " + "evaluate.\n"); return false; } } @@ -2638,25 +2654,36 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, // If we found compatible types, go ahead and push the bitcast // onto the stored value. Val = ConstantExpr::getBitCast(Val, NewTy); + + DEBUG(dbgs() << "Evaluated bitcast: " << *Val << "\n"); } + } MutatedMemory[Ptr] = Val; } else if (BinaryOperator *BO = dyn_cast(CurInst)) { InstResult = ConstantExpr::get(BO->getOpcode(), getVal(BO->getOperand(0)), getVal(BO->getOperand(1))); + DEBUG(dbgs() << "Found a BinaryOperator! Simplifying: " << *InstResult + << "\n"); } else if (CmpInst *CI = dyn_cast(CurInst)) { InstResult = ConstantExpr::getCompare(CI->getPredicate(), getVal(CI->getOperand(0)), getVal(CI->getOperand(1))); + DEBUG(dbgs() << "Found a CmpInst! Simplifying: " << *InstResult + << "\n"); } else if (CastInst *CI = dyn_cast(CurInst)) { InstResult = ConstantExpr::getCast(CI->getOpcode(), getVal(CI->getOperand(0)), CI->getType()); + DEBUG(dbgs() << "Found a Cast! Simplifying: " << *InstResult + << "\n"); } else if (SelectInst *SI = dyn_cast(CurInst)) { InstResult = ConstantExpr::getSelect(getVal(SI->getOperand(0)), getVal(SI->getOperand(1)), getVal(SI->getOperand(2))); + DEBUG(dbgs() << "Found a Select! Simplifying: " << *InstResult + << "\n"); } else if (GetElementPtrInst *GEP = dyn_cast(CurInst)) { Constant *P = getVal(GEP->getOperand(0)); SmallVector GEPOps; @@ -2666,41 +2693,70 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, InstResult = ConstantExpr::getGetElementPtr(P, GEPOps, cast(GEP)->isInBounds()); + DEBUG(dbgs() << "Found a GEP! Simplifying: " << *InstResult + << "\n"); } else if (LoadInst *LI = dyn_cast(CurInst)) { - if (!LI->isSimple()) return false; // no volatile/atomic accesses. + + if (!LI->isSimple()) { + DEBUG(dbgs() << "Found a Load! Not a simple load, can not evaluate.\n"); + return false; // no volatile/atomic accesses. + } + Constant *Ptr = getVal(LI->getOperand(0)); - if (ConstantExpr *CE = dyn_cast(Ptr)) + if (ConstantExpr *CE = dyn_cast(Ptr)) { Ptr = ConstantFoldConstantExpression(CE, TD, TLI); + DEBUG(dbgs() << "Found a constant pointer expression, constant " + "folding: " << *Ptr << "\n"); + } InstResult = ComputeLoadResult(Ptr); - if (InstResult == 0) return false; // Could not evaluate load. + if (InstResult == 0) { + DEBUG(dbgs() << "Failed to compute load result. Can not evaluate load." + "\n"); + return false; // Could not evaluate load. + } + + DEBUG(dbgs() << "Evaluated load: " << *InstResult << "\n"); } else if (AllocaInst *AI = dyn_cast(CurInst)) { - if (AI->isArrayAllocation()) return false; // Cannot handle array allocs. + if (AI->isArrayAllocation()) { + DEBUG(dbgs() << "Found an array alloca. Can not evaluate.\n"); + return false; // Cannot handle array allocs. + } Type *Ty = AI->getType()->getElementType(); AllocaTmps.push_back(new GlobalVariable(Ty, false, GlobalValue::InternalLinkage, UndefValue::get(Ty), AI->getName())); InstResult = AllocaTmps.back(); + DEBUG(dbgs() << "Found an alloca. Result: " << *InstResult << "\n"); } else if (isa(CurInst) || isa(CurInst)) { CallSite CS(CurInst); // Debug info can safely be ignored here. if (isa(CS.getInstruction())) { + DEBUG(dbgs() << "Ignoring debug info.\n"); ++CurInst; continue; } // Cannot handle inline asm. - if (isa(CS.getCalledValue())) return false; + if (isa(CS.getCalledValue())) { + DEBUG(dbgs() << "Found inline asm, can not evaluate.\n"); + return false; + } if (IntrinsicInst *II = dyn_cast(CS.getInstruction())) { if (MemSetInst *MSI = dyn_cast(II)) { - if (MSI->isVolatile()) return false; + if (MSI->isVolatile()) { + DEBUG(dbgs() << "Can not optimize a volatile memset " << + "intrinsic.\n"); + return false; + } Constant *Ptr = getVal(MSI->getDest()); Constant *Val = getVal(MSI->getValue()); Constant *DestVal = ComputeLoadResult(getVal(Ptr)); if (Val->isNullValue() && DestVal && DestVal->isNullValue()) { // This memset is a no-op. + DEBUG(dbgs() << "Ignoring no-op memset.\n"); ++CurInst; continue; } @@ -2708,6 +2764,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, if (II->getIntrinsicID() == Intrinsic::lifetime_start || II->getIntrinsicID() == Intrinsic::lifetime_end) { + DEBUG(dbgs() << "Ignoring lifetime intrinsic.\n"); ++CurInst; continue; } @@ -2715,8 +2772,10 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, if (II->getIntrinsicID() == Intrinsic::invariant_start) { // We don't insert an entry into Values, as it doesn't have a // meaningful return value. - if (!II->use_empty()) + if (!II->use_empty()) { + DEBUG(dbgs() << "Found unused invariant_start. Cant evaluate.\n"); return false; + } ConstantInt *Size = cast(II->getArgOperand(0)); Value *PtrArg = getVal(II->getArgOperand(1)); Value *Ptr = PtrArg->stripPointerCasts(); @@ -2724,20 +2783,30 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, Type *ElemTy = cast(GV->getType())->getElementType(); if (!Size->isAllOnesValue() && Size->getValue().getLimitedValue() >= - TD->getTypeStoreSize(ElemTy)) + TD->getTypeStoreSize(ElemTy)) { Invariants.insert(GV); + DEBUG(dbgs() << "Found a global var that is an invariant: " << *GV + << "\n"); + } else { + DEBUG(dbgs() << "Found a global var, but can not treat it as an " + "invariant.\n"); + } } // Continue even if we do nothing. ++CurInst; continue; } + + DEBUG(dbgs() << "Unknown intrinsic. Can not evaluate.\n"); return false; } // Resolve function pointers. Function *Callee = dyn_cast(getVal(CS.getCalledValue())); - if (!Callee || Callee->mayBeOverridden()) + if (!Callee || Callee->mayBeOverridden()) { + DEBUG(dbgs() << "Can not resolve function pointer.\n"); return false; // Cannot resolve. + } SmallVector Formals; for (User::op_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i) @@ -2747,22 +2816,38 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, // If this is a function we can constant fold, do it. if (Constant *C = ConstantFoldCall(Callee, Formals, TLI)) { InstResult = C; + DEBUG(dbgs() << "Constant folded function call. Result: " << + *InstResult << "\n"); } else { + DEBUG(dbgs() << "Can not constant fold function call.\n"); return false; } } else { - if (Callee->getFunctionType()->isVarArg()) + if (Callee->getFunctionType()->isVarArg()) { + DEBUG(dbgs() << "Can not constant fold vararg function call.\n"); return false; + } Constant *RetVal; // Execute the call, if successful, use the return value. ValueStack.push_back(new DenseMap); - if (!EvaluateFunction(Callee, RetVal, Formals)) + if (!EvaluateFunction(Callee, RetVal, Formals)) { + DEBUG(dbgs() << "Failed to evaluate function.\n"); return false; + } delete ValueStack.pop_back_val(); InstResult = RetVal; + + if (InstResult != NULL) { + DEBUG(dbgs() << "Successfully evaluated function. Result: " << + InstResult << "\n\n"); + } else { + DEBUG(dbgs() << "Successfully evaluated function. Result: 0\n\n"); + } } } else if (isa(CurInst)) { + DEBUG(dbgs() << "Found a terminator instruction.\n"); + if (BranchInst *BI = dyn_cast(CurInst)) { if (BI->isUnconditional()) { NextBB = BI->getSuccessor(0); @@ -2788,13 +2873,17 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, NextBB = 0; } else { // invoke, unwind, resume, unreachable. + DEBUG(dbgs() << "Can not handle terminator."); return false; // Cannot handle this terminator. } // We succeeded at evaluating this block! + DEBUG(dbgs() << "Successfully evaluated block.\n"); return true; } else { // Did not know how to evaluate this! + DEBUG(dbgs() << "Failed to evaluate block due to unhandled instruction." + "\n"); return false; } @@ -2808,6 +2897,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, // If we just processed an invoke, we finished evaluating the block. if (InvokeInst *II = dyn_cast(CurInst)) { NextBB = II->getNormalDest(); + DEBUG(dbgs() << "Found an invoke instruction. Finished Block.\n\n"); return true; } @@ -2846,6 +2936,8 @@ bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal, while (1) { BasicBlock *NextBB = 0; // Initialized to avoid compiler warnings. + DEBUG(dbgs() << "Trying to evaluate BB: " << *CurBB << "\n"); + if (!EvaluateBlock(CurInst, NextBB)) return false; @@ -2925,6 +3017,7 @@ bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) { } break; } + DEBUG(dbgs() << "Optimizing Global Constructor: " << *F << "\n"); // We cannot simplify external ctor functions. if (F->empty()) continue; -- cgit v1.1 From 8ecd3be1f3687222bfed627219844557024fcec1 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Fri, 11 Jan 2013 20:11:33 +0000 Subject: Remove some accidentaly duplicated code. This needs urgent cleanup :( git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172248 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/TargetLowering.cpp | 270 ---------------------------- 1 file changed, 270 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 2e248e9..76ece7f 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -33,226 +33,6 @@ #include using namespace llvm; -/// getFPEXT - Return the FPEXT_*_* value for the given types, or -/// UNKNOWN_LIBCALL if there is none. -RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) { - if (OpVT == MVT::f32) { - if (RetVT == MVT::f64) - return FPEXT_F32_F64; - if (RetVT == MVT::f128) - return FPEXT_F32_F128; - } else if (OpVT == MVT::f64) { - if (RetVT == MVT::f128) - return FPEXT_F64_F128; - } - - return UNKNOWN_LIBCALL; -} - -/// getFPROUND - Return the FPROUND_*_* value for the given types, or -/// UNKNOWN_LIBCALL if there is none. -RTLIB::Libcall RTLIB::getFPROUND(EVT OpVT, EVT RetVT) { - if (RetVT == MVT::f32) { - if (OpVT == MVT::f64) - return FPROUND_F64_F32; - if (OpVT == MVT::f80) - return FPROUND_F80_F32; - if (OpVT == MVT::f128) - return FPROUND_F128_F32; - if (OpVT == MVT::ppcf128) - return FPROUND_PPCF128_F32; - } else if (RetVT == MVT::f64) { - if (OpVT == MVT::f80) - return FPROUND_F80_F64; - if (OpVT == MVT::f128) - return FPROUND_F128_F64; - if (OpVT == MVT::ppcf128) - return FPROUND_PPCF128_F64; - } - - return UNKNOWN_LIBCALL; -} - -/// getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or -/// UNKNOWN_LIBCALL if there is none. -RTLIB::Libcall RTLIB::getFPTOSINT(EVT OpVT, EVT RetVT) { - if (OpVT == MVT::f32) { - if (RetVT == MVT::i8) - return FPTOSINT_F32_I8; - if (RetVT == MVT::i16) - return FPTOSINT_F32_I16; - if (RetVT == MVT::i32) - return FPTOSINT_F32_I32; - if (RetVT == MVT::i64) - return FPTOSINT_F32_I64; - if (RetVT == MVT::i128) - return FPTOSINT_F32_I128; - } else if (OpVT == MVT::f64) { - if (RetVT == MVT::i8) - return FPTOSINT_F64_I8; - if (RetVT == MVT::i16) - return FPTOSINT_F64_I16; - if (RetVT == MVT::i32) - return FPTOSINT_F64_I32; - if (RetVT == MVT::i64) - return FPTOSINT_F64_I64; - if (RetVT == MVT::i128) - return FPTOSINT_F64_I128; - } else if (OpVT == MVT::f80) { - if (RetVT == MVT::i32) - return FPTOSINT_F80_I32; - if (RetVT == MVT::i64) - return FPTOSINT_F80_I64; - if (RetVT == MVT::i128) - return FPTOSINT_F80_I128; - } else if (OpVT == MVT::f128) { - if (RetVT == MVT::i32) - return FPTOSINT_F128_I32; - if (RetVT == MVT::i64) - return FPTOSINT_F128_I64; - if (RetVT == MVT::i128) - return FPTOSINT_F128_I128; - } else if (OpVT == MVT::ppcf128) { - if (RetVT == MVT::i32) - return FPTOSINT_PPCF128_I32; - if (RetVT == MVT::i64) - return FPTOSINT_PPCF128_I64; - if (RetVT == MVT::i128) - return FPTOSINT_PPCF128_I128; - } - return UNKNOWN_LIBCALL; -} - -/// getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or -/// UNKNOWN_LIBCALL if there is none. -RTLIB::Libcall RTLIB::getFPTOUINT(EVT OpVT, EVT RetVT) { - if (OpVT == MVT::f32) { - if (RetVT == MVT::i8) - return FPTOUINT_F32_I8; - if (RetVT == MVT::i16) - return FPTOUINT_F32_I16; - if (RetVT == MVT::i32) - return FPTOUINT_F32_I32; - if (RetVT == MVT::i64) - return FPTOUINT_F32_I64; - if (RetVT == MVT::i128) - return FPTOUINT_F32_I128; - } else if (OpVT == MVT::f64) { - if (RetVT == MVT::i8) - return FPTOUINT_F64_I8; - if (RetVT == MVT::i16) - return FPTOUINT_F64_I16; - if (RetVT == MVT::i32) - return FPTOUINT_F64_I32; - if (RetVT == MVT::i64) - return FPTOUINT_F64_I64; - if (RetVT == MVT::i128) - return FPTOUINT_F64_I128; - } else if (OpVT == MVT::f80) { - if (RetVT == MVT::i32) - return FPTOUINT_F80_I32; - if (RetVT == MVT::i64) - return FPTOUINT_F80_I64; - if (RetVT == MVT::i128) - return FPTOUINT_F80_I128; - } else if (OpVT == MVT::f128) { - if (RetVT == MVT::i32) - return FPTOUINT_F128_I32; - if (RetVT == MVT::i64) - return FPTOUINT_F128_I64; - if (RetVT == MVT::i128) - return FPTOUINT_F128_I128; - } else if (OpVT == MVT::ppcf128) { - if (RetVT == MVT::i32) - return FPTOUINT_PPCF128_I32; - if (RetVT == MVT::i64) - return FPTOUINT_PPCF128_I64; - if (RetVT == MVT::i128) - return FPTOUINT_PPCF128_I128; - } - return UNKNOWN_LIBCALL; -} - -/// getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or -/// UNKNOWN_LIBCALL if there is none. -RTLIB::Libcall RTLIB::getSINTTOFP(EVT OpVT, EVT RetVT) { - if (OpVT == MVT::i32) { - if (RetVT == MVT::f32) - return SINTTOFP_I32_F32; - if (RetVT == MVT::f64) - return SINTTOFP_I32_F64; - if (RetVT == MVT::f80) - return SINTTOFP_I32_F80; - if (RetVT == MVT::f128) - return SINTTOFP_I32_F128; - if (RetVT == MVT::ppcf128) - return SINTTOFP_I32_PPCF128; - } else if (OpVT == MVT::i64) { - if (RetVT == MVT::f32) - return SINTTOFP_I64_F32; - if (RetVT == MVT::f64) - return SINTTOFP_I64_F64; - if (RetVT == MVT::f80) - return SINTTOFP_I64_F80; - if (RetVT == MVT::f128) - return SINTTOFP_I64_F128; - if (RetVT == MVT::ppcf128) - return SINTTOFP_I64_PPCF128; - } else if (OpVT == MVT::i128) { - if (RetVT == MVT::f32) - return SINTTOFP_I128_F32; - if (RetVT == MVT::f64) - return SINTTOFP_I128_F64; - if (RetVT == MVT::f80) - return SINTTOFP_I128_F80; - if (RetVT == MVT::f128) - return SINTTOFP_I128_F128; - if (RetVT == MVT::ppcf128) - return SINTTOFP_I128_PPCF128; - } - return UNKNOWN_LIBCALL; -} - -/// getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or -/// UNKNOWN_LIBCALL if there is none. -RTLIB::Libcall RTLIB::getUINTTOFP(EVT OpVT, EVT RetVT) { - if (OpVT == MVT::i32) { - if (RetVT == MVT::f32) - return UINTTOFP_I32_F32; - if (RetVT == MVT::f64) - return UINTTOFP_I32_F64; - if (RetVT == MVT::f80) - return UINTTOFP_I32_F80; - if (RetVT == MVT::f128) - return UINTTOFP_I32_F128; - if (RetVT == MVT::ppcf128) - return UINTTOFP_I32_PPCF128; - } else if (OpVT == MVT::i64) { - if (RetVT == MVT::f32) - return UINTTOFP_I64_F32; - if (RetVT == MVT::f64) - return UINTTOFP_I64_F64; - if (RetVT == MVT::f80) - return UINTTOFP_I64_F80; - if (RetVT == MVT::f128) - return UINTTOFP_I64_F128; - if (RetVT == MVT::ppcf128) - return UINTTOFP_I64_PPCF128; - } else if (OpVT == MVT::i128) { - if (RetVT == MVT::f32) - return UINTTOFP_I128_F32; - if (RetVT == MVT::f64) - return UINTTOFP_I128_F64; - if (RetVT == MVT::f80) - return UINTTOFP_I128_F80; - if (RetVT == MVT::f128) - return UINTTOFP_I128_F128; - if (RetVT == MVT::ppcf128) - return UINTTOFP_I128_PPCF128; - } - return UNKNOWN_LIBCALL; -} - /// NOTE: The constructor takes ownership of TLOF. TargetLowering::TargetLowering(const TargetMachine &tm, const TargetLoweringObjectFile *tlof) @@ -417,56 +197,6 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, } } -/// Get the EVTs and ArgFlags collections that represent the legalized return -/// type of the given function. This does not require a DAG or a return value, -/// and is suitable for use before any DAGs for the function are constructed. -/// TODO: Move this out of TargetLowering.cpp. -void llvm::GetReturnInfo(Type* ReturnType, AttributeSet attr, - SmallVectorImpl &Outs, - const TargetLowering &TLI) { - SmallVector ValueVTs; - ComputeValueVTs(TLI, ReturnType, ValueVTs); - unsigned NumValues = ValueVTs.size(); - if (NumValues == 0) return; - - for (unsigned j = 0, f = NumValues; j != f; ++j) { - EVT VT = ValueVTs[j]; - ISD::NodeType ExtendKind = ISD::ANY_EXTEND; - - if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt)) - ExtendKind = ISD::SIGN_EXTEND; - else if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt)) - ExtendKind = ISD::ZERO_EXTEND; - - // FIXME: C calling convention requires the return type to be promoted to - // at least 32-bit. But this is not necessary for non-C calling - // conventions. The frontend should mark functions whose return values - // require promoting with signext or zeroext attributes. - if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) { - MVT MinVT = TLI.getRegisterType(ReturnType->getContext(), MVT::i32); - if (VT.bitsLT(MinVT)) - VT = MinVT; - } - - unsigned NumParts = TLI.getNumRegisters(ReturnType->getContext(), VT); - MVT PartVT = TLI.getRegisterType(ReturnType->getContext(), VT); - - // 'inreg' on function refers to return value - ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); - if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::InReg)) - Flags.setInReg(); - - // Propagate extension type if any - if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt)) - Flags.setSExt(); - else if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt)) - Flags.setZExt(); - - for (unsigned i = 0; i < NumParts; ++i) - Outs.push_back(ISD::OutputArg(Flags, PartVT, /*isFixed=*/true, 0, 0)); - } -} - /// getJumpTableEncoding - Return the entry encoding for a jump table in the /// current function. The returned value is a member of the /// MachineJumpTableInfo::JTEntryKind enum. -- cgit v1.1 From 1452d46e0bc5ca6bea77ca85abf9b694e3b6ab84 Mon Sep 17 00:00:00 2001 From: Preston Gurd Date: Fri, 11 Jan 2013 22:06:56 +0000 Subject: Update patch for the pad short functions pass for Intel Atom (only). Adds a check for -Oz, changes the code to not re-visit BBs, and skips over DBG_VALUE instrs. Patch by Andy Zhang. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172258 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86PadShortFunction.cpp | 77 ++++++++++++++++++++++++++-------- 1 file changed, 59 insertions(+), 18 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86PadShortFunction.cpp b/lib/Target/X86/X86PadShortFunction.cpp index c22872f..83e75ea 100644 --- a/lib/Target/X86/X86PadShortFunction.cpp +++ b/lib/Target/X86/X86PadShortFunction.cpp @@ -33,6 +33,19 @@ using namespace llvm; STATISTIC(NumBBsPadded, "Number of basic blocks padded"); namespace { + struct VisitedBBInfo { + // HasReturn - Whether the BB contains a return instruction + bool HasReturn; + + // Cycles - Number of cycles until return if HasReturn is true, otherwise + // number of cycles until end of the BB + unsigned int Cycles; + + VisitedBBInfo() : HasReturn(false), Cycles(0) {} + VisitedBBInfo(bool HasReturn, unsigned int Cycles) + : HasReturn(HasReturn), Cycles(Cycles) {} + }; + struct PadShortFunc : public MachineFunctionPass { static char ID; PadShortFunc() : MachineFunctionPass(ID) @@ -49,16 +62,21 @@ namespace { unsigned int Cycles = 0); bool cyclesUntilReturn(MachineBasicBlock *MBB, - unsigned int &Cycles, - MachineBasicBlock::iterator *Location = 0); + unsigned int &Cycles); void addPadding(MachineBasicBlock *MBB, MachineBasicBlock::iterator &MBBI, unsigned int NOOPsToAdd); const unsigned int Threshold; + + // ReturnBBs - Maps basic blocks that return to the minimum number of + // cycles until the return, starting from the entry block. DenseMap ReturnBBs; + // VisitedBBs - Cache of previously visited BBs. + DenseMap VisitedBBs; + const TargetMachine *TM; const TargetInstrInfo *TII; }; @@ -73,25 +91,26 @@ FunctionPass *llvm::createX86PadShortFunctions() { /// runOnMachineFunction - Loop over all of the basic blocks, inserting /// NOOP instructions before early exits. bool PadShortFunc::runOnMachineFunction(MachineFunction &MF) { - bool OptForSize = MF.getFunction()->getAttributes(). - hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize); - - if (OptForSize) + const AttributeSet &FnAttrs = MF.getFunction()->getAttributes(); + if (FnAttrs.hasAttribute(AttributeSet::FunctionIndex, + Attribute::OptimizeForSize) || + FnAttrs.hasAttribute(AttributeSet::FunctionIndex, + Attribute::MinSize)) { return false; + } TM = &MF.getTarget(); TII = TM->getInstrInfo(); // Search through basic blocks and mark the ones that have early returns ReturnBBs.clear(); + VisitedBBs.clear(); findReturns(MF.begin()); bool MadeChange = false; - MachineBasicBlock::iterator ReturnLoc; MachineBasicBlock *MBB; unsigned int Cycles = 0; - unsigned int BBCycles; // Pad the identified basic blocks with NOOPs for (DenseMap::iterator I = ReturnBBs.begin(); @@ -100,8 +119,16 @@ bool PadShortFunc::runOnMachineFunction(MachineFunction &MF) { Cycles = I->second; if (Cycles < Threshold) { - if (!cyclesUntilReturn(MBB, BBCycles, &ReturnLoc)) - continue; + // BB ends in a return. Skip over any DBG_VALUE instructions + // trailing the terminator. + assert(MBB->size() > 0 && + "Basic block should contain at least a RET but is empty"); + MachineBasicBlock::iterator ReturnLoc = --MBB->end(); + + while (ReturnLoc->isDebugValue()) + --ReturnLoc; + assert(ReturnLoc->isReturn() && !ReturnLoc->isCall() && + "Basic block does not end with RET"); addPadding(MBB, ReturnLoc, Threshold - Cycles); NumBBsPadded++; @@ -127,18 +154,30 @@ void PadShortFunc::findReturns(MachineBasicBlock *MBB, unsigned int Cycles) { // Follow branches in BB and look for returns for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(); - I != MBB->succ_end(); ++I) { + I != MBB->succ_end(); ++I) { + if (*I == MBB) + continue; findReturns(*I, Cycles); } } -/// cyclesUntilReturn - if the MBB has a return instruction, set Location -/// to the instruction and return true. Return false otherwise. +/// cyclesUntilReturn - return true if the MBB has a return instruction, +/// and return false otherwise. /// Cycles will be incremented by the number of cycles taken to reach the /// return or the end of the BB, whichever occurs first. bool PadShortFunc::cyclesUntilReturn(MachineBasicBlock *MBB, - unsigned int &Cycles, - MachineBasicBlock::iterator *Location) { + unsigned int &Cycles) { + // Return cached result if BB was previously visited + DenseMap::iterator it + = VisitedBBs.find(MBB); + if (it != VisitedBBs.end()) { + VisitedBBInfo BBInfo = it->second; + Cycles += BBInfo.Cycles; + return BBInfo.HasReturn; + } + + unsigned int CyclesToEnd = 0; + for (MachineBasicBlock::iterator MBBI = MBB->begin(); MBBI != MBB->end(); ++MBBI) { MachineInstr *MI = MBBI; @@ -146,14 +185,16 @@ bool PadShortFunc::cyclesUntilReturn(MachineBasicBlock *MBB, // functions do not count because the called function will be padded, // if necessary. if (MI->isReturn() && !MI->isCall()) { - if (Location) - *Location = MBBI; + VisitedBBs[MBB] = VisitedBBInfo(true, CyclesToEnd); + Cycles += CyclesToEnd; return true; } - Cycles += TII->getInstrLatency(TM->getInstrItineraryData(), MI); + CyclesToEnd += TII->getInstrLatency(TM->getInstrItineraryData(), MI); } + VisitedBBs[MBB] = VisitedBBInfo(false, CyclesToEnd); + Cycles += CyclesToEnd; return false; } -- cgit v1.1 From 9b1bb05386b796eef2438112b2e87ac98409d01a Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Fri, 11 Jan 2013 22:55:28 +0000 Subject: Proof of concept moving of generic directive parsing from AsmParser to the GenericAsmParser extension, where a lot of directives are already being parsed. The end goal is having just a single place (and a single lookup table) for all directive parsing. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172268 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCParser/AsmParser.cpp | 79 +++++++++++++++++++++---------------------- 1 file changed, 38 insertions(+), 41 deletions(-) (limited to 'lib') diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index ecf35ff..bc3a366 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -297,8 +297,7 @@ private: DK_SET, DK_EQU, DK_EQUIV, DK_ASCII, DK_ASCIZ, DK_STRING, DK_BYTE, DK_SHORT, DK_VALUE, DK_2BYTE, DK_LONG, DK_INT, DK_4BYTE, DK_QUAD, DK_8BYTE, DK_SINGLE, DK_FLOAT, DK_DOUBLE, DK_ALIGN, DK_ALIGN32, DK_BALIGN, DK_BALIGNW, - DK_BALIGNL, DK_P2ALIGN, DK_P2ALIGNW, DK_P2ALIGNL, DK_ORG, DK_FILL, - DK_SPACE, DK_SKIP, DK_ENDR, + DK_BALIGNL, DK_P2ALIGN, DK_P2ALIGNW, DK_P2ALIGNL, DK_ORG, DK_FILL, DK_ENDR, DK_BUNDLE_ALIGN_MODE, DK_BUNDLE_LOCK, DK_BUNDLE_UNLOCK, DK_ZERO, DK_EXTERN, DK_GLOBL, DK_GLOBAL, DK_INDIRECT_SYMBOL, DK_LAZY_REFERENCE, DK_NO_DEAD_STRIP, DK_SYMBOL_RESOLVER, DK_PRIVATE_EXTERN, @@ -316,7 +315,6 @@ private: bool ParseDirectiveValue(unsigned Size); // ".byte", ".long", ... bool ParseDirectiveRealValue(const fltSemantics &); // ".single", ... bool ParseDirectiveFill(); // ".fill" - bool ParseDirectiveSpace(); // ".space" bool ParseDirectiveZero(); // ".zero" // ".set", ".equ", ".equiv" bool ParseDirectiveSet(StringRef IDVal, bool allow_redef); @@ -399,6 +397,9 @@ public: AddDirectiveHandler<&GenericAsmParser::ParseDirectiveLoc>(".loc"); AddDirectiveHandler<&GenericAsmParser::ParseDirectiveStabs>(".stabs"); + AddDirectiveHandler<&GenericAsmParser::ParseDirectiveSpace>(".space"); + AddDirectiveHandler<&GenericAsmParser::ParseDirectiveSpace>(".skip"); + // CFI directives. AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFISections>( ".cfi_sections"); @@ -459,6 +460,7 @@ public: bool ParseDirectiveLine(StringRef, SMLoc DirectiveLoc); bool ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc); bool ParseDirectiveStabs(StringRef, SMLoc DirectiveLoc); + bool ParseDirectiveSpace(StringRef, SMLoc DirectiveLoc); bool ParseDirectiveCFISections(StringRef, SMLoc DirectiveLoc); bool ParseDirectiveCFIStartProc(StringRef, SMLoc DirectiveLoc); bool ParseDirectiveCFIEndProc(StringRef, SMLoc DirectiveLoc); @@ -1343,9 +1345,6 @@ bool AsmParser::ParseStatement(ParseStatementInfo &Info) { return ParseDirectiveOrg(); case DK_FILL: return ParseDirectiveFill(); - case DK_SPACE: - case DK_SKIP: - return ParseDirectiveSpace(); case DK_ZERO: return ParseDirectiveZero(); case DK_EXTERN: @@ -2243,39 +2242,6 @@ bool AsmParser::ParseDirectiveRealValue(const fltSemantics &Semantics) { return false; } -/// ParseDirectiveSpace -/// ::= .space expression [ , expression ] -bool AsmParser::ParseDirectiveSpace() { - CheckForValidSection(); - - int64_t NumBytes; - if (ParseAbsoluteExpression(NumBytes)) - return true; - - int64_t FillExpr = 0; - if (getLexer().isNot(AsmToken::EndOfStatement)) { - if (getLexer().isNot(AsmToken::Comma)) - return TokError("unexpected token in '.space' directive"); - Lex(); - - if (ParseAbsoluteExpression(FillExpr)) - return true; - - if (getLexer().isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in '.space' directive"); - } - - Lex(); - - if (NumBytes <= 0) - return TokError("invalid number of bytes in '.space' directive"); - - // FIXME: Sometimes the fill expr is 'nop' if it isn't supplied, instead of 0. - getStreamer().EmitFill(NumBytes, FillExpr, DEFAULT_ADDRSPACE); - - return false; -} - /// ParseDirectiveZero /// ::= .zero expression bool AsmParser::ParseDirectiveZero() { @@ -2920,8 +2886,6 @@ void AsmParser::initializeDirectiveKindMapping() { DirectiveKindMapping[".p2alignl"] = DK_P2ALIGNL; DirectiveKindMapping[".org"] = DK_ORG; DirectiveKindMapping[".fill"] = DK_FILL; - DirectiveKindMapping[".space"] = DK_SPACE; - DirectiveKindMapping[".skip"] = DK_SKIP; DirectiveKindMapping[".zero"] = DK_ZERO; DirectiveKindMapping[".extern"] = DK_EXTERN; DirectiveKindMapping[".globl"] = DK_GLOBL; @@ -3151,6 +3115,39 @@ bool GenericAsmParser::ParseDirectiveStabs(StringRef Directive, return TokError("unsupported directive '" + Directive + "'"); } +/// ParseDirectiveSpace +/// ::= .space expression [ , expression ] +bool GenericAsmParser::ParseDirectiveSpace(StringRef, SMLoc DirectiveLoc) { + getParser().CheckForValidSection(); + + int64_t NumBytes; + if (getParser().ParseAbsoluteExpression(NumBytes)) + return true; + + int64_t FillExpr = 0; + if (getLexer().isNot(AsmToken::EndOfStatement)) { + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in '.space' directive"); + Lex(); + + if (getParser().ParseAbsoluteExpression(FillExpr)) + return true; + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.space' directive"); + } + + Lex(); + + if (NumBytes <= 0) + return TokError("invalid number of bytes in '.space' directive"); + + // FIXME: Sometimes the fill expr is 'nop' if it isn't supplied, instead of 0. + getStreamer().EmitFill(NumBytes, FillExpr, DEFAULT_ADDRSPACE); + + return false; +} + /// ParseDirectiveCFISections /// ::= .cfi_sections section [, section] bool GenericAsmParser::ParseDirectiveCFISections(StringRef, -- cgit v1.1 From 66de2af815f97e484c1940ff157ffbb809931b20 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Fri, 11 Jan 2013 22:57:48 +0000 Subject: PPC: Implement efficient lowering of sign_extend_inreg. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172269 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index de6bbe3..2dade85 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -61,6 +61,8 @@ class VectorLegalizer { // Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if // SINT_TO_FLOAT and SHR on vectors isn't legal. SDValue ExpandUINT_TO_FLOAT(SDValue Op); + // Implement expansion for SIGN_EXTEND_INREG using SRL and SRA. + SDValue ExpandSEXTINREG(SDValue Op); // Implement vselect in terms of XOR, AND, OR when blend is not supported // by the target. SDValue ExpandVSELECT(SDValue Op); @@ -262,7 +264,9 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { // FALL THROUGH } case TargetLowering::Expand: - if (Node->getOpcode() == ISD::VSELECT) + if (Node->getOpcode() == ISD::SIGN_EXTEND_INREG) + Result = ExpandSEXTINREG(Op); + else if (Node->getOpcode() == ISD::VSELECT) Result = ExpandVSELECT(Op); else if (Node->getOpcode() == ISD::SELECT) Result = ExpandSELECT(Op); @@ -501,6 +505,26 @@ SDValue VectorLegalizer::ExpandSELECT(SDValue Op) { return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val); } +SDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) { + EVT VT = Op.getValueType(); + + // Make sure that the SRA and SRL instructions are available. + if (TLI.getOperationAction(ISD::SRA, VT) == TargetLowering::Expand || + TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Expand) + return DAG.UnrollVectorOp(Op.getNode()); + + DebugLoc DL = Op.getDebugLoc(); + EVT OrigTy = cast(Op->getOperand(1))->getVT(); + + unsigned BW = VT.getScalarType().getSizeInBits(); + unsigned OrigBW = OrigTy.getScalarType().getSizeInBits(); + SDValue ShiftSz = DAG.getConstant(BW - OrigBW, VT); + + Op = Op.getOperand(0); + Op = DAG.getNode(ISD::SRL, DL, VT, Op, ShiftSz); + return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz); +} + SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) { // Implement VSELECT in terms of XOR, AND, OR // on platforms which do not support blend natively. -- cgit v1.1 From dcf669560e2ae00c5392474d10aa758a51c609a8 Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Fri, 11 Jan 2013 23:08:52 +0000 Subject: Fixed whitespace. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172271 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/IPO/GlobalOpt.cpp | 122 +++++++++++++++++++-------------------- 1 file changed, 61 insertions(+), 61 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index 1562d72..de63354 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -448,8 +448,8 @@ static bool CleanupPointerRootUsers(GlobalVariable *GV, Dead[i].second->eraseFromParent(); Instruction *I = Dead[i].first; do { - if (isAllocationFn(I, TLI)) - break; + if (isAllocationFn(I, TLI)) + break; Instruction *J = dyn_cast(I->getOperand(0)); if (!J) break; @@ -2589,18 +2589,18 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, if (StoreInst *SI = dyn_cast(CurInst)) { if (!SI->isSimple()) { - DEBUG(dbgs() << "Store is not simple! Can not evaluate.\n"); - return false; // no volatile/atomic accesses. + DEBUG(dbgs() << "Store is not simple! Can not evaluate.\n"); + return false; // no volatile/atomic accesses. } Constant *Ptr = getVal(SI->getOperand(1)); if (ConstantExpr *CE = dyn_cast(Ptr)) { - DEBUG(dbgs() << "Folding constant ptr expression: " << *Ptr); + DEBUG(dbgs() << "Folding constant ptr expression: " << *Ptr); Ptr = ConstantFoldConstantExpression(CE, TD, TLI); - DEBUG(dbgs() << "; To: " << *Ptr << "\n"); + DEBUG(dbgs() << "; To: " << *Ptr << "\n"); } if (!isSimpleEnoughPointerToCommit(Ptr)) { // If this is too complex for us to commit, reject it. - DEBUG(dbgs() << "Pointer is too complex for us to evaluate store."); + DEBUG(dbgs() << "Pointer is too complex for us to evaluate store."); return false; } @@ -2609,14 +2609,14 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, // If this might be too difficult for the backend to handle (e.g. the addr // of one global variable divided by another) then we can't commit it. if (!isSimpleEnoughValueToCommit(Val, SimpleConstants, TD)) { - DEBUG(dbgs() << "Store value is too complex to evaluate store. " << *Val - << "\n"); + DEBUG(dbgs() << "Store value is too complex to evaluate store. " << *Val + << "\n"); return false; } if (ConstantExpr *CE = dyn_cast(Ptr)) { if (CE->getOpcode() == Instruction::BitCast) { - DEBUG(dbgs() << "Attempting to resolve bitcast on constant ptr.\n"); + DEBUG(dbgs() << "Attempting to resolve bitcast on constant ptr.\n"); // If we're evaluating a store through a bitcast, then we need // to pull the bitcast off the pointer type and push it onto the // stored value. @@ -2645,8 +2645,8 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, // If we can't improve the situation by introspecting NewTy, // we have to give up. } else { - DEBUG(dbgs() << "Failed to bitcast constant ptr, can not " - "evaluate.\n"); + DEBUG(dbgs() << "Failed to bitcast constant ptr, can not " + "evaluate.\n"); return false; } } @@ -2655,7 +2655,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, // onto the stored value. Val = ConstantExpr::getBitCast(Val, NewTy); - DEBUG(dbgs() << "Evaluated bitcast: " << *Val << "\n"); + DEBUG(dbgs() << "Evaluated bitcast: " << *Val << "\n"); } } @@ -2665,25 +2665,25 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, getVal(BO->getOperand(0)), getVal(BO->getOperand(1))); DEBUG(dbgs() << "Found a BinaryOperator! Simplifying: " << *InstResult - << "\n"); + << "\n"); } else if (CmpInst *CI = dyn_cast(CurInst)) { InstResult = ConstantExpr::getCompare(CI->getPredicate(), getVal(CI->getOperand(0)), getVal(CI->getOperand(1))); DEBUG(dbgs() << "Found a CmpInst! Simplifying: " << *InstResult - << "\n"); + << "\n"); } else if (CastInst *CI = dyn_cast(CurInst)) { InstResult = ConstantExpr::getCast(CI->getOpcode(), getVal(CI->getOperand(0)), CI->getType()); DEBUG(dbgs() << "Found a Cast! Simplifying: " << *InstResult - << "\n"); + << "\n"); } else if (SelectInst *SI = dyn_cast(CurInst)) { InstResult = ConstantExpr::getSelect(getVal(SI->getOperand(0)), getVal(SI->getOperand(1)), getVal(SI->getOperand(2))); DEBUG(dbgs() << "Found a Select! Simplifying: " << *InstResult - << "\n"); + << "\n"); } else if (GetElementPtrInst *GEP = dyn_cast(CurInst)) { Constant *P = getVal(GEP->getOperand(0)); SmallVector GEPOps; @@ -2694,32 +2694,32 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, ConstantExpr::getGetElementPtr(P, GEPOps, cast(GEP)->isInBounds()); DEBUG(dbgs() << "Found a GEP! Simplifying: " << *InstResult - << "\n"); + << "\n"); } else if (LoadInst *LI = dyn_cast(CurInst)) { if (!LI->isSimple()) { - DEBUG(dbgs() << "Found a Load! Not a simple load, can not evaluate.\n"); - return false; // no volatile/atomic accesses. + DEBUG(dbgs() << "Found a Load! Not a simple load, can not evaluate.\n"); + return false; // no volatile/atomic accesses. } Constant *Ptr = getVal(LI->getOperand(0)); if (ConstantExpr *CE = dyn_cast(Ptr)) { Ptr = ConstantFoldConstantExpression(CE, TD, TLI); - DEBUG(dbgs() << "Found a constant pointer expression, constant " - "folding: " << *Ptr << "\n"); + DEBUG(dbgs() << "Found a constant pointer expression, constant " + "folding: " << *Ptr << "\n"); } InstResult = ComputeLoadResult(Ptr); if (InstResult == 0) { - DEBUG(dbgs() << "Failed to compute load result. Can not evaluate load." - "\n"); - return false; // Could not evaluate load. + DEBUG(dbgs() << "Failed to compute load result. Can not evaluate load." + "\n"); + return false; // Could not evaluate load. } DEBUG(dbgs() << "Evaluated load: " << *InstResult << "\n"); } else if (AllocaInst *AI = dyn_cast(CurInst)) { if (AI->isArrayAllocation()) { - DEBUG(dbgs() << "Found an array alloca. Can not evaluate.\n"); - return false; // Cannot handle array allocs. + DEBUG(dbgs() << "Found an array alloca. Can not evaluate.\n"); + return false; // Cannot handle array allocs. } Type *Ty = AI->getType()->getElementType(); AllocaTmps.push_back(new GlobalVariable(Ty, false, @@ -2733,30 +2733,30 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, // Debug info can safely be ignored here. if (isa(CS.getInstruction())) { - DEBUG(dbgs() << "Ignoring debug info.\n"); + DEBUG(dbgs() << "Ignoring debug info.\n"); ++CurInst; continue; } // Cannot handle inline asm. if (isa(CS.getCalledValue())) { - DEBUG(dbgs() << "Found inline asm, can not evaluate.\n"); - return false; + DEBUG(dbgs() << "Found inline asm, can not evaluate.\n"); + return false; } if (IntrinsicInst *II = dyn_cast(CS.getInstruction())) { if (MemSetInst *MSI = dyn_cast(II)) { if (MSI->isVolatile()) { - DEBUG(dbgs() << "Can not optimize a volatile memset " << - "intrinsic.\n"); - return false; - } + DEBUG(dbgs() << "Can not optimize a volatile memset " << + "intrinsic.\n"); + return false; + } Constant *Ptr = getVal(MSI->getDest()); Constant *Val = getVal(MSI->getValue()); Constant *DestVal = ComputeLoadResult(getVal(Ptr)); if (Val->isNullValue() && DestVal && DestVal->isNullValue()) { // This memset is a no-op. - DEBUG(dbgs() << "Ignoring no-op memset.\n"); + DEBUG(dbgs() << "Ignoring no-op memset.\n"); ++CurInst; continue; } @@ -2764,7 +2764,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, if (II->getIntrinsicID() == Intrinsic::lifetime_start || II->getIntrinsicID() == Intrinsic::lifetime_end) { - DEBUG(dbgs() << "Ignoring lifetime intrinsic.\n"); + DEBUG(dbgs() << "Ignoring lifetime intrinsic.\n"); ++CurInst; continue; } @@ -2773,9 +2773,9 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, // We don't insert an entry into Values, as it doesn't have a // meaningful return value. if (!II->use_empty()) { - DEBUG(dbgs() << "Found unused invariant_start. Cant evaluate.\n"); + DEBUG(dbgs() << "Found unused invariant_start. Cant evaluate.\n"); return false; - } + } ConstantInt *Size = cast(II->getArgOperand(0)); Value *PtrArg = getVal(II->getArgOperand(1)); Value *Ptr = PtrArg->stripPointerCasts(); @@ -2785,26 +2785,26 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, Size->getValue().getLimitedValue() >= TD->getTypeStoreSize(ElemTy)) { Invariants.insert(GV); - DEBUG(dbgs() << "Found a global var that is an invariant: " << *GV - << "\n"); - } else { - DEBUG(dbgs() << "Found a global var, but can not treat it as an " - "invariant.\n"); - } + DEBUG(dbgs() << "Found a global var that is an invariant: " << *GV + << "\n"); + } else { + DEBUG(dbgs() << "Found a global var, but can not treat it as an " + "invariant.\n"); + } } // Continue even if we do nothing. ++CurInst; continue; } - DEBUG(dbgs() << "Unknown intrinsic. Can not evaluate.\n"); + DEBUG(dbgs() << "Unknown intrinsic. Can not evaluate.\n"); return false; } // Resolve function pointers. Function *Callee = dyn_cast(getVal(CS.getCalledValue())); if (!Callee || Callee->mayBeOverridden()) { - DEBUG(dbgs() << "Can not resolve function pointer.\n"); + DEBUG(dbgs() << "Can not resolve function pointer.\n"); return false; // Cannot resolve. } @@ -2816,34 +2816,34 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, // If this is a function we can constant fold, do it. if (Constant *C = ConstantFoldCall(Callee, Formals, TLI)) { InstResult = C; - DEBUG(dbgs() << "Constant folded function call. Result: " << - *InstResult << "\n"); + DEBUG(dbgs() << "Constant folded function call. Result: " << + *InstResult << "\n"); } else { - DEBUG(dbgs() << "Can not constant fold function call.\n"); + DEBUG(dbgs() << "Can not constant fold function call.\n"); return false; } } else { if (Callee->getFunctionType()->isVarArg()) { - DEBUG(dbgs() << "Can not constant fold vararg function call.\n"); + DEBUG(dbgs() << "Can not constant fold vararg function call.\n"); return false; - } + } Constant *RetVal; // Execute the call, if successful, use the return value. ValueStack.push_back(new DenseMap); if (!EvaluateFunction(Callee, RetVal, Formals)) { - DEBUG(dbgs() << "Failed to evaluate function.\n"); + DEBUG(dbgs() << "Failed to evaluate function.\n"); return false; - } + } delete ValueStack.pop_back_val(); InstResult = RetVal; - if (InstResult != NULL) { - DEBUG(dbgs() << "Successfully evaluated function. Result: " << - InstResult << "\n\n"); - } else { - DEBUG(dbgs() << "Successfully evaluated function. Result: 0\n\n"); - } + if (InstResult != NULL) { + DEBUG(dbgs() << "Successfully evaluated function. Result: " << + InstResult << "\n\n"); + } else { + DEBUG(dbgs() << "Successfully evaluated function. Result: 0\n\n"); + } } } else if (isa(CurInst)) { DEBUG(dbgs() << "Found a terminator instruction.\n"); @@ -2873,7 +2873,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, NextBB = 0; } else { // invoke, unwind, resume, unreachable. - DEBUG(dbgs() << "Can not handle terminator."); + DEBUG(dbgs() << "Can not handle terminator."); return false; // Cannot handle this terminator. } @@ -2883,7 +2883,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, } else { // Did not know how to evaluate this! DEBUG(dbgs() << "Failed to evaluate block due to unhandled instruction." - "\n"); + "\n"); return false; } -- cgit v1.1 From bf706b3f9987a88493dea4f206f46e8062eedd0b Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Sat, 12 Jan 2013 00:05:00 +0000 Subject: Make ParseIdentifier a public method instead of private. The MCAsmParser interface defines ParseIdentifier is public. There's no reason whatsoever for AsmParser (which implements the MCAsmParser interface) to hide this method. This is all part of a bigger scheme. Several asm parsing "extensions" use the main parser properly through the MCAsmParser interface. However, GenericAsmParser has much more exclusive access and uses implementation details from the concrete implementation - AsmParser, in which it is also declared as a friend. This makes for overly coupled code, and even makes it hard to split GenericAsmParser into a separate file. There's no reason why GenericAsmParser shouldn't be able to access AsmParser through an abstract interface, as long as it's actually registered as an extension. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172276 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCParser/AsmParser.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index bc3a366..2aecb0c 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -226,6 +226,10 @@ public: virtual bool ParseParenExpression(const MCExpr *&Res, SMLoc &EndLoc); virtual bool ParseAbsoluteExpression(int64_t &Res); + /// ParseIdentifier - Parse an identifier or string (as a quoted identifier) + /// and set \p Res to the identifier contents. + virtual bool ParseIdentifier(StringRef &Res); + /// } private: @@ -286,10 +290,6 @@ private: bool ParseParenExpr(const MCExpr *&Res, SMLoc &EndLoc); bool ParseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc); - /// ParseIdentifier - Parse an identifier or string (as a quoted identifier) - /// and set \p Res to the identifier contents. - virtual bool ParseIdentifier(StringRef &Res); - // Directive Parsing. enum DirectiveKind { -- cgit v1.1 From b2f0b595a3aac4da1265cfa2f7a53baaa229328f Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Sat, 12 Jan 2013 00:23:24 +0000 Subject: Stop hiding the interface-exposed EatToEndOfStatement (see r172276). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172277 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCParser/AsmParser.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index 2aecb0c..50579a6 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -229,6 +229,7 @@ public: /// ParseIdentifier - Parse an identifier or string (as a quoted identifier) /// and set \p Res to the identifier contents. virtual bool ParseIdentifier(StringRef &Res); + virtual void EatToEndOfStatement(); /// } @@ -267,8 +268,6 @@ private: /// location. void JumpToLoc(SMLoc Loc, int InBuffer=-1); - virtual void EatToEndOfStatement(); - bool ParseMacroArgument(MacroArgument &MA, AsmToken::TokenKind &ArgumentDelimiter); bool ParseMacroArguments(const Macro *M, MacroArguments &A); -- cgit v1.1 From c7a275245f501e2f68a55af05c75bc9b6b50ec84 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Sat, 12 Jan 2013 00:54:59 +0000 Subject: Precompute some information about register costs. Remember the minimum cost of the registers in an allocation order and the number of registers at the end of the allocation order that have the same cost per use. This information can be used to limit the search space for RAGreedy::tryEvict() when looking for a cheaper register. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172280 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/RegisterClassInfo.cpp | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/RegisterClassInfo.cpp b/lib/CodeGen/RegisterClassInfo.cpp index 078a0df..87382d8 100644 --- a/lib/CodeGen/RegisterClassInfo.cpp +++ b/lib/CodeGen/RegisterClassInfo.cpp @@ -83,6 +83,9 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const { unsigned N = 0; SmallVector CSRAlias; + unsigned MinCost = 0xff; + unsigned LastCost = ~0u; + unsigned LastCostChange = 0; // FIXME: Once targets reserve registers instead of removing them from the // allocation order, we can simply use begin/end here. @@ -92,17 +95,31 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const { // Remove reserved registers from the allocation order. if (Reserved.test(PhysReg)) continue; + unsigned Cost = TRI->getCostPerUse(PhysReg); + MinCost = std::min(MinCost, Cost); + if (CSRNum[PhysReg]) // PhysReg aliases a CSR, save it for later. CSRAlias.push_back(PhysReg); - else + else { + if (Cost != LastCost) + LastCostChange = N; RCI.Order[N++] = PhysReg; + LastCost = Cost; + } } RCI.NumRegs = N + CSRAlias.size(); assert (RCI.NumRegs <= NumRegs && "Allocation order larger than regclass"); // CSR aliases go after the volatile registers, preserve the target's order. - std::copy(CSRAlias.begin(), CSRAlias.end(), &RCI.Order[N]); + for (unsigned i = 0, e = CSRAlias.size(); i != e; ++i) { + unsigned PhysReg = CSRAlias[i]; + unsigned Cost = TRI->getCostPerUse(PhysReg); + if (Cost != LastCost) + LastCostChange = N; + RCI.Order[N++] = PhysReg; + LastCost = Cost; + } // Register allocator stress test. Clip register class to N registers. if (StressRA && RCI.NumRegs > StressRA) @@ -113,6 +130,9 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const { if (Super != RC && getNumAllocatableRegs(Super) > RCI.NumRegs) RCI.ProperSubClass = true; + RCI.MinCost = uint8_t(MinCost); + RCI.LastCostChange = LastCostChange; + DEBUG({ dbgs() << "AllocationOrder(" << RC->getName() << ") = ["; for (unsigned I = 0; I != RCI.NumRegs; ++I) -- cgit v1.1 From 6d6132986d2ef14bbf9d76f5acbf2a0bace32d69 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Sat, 12 Jan 2013 00:57:44 +0000 Subject: Limit the search space in RAGreedy::tryEvict(). When tryEvict() is looking for a cheaper register in the allocation order, skip the tail of too expensive registers when possible. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172281 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AllocationOrder.h | 15 +++++++++++++++ lib/CodeGen/RegAllocGreedy.cpp | 19 ++++++++++++++++++- 2 files changed, 33 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/CodeGen/AllocationOrder.h b/lib/CodeGen/AllocationOrder.h index a5293f6..aed461a 100644 --- a/lib/CodeGen/AllocationOrder.h +++ b/lib/CodeGen/AllocationOrder.h @@ -39,6 +39,9 @@ public: const VirtRegMap &VRM, const RegisterClassInfo &RegClassInfo); + /// Get the allocation order without reordered hints. + ArrayRef getOrder() const { return Order; } + /// Return the next physical register in the allocation order, or 0. /// It is safe to call next() again after it returned 0, it will keep /// returning 0 until rewind() is called. @@ -53,6 +56,18 @@ public: return 0; } + /// As next(), but allow duplicates to be returned, and stop before the + /// Limit'th register in the RegisterClassInfo allocation order. + /// + /// This can produce more than Limit registers if there are hints. + unsigned nextWithDups(unsigned Limit) { + if (Pos < 0) + return Hints.end()[Pos++]; + if (Pos < int(Limit)) + return Order[Pos++]; + return 0; + } + /// Start over from the beginning. void rewind() { Pos = -int(Hints.size()); } diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index 1884452..6344a73 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -632,16 +632,33 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, // Keep track of the cheapest interference seen so far. EvictionCost BestCost(~0u); unsigned BestPhys = 0; + unsigned OrderLimit = Order.getOrder().size(); // When we are just looking for a reduced cost per use, don't break any // hints, and only evict smaller spill weights. if (CostPerUseLimit < ~0u) { BestCost.BrokenHints = 0; BestCost.MaxWeight = VirtReg.weight; + + // Check of any registers in RC are below CostPerUseLimit. + const TargetRegisterClass *RC = MRI->getRegClass(VirtReg.reg); + unsigned MinCost = RegClassInfo.getMinCost(RC); + if (MinCost >= CostPerUseLimit) { + DEBUG(dbgs() << RC->getName() << " minimum cost = " << MinCost + << ", no cheaper registers to be found.\n"); + return 0; + } + + // It is normal for register classes to have a long tail of registers with + // the same cost. We don't need to look at them if they're too expensive. + if (TRI->getCostPerUse(Order.getOrder().back()) >= CostPerUseLimit) { + OrderLimit = RegClassInfo.getLastCostChange(RC); + DEBUG(dbgs() << "Only trying the first " << OrderLimit << " regs.\n"); + } } Order.rewind(); - while (unsigned PhysReg = Order.next()) { + while (unsigned PhysReg = Order.nextWithDups(OrderLimit)) { if (TRI->getCostPerUse(PhysReg) >= CostPerUseLimit) continue; // The first use of a callee-saved register in a function has cost 1. -- cgit v1.1 From ec3199f675b17b12fd779df557c6bff25aa4e862 Mon Sep 17 00:00:00 2001 From: Jack Carter Date: Sat, 12 Jan 2013 01:03:14 +0000 Subject: This patch tackles the problem of parsing Mips register names in the standalone assembler llvm-mc. Registers such as $A1 can represent either a 32 or 64 bit register based on the instruction using it. In addition, based on the abi, $T0 can represent different 32 bit registers. The problem is resolved by the Mips specific AsmParser td definitions changing to work together. Many cases of RegisterClass parameters are now RegisterOperand. Contributer: Vladimir Medic git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172284 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/AsmParser/MipsAsmParser.cpp | 330 ++++++++++++++++++------ lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp | 5 + lib/Target/Mips/InstPrinter/MipsInstPrinter.h | 1 + lib/Target/Mips/Mips64InstrInfo.td | 117 +++++---- lib/Target/Mips/MipsInstrFPU.td | 16 +- lib/Target/Mips/MipsInstrInfo.td | 219 ++++++++-------- lib/Target/Mips/MipsRegisterInfo.td | 45 ++++ 7 files changed, 496 insertions(+), 237 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 085503eb..41df9d4 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -84,15 +84,30 @@ class MipsAsmParser : public MCTargetAsmParser { bool ParseDirective(AsmToken DirectiveID); MipsAsmParser::OperandMatchResultTy - parseMemOperand(SmallVectorImpl&); + parseMemOperand(SmallVectorImpl &Operands); + + MipsAsmParser::OperandMatchResultTy + parseCPURegs(SmallVectorImpl &Operands); + + MipsAsmParser::OperandMatchResultTy + parseCPU64Regs(SmallVectorImpl &Operands); + + MipsAsmParser::OperandMatchResultTy + parseHWRegs(SmallVectorImpl &Operands); + + MipsAsmParser::OperandMatchResultTy + parseHW64Regs(SmallVectorImpl &Operands); + + MipsAsmParser::OperandMatchResultTy + parseCCRRegs(SmallVectorImpl &Operands); bool ParseOperand(SmallVectorImpl &, StringRef Mnemonic); - int tryParseRegister(StringRef Mnemonic); + int tryParseRegister(bool is64BitReg); bool tryParseRegisterOperand(SmallVectorImpl &Operands, - StringRef Mnemonic); + bool is64BitReg); bool needsExpansion(MCInst &Inst); @@ -107,7 +122,7 @@ class MipsAsmParser : public MCTargetAsmParser { bool reportParseError(StringRef ErrorMsg); bool parseMemOffset(const MCExpr *&Res); - bool parseRelocOperand(const MCExpr *&Res, SMLoc &E); + bool parseRelocOperand(const MCExpr *&Res); bool parseDirectiveSet(); @@ -128,9 +143,9 @@ class MipsAsmParser : public MCTargetAsmParser { return (STI.getFeatureBits() & Mips::FeatureFP64Bit) != 0; } - int matchRegisterName(StringRef Symbol); + int matchRegisterName(StringRef Symbol, bool is64BitReg); - int matchRegisterByNumber(unsigned RegNum, StringRef Mnemonic); + int matchRegisterByNumber(unsigned RegNum, unsigned RegClass); void setFpFormat(FpFormatTy Format) { FpFormat = Format; @@ -166,6 +181,20 @@ namespace { /// instruction. class MipsOperand : public MCParsedAsmOperand { +public: + enum RegisterKind { + Kind_None, + Kind_CPURegs, + Kind_CPU64Regs, + Kind_HWRegs, + Kind_HW64Regs, + Kind_FGR32Regs, + Kind_FGR64Regs, + Kind_AFGR32Regs, + Kind_CCRRegs + }; + +private: enum KindTy { k_CondCode, k_CoprocNum, @@ -186,6 +215,7 @@ class MipsOperand : public MCParsedAsmOperand { struct { unsigned RegNum; + RegisterKind Kind; } Reg; struct { @@ -246,6 +276,11 @@ public: return Reg.RegNum; } + void setRegKind(RegisterKind RegKind) { + assert((Kind == k_Register) && "Invalid access!"); + Reg.Kind = RegKind; + } + const MCExpr *getImm() const { assert((Kind == k_Immediate) && "Invalid access!"); return Imm.Val; @@ -296,6 +331,45 @@ public: return Op; } + bool isCPURegsAsm() const { + return Reg.Kind == Kind_CPURegs; + } + void addCPURegsAsmOperands(MCInst &Inst, unsigned N) const { + Inst.addOperand(MCOperand::CreateReg(Reg.RegNum)); + } + + bool isCPU64RegsAsm() const { + return Reg.Kind == Kind_CPU64Regs; + } + void addCPU64RegsAsmOperands(MCInst &Inst, unsigned N) const { + Inst.addOperand(MCOperand::CreateReg(Reg.RegNum)); + } + + bool isHWRegsAsm() const { + assert((Kind == k_Register) && "Invalid access!"); + return Reg.Kind == Kind_HWRegs; + } + void addHWRegsAsmOperands(MCInst &Inst, unsigned N) const { + Inst.addOperand(MCOperand::CreateReg(Reg.RegNum)); + } + + bool isHW64RegsAsm() const { + assert((Kind == k_Register) && "Invalid access!"); + return Reg.Kind == Kind_HW64Regs; + } + void addHW64RegsAsmOperands(MCInst &Inst, unsigned N) const { + Inst.addOperand(MCOperand::CreateReg(Reg.RegNum)); + } + + void addCCRAsmOperands(MCInst &Inst, unsigned N) const { + Inst.addOperand(MCOperand::CreateReg(Reg.RegNum)); + } + + bool isCCRAsm() const { + assert((Kind == k_Register) && "Invalid access!"); + return Reg.Kind == Kind_CCRRegs; + } + /// getStartLoc - Get the location of the first token of this operand. SMLoc getStartLoc() const { return StartLoc; } /// getEndLoc - Get the location of the last token of this operand. @@ -344,31 +418,31 @@ void MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc, if ( 0 <= ImmValue && ImmValue <= 65535) { // for 0 <= j <= 65535. // li d,j => ori d,$zero,j - tmpInst.setOpcode(isMips64() ? Mips::ORi64 : Mips::ORi); + tmpInst.setOpcode(Mips::ORi); tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg())); tmpInst.addOperand( - MCOperand::CreateReg(isMips64() ? Mips::ZERO_64 : Mips::ZERO)); + MCOperand::CreateReg(Mips::ZERO)); tmpInst.addOperand(MCOperand::CreateImm(ImmValue)); Instructions.push_back(tmpInst); } else if ( ImmValue < 0 && ImmValue >= -32768) { // for -32768 <= j < 0. // li d,j => addiu d,$zero,j - tmpInst.setOpcode(Mips::ADDiu); //TODO:no ADDiu64 in td files? + tmpInst.setOpcode(Mips::ADDiu); tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg())); tmpInst.addOperand( - MCOperand::CreateReg(isMips64() ? Mips::ZERO_64 : Mips::ZERO)); + MCOperand::CreateReg(Mips::ZERO)); tmpInst.addOperand(MCOperand::CreateImm(ImmValue)); Instructions.push_back(tmpInst); } else { // for any other value of j that is representable as a 32-bit integer. // li d,j => lui d,hi16(j) // ori d,d,lo16(j) - tmpInst.setOpcode(isMips64() ? Mips::LUi64 : Mips::LUi); + tmpInst.setOpcode(Mips::LUi); tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg())); tmpInst.addOperand(MCOperand::CreateImm((ImmValue & 0xffff0000) >> 16)); Instructions.push_back(tmpInst); tmpInst.clear(); - tmpInst.setOpcode(isMips64() ? Mips::ORi64 : Mips::ORi); + tmpInst.setOpcode(Mips::ORi); tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg())); tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg())); tmpInst.addOperand(MCOperand::CreateImm(ImmValue & 0xffff)); @@ -390,7 +464,7 @@ void MipsAsmParser::expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc, if ( -32768 <= ImmValue && ImmValue <= 65535) { //for -32768 <= j <= 65535. //la d,j(s) => addiu d,s,j - tmpInst.setOpcode(Mips::ADDiu); //TODO:no ADDiu64 in td files? + tmpInst.setOpcode(Mips::ADDiu); tmpInst.addOperand(MCOperand::CreateReg(DstRegOp.getReg())); tmpInst.addOperand(MCOperand::CreateReg(SrcRegOp.getReg())); tmpInst.addOperand(MCOperand::CreateImm(ImmValue)); @@ -400,12 +474,12 @@ void MipsAsmParser::expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc, //la d,j(s) => lui d,hi16(j) // ori d,d,lo16(j) // addu d,d,s - tmpInst.setOpcode(isMips64()?Mips::LUi64:Mips::LUi); + tmpInst.setOpcode(Mips::LUi); tmpInst.addOperand(MCOperand::CreateReg(DstRegOp.getReg())); tmpInst.addOperand(MCOperand::CreateImm((ImmValue & 0xffff0000) >> 16)); Instructions.push_back(tmpInst); tmpInst.clear(); - tmpInst.setOpcode(isMips64()?Mips::ORi64:Mips::ORi); + tmpInst.setOpcode(Mips::ORi); tmpInst.addOperand(MCOperand::CreateReg(DstRegOp.getReg())); tmpInst.addOperand(MCOperand::CreateReg(DstRegOp.getReg())); tmpInst.addOperand(MCOperand::CreateImm(ImmValue & 0xffff)); @@ -433,19 +507,19 @@ void MipsAsmParser::expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc, tmpInst.setOpcode(Mips::ADDiu); tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg())); tmpInst.addOperand( - MCOperand::CreateReg(isMips64()?Mips::ZERO_64:Mips::ZERO)); + MCOperand::CreateReg(Mips::ZERO)); tmpInst.addOperand(MCOperand::CreateImm(ImmValue)); Instructions.push_back(tmpInst); } else { //for any other value of j that is representable as a 32-bit integer. //la d,j => lui d,hi16(j) // ori d,d,lo16(j) - tmpInst.setOpcode(isMips64()?Mips::LUi64:Mips::LUi); + tmpInst.setOpcode(Mips::LUi); tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg())); tmpInst.addOperand(MCOperand::CreateImm((ImmValue & 0xffff0000) >> 16)); Instructions.push_back(tmpInst); tmpInst.clear(); - tmpInst.setOpcode(isMips64()?Mips::ORi64:Mips::ORi); + tmpInst.setOpcode(Mips::ORi); tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg())); tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg())); tmpInst.addOperand(MCOperand::CreateImm(ImmValue & 0xffff)); @@ -498,10 +572,10 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, return true; } -int MipsAsmParser::matchRegisterName(StringRef Name) { +int MipsAsmParser::matchRegisterName(StringRef Name, bool is64BitReg) { int CC; - if (!isMips64()) + if (!is64BitReg) CC = StringSwitch(Name) .Case("zero", Mips::ZERO) .Case("a0", Mips::A0) @@ -643,7 +717,7 @@ unsigned MipsAsmParser::getATReg() { unsigned Reg = Options.getATRegNum(); if (isMips64()) return getReg(Mips::CPU64RegsRegClassID,Reg); - + return getReg(Mips::CPURegsRegClassID,Reg); } @@ -651,63 +725,41 @@ unsigned MipsAsmParser::getReg(int RC,int RegNo) { return *(getContext().getRegisterInfo().getRegClass(RC).begin() + RegNo); } -int MipsAsmParser::matchRegisterByNumber(unsigned RegNum, StringRef Mnemonic) { - - if (Mnemonic.lower() == "rdhwr") { - // at the moment only hwreg29 is supported - if (RegNum != 29) - return -1; - return Mips::HWR29; - } +int MipsAsmParser::matchRegisterByNumber(unsigned RegNum, unsigned RegClass) { if (RegNum > 31) return -1; - // MIPS64 registers are numbered 1 after the 32-bit equivalents - return getReg(Mips::CPURegsRegClassID, RegNum) + isMips64(); + return getReg(RegClass, RegNum); } -int MipsAsmParser::tryParseRegister(StringRef Mnemonic) { +int MipsAsmParser::tryParseRegister(bool is64BitReg) { const AsmToken &Tok = Parser.getTok(); int RegNum = -1; if (Tok.is(AsmToken::Identifier)) { std::string lowerCase = Tok.getString().lower(); - RegNum = matchRegisterName(lowerCase); + RegNum = matchRegisterName(lowerCase, is64BitReg); } else if (Tok.is(AsmToken::Integer)) RegNum = matchRegisterByNumber(static_cast(Tok.getIntVal()), - Mnemonic.lower()); - else - return RegNum; //error - // 64 bit div operations require Mips::ZERO instead of MIPS::ZERO_64 - if (isMips64() && RegNum == Mips::ZERO_64) { - if (Mnemonic.find("ddiv") != StringRef::npos) - RegNum = Mips::ZERO; - } + is64BitReg ? Mips::CPU64RegsRegClassID + : Mips::CPURegsRegClassID); return RegNum; } bool MipsAsmParser:: tryParseRegisterOperand(SmallVectorImpl &Operands, - StringRef Mnemonic){ + bool is64BitReg){ SMLoc S = Parser.getTok().getLoc(); - SMLoc E = Parser.getTok().getEndLoc(); int RegNo = -1; - // FIXME: we should make a more generic method for CCR - if ((Mnemonic == "cfc1" || Mnemonic == "ctc1") - && Operands.size() == 2 && Parser.getTok().is(AsmToken::Integer)){ - RegNo = Parser.getTok().getIntVal(); // get the int value - // at the moment only fcc0 is supported - if (RegNo == 0) - RegNo = Mips::FCC0; - } else - RegNo = tryParseRegister(Mnemonic); + RegNo = tryParseRegister(is64BitReg); if (RegNo == -1) return true; - Operands.push_back(MipsOperand::CreateReg(RegNo, S, E)); + Operands.push_back(MipsOperand::CreateReg(RegNo, S, + Parser.getTok().getLoc())); Parser.Lex(); // Eat register token. return false; } @@ -734,7 +786,7 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl&Operands, SMLoc S = Parser.getTok().getLoc(); Parser.Lex(); // Eat dollar token. // parse register operand - if (!tryParseRegisterOperand(Operands, Mnemonic)) { + if (!tryParseRegisterOperand(Operands, isMips64())) { if (getLexer().is(AsmToken::LParen)) { // check if it is indexed addressing operand Operands.push_back(MipsOperand::CreateToken("(", S)); @@ -743,7 +795,7 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl&Operands, return true; Parser.Lex(); // eat dollar - if (tryParseRegisterOperand(Operands, Mnemonic)) + if (tryParseRegisterOperand(Operands, isMips64())) return true; if (!getLexer().is(AsmToken::RParen)) @@ -760,7 +812,7 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl&Operands, if (Parser.ParseIdentifier(Identifier)) return true; - SMLoc E = SMLoc::getFromPointer(Identifier.end()); + SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); MCSymbol *Sym = getContext().GetOrCreateSymbol("$" + Identifier); @@ -780,9 +832,9 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl&Operands, // quoted label names const MCExpr *IdVal; SMLoc S = Parser.getTok().getLoc(); - SMLoc E; - if (getParser().ParseExpression(IdVal, E)) + if (getParser().ParseExpression(IdVal)) return true; + SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); Operands.push_back(MipsOperand::CreateImm(IdVal, S, E)); return false; } @@ -790,10 +842,11 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl&Operands, // it is a symbol reference or constant expression const MCExpr *IdVal; SMLoc S = Parser.getTok().getLoc(); // start location of the operand - SMLoc E; - if (parseRelocOperand(IdVal, E)) + if (parseRelocOperand(IdVal)) return true; + SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + Operands.push_back(MipsOperand::CreateImm(IdVal, S, E)); return false; } // case AsmToken::Percent @@ -801,7 +854,7 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl&Operands, return true; } -bool MipsAsmParser::parseRelocOperand(const MCExpr *&Res, SMLoc &EndLoc) { +bool MipsAsmParser::parseRelocOperand(const MCExpr *&Res) { Parser.Lex(); // eat % token const AsmToken &Tok = Parser.getTok(); // get next token, operation @@ -813,6 +866,7 @@ bool MipsAsmParser::parseRelocOperand(const MCExpr *&Res, SMLoc &EndLoc) { Parser.Lex(); // eat identifier // now make expression from the rest of the operand const MCExpr *IdVal; + SMLoc EndLoc; if (getLexer().getKind() == AsmToken::LParen) { while (1) { @@ -833,10 +887,8 @@ bool MipsAsmParser::parseRelocOperand(const MCExpr *&Res, SMLoc &EndLoc) { if (getParser().ParseParenExpression(IdVal,EndLoc)) return true; - while (getLexer().getKind() == AsmToken::RParen) { - EndLoc = Parser.getTok().getEndLoc(); + while (getLexer().getKind() == AsmToken::RParen) Parser.Lex(); // eat ')' token - } } else return true; // parenthesis must follow reloc operand @@ -868,23 +920,24 @@ bool MipsAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) { StartLoc = Parser.getTok().getLoc(); - EndLoc = Parser.getTok().getEndLoc(); - RegNo = tryParseRegister(""); + RegNo = tryParseRegister(isMips64()); + EndLoc = Parser.getTok().getLoc(); return (RegNo == (unsigned)-1); } bool MipsAsmParser::parseMemOffset(const MCExpr *&Res) { + + SMLoc S; + switch(getLexer().getKind()) { default: return true; case AsmToken::Integer: case AsmToken::Minus: case AsmToken::Plus: - return getParser().ParseExpression(Res); - case AsmToken::Percent: { - SMLoc E; - return parseRelocOperand(Res, E); - } + return (getParser().ParseExpression(Res)); + case AsmToken::Percent: + return parseRelocOperand(Res); case AsmToken::LParen: return false; // it's probably assuming 0 } @@ -895,8 +948,9 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand( SmallVectorImpl&Operands) { const MCExpr *IdVal = 0; - SMLoc S = Parser.getTok().getLoc(); - SMLoc E = Parser.getTok().getEndLoc(); + SMLoc S; + // first operand is the offset + S = Parser.getTok().getLoc(); if (parseMemOffset(IdVal)) return MatchOperand_ParseFail; @@ -905,6 +959,7 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand( if (Tok.isNot(AsmToken::LParen)) { MipsOperand *Mnemonic = static_cast(Operands[0]); if (Mnemonic->getToken() == "la") { + SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() -1); Operands.push_back(MipsOperand::CreateImm(IdVal, S, E)); return MatchOperand_Success; } @@ -917,7 +972,7 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand( const AsmToken &Tok1 = Parser.getTok(); // get next token if (Tok1.is(AsmToken::Dollar)) { Parser.Lex(); // Eat '$' token. - if (tryParseRegisterOperand(Operands,"")) { + if (tryParseRegisterOperand(Operands, isMips64())) { Error(Parser.getTok().getLoc(), "unexpected token in operand"); return MatchOperand_ParseFail; } @@ -933,7 +988,8 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand( return MatchOperand_ParseFail; } - E = Parser.getTok().getEndLoc(); + SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + Parser.Lex(); // Eat ')' token. if (IdVal == 0) @@ -950,6 +1006,126 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand( return MatchOperand_Success; } +MipsAsmParser::OperandMatchResultTy +MipsAsmParser::parseCPU64Regs(SmallVectorImpl &Operands) { + + if (!isMips64()) + return MatchOperand_NoMatch; + // if the first token is not '$' we have an error + if (Parser.getTok().isNot(AsmToken::Dollar)) + return MatchOperand_NoMatch; + + Parser.Lex(); // Eat $ + if(!tryParseRegisterOperand(Operands, true)) { + // set the proper register kind + MipsOperand* op = static_cast(Operands.back()); + op->setRegKind(MipsOperand::Kind_CPU64Regs); + return MatchOperand_Success; + } + return MatchOperand_NoMatch; +} + +MipsAsmParser::OperandMatchResultTy +MipsAsmParser::parseCPURegs(SmallVectorImpl &Operands) { + + // if the first token is not '$' we have an error + if (Parser.getTok().isNot(AsmToken::Dollar)) + return MatchOperand_NoMatch; + + Parser.Lex(); // Eat $ + if(!tryParseRegisterOperand(Operands, false)) { + // set the propper register kind + MipsOperand* op = static_cast(Operands.back()); + op->setRegKind(MipsOperand::Kind_CPURegs); + return MatchOperand_Success; + } + return MatchOperand_NoMatch; +} + +MipsAsmParser::OperandMatchResultTy +MipsAsmParser::parseHWRegs(SmallVectorImpl &Operands) { + + // if the first token is not '$' we have error + if (Parser.getTok().isNot(AsmToken::Dollar)) + return MatchOperand_NoMatch; + SMLoc S = Parser.getTok().getLoc(); + Parser.Lex(); // Eat $ + + const AsmToken &Tok = Parser.getTok(); // get next token + if (Tok.isNot(AsmToken::Integer)) + return MatchOperand_NoMatch; + + unsigned RegNum = Tok.getIntVal(); + // at the moment only hwreg29 is supported + if (RegNum != 29) + return MatchOperand_ParseFail; + + MipsOperand *op = MipsOperand::CreateReg(Mips::HWR29, S, + Parser.getTok().getLoc()); + op->setRegKind(MipsOperand::Kind_HWRegs); + Operands.push_back(op); + + Parser.Lex(); // Eat reg number + return MatchOperand_Success; +} + +MipsAsmParser::OperandMatchResultTy +MipsAsmParser::parseHW64Regs(SmallVectorImpl &Operands) { + //if the first token is not '$' we have error + if (Parser.getTok().isNot(AsmToken::Dollar)) + return MatchOperand_NoMatch; + SMLoc S = Parser.getTok().getLoc(); + Parser.Lex(); // Eat $ + + const AsmToken &Tok = Parser.getTok(); // get next token + if (Tok.isNot(AsmToken::Integer)) + return MatchOperand_NoMatch; + + unsigned RegNum = Tok.getIntVal(); + // at the moment only hwreg29 is supported + if (RegNum != 29) + return MatchOperand_ParseFail; + + MipsOperand *op = MipsOperand::CreateReg(Mips::HWR29_64, S, + Parser.getTok().getLoc()); + op->setRegKind(MipsOperand::Kind_HWRegs); + Operands.push_back(op); + + Parser.Lex(); // Eat reg number + return MatchOperand_Success; +} + +MipsAsmParser::OperandMatchResultTy +MipsAsmParser::parseCCRRegs(SmallVectorImpl &Operands) { + unsigned RegNum; + //if the first token is not '$' we have error + if (Parser.getTok().isNot(AsmToken::Dollar)) + return MatchOperand_NoMatch; + SMLoc S = Parser.getTok().getLoc(); + Parser.Lex(); // Eat $ + + const AsmToken &Tok = Parser.getTok(); // get next token + if (Tok.is(AsmToken::Integer)) { + RegNum = Tok.getIntVal(); + // at the moment only fcc0 is supported + if (RegNum != 0) + return MatchOperand_ParseFail; + } else if (Tok.is(AsmToken::Identifier)) { + // at the moment only fcc0 is supported + if (Tok.getIdentifier() != "fcc0") + return MatchOperand_ParseFail; + } else + return MatchOperand_NoMatch; + + MipsOperand *op = MipsOperand::CreateReg(Mips::FCC0, S, + Parser.getTok().getLoc()); + op->setRegKind(MipsOperand::Kind_CCRRegs); + Operands.push_back(op); + + Parser.Lex(); // Eat reg number + return MatchOperand_Success; +} + MCSymbolRefExpr::VariantKind MipsAsmParser::getVariantKind(StringRef Symbol) { MCSymbolRefExpr::VariantKind VK @@ -1083,8 +1259,8 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, if (Cc == -1) { return Error(NameLoc, "Invalid conditional code"); } - // FIXME: May include trailing whitespace... - SMLoc E = Parser.getTok().getLoc(); + SMLoc E = SMLoc::getFromPointer( + Parser.getTok().getLoc().getPointer() -1 ); Operands.push_back(MipsOperand::CreateImm( MCConstantExpr::Create(Cc, getContext()), NameLoc, E)); } else { diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp index 68d3ac5..97c367f 100644 --- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp +++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp @@ -149,6 +149,11 @@ static void printExpr(const MCExpr *Expr, raw_ostream &OS) { OS << ')'; } +void MipsInstPrinter::printCPURegs(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + printRegName(O, MI->getOperand(OpNo).getReg()); +} + void MipsInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { const MCOperand &Op = MI->getOperand(OpNo); diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h index 3d8a6f9..38cac68 100644 --- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h +++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h @@ -87,6 +87,7 @@ public: virtual void printRegName(raw_ostream &OS, unsigned RegNo) const; virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); + void printCPURegs(const MCInst *MI, unsigned OpNo, raw_ostream &O); private: void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index cdf12c8..bbeb649 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -71,52 +71,54 @@ let usesCustomInserter = 1, Predicates = [HasStdEnc], //===----------------------------------------------------------------------===// let DecoderNamespace = "Mips64" in { /// Arithmetic Instructions (ALU Immediate) -def DADDi : ArithLogicI<"daddi", simm16_64, CPU64Regs>, ADDI_FM<0x18>; -def DADDiu : ArithLogicI<"daddiu", simm16_64, CPU64Regs, immSExt16, add>, +def DADDi : ArithLogicI<"daddi", simm16_64, CPU64RegsOpnd>, ADDI_FM<0x18>; +def DADDiu : ArithLogicI<"daddiu", simm16_64, CPU64RegsOpnd, immSExt16, add>, ADDI_FM<0x19>, IsAsCheapAsAMove; -def DANDi : ArithLogicI<"andi", uimm16_64, CPU64Regs, immZExt16, and>, +def DANDi : ArithLogicI<"andi", uimm16_64, CPU64RegsOpnd, immZExt16, and>, ADDI_FM<0xc>; def SLTi64 : SetCC_I<"slti", setlt, simm16_64, immSExt16, CPU64Regs>, SLTI_FM<0xa>; def SLTiu64 : SetCC_I<"sltiu", setult, simm16_64, immSExt16, CPU64Regs>, SLTI_FM<0xb>; -def ORi64 : ArithLogicI<"ori", uimm16_64, CPU64Regs, immZExt16, or>, +def ORi64 : ArithLogicI<"ori", uimm16_64, CPU64RegsOpnd, immZExt16, or>, ADDI_FM<0xd>; -def XORi64 : ArithLogicI<"xori", uimm16_64, CPU64Regs, immZExt16, xor>, +def XORi64 : ArithLogicI<"xori", uimm16_64, CPU64RegsOpnd, immZExt16, xor>, ADDI_FM<0xe>; def LUi64 : LoadUpper<"lui", CPU64Regs, uimm16_64>, LUI_FM; /// Arithmetic Instructions (3-Operand, R-Type) -def DADD : ArithLogicR<"dadd", CPU64Regs>, ADD_FM<0, 0x2c>; -def DADDu : ArithLogicR<"daddu", CPU64Regs, 1, IIAlu, add>, ADD_FM<0, 0x2d>; -def DSUBu : ArithLogicR<"dsubu", CPU64Regs, 0, IIAlu, sub>, ADD_FM<0, 0x2f>; +def DADD : ArithLogicR<"dadd", CPU64RegsOpnd>, ADD_FM<0, 0x2c>; +def DADDu : ArithLogicR<"daddu", CPU64RegsOpnd, 1, IIAlu, add>, + ADD_FM<0, 0x2d>; +def DSUBu : ArithLogicR<"dsubu", CPU64RegsOpnd, 0, IIAlu, sub>, + ADD_FM<0, 0x2f>; def SLT64 : SetCC_R<"slt", setlt, CPU64Regs>, ADD_FM<0, 0x2a>; def SLTu64 : SetCC_R<"sltu", setult, CPU64Regs>, ADD_FM<0, 0x2b>; -def AND64 : ArithLogicR<"and", CPU64Regs, 1, IIAlu, and>, ADD_FM<0, 0x24>; -def OR64 : ArithLogicR<"or", CPU64Regs, 1, IIAlu, or>, ADD_FM<0, 0x25>; -def XOR64 : ArithLogicR<"xor", CPU64Regs, 1, IIAlu, xor>, ADD_FM<0, 0x26>; -def NOR64 : LogicNOR<"nor", CPU64Regs>, ADD_FM<0, 0x27>; +def AND64 : ArithLogicR<"and", CPU64RegsOpnd, 1, IIAlu, and>, ADD_FM<0, 0x24>; +def OR64 : ArithLogicR<"or", CPU64RegsOpnd, 1, IIAlu, or>, ADD_FM<0, 0x25>; +def XOR64 : ArithLogicR<"xor", CPU64RegsOpnd, 1, IIAlu, xor>, ADD_FM<0, 0x26>; +def NOR64 : LogicNOR<"nor", CPU64RegsOpnd>, ADD_FM<0, 0x27>; /// Shift Instructions -def DSLL : shift_rotate_imm<"dsll", shamt, CPU64Regs, shl, immZExt6>, +def DSLL : shift_rotate_imm<"dsll", shamt, CPU64RegsOpnd, shl, immZExt6>, SRA_FM<0x38, 0>; -def DSRL : shift_rotate_imm<"dsrl", shamt, CPU64Regs, srl, immZExt6>, +def DSRL : shift_rotate_imm<"dsrl", shamt, CPU64RegsOpnd, srl, immZExt6>, SRA_FM<0x3a, 0>; -def DSRA : shift_rotate_imm<"dsra", shamt, CPU64Regs, sra, immZExt6>, +def DSRA : shift_rotate_imm<"dsra", shamt, CPU64RegsOpnd, sra, immZExt6>, SRA_FM<0x3b, 0>; -def DSLLV : shift_rotate_reg<"dsllv", CPU64Regs, shl>, SRLV_FM<0x14, 0>; -def DSRLV : shift_rotate_reg<"dsrlv", CPU64Regs, srl>, SRLV_FM<0x16, 0>; -def DSRAV : shift_rotate_reg<"dsrav", CPU64Regs, sra>, SRLV_FM<0x17, 0>; -def DSLL32 : shift_rotate_imm<"dsll32", shamt, CPU64Regs>, SRA_FM<0x3c, 0>; -def DSRL32 : shift_rotate_imm<"dsrl32", shamt, CPU64Regs>, SRA_FM<0x3e, 0>; -def DSRA32 : shift_rotate_imm<"dsra32", shamt, CPU64Regs>, SRA_FM<0x3f, 0>; +def DSLLV : shift_rotate_reg<"dsllv", CPU64RegsOpnd, shl>, SRLV_FM<0x14, 0>; +def DSRLV : shift_rotate_reg<"dsrlv", CPU64RegsOpnd, srl>, SRLV_FM<0x16, 0>; +def DSRAV : shift_rotate_reg<"dsrav", CPU64RegsOpnd, sra>, SRLV_FM<0x17, 0>; +def DSLL32 : shift_rotate_imm<"dsll32", shamt, CPU64RegsOpnd>, SRA_FM<0x3c, 0>; +def DSRL32 : shift_rotate_imm<"dsrl32", shamt, CPU64RegsOpnd>, SRA_FM<0x3e, 0>; +def DSRA32 : shift_rotate_imm<"dsra32", shamt, CPU64RegsOpnd>, SRA_FM<0x3f, 0>; } // Rotate Instructions let Predicates = [HasMips64r2, HasStdEnc], DecoderNamespace = "Mips64" in { - def DROTR : shift_rotate_imm<"drotr", shamt, CPU64Regs, rotr, immZExt6>, + def DROTR : shift_rotate_imm<"drotr", shamt, CPU64RegsOpnd, rotr, immZExt6>, SRA_FM<0x3a, 1>; - def DROTRV : shift_rotate_reg<"drotrv", CPU64Regs, rotr>, SRLV_FM<0x16, 1>; + def DROTRV : shift_rotate_reg<"drotrv", CPU64RegsOpnd, rotr>, SRLV_FM<0x16, 1>; } let DecoderNamespace = "Mips64" in { @@ -135,12 +137,11 @@ defm LD : LoadM<"ld", CPU64Regs, load>, LW_FM<0x37>; defm SD : StoreM<"sd", CPU64Regs, store>, LW_FM<0x3f>; /// load/store left/right -let isCodeGenOnly = 1 in { - defm LWL64 : LoadLeftRightM<"lwl", MipsLWL, CPU64Regs>, LW_FM<0x22>; - defm LWR64 : LoadLeftRightM<"lwr", MipsLWR, CPU64Regs>, LW_FM<0x26>; - defm SWL64 : StoreLeftRightM<"swl", MipsSWL, CPU64Regs>, LW_FM<0x2a>; - defm SWR64 : StoreLeftRightM<"swr", MipsSWR, CPU64Regs>, LW_FM<0x2e>; -} +defm LWL64 : LoadLeftRightM<"lwl", MipsLWL, CPU64Regs>, LW_FM<0x22>; +defm LWR64 : LoadLeftRightM<"lwr", MipsLWR, CPU64Regs>, LW_FM<0x26>; +defm SWL64 : StoreLeftRightM<"swl", MipsSWL, CPU64Regs>, LW_FM<0x2a>; +defm SWR64 : StoreLeftRightM<"swr", MipsSWR, CPU64Regs>, LW_FM<0x2e>; + defm LDL : LoadLeftRightM<"ldl", MipsLDL, CPU64Regs>, LW_FM<0x1a>; defm LDR : LoadLeftRightM<"ldr", MipsLDR, CPU64Regs>, LW_FM<0x1b>; defm SDL : StoreLeftRightM<"sdl", MipsSDL, CPU64Regs>, LW_FM<0x2c>; @@ -148,13 +149,13 @@ defm SDR : StoreLeftRightM<"sdr", MipsSDR, CPU64Regs>, LW_FM<0x2d>; /// Load-linked, Store-conditional let Predicates = [NotN64, HasStdEnc] in { - def LLD : LLBase<"lld", CPU64Regs, mem>, LW_FM<0x34>; - def SCD : SCBase<"scd", CPU64Regs, mem>, LW_FM<0x3c>; + def LLD : LLBase<"lld", CPU64RegsOpnd, mem>, LW_FM<0x34>; + def SCD : SCBase<"scd", CPU64RegsOpnd, mem>, LW_FM<0x3c>; } let Predicates = [IsN64, HasStdEnc], isCodeGenOnly = 1 in { - def LLD_P8 : LLBase<"lld", CPU64Regs, mem64>, LW_FM<0x34>; - def SCD_P8 : SCBase<"scd", CPU64Regs, mem64>, LW_FM<0x3c>; + def LLD_P8 : LLBase<"lld", CPU64RegsOpnd, mem64>, LW_FM<0x34>; + def SCD_P8 : SCBase<"scd", CPU64RegsOpnd, mem64>, LW_FM<0x3c>; } /// Jump and Branch Instructions @@ -172,11 +173,11 @@ def TAILCALL64_R : JumpFR, MTLO_FM<8>, IsTailCall; let DecoderNamespace = "Mips64" in { /// Multiply and Divide Instructions. -def DMULT : Mult<"dmult", IIImul, CPU64Regs, [HI64, LO64]>, MULT_FM<0, 0x1c>; -def DMULTu : Mult<"dmultu", IIImul, CPU64Regs, [HI64, LO64]>, MULT_FM<0, 0x1d>; -def DSDIV : Div, +def DMULT : Mult<"dmult", IIImul, CPU64RegsOpnd, [HI64, LO64]>, MULT_FM<0, 0x1c>; +def DMULTu : Mult<"dmultu", IIImul, CPU64RegsOpnd, [HI64, LO64]>, MULT_FM<0, 0x1d>; +def DSDIV : Div, MULT_FM<0, 0x1e>; -def DUDIV : Div, +def DUDIV : Div, MULT_FM<0, 0x1f>; def MTHI64 : MoveToLOHI<"mthi", CPU64Regs, [HI64]>, MTLO_FM<0x11>; @@ -189,28 +190,28 @@ def SEB64 : SignExtInReg<"seb", i8, CPU64Regs>, SEB_FM<0x10, 0x20>; def SEH64 : SignExtInReg<"seh", i16, CPU64Regs>, SEB_FM<0x18, 0x20>; /// Count Leading -def DCLZ : CountLeading0<"dclz", CPU64Regs>, CLO_FM<0x24>; -def DCLO : CountLeading1<"dclo", CPU64Regs>, CLO_FM<0x25>; +def DCLZ : CountLeading0<"dclz", CPU64RegsOpnd>, CLO_FM<0x24>; +def DCLO : CountLeading1<"dclo", CPU64RegsOpnd>, CLO_FM<0x25>; /// Double Word Swap Bytes/HalfWords -def DSBH : SubwordSwap<"dsbh", CPU64Regs>, SEB_FM<2, 0x24>; -def DSHD : SubwordSwap<"dshd", CPU64Regs>, SEB_FM<5, 0x24>; +def DSBH : SubwordSwap<"dsbh", CPU64RegsOpnd>, SEB_FM<2, 0x24>; +def DSHD : SubwordSwap<"dshd", CPU64RegsOpnd>, SEB_FM<5, 0x24>; def LEA_ADDiu64 : EffectiveAddress<"daddiu", CPU64Regs, mem_ea_64>, LW_FM<0x19>; } let DecoderNamespace = "Mips64" in { -def RDHWR64 : ReadHardware, RDHWR_FM; +def RDHWR64 : ReadHardware, RDHWR_FM; -def DEXT : ExtBase<"dext", CPU64Regs>, EXT_FM<3>; +def DEXT : ExtBase<"dext", CPU64RegsOpnd>, EXT_FM<3>; let Pattern = [] in { - def DEXTU : ExtBase<"dextu", CPU64Regs>, EXT_FM<2>; - def DEXTM : ExtBase<"dextm", CPU64Regs>, EXT_FM<1>; + def DEXTU : ExtBase<"dextu", CPU64RegsOpnd>, EXT_FM<2>; + def DEXTM : ExtBase<"dextm", CPU64RegsOpnd>, EXT_FM<1>; } -def DINS : InsBase<"dins", CPU64Regs>, EXT_FM<7>; +def DINS : InsBase<"dins", CPU64RegsOpnd>, EXT_FM<7>; let Pattern = [] in { - def DINSU : InsBase<"dinsu", CPU64Regs>, EXT_FM<6>; - def DINSM : InsBase<"dinsm", CPU64Regs>, EXT_FM<5>; + def DINSU : InsBase<"dinsu", CPU64RegsOpnd>, EXT_FM<6>; + def DINSM : InsBase<"dinsm", CPU64RegsOpnd>, EXT_FM<5>; } let isCodeGenOnly = 1, rs = 0, shamt = 0 in { @@ -304,7 +305,25 @@ def : MipsPat<(bswap CPU64Regs:$rt), (DSHD (DSBH CPU64Regs:$rt))>; //===----------------------------------------------------------------------===// // Instruction aliases //===----------------------------------------------------------------------===// -def : InstAlias<"move $dst,$src", (DADD CPU64Regs:$dst,CPU64Regs:$src,ZERO_64)>; +def : InstAlias<"move $dst,$src", (DADDu CPU64RegsOpnd:$dst, + CPU64RegsOpnd:$src,ZERO_64)>, + Requires<[HasMips64]>; +def : InstAlias<"and $rs, $rt, $imm", + (DANDi CPU64RegsOpnd:$rs, CPU64RegsOpnd:$rt, uimm16_64:$imm)>, + Requires<[HasMips64]>; +def : InstAlias<"slt $rs, $rt, $imm", + (SLTi64 CPURegsOpnd:$rs, CPU64Regs:$rt, simm16_64:$imm)>, + Requires<[HasMips64]>; +def : InstAlias<"xor $rs, $rt, $imm", + (XORi64 CPU64RegsOpnd:$rs, CPU64RegsOpnd:$rt, uimm16_64:$imm)>, + Requires<[HasMips64]>; +def : InstAlias<"not $rt, $rs", (NOR64 CPU64RegsOpnd:$rt, CPU64RegsOpnd:$rs, ZERO_64)>, + Requires<[HasMips64]>; +def : InstAlias<"j $rs", (JR64 CPU64Regs:$rs)>, Requires<[HasMips64]>; +def : InstAlias<"daddu $rs, $rt, $imm", + (DADDiu CPU64RegsOpnd:$rs, CPU64RegsOpnd:$rt, simm16_64:$imm)>; +def : InstAlias<"dadd $rs, $rt, $imm", + (DADDi CPU64RegsOpnd:$rs, CPU64RegsOpnd:$rt, simm16_64:$imm)>; /// Move between CPU and coprocessor registers let DecoderNamespace = "Mips64" in { diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td index ab6f8ab..a38ed16 100644 --- a/lib/Target/Mips/MipsInstrFPU.td +++ b/lib/Target/Mips/MipsInstrFPU.td @@ -138,6 +138,16 @@ class MTC1_FT; +class MFC1_FT_CCR : + InstSE<(outs DstRC:$rt), (ins SrcRC:$fs), !strconcat(opstr, "\t$rt, $fs"), + [(set DstRC:$rt, (OpNode SrcRC:$fs))], Itin, FrmFR>; + +class MTC1_FT_CCR : + InstSE<(outs DstRC:$fs), (ins SrcRC:$rt), !strconcat(opstr, "\t$rt, $fs"), + [(set DstRC:$fs, (OpNode SrcRC:$rt))], Itin, FrmFR>; + class LW_FT : InstSE<(outs RC:$rt), (ins MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"), @@ -265,8 +275,8 @@ defm FSQRT : ABSS_M<"sqrt.d", IIFsqrtDouble, fsqrt>, ABSS_FM<0x4, 17>; // regardless of register aliasing. /// Move Control Registers From/To CPU Registers -def CFC1 : MFC1_FT<"cfc1", CPURegs, CCR, IIFmove>, MFC1_FM<2>; -def CTC1 : MTC1_FT<"ctc1", CCR, CPURegs, IIFmove>, MFC1_FM<6>; +def CFC1 : MFC1_FT_CCR<"cfc1", CPURegs, CCROpnd, IIFmove>, MFC1_FM<2>; +def CTC1 : MTC1_FT_CCR<"ctc1", CCROpnd, CPURegs, IIFmove>, MFC1_FM<6>; def MFC1 : MFC1_FT<"mfc1", CPURegs, FGR32, IIFmove, bitconvert>, MFC1_FM<0>; def MTC1 : MTC1_FT<"mtc1", FGR32, CPURegs, IIFmove, bitconvert>, MFC1_FM<4>; def DMFC1 : MFC1_FT<"dmfc1", CPU64Regs, FGR64, IIFmove, bitconvert>, MFC1_FM<1>; @@ -437,7 +447,7 @@ def FCMP_D64 : CEQS_FT<"d", FGR64, IIFcmp, MipsFPCmp>, CEQS_FM<17>, //===----------------------------------------------------------------------===// // Floating Point Pseudo-Instructions //===----------------------------------------------------------------------===// -def MOVCCRToCCR : PseudoSE<(outs CCR:$dst), (ins CCR:$src), []>; +def MOVCCRToCCR : PseudoSE<(outs CCR:$dst), (ins CCROpnd:$src), []>; // This pseudo instr gets expanded into 2 mtc1 instrs after register // allocation. diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 8f2ce6f..3ed8f93 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -332,12 +332,12 @@ def addr : //===----------------------------------------------------------------------===// // Arithmetic and logical instructions with 3 register operands. -class ArithLogicR: - InstSE<(outs RC:$rd), (ins RC:$rs, RC:$rt), + InstSE<(outs RO:$rd), (ins RO:$rs, RO:$rt), !strconcat(opstr, "\t$rd, $rs, $rt"), - [(set RC:$rd, (OpNode RC:$rs, RC:$rt))], Itin, FrmR> { + [(set RO:$rd, (OpNode RO:$rs, RO:$rt))], Itin, FrmR> { let isCommutable = isComm; let isReMaterializable = 1; string BaseOpcode; @@ -345,27 +345,27 @@ class ArithLogicR : - InstSE<(outs RC:$rt), (ins RC:$rs, Od:$imm16), + InstSE<(outs RO:$rt), (ins RO:$rs, Od:$imm16), !strconcat(opstr, "\t$rt, $rs, $imm16"), - [(set RC:$rt, (OpNode RC:$rs, imm_type:$imm16))], IIAlu, FrmI> { + [(set RO:$rt, (OpNode RO:$rs, imm_type:$imm16))], IIAlu, FrmI> { let isReMaterializable = 1; } // Arithmetic Multiply ADD/SUB class MArithR : - InstSE<(outs), (ins CPURegs:$rs, CPURegs:$rt), + InstSE<(outs), (ins CPURegsOpnd:$rs, CPURegsOpnd:$rt), !strconcat(opstr, "\t$rs, $rt"), - [(op CPURegs:$rs, CPURegs:$rt, LO, HI)], IIImul, FrmR> { + [(op CPURegsOpnd:$rs, CPURegsOpnd:$rt, LO, HI)], IIImul, FrmR> { let Defs = [HI, LO]; let Uses = [HI, LO]; let isCommutable = isComm; } // Logical -class LogicNOR: +class LogicNOR: InstSE<(outs RC:$rd), (ins RC:$rs, RC:$rt), !strconcat(opstr, "\t$rd, $rs, $rt"), [(set RC:$rd, (not (or RC:$rs, RC:$rt)))], IIAlu, FrmR> { @@ -374,17 +374,17 @@ class LogicNOR: // Shifts class shift_rotate_imm : InstSE<(outs RC:$rd), (ins RC:$rt, ImmOpnd:$shamt), !strconcat(opstr, "\t$rd, $rt, $shamt"), [(set RC:$rd, (OpNode RC:$rt, PF:$shamt))], IIAlu, FrmR>; -class shift_rotate_reg: - InstSE<(outs RC:$rd), (ins CPURegs:$rs, RC:$rt), + InstSE<(outs RC:$rd), (ins CPURegsOpnd:$rs, RC:$rt), !strconcat(opstr, "\t$rd, $rt, $rs"), - [(set RC:$rd, (OpNode RC:$rt, CPURegs:$rs))], IIAlu, FrmR>; + [(set RC:$rd, (OpNode RC:$rt, CPURegsOpnd:$rs))], IIAlu, FrmR>; // Load Upper Imediate class LoadUpper: @@ -498,15 +498,15 @@ class CBranchZero : // SetCC class SetCC_R : - InstSE<(outs CPURegs:$rd), (ins RC:$rs, RC:$rt), + InstSE<(outs CPURegsOpnd:$rd), (ins RC:$rs, RC:$rt), !strconcat(opstr, "\t$rd, $rs, $rt"), - [(set CPURegs:$rd, (cond_op RC:$rs, RC:$rt))], IIAlu, FrmR>; + [(set CPURegsOpnd:$rd, (cond_op RC:$rs, RC:$rt))], IIAlu, FrmR>; class SetCC_I: - InstSE<(outs CPURegs:$rt), (ins RC:$rs, Od:$imm16), + InstSE<(outs CPURegsOpnd:$rt), (ins RC:$rs, Od:$imm16), !strconcat(opstr, "\t$rt, $rs, $imm16"), - [(set CPURegs:$rt, (cond_op RC:$rs, imm_type:$imm16))], IIAlu, FrmI>; + [(set CPURegsOpnd:$rt, (cond_op RC:$rs, imm_type:$imm16))], IIAlu, FrmI>; // Jump class JumpFJ; - class BGEZAL_FT : - InstSE<(outs), (ins RC:$rs, brtarget:$offset), + class BGEZAL_FT : + InstSE<(outs), (ins RO:$rs, brtarget:$offset), !strconcat(opstr, "\t$rs, $offset"), [], IIBranch, FrmI>; } @@ -585,19 +585,19 @@ class SYNC_FT : NoItinerary, FrmOther>; // Mul, Div -class Mult DefRegs> : - InstSE<(outs), (ins RC:$rs, RC:$rt), !strconcat(opstr, "\t$rs, $rt"), [], + InstSE<(outs), (ins RO:$rs, RO:$rt), !strconcat(opstr, "\t$rs, $rt"), [], itin, FrmR> { let isCommutable = 1; let Defs = DefRegs; let neverHasSideEffects = 1; } -class Div DefRegs> : - InstSE<(outs), (ins RC:$rs, RC:$rt), - !strconcat(opstr, "\t$$zero, $rs, $rt"), [(op RC:$rs, RC:$rt)], itin, + InstSE<(outs), (ins RO:$rs, RO:$rt), + !strconcat(opstr, "\t$$zero, $rs, $rt"), [(op RO:$rs, RO:$rt)], itin, FrmR> { let Defs = DefRegs; } @@ -623,14 +623,14 @@ class EffectiveAddress : } // Count Leading Ones/Zeros in Word -class CountLeading0: - InstSE<(outs RC:$rd), (ins RC:$rs), !strconcat(opstr, "\t$rd, $rs"), - [(set RC:$rd, (ctlz RC:$rs))], IIAlu, FrmR>, +class CountLeading0: + InstSE<(outs RO:$rd), (ins RO:$rs), !strconcat(opstr, "\t$rd, $rs"), + [(set RO:$rd, (ctlz RO:$rs))], IIAlu, FrmR>, Requires<[HasBitCount, HasStdEnc]>; -class CountLeading1: - InstSE<(outs RC:$rd), (ins RC:$rs), !strconcat(opstr, "\t$rd, $rs"), - [(set RC:$rd, (ctlz (not RC:$rs)))], IIAlu, FrmR>, +class CountLeading1: + InstSE<(outs RO:$rd), (ins RO:$rs), !strconcat(opstr, "\t$rd, $rs"), + [(set RO:$rd, (ctlz (not RO:$rs)))], IIAlu, FrmR>, Requires<[HasBitCount, HasStdEnc]>; @@ -642,31 +642,31 @@ class SignExtInReg : } // Subword Swap -class SubwordSwap: - InstSE<(outs RC:$rd), (ins RC:$rt), !strconcat(opstr, "\t$rd, $rt"), [], +class SubwordSwap: + InstSE<(outs RO:$rd), (ins RO:$rt), !strconcat(opstr, "\t$rd, $rt"), [], NoItinerary, FrmR> { let Predicates = [HasSwap, HasStdEnc]; let neverHasSideEffects = 1; } // Read Hardware -class ReadHardware : - InstSE<(outs CPURegClass:$rt), (ins HWRegClass:$rd), "rdhwr\t$rt, $rd", [], +class ReadHardware : + InstSE<(outs CPURegClass:$rt), (ins RO:$rd), "rdhwr\t$rt, $rd", [], IIAlu, FrmR>; // Ext and Ins -class ExtBase: - InstSE<(outs RC:$rt), (ins RC:$rs, uimm16:$pos, size_ext:$size), +class ExtBase: + InstSE<(outs RO:$rt), (ins RO:$rs, uimm16:$pos, size_ext:$size), !strconcat(opstr, " $rt, $rs, $pos, $size"), - [(set RC:$rt, (MipsExt RC:$rs, imm:$pos, imm:$size))], NoItinerary, + [(set RO:$rt, (MipsExt RO:$rs, imm:$pos, imm:$size))], NoItinerary, FrmR> { let Predicates = [HasMips32r2, HasStdEnc]; } -class InsBase: - InstSE<(outs RC:$rt), (ins RC:$rs, uimm16:$pos, size_ins:$size, RC:$src), +class InsBase: + InstSE<(outs RO:$rt), (ins RO:$rs, uimm16:$pos, size_ins:$size, RO:$src), !strconcat(opstr, " $rt, $rs, $pos, $size"), - [(set RC:$rt, (MipsIns RC:$rs, imm:$pos, imm:$size, RC:$src))], + [(set RO:$rt, (MipsIns RO:$rs, imm:$pos, imm:$size, RO:$src))], NoItinerary, FrmR> { let Predicates = [HasMips32r2, HasStdEnc]; let Constraints = "$src = $rt"; @@ -699,15 +699,15 @@ multiclass AtomicCmpSwap32 { } } -class LLBase : - InstSE<(outs RC:$rt), (ins Mem:$addr), !strconcat(opstr, "\t$rt, $addr"), +class LLBase : + InstSE<(outs RO:$rt), (ins Mem:$addr), !strconcat(opstr, "\t$rt, $addr"), [], NoItinerary, FrmI> { let DecoderMethod = "DecodeMem"; let mayLoad = 1; } -class SCBase : - InstSE<(outs RC:$dst), (ins RC:$rt, Mem:$addr), +class SCBase : + InstSE<(outs RO:$dst), (ins RO:$rt, Mem:$addr), !strconcat(opstr, "\t$rt, $addr"), [], NoItinerary, FrmI> { let DecoderMethod = "DecodeMem"; let mayStore = 1; @@ -769,42 +769,42 @@ let usesCustomInserter = 1 in { //===----------------------------------------------------------------------===// /// Arithmetic Instructions (ALU Immediate) -def ADDiu : ArithLogicI<"addiu", simm16, CPURegs, immSExt16, add>, +def ADDiu : ArithLogicI<"addiu", simm16, CPURegsOpnd, immSExt16, add>, ADDI_FM<0x9>, IsAsCheapAsAMove; -def ADDi : ArithLogicI<"addi", simm16, CPURegs>, ADDI_FM<0x8>; +def ADDi : ArithLogicI<"addi", simm16, CPURegsOpnd>, ADDI_FM<0x8>; def SLTi : SetCC_I<"slti", setlt, simm16, immSExt16, CPURegs>, SLTI_FM<0xa>; def SLTiu : SetCC_I<"sltiu", setult, simm16, immSExt16, CPURegs>, SLTI_FM<0xb>; -def ANDi : ArithLogicI<"andi", uimm16, CPURegs, immZExt16, and>, ADDI_FM<0xc>; -def ORi : ArithLogicI<"ori", uimm16, CPURegs, immZExt16, or>, ADDI_FM<0xd>; -def XORi : ArithLogicI<"xori", uimm16, CPURegs, immZExt16, xor>, ADDI_FM<0xe>; +def ANDi : ArithLogicI<"andi", uimm16, CPURegsOpnd, immZExt16, and>, ADDI_FM<0xc>; +def ORi : ArithLogicI<"ori", uimm16, CPURegsOpnd, immZExt16, or>, ADDI_FM<0xd>; +def XORi : ArithLogicI<"xori", uimm16, CPURegsOpnd, immZExt16, xor>, ADDI_FM<0xe>; def LUi : LoadUpper<"lui", CPURegs, uimm16>, LUI_FM; /// Arithmetic Instructions (3-Operand, R-Type) -def ADDu : ArithLogicR<"addu", CPURegs, 1, IIAlu, add>, ADD_FM<0, 0x21>; -def SUBu : ArithLogicR<"subu", CPURegs, 0, IIAlu, sub>, ADD_FM<0, 0x23>; -def MUL : ArithLogicR<"mul", CPURegs, 1, IIImul, mul>, ADD_FM<0x1c, 2>; -def ADD : ArithLogicR<"add", CPURegs>, ADD_FM<0, 0x20>; -def SUB : ArithLogicR<"sub", CPURegs>, ADD_FM<0, 0x22>; +def ADDu : ArithLogicR<"addu", CPURegsOpnd, 1, IIAlu, add>, ADD_FM<0, 0x21>; +def SUBu : ArithLogicR<"subu", CPURegsOpnd, 0, IIAlu, sub>, ADD_FM<0, 0x23>; +def MUL : ArithLogicR<"mul", CPURegsOpnd, 1, IIImul, mul>, ADD_FM<0x1c, 2>; +def ADD : ArithLogicR<"add", CPURegsOpnd>, ADD_FM<0, 0x20>; +def SUB : ArithLogicR<"sub", CPURegsOpnd>, ADD_FM<0, 0x22>; def SLT : SetCC_R<"slt", setlt, CPURegs>, ADD_FM<0, 0x2a>; def SLTu : SetCC_R<"sltu", setult, CPURegs>, ADD_FM<0, 0x2b>; -def AND : ArithLogicR<"and", CPURegs, 1, IIAlu, and>, ADD_FM<0, 0x24>; -def OR : ArithLogicR<"or", CPURegs, 1, IIAlu, or>, ADD_FM<0, 0x25>; -def XOR : ArithLogicR<"xor", CPURegs, 1, IIAlu, xor>, ADD_FM<0, 0x26>; -def NOR : LogicNOR<"nor", CPURegs>, ADD_FM<0, 0x27>; +def AND : ArithLogicR<"and", CPURegsOpnd, 1, IIAlu, and>, ADD_FM<0, 0x24>; +def OR : ArithLogicR<"or", CPURegsOpnd, 1, IIAlu, or>, ADD_FM<0, 0x25>; +def XOR : ArithLogicR<"xor", CPURegsOpnd, 1, IIAlu, xor>, ADD_FM<0, 0x26>; +def NOR : LogicNOR<"nor", CPURegsOpnd>, ADD_FM<0, 0x27>; /// Shift Instructions -def SLL : shift_rotate_imm<"sll", shamt, CPURegs, shl, immZExt5>, SRA_FM<0, 0>; -def SRL : shift_rotate_imm<"srl", shamt, CPURegs, srl, immZExt5>, SRA_FM<2, 0>; -def SRA : shift_rotate_imm<"sra", shamt, CPURegs, sra, immZExt5>, SRA_FM<3, 0>; -def SLLV : shift_rotate_reg<"sllv", CPURegs, shl>, SRLV_FM<4, 0>; -def SRLV : shift_rotate_reg<"srlv", CPURegs, srl>, SRLV_FM<6, 0>; -def SRAV : shift_rotate_reg<"srav", CPURegs, sra>, SRLV_FM<7, 0>; +def SLL : shift_rotate_imm<"sll", shamt, CPURegsOpnd, shl, immZExt5>, SRA_FM<0, 0>; +def SRL : shift_rotate_imm<"srl", shamt, CPURegsOpnd, srl, immZExt5>, SRA_FM<2, 0>; +def SRA : shift_rotate_imm<"sra", shamt, CPURegsOpnd, sra, immZExt5>, SRA_FM<3, 0>; +def SLLV : shift_rotate_reg<"sllv", CPURegsOpnd, shl>, SRLV_FM<4, 0>; +def SRLV : shift_rotate_reg<"srlv", CPURegsOpnd, srl>, SRLV_FM<6, 0>; +def SRAV : shift_rotate_reg<"srav", CPURegsOpnd, sra>, SRLV_FM<7, 0>; // Rotate Instructions let Predicates = [HasMips32r2, HasStdEnc] in { - def ROTR : shift_rotate_imm<"rotr", shamt, CPURegs, rotr, immZExt5>, + def ROTR : shift_rotate_imm<"rotr", shamt, CPURegsOpnd, rotr, immZExt5>, SRA_FM<2, 1>; - def ROTRV : shift_rotate_reg<"rotrv", CPURegs, rotr>, SRLV_FM<6, 1>; + def ROTRV : shift_rotate_reg<"rotrv", CPURegsOpnd, rotr>, SRLV_FM<6, 1>; } /// Load and Store Instructions @@ -828,13 +828,13 @@ def SYNC : SYNC_FT, SYNC_FM; /// Load-linked, Store-conditional let Predicates = [NotN64, HasStdEnc] in { - def LL : LLBase<"ll", CPURegs, mem>, LW_FM<0x30>; - def SC : SCBase<"sc", CPURegs, mem>, LW_FM<0x38>; + def LL : LLBase<"ll", CPURegsOpnd, mem>, LW_FM<0x30>; + def SC : SCBase<"sc", CPURegsOpnd, mem>, LW_FM<0x38>; } let Predicates = [IsN64, HasStdEnc], DecoderNamespace = "Mips64" in { - def LL_P8 : LLBase<"ll", CPURegs, mem64>, LW_FM<0x30>; - def SC_P8 : SCBase<"sc", CPURegs, mem64>, LW_FM<0x38>; + def LL_P8 : LLBase<"ll", CPURegsOpnd, mem64>, LW_FM<0x30>; + def SC_P8 : SCBase<"sc", CPURegsOpnd, mem64>, LW_FM<0x38>; } /// Jump and Branch Instructions @@ -853,18 +853,18 @@ def BAL_BR: BAL_FT, BAL_FM; def JAL : JumpLink<"jal">, FJ<3>; def JALR : JumpLinkReg<"jalr", CPURegs>, JALR_FM; -def BGEZAL : BGEZAL_FT<"bgezal", CPURegs>, BGEZAL_FM<0x11>; -def BLTZAL : BGEZAL_FT<"bltzal", CPURegs>, BGEZAL_FM<0x10>; +def BGEZAL : BGEZAL_FT<"bgezal", CPURegsOpnd>, BGEZAL_FM<0x11>; +def BLTZAL : BGEZAL_FT<"bltzal", CPURegsOpnd>, BGEZAL_FM<0x10>; def TAILCALL : JumpFJ, FJ<2>, IsTailCall; def TAILCALL_R : JumpFR, MTLO_FM<8>, IsTailCall; def RET : RetBase, MTLO_FM<8>; /// Multiply and Divide Instructions. -def MULT : Mult<"mult", IIImul, CPURegs, [HI, LO]>, MULT_FM<0, 0x18>; -def MULTu : Mult<"multu", IIImul, CPURegs, [HI, LO]>, MULT_FM<0, 0x19>; -def SDIV : Div, MULT_FM<0, 0x1a>; -def UDIV : Div, +def MULT : Mult<"mult", IIImul, CPURegsOpnd, [HI, LO]>, MULT_FM<0, 0x18>; +def MULTu : Mult<"multu", IIImul, CPURegsOpnd, [HI, LO]>, MULT_FM<0, 0x19>; +def SDIV : Div, MULT_FM<0, 0x1a>; +def UDIV : Div, MULT_FM<0, 0x1b>; def MTHI : MoveToLOHI<"mthi", CPURegs, [HI]>, MTLO_FM<0x11>; @@ -877,11 +877,11 @@ def SEB : SignExtInReg<"seb", i8, CPURegs>, SEB_FM<0x10, 0x20>; def SEH : SignExtInReg<"seh", i16, CPURegs>, SEB_FM<0x18, 0x20>; /// Count Leading -def CLZ : CountLeading0<"clz", CPURegs>, CLO_FM<0x20>; -def CLO : CountLeading1<"clo", CPURegs>, CLO_FM<0x21>; +def CLZ : CountLeading0<"clz", CPURegsOpnd>, CLO_FM<0x20>; +def CLO : CountLeading1<"clo", CPURegsOpnd>, CLO_FM<0x21>; /// Word Swap Bytes Within Halfwords -def WSBH : SubwordSwap<"wsbh", CPURegs>, SEB_FM<2, 0x20>; +def WSBH : SubwordSwap<"wsbh", CPURegsOpnd>, SEB_FM<2, 0x20>; /// No operation. /// FIXME: NOP should be an alias of "sll $0, $0, 0". @@ -899,10 +899,10 @@ def MADDU : MArithR<"maddu", MipsMAddu, 1>, MULT_FM<0x1c, 1>; def MSUB : MArithR<"msub", MipsMSub>, MULT_FM<0x1c, 4>; def MSUBU : MArithR<"msubu", MipsMSubu>, MULT_FM<0x1c, 5>; -def RDHWR : ReadHardware, RDHWR_FM; +def RDHWR : ReadHardware, RDHWR_FM; -def EXT : ExtBase<"ext", CPURegs>, EXT_FM<0>; -def INS : InsBase<"ins", CPURegs>, EXT_FM<4>; +def EXT : ExtBase<"ext", CPURegsOpnd>, EXT_FM<0>; +def INS : InsBase<"ins", CPURegsOpnd>, EXT_FM<4>; /// Move Control Registers From/To CPU Registers def MFC0_3OP : MFC3OP<(outs CPURegs:$rt), (ins CPURegs:$rd, uimm16:$sel), @@ -920,22 +920,25 @@ def MTC2_3OP : MFC3OP<(outs CPURegs:$rd, uimm16:$sel), (ins CPURegs:$rt), //===----------------------------------------------------------------------===// // Instruction aliases //===----------------------------------------------------------------------===// -def : InstAlias<"move $dst,$src", (ADD CPURegs:$dst,CPURegs:$src,ZERO)>; -def : InstAlias<"bal $offset", (BGEZAL RA,brtarget:$offset)>; -def : InstAlias<"addu $rs,$rt,$imm", - (ADDiu CPURegs:$rs,CPURegs:$rt,simm16:$imm)>; -def : InstAlias<"add $rs,$rt,$imm", - (ADDi CPURegs:$rs,CPURegs:$rt,simm16:$imm)>; -def : InstAlias<"and $rs,$rt,$imm", - (ANDi CPURegs:$rs,CPURegs:$rt,simm16:$imm)>; -def : InstAlias<"j $rs", (JR CPURegs:$rs)>; -def : InstAlias<"not $rt,$rs", (NOR CPURegs:$rt,CPURegs:$rs,ZERO)>; -def : InstAlias<"neg $rt,$rs", (SUB CPURegs:$rt,ZERO,CPURegs:$rs)>; -def : InstAlias<"negu $rt,$rs", (SUBu CPURegs:$rt,ZERO,CPURegs:$rs)>; -def : InstAlias<"slt $rs,$rt,$imm", - (SLTi CPURegs:$rs,CPURegs:$rt,simm16:$imm)>; -def : InstAlias<"xor $rs,$rt,$imm", - (XORi CPURegs:$rs,CPURegs:$rt,simm16:$imm)>; +def : InstAlias<"move $dst,$src", (ADDu CPURegsOpnd:$dst, + CPURegsOpnd:$src,ZERO)>, Requires<[NotMips64]>; +def : InstAlias<"bal $offset", (BGEZAL RA, brtarget:$offset)>; +def : InstAlias<"addu $rs, $rt, $imm", + (ADDiu CPURegsOpnd:$rs, CPURegsOpnd:$rt, simm16:$imm)>; +def : InstAlias<"add $rs, $rt, $imm", + (ADDi CPURegsOpnd:$rs, CPURegsOpnd:$rt, simm16:$imm)>; +def : InstAlias<"and $rs, $rt, $imm", + (ANDi CPURegsOpnd:$rs, CPURegsOpnd:$rt, simm16:$imm)>; +def : InstAlias<"j $rs", (JR CPURegs:$rs)>, Requires<[NotMips64]>; +def : InstAlias<"not $rt, $rs", (NOR CPURegsOpnd:$rt, CPURegsOpnd:$rs, ZERO)>; +def : InstAlias<"neg $rt, $rs", (SUB CPURegsOpnd:$rt, ZERO, CPURegsOpnd:$rs)>; +def : InstAlias<"negu $rt, $rs", (SUBu CPURegsOpnd:$rt, ZERO, + CPURegsOpnd:$rs)>; +def : InstAlias<"slt $rs, $rt, $imm", + (SLTi CPURegsOpnd:$rs, CPURegs:$rt, simm16:$imm)>; +def : InstAlias<"xor $rs, $rt, $imm", + (XORi CPURegsOpnd:$rs, CPURegsOpnd:$rt, simm16:$imm)>, + Requires<[NotMips64]>; def : InstAlias<"mfc0 $rt, $rd", (MFC0_3OP CPURegs:$rt, CPURegs:$rd, 0)>; def : InstAlias<"mtc0 $rt, $rd", (MTC0_3OP CPURegs:$rd, 0, CPURegs:$rt)>; def : InstAlias<"mfc2 $rt, $rd", (MFC2_3OP CPURegs:$rt, CPURegs:$rd, 0)>; @@ -945,20 +948,20 @@ def : InstAlias<"mtc2 $rt, $rd", (MTC2_3OP CPURegs:$rd, 0, CPURegs:$rt)>; // Assembler Pseudo Instructions //===----------------------------------------------------------------------===// -class LoadImm32< string instr_asm, Operand Od, RegisterClass RC> : - MipsAsmPseudoInst<(outs RC:$rt), (ins Od:$imm32), +class LoadImm32< string instr_asm, Operand Od, RegisterOperand RO> : + MipsAsmPseudoInst<(outs RO:$rt), (ins Od:$imm32), !strconcat(instr_asm, "\t$rt, $imm32")> ; -def LoadImm32Reg : LoadImm32<"li", shamt,CPURegs>; +def LoadImm32Reg : LoadImm32<"li", shamt,CPURegsOpnd>; -class LoadAddress : - MipsAsmPseudoInst<(outs RC:$rt), (ins MemOpnd:$addr), +class LoadAddress : + MipsAsmPseudoInst<(outs RO:$rt), (ins MemOpnd:$addr), !strconcat(instr_asm, "\t$rt, $addr")> ; -def LoadAddr32Reg : LoadAddress<"la", mem, CPURegs>; +def LoadAddr32Reg : LoadAddress<"la", mem, CPURegsOpnd>; -class LoadAddressImm : - MipsAsmPseudoInst<(outs RC:$rt), (ins Od:$imm32), +class LoadAddressImm : + MipsAsmPseudoInst<(outs RO:$rt), (ins Od:$imm32), !strconcat(instr_asm, "\t$rt, $imm32")> ; -def LoadAddr32Imm : LoadAddressImm<"la", shamt,CPURegs>; +def LoadAddr32Imm : LoadAddressImm<"la", shamt,CPURegsOpnd>; @@ -1045,7 +1048,7 @@ def : WrapperPat; // Mips does not have "not", so we expand our way def : MipsPat<(not CPURegs:$in), - (NOR CPURegs:$in, ZERO)>; + (NOR CPURegsOpnd:$in, ZERO)>; // extended loads let Predicates = [NotN64, HasStdEnc] in { diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td index f07a10c..c6eb0e1 100644 --- a/lib/Target/Mips/MipsRegisterInfo.td +++ b/lib/Target/Mips/MipsRegisterInfo.td @@ -331,3 +331,48 @@ def HWRegs64 : RegisterClass<"Mips", [i64], 32, (add HWR29_64)>; // Accumulator Registers def ACRegs : RegisterClass<"Mips", [i64], 64, (sequence "AC%u", 0, 3)>; + +def CPURegsAsmOperand : AsmOperandClass { + let Name = "CPURegsAsm"; + let ParserMethod = "parseCPURegs"; +} + +def CPU64RegsAsmOperand : AsmOperandClass { + let Name = "CPU64RegsAsm"; + let ParserMethod = "parseCPU64Regs"; +} + +def CCRAsmOperand : AsmOperandClass { + let Name = "CCRAsm"; + let ParserMethod = "parseCCRRegs"; +} + +def CPURegsOpnd : RegisterOperand { + let ParserMatchClass = CPURegsAsmOperand; +} + +def CPU64RegsOpnd : RegisterOperand { + let ParserMatchClass = CPU64RegsAsmOperand; +} + +def CCROpnd : RegisterOperand { + let ParserMatchClass = CCRAsmOperand; +} + +def HWRegsAsmOperand : AsmOperandClass { + let Name = "HWRegsAsm"; + let ParserMethod = "parseHWRegs"; +} + +def HW64RegsAsmOperand : AsmOperandClass { + let Name = "HW64RegsAsm"; + let ParserMethod = "parseHW64Regs"; +} + +def HWRegsOpnd : RegisterOperand { + let ParserMatchClass = HWRegsAsmOperand; +} + +def HW64RegsOpnd : RegisterOperand { + let ParserMatchClass = HW64RegsAsmOperand; +} -- cgit v1.1 From e8c161a92451ad38919525ea73ae3c6936c24bdf Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Sat, 12 Jan 2013 01:25:15 +0000 Subject: Fixed a bug where we were tail calling objc_autorelease causing an object to not be placed into an autorelease pool. The reason that this occurs is that tail calling objc_autorelease eventually tail calls -[NSObject autorelease] which supports fast autorelease. This can cause us to violate the semantic gaurantees of __autoreleasing variables that assignment to an __autoreleasing variables always yields an object that is placed into the innermost autorelease pool. The fix included in this patch works by: 1. In the peephole optimization function OptimizeIndividualFunctions, always remove tail call from objc_autorelease. 2. Whenever we convert to/from an objc_autorelease, set/unset the tail call keyword as appropriate. *NOTE* I also handled the case where objc_autorelease is converted in OptimizeReturns to an autoreleaseRV which still violates the ARC semantics. I will be removing that in a later patch and I wanted to make sure that the tree is in a consistent state vis-a-vis ARC always. Additionally some test cases are provided and all tests that have tail call marked objc_autorelease keywords have been modified so that tail call has been removed. *NOTE* One test fails due to a separate bug that I am going to commit soon. Thus I marked the check line TMP: instead of CHECK: so make check does not fail. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172287 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/ObjCARC.cpp | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp index 34700eb..1607e8e 100644 --- a/lib/Transforms/Scalar/ObjCARC.cpp +++ b/lib/Transforms/Scalar/ObjCARC.cpp @@ -426,10 +426,20 @@ static bool IsAlwaysTail(InstructionClass Class) { // IC_RetainBlock may be given a stack argument. return Class == IC_Retain || Class == IC_RetainRV || - Class == IC_Autorelease || Class == IC_AutoreleaseRV; } +/// \brief Test if the given class represents instructions which are never safe +/// to mark with the "tail" keyword. +static bool IsNeverTail(InstructionClass Class) { + /// It is never safe to tail call objc_autorelease since by tail calling + /// objc_autorelease, we also tail call -[NSObject autorelease] which supports + /// fast autoreleasing causing our object to be potentially reclaimed from the + /// autorelease pool which violates the semantics of __autoreleasing types in + /// ARC. + return Class == IC_Autorelease; +} + /// IsNoThrow - Test if the given class represents instructions which are always /// safe to mark with the nounwind attribute.. static bool IsNoThrow(InstructionClass Class) { @@ -2306,8 +2316,10 @@ ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV) { " Old: " << *AutoreleaseRV << "\n"); - cast(AutoreleaseRV)-> + CallInst *AutoreleaseRVCI = cast(AutoreleaseRV); + AutoreleaseRVCI-> setCalledFunction(getAutoreleaseCallee(F.getParent())); + AutoreleaseRVCI->setTailCall(false); // Never tail call objc_autorelease. DEBUG(dbgs() << " New: " << *AutoreleaseRV << "\n"); @@ -2449,6 +2461,16 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { cast(Inst)->setTailCall(); } + // Ensure that functions that can never have a "tail" keyword due to the + // semantics of ARC truly do not do so. + if (IsNeverTail(Class)) { + Changed = true; + DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Removing tail keyword" + " from function: " << *Inst << + "\n"); + cast(Inst)->setTailCall(false); + } + // Set nounwind as needed. if (IsNoThrow(Class)) { Changed = true; @@ -3756,6 +3778,7 @@ void ObjCARCOpt::OptimizeReturns(Function &F) { Autorelease->setCalledFunction(getAutoreleaseRVCallee(F.getParent())); DEBUG(dbgs() << " Out: " << *Autorelease << "\n"); + Autorelease->setTailCall(); // Always tail call autoreleaseRV. AutoreleaseClass = IC_AutoreleaseRV; } -- cgit v1.1 From 0e385450fc62a69cf7c557173f2f5df132702379 Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Sat, 12 Jan 2013 01:25:19 +0000 Subject: Fixed bug in ObjCARC where we were changing a call from objc_autoreleaseRV => objc_autorelease but were not updating the InstructionClass to IC_Autorelease. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172288 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/ObjCARC.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp index 1607e8e..a982c35 100644 --- a/lib/Transforms/Scalar/ObjCARC.cpp +++ b/lib/Transforms/Scalar/ObjCARC.cpp @@ -1716,7 +1716,8 @@ namespace { void OptimizeRetainCall(Function &F, Instruction *Retain); bool OptimizeRetainRVCall(Function &F, Instruction *RetainRV); - void OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV); + void OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV, + InstructionClass &Class); void OptimizeIndividualCalls(Function &F); void CheckForCFGHazards(const BasicBlock *BB, @@ -2289,7 +2290,8 @@ ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) { /// OptimizeAutoreleaseRVCall - Turn objc_autoreleaseReturnValue into /// objc_autorelease if the result is not used as a return value. void -ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV) { +ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV, + InstructionClass &Class) { // Check for a return of the pointer value. const Value *Ptr = GetObjCArg(AutoreleaseRV); SmallVector Users; @@ -2320,6 +2322,7 @@ ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV) { AutoreleaseRVCI-> setCalledFunction(getAutoreleaseCallee(F.getParent())); AutoreleaseRVCI->setTailCall(false); // Never tail call objc_autorelease. + Class = IC_Autorelease; DEBUG(dbgs() << " New: " << *AutoreleaseRV << "\n"); @@ -2417,7 +2420,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { continue; break; case IC_AutoreleaseRV: - OptimizeAutoreleaseRVCall(F, Inst); + OptimizeAutoreleaseRVCall(F, Inst, Class); break; } -- cgit v1.1 From 0d3582b1d1bedde39f964420edd237583bc5a010 Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Sat, 12 Jan 2013 02:57:16 +0000 Subject: Fixed a few debug messages in ObjCARC and added one. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172298 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/ObjCARC.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp index a982c35..5a2db6b 100644 --- a/lib/Transforms/Scalar/ObjCARC.cpp +++ b/lib/Transforms/Scalar/ObjCARC.cpp @@ -2602,10 +2602,8 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { } } } while (!Worklist.empty()); - - DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Finished Queue.\n\n"); - } + DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Finished List.\n"); } /// CheckForCFGHazards - Check for critical edges, loop boundaries, irreducible @@ -3869,6 +3867,8 @@ bool ObjCARCOpt::runOnFunction(Function &F) { Changed = false; + DEBUG(dbgs() << "ObjCARCOpt: Visiting Function: " << F.getName() << "\n"); + PA.setAA(&getAnalysis()); // This pass performs several distinct transformations. As a compile-time aid @@ -3902,6 +3902,8 @@ bool ObjCARCOpt::runOnFunction(Function &F) { (1 << IC_AutoreleaseRV))) OptimizeReturns(F); + DEBUG(dbgs() << "\n"); + return Changed; } -- cgit v1.1 From e7a715f61e7855a280734063989a1001320e85de Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Sat, 12 Jan 2013 03:45:49 +0000 Subject: Fixed debug message in ObjCARC. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172299 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/ObjCARC.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp index 5a2db6b..1aa41fc 100644 --- a/lib/Transforms/Scalar/ObjCARC.cpp +++ b/lib/Transforms/Scalar/ObjCARC.cpp @@ -2212,8 +2212,8 @@ ObjCARCOpt::OptimizeRetainCall(Function &F, Instruction *Retain) { ++NumPeeps; DEBUG(dbgs() << "ObjCARCOpt::OptimizeRetainCall: Transforming " - "objc_retainAutoreleasedReturnValue => " - "objc_retain since the operand is not a return value.\n" + "objc_retain => objc_retainAutoreleasedReturnValue" + " since the operand is a return value.\n" " Old: " << *Retain << "\n"); -- cgit v1.1 From a96a96cefaa3196bde76a7bda8e57c95893f723b Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Sat, 12 Jan 2013 15:19:10 +0000 Subject: MipsAsmParser: Try to unbreak tests to add extra check. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172315 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/AsmParser/MipsAsmParser.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 41df9d4..57338df 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -332,14 +332,14 @@ public: } bool isCPURegsAsm() const { - return Reg.Kind == Kind_CPURegs; + return Kind == k_Register && Reg.Kind == Kind_CPURegs; } void addCPURegsAsmOperands(MCInst &Inst, unsigned N) const { Inst.addOperand(MCOperand::CreateReg(Reg.RegNum)); } bool isCPU64RegsAsm() const { - return Reg.Kind == Kind_CPU64Regs; + return Kind == k_Register && Reg.Kind == Kind_CPU64Regs; } void addCPU64RegsAsmOperands(MCInst &Inst, unsigned N) const { Inst.addOperand(MCOperand::CreateReg(Reg.RegNum)); -- cgit v1.1 From 0813589f72de5f15a28f36e2235cb933b7b8ebc0 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sat, 12 Jan 2013 15:34:31 +0000 Subject: GlobalOpt: Avoid jump on uninitialized value. Found by valgrind. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172318 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/IPO/GlobalOpt.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index de63354..efec788 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -2828,7 +2828,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, return false; } - Constant *RetVal; + Constant *RetVal = 0; // Execute the call, if successful, use the return value. ValueStack.push_back(new DenseMap); if (!EvaluateFunction(Callee, RetVal, Formals)) { -- cgit v1.1 From edaf85606d7ac8368dd7fa0e9fd4042e523a6e3a Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Sat, 12 Jan 2013 15:37:00 +0000 Subject: MipsDisassembler.cpp: Prune DecodeHWRegs64RegisterClass() to suppress a warning. [-Wunused-function] git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172319 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Disassembler/MipsDisassembler.cpp | 16 ---------------- 1 file changed, 16 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp index 9560f3f..1efeffd 100644 --- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp +++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp @@ -128,11 +128,6 @@ static DecodeStatus DecodeAFGR64RegisterClass(MCInst &Inst, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeHWRegs64RegisterClass(MCInst &Inst, - unsigned Insn, - uint64_t Address, - const void *Decoder); - static DecodeStatus DecodeACRegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, @@ -459,17 +454,6 @@ static DecodeStatus DecodeAFGR64RegisterClass(MCInst &Inst, return MCDisassembler::Success; } -static DecodeStatus DecodeHWRegs64RegisterClass(MCInst &Inst, - unsigned RegNo, - uint64_t Address, - const void *Decoder) { - //Currently only hardware register 29 is supported - if (RegNo != 29) - return MCDisassembler::Fail; - Inst.addOperand(MCOperand::CreateReg(Mips::HWR29_64)); - return MCDisassembler::Success; -} - static DecodeStatus DecodeACRegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, -- cgit v1.1 From 4dc478308f0de13d9ce20915193ac8c3318c5bd6 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sat, 12 Jan 2013 19:06:44 +0000 Subject: When lowering an inreg sext first shift left, then right arithmetically. Shifting right two times will only yield zero. Should fix SingleSource/UnitTests/SignlessTypes/factor. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172322 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 2dade85..3989295 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -508,9 +508,9 @@ SDValue VectorLegalizer::ExpandSELECT(SDValue Op) { SDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) { EVT VT = Op.getValueType(); - // Make sure that the SRA and SRL instructions are available. + // Make sure that the SRA and SHL instructions are available. if (TLI.getOperationAction(ISD::SRA, VT) == TargetLowering::Expand || - TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Expand) + TLI.getOperationAction(ISD::SHL, VT) == TargetLowering::Expand) return DAG.UnrollVectorOp(Op.getNode()); DebugLoc DL = Op.getDebugLoc(); @@ -521,7 +521,7 @@ SDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) { SDValue ShiftSz = DAG.getConstant(BW - OrigBW, VT); Op = Op.getOperand(0); - Op = DAG.getNode(ISD::SRL, DL, VT, Op, ShiftSz); + Op = DAG.getNode(ISD::SHL, DL, VT, Op, ShiftSz); return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz); } -- cgit v1.1 From eab0ba03cfa8056ee19fe37d2b9fb7793c570564 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Sat, 12 Jan 2013 23:46:04 +0000 Subject: Fix an editor goof in r171738 that Bill spotted. He may even have a test case, but looking at the diff this was an obviously unintended change. Thanks for the careful review Bill! =] git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172336 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/LoopStrengthReduce.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index c7b853e..87e3447 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -2891,7 +2891,6 @@ void LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) { Formula F; F.InitialMatch(S, L, SE); - F.HasBaseReg = true; bool Inserted = InsertFormula(LU, LUIdx, F); assert(Inserted && "Initial formula already exists!"); (void)Inserted; } @@ -2903,6 +2902,7 @@ LSRInstance::InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) { Formula F; F.BaseRegs.push_back(S); + F.HasBaseReg = true; bool Inserted = InsertFormula(LU, LUIdx, F); assert(Inserted && "Supplemental formula already exists!"); (void)Inserted; } -- cgit v1.1 From cf14005185778d823631a2367401e65b5e4802d8 Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Sun, 13 Jan 2013 07:00:51 +0000 Subject: [ObjCARC] More debug messages. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172346 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/ObjCARC.cpp | 33 +++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp index 1aa41fc..37af1f5 100644 --- a/lib/Transforms/Scalar/ObjCARC.cpp +++ b/lib/Transforms/Scalar/ObjCARC.cpp @@ -894,6 +894,8 @@ bool ObjCARCExpand::runOnFunction(Function &F) { bool Changed = false; + DEBUG(dbgs() << "ObjCARCExpand: Visiting Function: " << F.getName() << "\n"); + for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) { Instruction *Inst = &*I; @@ -2630,8 +2632,13 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB, // If the terminator is an invoke marked with the // clang.arc.no_objc_arc_exceptions metadata, the unwind edge can be // ignored, for ARC purposes. - if (isa(TI) && TI->getMetadata(NoObjCARCExceptionsMDKind)) + if (isa(TI) && TI->getMetadata(NoObjCARCExceptionsMDKind)) { + DEBUG(dbgs() << "ObjCARCOpt::CheckForCFGHazards: Found an invoke " + "terminator marked with " + "clang.arc.no_objc_arc_exceptions. Ignoring unwind " + "edge.\n"); --SE; + } for (; SI != SE; ++SI) { Sequence SuccSSeq = S_None; @@ -2684,8 +2691,13 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB, // If the terminator is an invoke marked with the // clang.arc.no_objc_arc_exceptions metadata, the unwind edge can be // ignored, for ARC purposes. - if (isa(TI) && TI->getMetadata(NoObjCARCExceptionsMDKind)) + if (isa(TI) && TI->getMetadata(NoObjCARCExceptionsMDKind)) { + DEBUG(dbgs() << "ObjCARCOpt::CheckForCFGHazards: Found an invoke " + "terminator marked with " + "clang.arc.no_objc_arc_exceptions. Ignoring unwind " + "edge.\n"); --SE; + } for (; SI != SE; ++SI) { Sequence SuccSSeq = S_None; @@ -2752,8 +2764,11 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst, // Theoretically we could implement removal of nested retain+release // pairs by making PtrState hold a stack of states, but this is // simple and avoids adding overhead for the non-nested case. - if (S.GetSeq() == S_Release || S.GetSeq() == S_MovableRelease) + if (S.GetSeq() == S_Release || S.GetSeq() == S_MovableRelease) { + DEBUG(dbgs() << "ObjCARCOpt::VisitInstructionBottomUp: Found nested " + "releases (i.e. a release pair)\n"); NestingDetected = true; + } MDNode *ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind); S.ResetSequenceProgress(ReleaseMetadata ? S_MovableRelease : S_Release); @@ -2916,6 +2931,8 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB, if (isa(Inst)) continue; + DEBUG(dbgs() << "ObjCARCOpt::VisitButtonUp: Visiting " << *Inst << "\n"); + NestingDetected |= VisitInstructionBottomUp(Inst, BB, Retains, MyStates); } @@ -3098,6 +3115,9 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB, // Visit all the instructions, top-down. for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { Instruction *Inst = I; + + DEBUG(dbgs() << "ObjCARCOpt::VisitTopDown: Visiting " << *Inst << "\n"); + NestingDetected |= VisitInstructionTopDown(Inst, Releases, MyStates); } @@ -3136,8 +3156,13 @@ ComputePostOrders(Function &F, // If the terminator is an invoke marked with the // clang.arc.no_objc_arc_exceptions metadata, the unwind edge can be // ignored, for ARC purposes. - if (isa(TI) && TI->getMetadata(NoObjCARCExceptionsMDKind)) + if (isa(TI) && TI->getMetadata(NoObjCARCExceptionsMDKind)) { + DEBUG(dbgs() << "ObjCARCOpt::ComputePostOrders: Found an invoke " + "terminator marked with " + "clang.arc.no_objc_arc_exceptions. Ignoring unwind " + "edge.\n"); --SE; + } while (SuccStack.back().second != SE) { BasicBlock *SuccBB = *SuccStack.back().second++; -- cgit v1.1 From 981308cffbfd1f77750452015f6e6f0f053e11d4 Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Sun, 13 Jan 2013 07:47:32 +0000 Subject: [ObjCARC] Even more debug messages! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172347 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/ObjCARC.cpp | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp index 37af1f5..794d354 100644 --- a/lib/Transforms/Scalar/ObjCARC.cpp +++ b/lib/Transforms/Scalar/ObjCARC.cpp @@ -619,14 +619,23 @@ static bool ModuleHasARC(const Module &M) { /// escape analysis in that a use as an argument to a call is not considered /// an escape. static bool DoesObjCBlockEscape(const Value *BlockPtr) { + + DEBUG(dbgs() << "DoesObjCBlockEscape: Target: " << *BlockPtr << "\n"); + // Walk the def-use chains. SmallVector Worklist; Worklist.push_back(BlockPtr); do { const Value *V = Worklist.pop_back_val(); + + DEBUG(dbgs() << "DoesObjCBlockEscape: Visiting: " << *V << "\n"); + for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end(); UI != UE; ++UI) { const User *UUser = *UI; + + DEBUG(dbgs() << "DoesObjCBlockEscape: User: " << *UUser << "\n"); + // Special - Use by a call (callee or argument) is not considered // to be an escape. switch (GetBasicInstructionClass(UUser)) { @@ -634,15 +643,20 @@ static bool DoesObjCBlockEscape(const Value *BlockPtr) { case IC_InitWeak: case IC_StoreStrong: case IC_Autorelease: - case IC_AutoreleaseRV: + case IC_AutoreleaseRV: { + DEBUG(dbgs() << "DoesObjCBlockEscape: User copies pointer arguments. " + "Block Escapes!\n"); // These special functions make copies of their pointer arguments. return true; + } case IC_User: case IC_None: // Use by an instruction which copies the value is an escape if the // result is an escape. if (isa(UUser) || isa(UUser) || isa(UUser) || isa(UUser)) { + DEBUG(dbgs() << "DoesObjCBlockEscape: User copies value. Escapes if " + "result escapes. Adding to list.\n"); Worklist.push_back(UUser); continue; } @@ -659,11 +673,13 @@ static bool DoesObjCBlockEscape(const Value *BlockPtr) { continue; } // Otherwise, conservatively assume an escape. + DEBUG(dbgs() << "DoesObjCBlockEscape: Assuming block escapes.\n"); return true; } } while (!Worklist.empty()); // No escapes found. + DEBUG(dbgs() << "DoesObjCBlockEscape: Block does not escape.\n"); return false; } -- cgit v1.1 From b6db95f42b7c3b58f980e387d20ddb3e16bffd56 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Sun, 13 Jan 2013 07:56:29 +0000 Subject: Fix PR14547. Handle induction variables of small sizes smaller than i32 (i8 and i16). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172348 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/LoopVectorize.cpp | 3 +++ 1 file changed, 3 insertions(+) (limited to 'lib') diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 4bb8c43..464ed97 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1033,11 +1033,14 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { // We may need to extend the index in case there is a type mismatch. // We know that the count starts at zero and does not overflow. + unsigned IdxTyBW = IdxTy->getScalarSizeInBits(); if (Count->getType() != IdxTy) { // The exit count can be of pointer type. Convert it to the correct // integer type. if (ExitCount->getType()->isPointerTy()) Count = CastInst::CreatePointerCast(Count, IdxTy, "ptrcnt.to.int", Loc); + else if (IdxTyBW < Count->getType()->getScalarSizeInBits()) + Count = CastInst::CreateTruncOrBitCast(Count, IdxTy, "tr.cnt", Loc); else Count = CastInst::CreateZExtOrBitCast(Count, IdxTy, "zext.cnt", Loc); } -- cgit v1.1 From 08219ea2b42e140aa03ceb8e166cd52d787c48aa Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sun, 13 Jan 2013 11:37:04 +0000 Subject: X86: Add patterns for X86ISD::VSEXT in registers. Those can occur when something between the sextload and the store is on the same chain and blocks isel. Fixes PR14887. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172353 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'lib') diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 3175324..23073a9 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -5590,6 +5590,30 @@ defm PMOVSXBQ : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq>; defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq>; let Predicates = [HasAVX2] in { + def : Pat<(v16i16 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBWYrr VR128:$src)>; + def : Pat<(v8i32 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBDYrr VR128:$src)>; + def : Pat<(v4i64 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBQYrr VR128:$src)>; + + def : Pat<(v8i32 (X86vsext (v8i16 VR128:$src))), (VPMOVSXWDYrr VR128:$src)>; + def : Pat<(v4i64 (X86vsext (v8i16 VR128:$src))), (VPMOVSXWQYrr VR128:$src)>; + + def : Pat<(v4i64 (X86vsext (v4i32 VR128:$src))), (VPMOVSXDQYrr VR128:$src)>; + + def : Pat<(v16i16 (X86vsext (v32i8 VR256:$src))), + (VPMOVSXBWYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; + def : Pat<(v8i32 (X86vsext (v32i8 VR256:$src))), + (VPMOVSXBDYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; + def : Pat<(v4i64 (X86vsext (v32i8 VR256:$src))), + (VPMOVSXBQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; + + def : Pat<(v8i32 (X86vsext (v16i16 VR256:$src))), + (VPMOVSXWDYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; + def : Pat<(v4i64 (X86vsext (v16i16 VR256:$src))), + (VPMOVSXWQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; + + def : Pat<(v4i64 (X86vsext (v8i32 VR256:$src))), + (VPMOVSXDQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; + def : Pat<(v8i32 (X86vsmovl (v8i16 (bitconvert (v2i64 (load addr:$src)))))), (VPMOVSXWDYrm addr:$src)>; def : Pat<(v4i64 (X86vsmovl (v4i32 (bitconvert (v2i64 (load addr:$src)))))), @@ -5628,6 +5652,15 @@ let Predicates = [HasAVX] in { } let Predicates = [UseSSE41] in { + def : Pat<(v8i16 (X86vsext (v16i8 VR128:$src))), (PMOVSXBWrr VR128:$src)>; + def : Pat<(v4i32 (X86vsext (v16i8 VR128:$src))), (PMOVSXBDrr VR128:$src)>; + def : Pat<(v2i64 (X86vsext (v16i8 VR128:$src))), (PMOVSXBQrr VR128:$src)>; + + def : Pat<(v4i32 (X86vsext (v8i16 VR128:$src))), (PMOVSXWDrr VR128:$src)>; + def : Pat<(v2i64 (X86vsext (v8i16 VR128:$src))), (PMOVSXWQrr VR128:$src)>; + + def : Pat<(v2i64 (X86vsext (v4i32 VR128:$src))), (PMOVSXDQrr VR128:$src)>; + // Common patterns involving scalar load def : Pat<(int_x86_sse41_pmovsxbq (bitconvert (v4i32 (X86vzmovl @@ -5727,6 +5760,15 @@ let Predicates = [HasAVX] in { def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2i64 (X86vzload addr:$src)))))), (VPMOVZXDQrm addr:$src)>; + def : Pat<(v8i16 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBWrr VR128:$src)>; + def : Pat<(v4i32 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBDrr VR128:$src)>; + def : Pat<(v2i64 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBQrr VR128:$src)>; + + def : Pat<(v4i32 (X86vsext (v8i16 VR128:$src))), (VPMOVSXWDrr VR128:$src)>; + def : Pat<(v2i64 (X86vsext (v8i16 VR128:$src))), (VPMOVSXWQrr VR128:$src)>; + + def : Pat<(v2i64 (X86vsext (v4i32 VR128:$src))), (VPMOVSXDQrr VR128:$src)>; + def : Pat<(v4i32 (X86vsext (v8i16 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))), (VPMOVSXWDrm addr:$src)>; -- cgit v1.1 From 96f498bd9f140a98321c478f517877c4767b94fa Mon Sep 17 00:00:00 2001 From: Dmitri Gribenko Date: Sun, 13 Jan 2013 16:01:15 +0000 Subject: Remove redundant 'llvm::' qualifications git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172358 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/ValueTracking.cpp | 2 +- lib/MC/ELFObjectWriter.cpp | 2 +- lib/MC/WinCOFFObjectWriter.cpp | 24 ++++++++++++------------ lib/Support/APFloat.cpp | 2 +- lib/Support/DynamicLibrary.cpp | 2 +- lib/Target/R600/AMDGPUSubtarget.h | 2 +- lib/Target/R600/AMDILDevice.h | 2 +- lib/Target/R600/AMDILPeepholeOptimizer.cpp | 8 ++++---- lib/Transforms/IPO/DeadArgumentElimination.cpp | 2 +- 9 files changed, 23 insertions(+), 23 deletions(-) (limited to 'lib') diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index efb9b08..23bc444 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -1510,7 +1510,7 @@ static Value *BuildSubAggregate(Value *From, Value* To, Type *IndexedType, SmallVector &Idxs, unsigned IdxSkip, Instruction *InsertBefore) { - llvm::StructType *STy = llvm::dyn_cast(IndexedType); + llvm::StructType *STy = dyn_cast(IndexedType); if (STy) { // Save the original To argument so we can modify it Value *OrigTo = To; diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp index bfe1709..0072446 100644 --- a/lib/MC/ELFObjectWriter.cpp +++ b/lib/MC/ELFObjectWriter.cpp @@ -865,7 +865,7 @@ void ELFObjectWriter::ComputeSymbolTable(MCAssembler &Asm, // FIXME: Is this the correct place to do this? // FIXME: Why is an undefined reference to _GLOBAL_OFFSET_TABLE_ needed? if (NeedsGOT) { - llvm::StringRef Name = "_GLOBAL_OFFSET_TABLE_"; + StringRef Name = "_GLOBAL_OFFSET_TABLE_"; MCSymbol *Sym = Asm.getContext().GetOrCreateSymbol(Name); MCSymbolData &Data = Asm.getOrCreateSymbolData(*Sym); Data.setExternal(true); diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp index 01860c5..e1d6538 100644 --- a/lib/MC/WinCOFFObjectWriter.cpp +++ b/lib/MC/WinCOFFObjectWriter.cpp @@ -36,7 +36,7 @@ using namespace llvm; namespace { -typedef llvm::SmallString name; +typedef SmallString name; enum AuxiliaryType { ATFunctionDefinition, @@ -58,7 +58,7 @@ class COFFSymbol { public: COFF::symbol Data; - typedef llvm::SmallVector AuxiliarySymbols; + typedef SmallVector AuxiliarySymbols; name Name; int Index; @@ -69,7 +69,7 @@ public: MCSymbolData const *MCData; - COFFSymbol(llvm::StringRef name); + COFFSymbol(StringRef name); size_t size() const; void set_name_offset(uint32_t Offset); @@ -97,13 +97,13 @@ public: COFFSymbol *Symbol; relocations Relocations; - COFFSection(llvm::StringRef name); + COFFSection(StringRef name); static size_t size(); }; // This class holds the COFF string table. class StringTable { - typedef llvm::StringMap map; + typedef StringMap map; map Map; void update_length(); @@ -112,7 +112,7 @@ public: StringTable(); size_t size() const; - size_t insert(llvm::StringRef String); + size_t insert(StringRef String); }; class WinCOFFObjectWriter : public MCObjectWriter { @@ -144,7 +144,7 @@ public: COFFSection *createSection(StringRef Name); template - object_t *createCOFFEntity(llvm::StringRef Name, list_t &List); + object_t *createCOFFEntity(StringRef Name, list_t &List); void DefineSection(MCSectionData const &SectionData); void DefineSymbol(MCSymbolData const &SymbolData, MCAssembler &Assembler); @@ -202,7 +202,7 @@ static inline void write_uint8_le(void *Data, uint8_t const &Value) { //------------------------------------------------------------------------------ // Symbol class implementation -COFFSymbol::COFFSymbol(llvm::StringRef name) +COFFSymbol::COFFSymbol(StringRef name) : Name(name.begin(), name.end()) , Other(NULL) , Section(NULL) @@ -254,7 +254,7 @@ bool COFFSymbol::should_keep() const { //------------------------------------------------------------------------------ // Section class implementation -COFFSection::COFFSection(llvm::StringRef name) +COFFSection::COFFSection(StringRef name) : Name(name) , MCData(NULL) , Symbol(NULL) { @@ -287,7 +287,7 @@ size_t StringTable::size() const { /// Add String to the table iff it is not already there. /// @returns the index into the string table where the string is now located. -size_t StringTable::insert(llvm::StringRef String) { +size_t StringTable::insert(StringRef String) { map::iterator i = Map.find(String); if (i != Map.end()) @@ -341,14 +341,14 @@ COFFSymbol *WinCOFFObjectWriter::GetOrCreateCOFFSymbol(const MCSymbol * Symbol){ return RetSymbol; } -COFFSection *WinCOFFObjectWriter::createSection(llvm::StringRef Name) { +COFFSection *WinCOFFObjectWriter::createSection(StringRef Name) { return createCOFFEntity(Name, Sections); } /// A template used to lookup or create a symbol/section, and initialize it if /// needed. template -object_t *WinCOFFObjectWriter::createCOFFEntity(llvm::StringRef Name, +object_t *WinCOFFObjectWriter::createCOFFEntity(StringRef Name, list_t &List) { object_t *Object = new object_t(Name); diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp index 0e3c619..4a7a5d1 100644 --- a/lib/Support/APFloat.cpp +++ b/lib/Support/APFloat.cpp @@ -3448,7 +3448,7 @@ void APFloat::toString(SmallVectorImpl &Str, AdjustToPrecision(significand, exp, FormatPrecision); - llvm::SmallVector buffer; + SmallVector buffer; // Fill the buffer. unsigned precision = significand.getBitWidth(); diff --git a/lib/Support/DynamicLibrary.cpp b/lib/Support/DynamicLibrary.cpp index d40439a..f14cb45 100644 --- a/lib/Support/DynamicLibrary.cpp +++ b/lib/Support/DynamicLibrary.cpp @@ -46,7 +46,7 @@ void llvm::sys::DynamicLibrary::AddSymbol(StringRef symbolName, void *symbolValue) { SmartScopedLock lock(getMutex()); if (ExplicitSymbols == 0) - ExplicitSymbols = new llvm::StringMap(); + ExplicitSymbols = new StringMap(); (*ExplicitSymbols)[symbolName] = symbolValue; } diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h index cab7884..1973fc6 100644 --- a/lib/Target/R600/AMDGPUSubtarget.h +++ b/lib/Target/R600/AMDGPUSubtarget.h @@ -44,7 +44,7 @@ public: virtual ~AMDGPUSubtarget(); const InstrItineraryData &getInstrItineraryData() const { return InstrItins; } - virtual void ParseSubtargetFeatures(llvm::StringRef CPU, llvm::StringRef FS); + virtual void ParseSubtargetFeatures(StringRef CPU, StringRef FS); bool isOverride(AMDGPUDeviceInfo::Caps) const; bool is64bit() const; diff --git a/lib/Target/R600/AMDILDevice.h b/lib/Target/R600/AMDILDevice.h index b9a1560..97df98c 100644 --- a/lib/Target/R600/AMDILDevice.h +++ b/lib/Target/R600/AMDILDevice.h @@ -104,7 +104,7 @@ public: static const unsigned int QuarterWavefrontSize = 16; protected: virtual void setCaps(); - llvm::BitVector mHWBits; + BitVector mHWBits; llvm::BitVector mSWBits; AMDGPUSubtarget *mSTM; uint32_t DeviceFlag; diff --git a/lib/Target/R600/AMDILPeepholeOptimizer.cpp b/lib/Target/R600/AMDILPeepholeOptimizer.cpp index a3d30af..a5f7ee5 100644 --- a/lib/Target/R600/AMDILPeepholeOptimizer.cpp +++ b/lib/Target/R600/AMDILPeepholeOptimizer.cpp @@ -613,7 +613,7 @@ AMDGPUPeepholeOpt::optimizeBitInsert(Instruction *inst) { if (isVector) { name += "_v" + itostr(numEle) + "u32"; } else { name += "_u32"; } Function *Func = dyn_cast(inst->getParent()->getParent()->getParent()-> - getOrInsertFunction(llvm::StringRef(name), funcType)); + getOrInsertFunction(StringRef(name), funcType)); Value *Operands[4] = { width, offset, @@ -777,7 +777,7 @@ AMDGPUPeepholeOpt::optimizeBitExtract(Instruction *inst) { // Lets create the function. Function *Func = dyn_cast(inst->getParent()->getParent()->getParent()-> - getOrInsertFunction(llvm::StringRef(name), funcType)); + getOrInsertFunction(StringRef(name), funcType)); Value *Operands[3] = { ShiftInst->getOperand(0), shiftValConst, @@ -967,7 +967,7 @@ AMDGPUPeepholeOpt::expandSigned24BitOps(CallInst *CI) { } Function *Func = dyn_cast( CI->getParent()->getParent()->getParent()-> - getOrInsertFunction(llvm::StringRef(name), funcType)); + getOrInsertFunction(StringRef(name), funcType)); Value *Operands[3] = { CI->getOperand(0), CI->getOperand(1), @@ -999,7 +999,7 @@ AMDGPUPeepholeOpt::expandSigned24BitOps(CallInst *CI) { } Function *Func = dyn_cast( CI->getParent()->getParent()->getParent()-> - getOrInsertFunction(llvm::StringRef(name), funcType)); + getOrInsertFunction(StringRef(name), funcType)); Value *Operands[2] = { CI->getOperand(0), CI->getOperand(1) diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp index ff040e7..4757ce8 100644 --- a/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -351,7 +351,7 @@ bool DAE::RemoveDeadArgumentsFromCallers(Function &Fn) if (Fn.use_empty()) return false; - llvm::SmallVector UnusedArgs; + SmallVector UnusedArgs; for (Function::arg_iterator I = Fn.arg_begin(), E = Fn.arg_end(); I != E; ++I) { Argument *Arg = I; -- cgit v1.1 From 29eb2cc00ca0bc4d218c6d034f90becfed999bcb Mon Sep 17 00:00:00 2001 From: Nuno Lopes Date: Sun, 13 Jan 2013 18:02:57 +0000 Subject: fix compile-time regression report by Joerg Sonnenberger: cache result of Size/OffsetVisitor to speedup analysis of PHI nodes git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172363 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/MemoryBuiltins.cpp | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'lib') diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp index f88affb..1d27a83 100644 --- a/lib/Analysis/MemoryBuiltins.cpp +++ b/lib/Analysis/MemoryBuiltins.cpp @@ -387,17 +387,19 @@ SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) { V = V->stripPointerCasts(); if (isa(V) || isa(V)) { - // If we have already seen this instruction, bail out. - if (!SeenInsts.insert(V)) - return unknown(); - - SizeOffsetType Ret; + // return cached value or insert unknown in cache if size of V was not + // computed yet in order to avoid recursions in PHis + std::pair CacheVal = + CacheMap.insert(std::make_pair(V, unknown())); + if (!CacheVal.second) + return CacheVal.first->second; + + SizeOffsetType Result; if (GEPOperator *GEP = dyn_cast(V)) - Ret = visitGEPOperator(*GEP); + Result = visitGEPOperator(*GEP); else - Ret = visit(cast(*V)); - SeenInsts.erase(V); - return Ret; + Result = visit(cast(*V)); + return CacheMap[V] = Result; } if (Argument *A = dyn_cast(V)) -- cgit v1.1 From 50c023d4960ee492014391b6b84a259e1e02da82 Mon Sep 17 00:00:00 2001 From: Nick Lewycky Date: Sun, 13 Jan 2013 19:03:55 +0000 Subject: Fix typo in comment. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172364 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelDAGToDAG.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 935f9bd..6f13186 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -446,7 +446,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() { if (OptLevel != CodeGenOpt::None && (N->getOpcode() == X86ISD::CALL || (N->getOpcode() == X86ISD::TC_RETURN && - // Only does this if load can be foled into TC_RETURN. + // Only does this if load can be folded into TC_RETURN. (Subtarget->is64Bit() || getTargetMachine().getRelocationModel() != Reloc::PIC_)))) { /// Also try moving call address load from outside callseq_start to just -- cgit v1.1 From 6056b85bb5e863f3b174ed21bd70e22a03ed61f7 Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Sun, 13 Jan 2013 22:12:06 +0000 Subject: Fixed an infinite loop in the block escape in analysis in ObjCARC caused by 2x blocks each assigned a value via a phi-node causing each to depend on the other. A test case is provided as well. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172368 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/ObjCARC.cpp | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp index 794d354..2e75bb9 100644 --- a/lib/Transforms/Scalar/ObjCARC.cpp +++ b/lib/Transforms/Scalar/ObjCARC.cpp @@ -30,6 +30,7 @@ #define DEBUG_TYPE "objc-arc" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -625,6 +626,10 @@ static bool DoesObjCBlockEscape(const Value *BlockPtr) { // Walk the def-use chains. SmallVector Worklist; Worklist.push_back(BlockPtr); + + // Ensure we do not visit any value twice. + SmallPtrSet VisitedSet; + do { const Value *V = Worklist.pop_back_val(); @@ -655,9 +660,15 @@ static bool DoesObjCBlockEscape(const Value *BlockPtr) { // result is an escape. if (isa(UUser) || isa(UUser) || isa(UUser) || isa(UUser)) { - DEBUG(dbgs() << "DoesObjCBlockEscape: User copies value. Escapes if " - "result escapes. Adding to list.\n"); - Worklist.push_back(UUser); + + if (!VisitedSet.count(UUser)) { + DEBUG(dbgs() << "DoesObjCBlockEscape: User copies value. Escapes if " + "result escapes. Adding to list.\n"); + VisitedSet.insert(V); + Worklist.push_back(UUser); + } else { + DEBUG(dbgs() << "DoesObjCBlockEscape: Already visited node.\n"); + } continue; } // Use by a load is not an escape. -- cgit v1.1 From 81c6121699a66b3e84f7b794b375095a39584701 Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Mon, 14 Jan 2013 00:35:14 +0000 Subject: Updated the documentation in ObjCARC.cpp to fit the style guide better (i.e. use doxygen). Still some work to do though. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172371 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/ObjCARC.cpp | 491 ++++++++++++++++++++------------------ 1 file changed, 259 insertions(+), 232 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp index 2e75bb9..a3f9ad3 100644 --- a/lib/Transforms/Scalar/ObjCARC.cpp +++ b/lib/Transforms/Scalar/ObjCARC.cpp @@ -6,26 +6,26 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// -// This file defines ObjC ARC optimizations. ARC stands for -// Automatic Reference Counting and is a system for managing reference counts -// for objects in Objective C. -// -// The optimizations performed include elimination of redundant, partially -// redundant, and inconsequential reference count operations, elimination of -// redundant weak pointer operations, pattern-matching and replacement of -// low-level operations into higher-level operations, and numerous minor -// simplifications. -// -// This file also defines a simple ARC-aware AliasAnalysis. -// -// WARNING: This file knows about certain library functions. It recognizes them -// by name, and hardwires knowledge of their semantics. -// -// WARNING: This file knows about how certain Objective-C library functions are -// used. Naive LLVM IR transformations which would otherwise be -// behavior-preserving may break these assumptions. -// +/// \file +/// This file defines ObjC ARC optimizations. ARC stands for Automatic +/// Reference Counting and is a system for managing reference counts for objects +/// in Objective C. +/// +/// The optimizations performed include elimination of redundant, partially +/// redundant, and inconsequential reference count operations, elimination of +/// redundant weak pointer operations, pattern-matching and replacement of +/// low-level operations into higher-level operations, and numerous minor +/// simplifications. +/// +/// This file also defines a simple ARC-aware AliasAnalysis. +/// +/// WARNING: This file knows about certain library functions. It recognizes them +/// by name, and hardwires knowledge of their semantics. +/// +/// WARNING: This file knows about how certain Objective-C library functions are +/// used. Naive LLVM IR transformations which would otherwise be +/// behavior-preserving may break these assumptions. +/// //===----------------------------------------------------------------------===// #define DEBUG_TYPE "objc-arc" @@ -36,25 +36,23 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; -// A handy option to enable/disable all optimizations in this file. +/// \brief A handy option to enable/disable all optimizations in this file. static cl::opt EnableARCOpts("enable-objc-arc-opts", cl::init(true)); -//===----------------------------------------------------------------------===// -// Misc. Utilities -//===----------------------------------------------------------------------===// +/// \defgroup MiscUtils Miscellaneous utilities that are not ARC specific. +/// @{ namespace { - /// MapVector - An associative container with fast insertion-order - /// (deterministic) iteration over its elements. Plus the special - /// blot operation. + /// \brief An associative container with fast insertion-order (deterministic) + /// iteration over its elements. Plus the special blot operation. template class MapVector { - /// Map - Map keys to indices in Vector. + /// Map keys to indices in Vector. typedef DenseMap MapTy; MapTy Map; - /// Vector - Keys and values. typedef std::vector > VectorTy; + /// Keys and values. VectorTy Vector; public: @@ -112,10 +110,9 @@ namespace { return Vector.begin() + It->second; } - /// blot - This is similar to erase, but instead of removing the element - /// from the vector, it just zeros out the key in the vector. This leaves - /// iterators intact, but clients must be prepared for zeroed-out keys when - /// iterating. + /// This is similar to erase, but instead of removing the element from the + /// vector, it just zeros out the key in the vector. This leaves iterators + /// intact, but clients must be prepared for zeroed-out keys when iterating. void blot(const KeyT &Key) { typename MapTy::iterator It = Map.find(Key); if (It == Map.end()) return; @@ -130,9 +127,10 @@ namespace { }; } -//===----------------------------------------------------------------------===// -// ARC Utilities. -//===----------------------------------------------------------------------===// +/// @} +/// +/// \defgroup ARCUtilities Utility declarations/definitions specific to ARC. +/// @{ #include "llvm/ADT/StringSwitch.h" #include "llvm/Analysis/ValueTracking.h" @@ -142,7 +140,8 @@ namespace { #include "llvm/Transforms/Utils/Local.h" namespace { - /// InstructionClass - A simple classification for instructions. + /// \enum InstructionClass + /// \brief A simple classification for instructions. enum InstructionClass { IC_Retain, ///< objc_retain IC_RetainRV, ///< objc_retainAutoreleasedReturnValue @@ -170,8 +169,7 @@ namespace { }; } -/// IsPotentialUse - Test whether the given value is possible a -/// reference-counted pointer. +/// \brief Test whether the given value is possible a reference-counted pointer. static bool IsPotentialUse(const Value *Op) { // Pointers to static or stack storage are not reference-counted pointers. if (isa(Op) || isa(Op)) @@ -194,8 +192,7 @@ static bool IsPotentialUse(const Value *Op) { return true; } -/// GetCallSiteClass - Helper for GetInstructionClass. Determines what kind -/// of construct CS is. +/// \brief Helper for GetInstructionClass. Determines what kind of construct CS is. static InstructionClass GetCallSiteClass(ImmutableCallSite CS) { for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); I != E; ++I) @@ -205,8 +202,8 @@ static InstructionClass GetCallSiteClass(ImmutableCallSite CS) { return CS.onlyReadsMemory() ? IC_None : IC_Call; } -/// GetFunctionClass - Determine if F is one of the special known Functions. -/// If it isn't, return IC_CallOrUser. +/// \brief Determine if F is one of the special known Functions. If it isn't, +/// return IC_CallOrUser. static InstructionClass GetFunctionClass(const Function *F) { Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); @@ -278,7 +275,7 @@ static InstructionClass GetFunctionClass(const Function *F) { return IC_CallOrUser; } -/// GetInstructionClass - Determine what kind of construct V is. +/// \brief Determine what kind of construct V is. static InstructionClass GetInstructionClass(const Value *V) { if (const Instruction *I = dyn_cast(V)) { // Any instruction other than bitcast and gep with a pointer operand have a @@ -368,9 +365,11 @@ static InstructionClass GetInstructionClass(const Value *V) { return IC_None; } -/// GetBasicInstructionClass - Determine what kind of construct V is. This is -/// similar to GetInstructionClass except that it only detects objc runtine -/// calls. This allows it to be faster. +/// \brief Determine which objc runtime call instruction class V belongs to. +/// +/// This is similar to GetInstructionClass except that it only detects objc +/// runtime calls. This allows it to be faster. +/// static InstructionClass GetBasicInstructionClass(const Value *V) { if (const CallInst *CI = dyn_cast(V)) { if (const Function *F = CI->getCalledFunction()) @@ -383,22 +382,20 @@ static InstructionClass GetBasicInstructionClass(const Value *V) { return isa(V) ? IC_CallOrUser : IC_User; } -/// IsRetain - Test if the given class is objc_retain or -/// equivalent. +/// \brief Test if the given class is objc_retain or equivalent. static bool IsRetain(InstructionClass Class) { return Class == IC_Retain || Class == IC_RetainRV; } -/// IsAutorelease - Test if the given class is objc_autorelease or -/// equivalent. +/// \brief Test if the given class is objc_autorelease or equivalent. static bool IsAutorelease(InstructionClass Class) { return Class == IC_Autorelease || Class == IC_AutoreleaseRV; } -/// IsForwarding - Test if the given class represents instructions which return -/// their argument verbatim. +/// \brief Test if the given class represents instructions which return their +/// argument verbatim. static bool IsForwarding(InstructionClass Class) { // objc_retainBlock technically doesn't always return its argument // verbatim, but it doesn't matter for our purposes here. @@ -410,8 +407,8 @@ static bool IsForwarding(InstructionClass Class) { Class == IC_NoopCast; } -/// IsNoopOnNull - Test if the given class represents instructions which do -/// nothing if passed a null pointer. +/// \brief Test if the given class represents instructions which do nothing if +/// passed a null pointer. static bool IsNoopOnNull(InstructionClass Class) { return Class == IC_Retain || Class == IC_RetainRV || @@ -421,8 +418,8 @@ static bool IsNoopOnNull(InstructionClass Class) { Class == IC_RetainBlock; } -/// IsAlwaysTail - Test if the given class represents instructions which are -/// always safe to mark with the "tail" keyword. +/// \brief Test if the given class represents instructions which are always safe to +/// mark with the "tail" keyword. static bool IsAlwaysTail(InstructionClass Class) { // IC_RetainBlock may be given a stack argument. return Class == IC_Retain || @@ -441,8 +438,8 @@ static bool IsNeverTail(InstructionClass Class) { return Class == IC_Autorelease; } -/// IsNoThrow - Test if the given class represents instructions which are always -/// safe to mark with the nounwind attribute.. +/// \brief Test if the given class represents instructions which are always safe +/// to mark with the nounwind attribute. static bool IsNoThrow(InstructionClass Class) { // objc_retainBlock is not nounwind because it calls user copy constructors // which could theoretically throw. @@ -455,9 +452,12 @@ static bool IsNoThrow(InstructionClass Class) { Class == IC_AutoreleasepoolPop; } -/// EraseInstruction - Erase the given instruction. Many ObjC calls return their -/// argument verbatim, so if it's such a call and the return value has users, -/// replace them with the argument value. +/// \brief Erase the given instruction. +/// +/// Many ObjC calls return their argument verbatim, +/// so if it's such a call and the return value has users, replace them with the +/// argument value. +/// static void EraseInstruction(Instruction *CI) { Value *OldArg = cast(CI)->getArgOperand(0); @@ -476,9 +476,9 @@ static void EraseInstruction(Instruction *CI) { RecursivelyDeleteTriviallyDeadInstructions(OldArg); } -/// GetUnderlyingObjCPtr - This is a wrapper around getUnderlyingObject which -/// also knows how to look through objc_retain and objc_autorelease calls, which -/// we know to return their argument verbatim. +/// \brief This is a wrapper around getUnderlyingObject which also knows how to +/// look through objc_retain and objc_autorelease calls, which we know to return +/// their argument verbatim. static const Value *GetUnderlyingObjCPtr(const Value *V) { for (;;) { V = GetUnderlyingObject(V); @@ -490,9 +490,9 @@ static const Value *GetUnderlyingObjCPtr(const Value *V) { return V; } -/// StripPointerCastsAndObjCCalls - This is a wrapper around -/// Value::stripPointerCasts which also knows how to look through objc_retain -/// and objc_autorelease calls, which we know to return their argument verbatim. +/// \brief This is a wrapper around Value::stripPointerCasts which also knows +/// how to look through objc_retain and objc_autorelease calls, which we know to +/// return their argument verbatim. static const Value *StripPointerCastsAndObjCCalls(const Value *V) { for (;;) { V = V->stripPointerCasts(); @@ -503,9 +503,9 @@ static const Value *StripPointerCastsAndObjCCalls(const Value *V) { return V; } -/// StripPointerCastsAndObjCCalls - This is a wrapper around -/// Value::stripPointerCasts which also knows how to look through objc_retain -/// and objc_autorelease calls, which we know to return their argument verbatim. +/// \brief This is a wrapper around Value::stripPointerCasts which also knows +/// how to look through objc_retain and objc_autorelease calls, which we know to +/// return their argument verbatim. static Value *StripPointerCastsAndObjCCalls(Value *V) { for (;;) { V = V->stripPointerCasts(); @@ -516,16 +516,15 @@ static Value *StripPointerCastsAndObjCCalls(Value *V) { return V; } -/// GetObjCArg - Assuming the given instruction is one of the special calls such -/// as objc_retain or objc_release, return the argument value, stripped of no-op +/// \brief Assuming the given instruction is one of the special calls such as +/// objc_retain or objc_release, return the argument value, stripped of no-op /// casts and forwarding calls. static Value *GetObjCArg(Value *Inst) { return StripPointerCastsAndObjCCalls(cast(Inst)->getArgOperand(0)); } -/// IsObjCIdentifiedObject - This is similar to AliasAnalysis' -/// isObjCIdentifiedObject, except that it uses special knowledge of -/// ObjC conventions... +/// \brief This is similar to AliasAnalysis's isObjCIdentifiedObject, except +/// that it uses special knowledge of ObjC conventions. static bool IsObjCIdentifiedObject(const Value *V) { // Assume that call results and arguments have their own "provenance". // Constants (including GlobalVariables) and Allocas are never @@ -558,9 +557,8 @@ static bool IsObjCIdentifiedObject(const Value *V) { return false; } -/// FindSingleUseIdentifiedObject - This is similar to -/// StripPointerCastsAndObjCCalls but it stops as soon as it finds a value -/// with multiple uses. +/// \brief This is similar to StripPointerCastsAndObjCCalls but it stops as soon +/// as it finds a value with multiple uses. static const Value *FindSingleUseIdentifiedObject(const Value *Arg) { if (Arg->hasOneUse()) { if (const BitCastInst *BC = dyn_cast(Arg)) @@ -592,8 +590,8 @@ static const Value *FindSingleUseIdentifiedObject(const Value *Arg) { return 0; } -/// ModuleHasARC - Test if the given module looks interesting to run ARC -/// optimization on. +/// \brief Test if the given module looks interesting to run ARC optimization +/// on. static bool ModuleHasARC(const Module &M) { return M.getNamedValue("objc_retain") || @@ -615,10 +613,12 @@ static bool ModuleHasARC(const Module &M) { M.getNamedValue("objc_unretainedPointer"); } -/// DoesObjCBlockEscape - Test whether the given pointer, which is an -/// Objective C block pointer, does not "escape". This differs from regular -/// escape analysis in that a use as an argument to a call is not considered -/// an escape. +/// \brief Test whether the given pointer, which is an Objective C block pointer, does +/// not "escape". +/// +/// This differs from regular escape analysis in that a use as an +/// argument to a call is not considered an escape. +/// static bool DoesObjCBlockEscape(const Value *BlockPtr) { DEBUG(dbgs() << "DoesObjCBlockEscape: Target: " << *BlockPtr << "\n"); @@ -694,17 +694,18 @@ static bool DoesObjCBlockEscape(const Value *BlockPtr) { return false; } -//===----------------------------------------------------------------------===// -// ARC AliasAnalysis. -//===----------------------------------------------------------------------===// +/// @} +/// +/// \defgroup ARCAA An extension of alias analysis using ObjC specific knowledge. +/// @{ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/Passes.h" #include "llvm/Pass.h" namespace { - /// ObjCARCAliasAnalysis - This is a simple alias analysis - /// implementation that uses knowledge of ARC constructs to answer queries. + /// \brief This is a simple alias analysis implementation that uses knowledge + /// of ARC constructs to answer queries. /// /// TODO: This class could be generalized to know about other ObjC-specific /// tricks. Such as knowing that ivars in the non-fragile ABI are non-aliasing @@ -722,10 +723,9 @@ namespace { InitializeAliasAnalysis(this); } - /// getAdjustedAnalysisPointer - This method is used when a pass implements - /// an analysis interface through multiple inheritance. If needed, it - /// should override this to adjust the this pointer as needed for the - /// specified pass info. + /// This method is used when a pass implements an analysis interface through + /// multiple inheritance. If needed, it should override this to adjust the + /// this pointer as needed for the specified pass info. virtual void *getAdjustedAnalysisPointer(const void *PI) { if (PI == &AliasAnalysis::ID) return static_cast(this); @@ -869,21 +869,22 @@ ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS1, return AliasAnalysis::getModRefInfo(CS1, CS2); } -//===----------------------------------------------------------------------===// -// ARC expansion. -//===----------------------------------------------------------------------===// +/// @} +/// +/// \defgroup ARCExpansion Early ARC Optimizations. +/// @{ #include "llvm/Support/InstIterator.h" #include "llvm/Transforms/Scalar.h" namespace { - /// ObjCARCExpand - Early ARC transformations. + /// \brief Early ARC transformations. class ObjCARCExpand : public FunctionPass { virtual void getAnalysisUsage(AnalysisUsage &AU) const; virtual bool doInitialization(Module &M); virtual bool runOnFunction(Function &F); - /// Run - A flag indicating whether this optimization pass should run. + /// A flag indicating whether this optimization pass should run. bool Run; public: @@ -956,15 +957,16 @@ bool ObjCARCExpand::runOnFunction(Function &F) { return Changed; } -//===----------------------------------------------------------------------===// -// ARC autorelease pool elimination. -//===----------------------------------------------------------------------===// +/// @} +/// +/// \defgroup ARCAPElim ARC Autorelease Pool Elimination. +/// @{ #include "llvm/ADT/STLExtras.h" #include "llvm/IR/Constants.h" namespace { - /// ObjCARCAPElim - Autorelease pool elimination. + /// \brief Autorelease pool elimination. class ObjCARCAPElim : public ModulePass { virtual void getAnalysisUsage(AnalysisUsage &AU) const; virtual bool runOnModule(Module &M); @@ -994,8 +996,8 @@ void ObjCARCAPElim::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); } -/// MayAutorelease - Interprocedurally determine if calls made by the -/// given call site can possibly produce autoreleases. +/// Interprocedurally determine if calls made by the given call site can +/// possibly produce autoreleases. bool ObjCARCAPElim::MayAutorelease(ImmutableCallSite CS, unsigned Depth) { if (const Function *Callee = CS.getCalledFunction()) { if (Callee->isDeclaration() || Callee->mayBeOverridden()) @@ -1102,9 +1104,10 @@ bool ObjCARCAPElim::runOnModule(Module &M) { return Changed; } -//===----------------------------------------------------------------------===// -// ARC optimization. -//===----------------------------------------------------------------------===// +/// @} +/// +/// \defgroup ARCOpt ARC Optimization. +/// @{ // TODO: On code like this: // @@ -1158,9 +1161,9 @@ STATISTIC(NumRRs, "Number of retain+release paths eliminated"); STATISTIC(NumPeeps, "Number of calls peephole-optimized"); namespace { - /// ProvenanceAnalysis - This is similar to BasicAliasAnalysis, and it - /// uses many of the same techniques, except it uses special ObjC-specific - /// reasoning about pointer relationships. + /// \brief This is similar to BasicAliasAnalysis, and it uses many of the same + /// techniques, except it uses special ObjC-specific reasoning about pointer + /// relationships. class ProvenanceAnalysis { AliasAnalysis *AA; @@ -1228,8 +1231,8 @@ bool ProvenanceAnalysis::relatedPHI(const PHINode *A, const Value *B) { return false; } -/// isStoredObjCPointer - Test if the value of P, or any value covered by its -/// provenance, is ever stored within the function (not counting callees). +/// Test if the value of P, or any value covered by its provenance, is ever +/// stored within the function (not counting callees). static bool isStoredObjCPointer(const Value *P) { SmallPtrSet Visited; SmallVector Worklist; @@ -1333,8 +1336,10 @@ bool ProvenanceAnalysis::related(const Value *A, const Value *B) { } namespace { - // Sequence - A sequence of states that a pointer may go through in which an - // objc_retain and objc_release are actually needed. + /// \enum Sequence + /// + /// \brief A sequence of states that a pointer may go through in which an + /// objc_retain and objc_release are actually needed. enum Sequence { S_None, S_Retain, ///< objc_retain(x) @@ -1375,11 +1380,11 @@ static Sequence MergeSeqs(Sequence A, Sequence B, bool TopDown) { } namespace { - /// RRInfo - Unidirectional information about either a + /// \brief Unidirectional information about either a /// retain-decrement-use-release sequence or release-use-decrement-retain /// reverese sequence. struct RRInfo { - /// KnownSafe - After an objc_retain, the reference count of the referenced + /// After an objc_retain, the reference count of the referenced /// object is known to be positive. Similarly, before an objc_release, the /// reference count of the referenced object is known to be positive. If /// there are retain-release pairs in code regions where the retain count @@ -1393,24 +1398,23 @@ namespace { /// KnownSafe is true when either of these conditions is satisfied. bool KnownSafe; - /// IsRetainBlock - True if the Calls are objc_retainBlock calls (as - /// opposed to objc_retain calls). + /// True if the Calls are objc_retainBlock calls (as opposed to objc_retain + /// calls). bool IsRetainBlock; - /// IsTailCallRelease - True of the objc_release calls are all marked - /// with the "tail" keyword. + /// True of the objc_release calls are all marked with the "tail" keyword. bool IsTailCallRelease; - /// ReleaseMetadata - If the Calls are objc_release calls and they all have - /// a clang.imprecise_release tag, this is the metadata tag. + /// If the Calls are objc_release calls and they all have a + /// clang.imprecise_release tag, this is the metadata tag. MDNode *ReleaseMetadata; - /// Calls - For a top-down sequence, the set of objc_retains or + /// For a top-down sequence, the set of objc_retains or /// objc_retainBlocks. For bottom-up, the set of objc_releases. SmallPtrSet Calls; - /// ReverseInsertPts - The set of optimal insert positions for - /// moving calls in the opposite sequence. + /// The set of optimal insert positions for moving calls in the opposite + /// sequence. SmallPtrSet ReverseInsertPts; RRInfo() : @@ -1432,23 +1436,22 @@ void RRInfo::clear() { } namespace { - /// PtrState - This class summarizes several per-pointer runtime properties - /// which are propogated through the flow graph. + /// \brief This class summarizes several per-pointer runtime properties which + /// are propogated through the flow graph. class PtrState { - /// KnownPositiveRefCount - True if the reference count is known to - /// be incremented. + /// True if the reference count is known to be incremented. bool KnownPositiveRefCount; - /// Partial - True of we've seen an opportunity for partial RR elimination, - /// such as pushing calls into a CFG triangle or into one side of a - /// CFG diamond. + /// True of we've seen an opportunity for partial RR elimination, such as + /// pushing calls into a CFG triangle or into one side of a CFG diamond. bool Partial; - /// Seq - The current position in the sequence. + /// The current position in the sequence. Sequence Seq : 8; public: - /// RRI - Unidirectional information about the current sequence. + /// Unidirectional information about the current sequence. + /// /// TODO: Encapsulate this better. RRInfo RRI; @@ -1529,30 +1532,31 @@ PtrState::Merge(const PtrState &Other, bool TopDown) { } namespace { - /// BBState - Per-BasicBlock state. + /// \brief Per-BasicBlock state. class BBState { - /// TopDownPathCount - The number of unique control paths from the entry - /// which can reach this block. + /// The number of unique control paths from the entry which can reach this + /// block. unsigned TopDownPathCount; - /// BottomUpPathCount - The number of unique control paths to exits - /// from this block. + /// The number of unique control paths to exits from this block. unsigned BottomUpPathCount; - /// MapTy - A type for PerPtrTopDown and PerPtrBottomUp. + /// A type for PerPtrTopDown and PerPtrBottomUp. typedef MapVector MapTy; - /// PerPtrTopDown - The top-down traversal uses this to record information - /// known about a pointer at the bottom of each block. + /// The top-down traversal uses this to record information known about a + /// pointer at the bottom of each block. MapTy PerPtrTopDown; - /// PerPtrBottomUp - The bottom-up traversal uses this to record information - /// known about a pointer at the top of each block. + /// The bottom-up traversal uses this to record information known about a + /// pointer at the top of each block. MapTy PerPtrBottomUp; - /// Preds, Succs - Effective successors and predecessors of the current - /// block (this ignores ignorable edges and ignored backedges). + /// Effective predecessors of the current block ignoring ignorable edges and + /// ignored backedges. SmallVector Preds; + /// Effective successors of the current block ignoring ignorable edges and + /// ignored backedges. SmallVector Succs; public: @@ -1579,12 +1583,12 @@ namespace { return PerPtrBottomUp.end(); } - /// SetAsEntry - Mark this block as being an entry block, which has one - /// path from the entry by definition. + /// Mark this block as being an entry block, which has one path from the + /// entry by definition. void SetAsEntry() { TopDownPathCount = 1; } - /// SetAsExit - Mark this block as being an exit block, which has one - /// path to an exit by definition. + /// Mark this block as being an exit block, which has one path to an exit by + /// definition. void SetAsExit() { BottomUpPathCount = 1; } PtrState &getPtrTopDownState(const Value *Arg) { @@ -1608,9 +1612,9 @@ namespace { void MergePred(const BBState &Other); void MergeSucc(const BBState &Other); - /// GetAllPathCount - Return the number of possible unique paths from an - /// entry to an exit which pass through this block. This is only valid - /// after both the top-down and bottom-up traversals are complete. + /// Return the number of possible unique paths from an entry to an exit + /// which pass through this block. This is only valid after both the + /// top-down and bottom-up traversals are complete. unsigned GetAllPathCount() const { assert(TopDownPathCount != 0); assert(BottomUpPathCount != 0); @@ -1641,8 +1645,8 @@ void BBState::InitFromSucc(const BBState &Other) { BottomUpPathCount = Other.BottomUpPathCount; } -/// MergePred - The top-down traversal uses this to merge information about -/// predecessors to form the initial state for a new block. +/// The top-down traversal uses this to merge information about predecessors to +/// form the initial state for a new block. void BBState::MergePred(const BBState &Other) { // Other.TopDownPathCount can be 0, in which case it is either dead or a // loop backedge. Loop backedges are special. @@ -1672,8 +1676,8 @@ void BBState::MergePred(const BBState &Other) { MI->second.Merge(PtrState(), /*TopDown=*/true); } -/// MergeSucc - The bottom-up traversal uses this to merge information about -/// successors to form the initial state for a new block. +/// The bottom-up traversal uses this to merge information about successors to +/// form the initial state for a new block. void BBState::MergeSucc(const BBState &Other) { // Other.BottomUpPathCount can be 0, in which case it is either dead or a // loop backedge. Loop backedges are special. @@ -1704,34 +1708,43 @@ void BBState::MergeSucc(const BBState &Other) { } namespace { - /// ObjCARCOpt - The main ARC optimization pass. + /// \brief The main ARC optimization pass. class ObjCARCOpt : public FunctionPass { bool Changed; ProvenanceAnalysis PA; - /// Run - A flag indicating whether this optimization pass should run. + /// A flag indicating whether this optimization pass should run. bool Run; - /// RetainRVCallee, etc. - Declarations for ObjC runtime - /// functions, for use in creating calls to them. These are initialized - /// lazily to avoid cluttering up the Module with unused declarations. - Constant *RetainRVCallee, *AutoreleaseRVCallee, *ReleaseCallee, - *RetainCallee, *RetainBlockCallee, *AutoreleaseCallee; - - /// UsedInThisFunciton - Flags which determine whether each of the - /// interesting runtine functions is in fact used in the current function. + /// Declarations for ObjC runtime functions, for use in creating calls to + /// them. These are initialized lazily to avoid cluttering up the Module + /// with unused declarations. + + /// Declaration for ObjC runtime function + /// objc_retainAutoreleasedReturnValue. + Constant *RetainRVCallee; + /// Declaration for ObjC runtime function objc_autoreleaseReturnValue. + Constant *AutoreleaseRVCallee; + /// Declaration for ObjC runtime function objc_release. + Constant *ReleaseCallee; + /// Declaration for ObjC runtime function objc_retain. + Constant *RetainCallee; + /// Declaration for ObjC runtime function objc_retainBlock. + Constant *RetainBlockCallee; + /// Declaration for ObjC runtime function objc_autorelease. + Constant *AutoreleaseCallee; + + /// Flags which determine whether each of the interesting runtine functions + /// is in fact used in the current function. unsigned UsedInThisFunction; - /// ImpreciseReleaseMDKind - The Metadata Kind for clang.imprecise_release - /// metadata. + /// The Metadata Kind for clang.imprecise_release metadata. unsigned ImpreciseReleaseMDKind; - /// CopyOnEscapeMDKind - The Metadata Kind for clang.arc.copy_on_escape - /// metadata. + /// The Metadata Kind for clang.arc.copy_on_escape metadata. unsigned CopyOnEscapeMDKind; - /// NoObjCARCExceptionsMDKind - The Metadata Kind for - /// clang.arc.no_objc_arc_exceptions metadata. + /// The Metadata Kind for clang.arc.no_objc_arc_exceptions metadata. unsigned NoObjCARCExceptionsMDKind; Constant *getRetainRVCallee(Module *M); @@ -1929,8 +1942,8 @@ Constant *ObjCARCOpt::getAutoreleaseCallee(Module *M) { return AutoreleaseCallee; } -/// IsPotentialUse - Test whether the given value is possible a -/// reference-counted pointer, including tests which utilize AliasAnalysis. +/// Test whether the given value is possible a reference-counted pointer, +/// including tests which utilize AliasAnalysis. static bool IsPotentialUse(const Value *Op, AliasAnalysis &AA) { // First make the rudimentary check. if (!IsPotentialUse(Op)) @@ -1949,9 +1962,8 @@ static bool IsPotentialUse(const Value *Op, AliasAnalysis &AA) { return true; } -/// CanAlterRefCount - Test whether the given instruction can result in a -/// reference count modification (positive or negative) for the pointer's -/// object. +/// Test whether the given instruction can result in a reference count +/// modification (positive or negative) for the pointer's object. static bool CanAlterRefCount(const Instruction *Inst, const Value *Ptr, ProvenanceAnalysis &PA, InstructionClass Class) { @@ -1985,8 +1997,8 @@ CanAlterRefCount(const Instruction *Inst, const Value *Ptr, return true; } -/// CanUse - Test whether the given instruction can "use" the given pointer's -/// object in a way that requires the reference count to be positive. +/// Test whether the given instruction can "use" the given pointer's object in a +/// way that requires the reference count to be positive. static bool CanUse(const Instruction *Inst, const Value *Ptr, ProvenanceAnalysis &PA, InstructionClass Class) { @@ -2030,8 +2042,8 @@ CanUse(const Instruction *Inst, const Value *Ptr, ProvenanceAnalysis &PA, return false; } -/// CanInterruptRV - Test whether the given instruction can autorelease -/// any pointer or cause an autoreleasepool pop. +/// Test whether the given instruction can autorelease any pointer or cause an +/// autoreleasepool pop. static bool CanInterruptRV(InstructionClass Class) { switch (Class) { @@ -2049,8 +2061,11 @@ CanInterruptRV(InstructionClass Class) { } namespace { - /// DependenceKind - There are several kinds of dependence-like concepts in - /// use here. + /// \enum DependenceKind + /// \brief Defines different dependence kinds among various ARC constructs. + /// + /// There are several kinds of dependence-like concepts in use here. + /// enum DependenceKind { NeedsPositiveRetainCount, AutoreleasePoolBoundary, @@ -2061,8 +2076,8 @@ namespace { }; } -/// Depends - Test if there can be dependencies on Inst through Arg. This -/// function only tests dependencies relevant for removing pairs of calls. +/// Test if there can be dependencies on Inst through Arg. This function only +/// tests dependencies relevant for removing pairs of calls. static bool Depends(DependenceKind Flavor, Instruction *Inst, const Value *Arg, ProvenanceAnalysis &PA) { @@ -2147,8 +2162,9 @@ Depends(DependenceKind Flavor, Instruction *Inst, const Value *Arg, llvm_unreachable("Invalid dependence flavor"); } -/// FindDependencies - Walk up the CFG from StartPos (which is in StartBB) and -/// find local and non-local dependencies on Arg. +/// Walk up the CFG from StartPos (which is in StartBB) and find local and +/// non-local dependencies on Arg. +/// /// TODO: Cache results? static void FindDependencies(DependenceKind Flavor, @@ -2220,8 +2236,8 @@ static bool isNoopInstruction(const Instruction *I) { cast(I)->hasAllZeroIndices()); } -/// OptimizeRetainCall - Turn objc_retain into -/// objc_retainAutoreleasedReturnValue if the operand is a return value. +/// Turn objc_retain into objc_retainAutoreleasedReturnValue if the operand is a +/// return value. void ObjCARCOpt::OptimizeRetainCall(Function &F, Instruction *Retain) { ImmutableCallSite CS(GetObjCArg(Retain)); @@ -2252,9 +2268,9 @@ ObjCARCOpt::OptimizeRetainCall(Function &F, Instruction *Retain) { << *Retain << "\n"); } -/// OptimizeRetainRVCall - Turn objc_retainAutoreleasedReturnValue into -/// objc_retain if the operand is not a return value. Or, if it can be paired -/// with an objc_autoreleaseReturnValue, delete the pair and return true. +/// Turn objc_retainAutoreleasedReturnValue into objc_retain if the operand is +/// not a return value. Or, if it can be paired with an +/// objc_autoreleaseReturnValue, delete the pair and return true. bool ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) { // Check for the argument being from an immediately preceding call or invoke. @@ -2316,8 +2332,8 @@ ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) { return false; } -/// OptimizeAutoreleaseRVCall - Turn objc_autoreleaseReturnValue into -/// objc_autorelease if the result is not used as a return value. +/// Turn objc_autoreleaseReturnValue into objc_autorelease if the result is not +/// used as a return value. void ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV, InstructionClass &Class) { @@ -2358,8 +2374,8 @@ ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV, } -/// OptimizeIndividualCalls - Visit each call, one at a time, and make -/// simplifications without doing any additional analysis. +/// Visit each call, one at a time, and make simplifications without doing any +/// additional analysis. void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { // Reset all the flags in preparation for recomputing them. UsedInThisFunction = 0; @@ -2635,9 +2651,9 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Finished List.\n"); } -/// CheckForCFGHazards - Check for critical edges, loop boundaries, irreducible -/// control flow, or other CFG structures where moving code across the edge -/// would result in it being executed more. +/// Check for critical edges, loop boundaries, irreducible control flow, or +/// other CFG structures where moving code across the edge would result in it +/// being executed more. void ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB, DenseMap &BBStates, @@ -3158,7 +3174,7 @@ ComputePostOrders(Function &F, SmallVectorImpl &ReverseCFGPostOrder, unsigned NoObjCARCExceptionsMDKind, DenseMap &BBStates) { - /// Visited - The visited set, for doing DFS walks. + /// The visited set, for doing DFS walks. SmallPtrSet Visited; // Do DFS, computing the PostOrder. @@ -3244,7 +3260,7 @@ ComputePostOrders(Function &F, } } -// Visit - Visit the function both top-down and bottom-up. +// Visit the function both top-down and bottom-up. bool ObjCARCOpt::Visit(Function &F, DenseMap &BBStates, @@ -3279,7 +3295,7 @@ ObjCARCOpt::Visit(Function &F, return TopDownNestingDetected && BottomUpNestingDetected; } -/// MoveCalls - Move the calls in RetainsToMove and ReleasesToMove. +/// Move the calls in RetainsToMove and ReleasesToMove. void ObjCARCOpt::MoveCalls(Value *Arg, RRInfo &RetainsToMove, RRInfo &ReleasesToMove, @@ -3355,8 +3371,8 @@ void ObjCARCOpt::MoveCalls(Value *Arg, } } -/// PerformCodePlacement - Identify pairings between the retains and releases, -/// and delete and/or move them. +/// Identify pairings between the retains and releases, and delete and/or move +/// them. bool ObjCARCOpt::PerformCodePlacement(DenseMap &BBStates, @@ -3569,7 +3585,7 @@ ObjCARCOpt::PerformCodePlacement(DenseMap return AnyPairsCompletelyEliminated; } -/// OptimizeWeakCalls - Weak pointer optimizations. +/// Weak pointer optimizations. void ObjCARCOpt::OptimizeWeakCalls(Function &F) { // First, do memdep-style RLE and S2L optimizations. We can't use memdep // itself because it uses AliasAnalysis and we need to do provenance @@ -3730,8 +3746,8 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) { } -/// OptimizeSequences - Identify program paths which execute sequences of -/// retains and releases which can be eliminated. +/// Identify program paths which execute sequences of retains and releases which +/// can be eliminated. bool ObjCARCOpt::OptimizeSequences(Function &F) { /// Releases, Retains - These are used to store the results of the main flow /// analysis. These use Value* as the key instead of Instruction* so that the @@ -3740,7 +3756,7 @@ bool ObjCARCOpt::OptimizeSequences(Function &F) { DenseMap Releases; MapVector Retains; - /// BBStates, This is used during the traversal of the function to track the + /// This is used during the traversal of the function to track the /// states for each identified object at each block. DenseMap BBStates; @@ -3752,7 +3768,7 @@ bool ObjCARCOpt::OptimizeSequences(Function &F) { NestingDetected; } -/// OptimizeReturns - Look for this pattern: +/// Look for this pattern: /// \code /// %call = call i8* @something(...) /// %2 = call i8* @objc_retain(i8* %call) @@ -3963,9 +3979,10 @@ void ObjCARCOpt::releaseMemory() { PA.clear(); } -//===----------------------------------------------------------------------===// -// ARC contraction. -//===----------------------------------------------------------------------===// +/// @} +/// +/// \defgroup ARCContract ARC Contraction. +/// @{ // TODO: ObjCARCContract could insert PHI nodes when uses aren't // dominated by single calls. @@ -3977,30 +3994,37 @@ void ObjCARCOpt::releaseMemory() { STATISTIC(NumStoreStrongs, "Number objc_storeStrong calls formed"); namespace { - /// ObjCARCContract - Late ARC optimizations. These change the IR in a way - /// that makes it difficult to be analyzed by ObjCARCOpt, so it's run late. + /// \brief Late ARC optimizations + /// + /// These change the IR in a way that makes it difficult to be analyzed by + /// ObjCARCOpt, so it's run late. class ObjCARCContract : public FunctionPass { bool Changed; AliasAnalysis *AA; DominatorTree *DT; ProvenanceAnalysis PA; - /// Run - A flag indicating whether this optimization pass should run. + /// A flag indicating whether this optimization pass should run. bool Run; - /// StoreStrongCallee, etc. - Declarations for ObjC runtime - /// functions, for use in creating calls to them. These are initialized - /// lazily to avoid cluttering up the Module with unused declarations. - Constant *StoreStrongCallee, - *RetainAutoreleaseCallee, *RetainAutoreleaseRVCallee; + /// Declarations for ObjC runtime functions, for use in creating calls to + /// them. These are initialized lazily to avoid cluttering up the Module + /// with unused declarations. - /// RetainRVMarker - The inline asm string to insert between calls and - /// RetainRV calls to make the optimization work on targets which need it. + /// Declaration for objc_storeStrong(). + Constant *StoreStrongCallee; + /// Declaration for objc_retainAutorelease(). + Constant *RetainAutoreleaseCallee; + /// Declaration for objc_retainAutoreleaseReturnValue(). + Constant *RetainAutoreleaseRVCallee; + + /// The inline asm string to insert between calls and RetainRV calls to make + /// the optimization work on targets which need it. const MDString *RetainRVMarker; - /// StoreStrongCalls - The set of inserted objc_storeStrong calls. If - /// at the end of walking the function we have found no alloca - /// instructions, these calls can be marked "tail". + /// The set of inserted objc_storeStrong calls. If at the end of walking the + /// function we have found no alloca instructions, these calls can be marked + /// "tail". SmallPtrSet StoreStrongCalls; Constant *getStoreStrongCallee(Module *M); @@ -4099,7 +4123,7 @@ Constant *ObjCARCContract::getRetainAutoreleaseRVCallee(Module *M) { return RetainAutoreleaseRVCallee; } -/// ContractAutorelease - Merge an autorelease with a retain into a fused call. +/// Merge an autorelease with a retain into a fused call. bool ObjCARCContract::ContractAutorelease(Function &F, Instruction *Autorelease, InstructionClass Class, @@ -4155,10 +4179,10 @@ ObjCARCContract::ContractAutorelease(Function &F, Instruction *Autorelease, return true; } -/// ContractRelease - Attempt to merge an objc_release with a store, load, and -/// objc_retain to form an objc_storeStrong. This can be a little tricky because -/// the instructions don't always appear in order, and there may be unrelated -/// intervening instructions. +/// Attempt to merge an objc_release with a store, load, and objc_retain to form +/// an objc_storeStrong. This can be a little tricky because the instructions +/// don't always appear in order, and there may be unrelated intervening +/// instructions. void ObjCARCContract::ContractRelease(Instruction *Release, inst_iterator &Iter) { LoadInst *Load = dyn_cast(GetObjCArg(Release)); @@ -4462,3 +4486,6 @@ bool ObjCARCContract::runOnFunction(Function &F) { return Changed; } + +/// @} +/// -- cgit v1.1 From 7899e47f205f32127ec2b6229ca587e996e0a08e Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Mon, 14 Jan 2013 01:47:53 +0000 Subject: Fixed some 80+ violations. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172374 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/ObjCARC.cpp | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp index a3f9ad3..e7c5178 100644 --- a/lib/Transforms/Scalar/ObjCARC.cpp +++ b/lib/Transforms/Scalar/ObjCARC.cpp @@ -192,7 +192,8 @@ static bool IsPotentialUse(const Value *Op) { return true; } -/// \brief Helper for GetInstructionClass. Determines what kind of construct CS is. +/// \brief Helper for GetInstructionClass. Determines what kind of construct CS +/// is. static InstructionClass GetCallSiteClass(ImmutableCallSite CS) { for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); I != E; ++I) @@ -418,8 +419,8 @@ static bool IsNoopOnNull(InstructionClass Class) { Class == IC_RetainBlock; } -/// \brief Test if the given class represents instructions which are always safe to -/// mark with the "tail" keyword. +/// \brief Test if the given class represents instructions which are always safe +/// to mark with the "tail" keyword. static bool IsAlwaysTail(InstructionClass Class) { // IC_RetainBlock may be given a stack argument. return Class == IC_Retain || @@ -613,8 +614,8 @@ static bool ModuleHasARC(const Module &M) { M.getNamedValue("objc_unretainedPointer"); } -/// \brief Test whether the given pointer, which is an Objective C block pointer, does -/// not "escape". +/// \brief Test whether the given pointer, which is an Objective C block +/// pointer, does not "escape". /// /// This differs from regular escape analysis in that a use as an /// argument to a call is not considered an escape. @@ -662,8 +663,8 @@ static bool DoesObjCBlockEscape(const Value *BlockPtr) { isa(UUser) || isa(UUser)) { if (!VisitedSet.count(UUser)) { - DEBUG(dbgs() << "DoesObjCBlockEscape: User copies value. Escapes if " - "result escapes. Adding to list.\n"); + DEBUG(dbgs() << "DoesObjCBlockEscape: User copies value. Escapes " + "if result escapes. Adding to list.\n"); VisitedSet.insert(V); Worklist.push_back(UUser); } else { @@ -696,7 +697,7 @@ static bool DoesObjCBlockEscape(const Value *BlockPtr) { /// @} /// -/// \defgroup ARCAA An extension of alias analysis using ObjC specific knowledge. +/// \defgroup ARCAA Extends alias analysis using ObjC specific knowledge. /// @{ #include "llvm/Analysis/AliasAnalysis.h" @@ -1036,8 +1037,9 @@ bool ObjCARCAPElim::OptimizeBB(BasicBlock *BB) { // zap the pair. if (Push && cast(Inst)->getArgOperand(0) == Push) { Changed = true; - DEBUG(dbgs() << "ObjCARCAPElim::OptimizeBB: Zapping push pop autorelease pair:\n" - << " Pop: " << *Inst << "\n" + DEBUG(dbgs() << "ObjCARCAPElim::OptimizeBB: Zapping push pop " + "autorelease pair:\n" + " Pop: " << *Inst << "\n" << " Push: " << *Push << "\n"); Inst->eraseFromParent(); Push->eraseFromParent(); @@ -1652,7 +1654,8 @@ void BBState::MergePred(const BBState &Other) { // loop backedge. Loop backedges are special. TopDownPathCount += Other.TopDownPathCount; - // Check for overflow. If we have overflow, fall back to conservative behavior. + // Check for overflow. If we have overflow, fall back to conservative + // behavior. if (TopDownPathCount < Other.TopDownPathCount) { clearTopDownPointers(); return; @@ -1683,7 +1686,8 @@ void BBState::MergeSucc(const BBState &Other) { // loop backedge. Loop backedges are special. BottomUpPathCount += Other.BottomUpPathCount; - // Check for overflow. If we have overflow, fall back to conservative behavior. + // Check for overflow. If we have overflow, fall back to conservative + // behavior. if (BottomUpPathCount < Other.BottomUpPathCount) { clearBottomUpPointers(); return; @@ -2513,8 +2517,8 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { // semantics of ARC truly do not do so. if (IsNeverTail(Class)) { Changed = true; - DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Removing tail keyword" - " from function: " << *Inst << + DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Removing tail " + "keyword from function: " << *Inst << "\n"); cast(Inst)->setTailCall(false); } -- cgit v1.1 From 33160cf37637691de97b16ad1b67e251fc6355d4 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 14 Jan 2013 07:26:58 +0000 Subject: Create a single multiclass for SSE and AVX version of MOVL/MOVH. Prevents needing to specify everything twice. No functional change intended git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172378 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 47 ++++++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 23 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 23073a9..b3d6066 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -1110,34 +1110,41 @@ def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src), // SSE 1 & 2 - Move Low packed FP Instructions //===----------------------------------------------------------------------===// -multiclass sse12_mov_hilo_packedopc, RegisterClass RC, - SDNode psnode, SDNode pdnode, string base_opc, - string asm_opr, InstrItinClass itin> { +multiclass sse12_mov_hilo_packed_baseopc, SDNode psnode, SDNode pdnode, + string base_opc, string asm_opr, + InstrItinClass itin> { def PSrm : PI, TB; def PDrm : PI, TB, OpSize; + } -let AddedComplexity = 20 in { - defm VMOVL : sse12_mov_hilo_packed<0x12, VR128, X86Movlps, X86Movlpd, "movlp", - "\t{$src2, $src1, $dst|$dst, $src1, $src2}", - IIC_SSE_MOV_LH>, VEX_4V; +multiclass sse12_mov_hilo_packedopc, SDNode psnode, SDNode pdnode, + string base_opc, InstrItinClass itin> { + defm V#NAME : sse12_mov_hilo_packed_base, VEX_4V; + +let Constraints = "$src1 = $dst" in + defm NAME : sse12_mov_hilo_packed_base; } -let Constraints = "$src1 = $dst", AddedComplexity = 20 in { - defm MOVL : sse12_mov_hilo_packed<0x12, VR128, X86Movlps, X86Movlpd, "movlp", - "\t{$src2, $dst|$dst, $src2}", - IIC_SSE_MOV_LH>; + +let AddedComplexity = 20 in { + defm MOVL : sse12_mov_hilo_packed<0x12, X86Movlps, X86Movlpd, "movlp", + IIC_SSE_MOV_LH>; } def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), @@ -1235,14 +1242,8 @@ let Predicates = [UseSSE2] in { //===----------------------------------------------------------------------===// let AddedComplexity = 20 in { - defm VMOVH : sse12_mov_hilo_packed<0x16, VR128, X86Movlhps, X86Movlhpd, "movhp", - "\t{$src2, $src1, $dst|$dst, $src1, $src2}", - IIC_SSE_MOV_LH>, VEX_4V; -} -let Constraints = "$src1 = $dst", AddedComplexity = 20 in { - defm MOVH : sse12_mov_hilo_packed<0x16, VR128, X86Movlhps, X86Movlhpd, "movhp", - "\t{$src2, $dst|$dst, $src2}", - IIC_SSE_MOV_LH>; + defm MOVH : sse12_mov_hilo_packed<0x16, X86Movlhps, X86Movlhpd, "movhp", + IIC_SSE_MOV_LH>; } // v2f64 extract element 1 is always custom lowered to unpack high to low -- cgit v1.1 From 29344a6349af5e37b1187de5d354cb95a5840e13 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 14 Jan 2013 07:46:34 +0000 Subject: Simplify nested strconcats in X86 td files since strconcat can take more than 2 arguments. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172379 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrFMA.td | 14 ++++++------ lib/Target/X86/X86InstrSSE.td | 50 +++++++++++++++++++++---------------------- 2 files changed, 32 insertions(+), 32 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86InstrFMA.td b/lib/Target/X86/X86InstrFMA.td index f48f133..7759a8a 100644 --- a/lib/Target/X86/X86InstrFMA.td +++ b/lib/Target/X86/X86InstrFMA.td @@ -60,14 +60,14 @@ multiclass fma3p_forms opc132, bits<8> opc213, bits<8> opc231, PatFrag MemFrag128, PatFrag MemFrag256, SDNode Op, ValueType OpTy128, ValueType OpTy256> { defm r213 : fma3p_rm; let neverHasSideEffects = 1 in { defm r132 : fma3p_rm; defm r231 : fma3p_rm; } // neverHasSideEffects = 1 } @@ -160,15 +160,15 @@ multiclass fma3s_forms opc132, bits<8> opc213, bits<8> opc231, X86MemOperand x86memop, Operand memop, PatFrag mem_frag, ComplexPattern mem_cpat> { let neverHasSideEffects = 1 in { - defm r132 : fma3s_rm; - defm r231 : fma3s_rm; } -defm r213 : fma3s_rm, - fma3s_rm_int; } diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index b3d6066..89149c6 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -3013,18 +3013,18 @@ multiclass sse1_fp_unop_s opc, string OpcodeStr, let Predicates = [HasAVX], hasSideEffects = 0 in { def V#NAME#SSr : SSI, VEX_4V, VEX_LIG; let mayLoad = 1 in { def V#NAME#SSm : SSI, VEX_4V, VEX_LIG; def V#NAME#SSm_Int : SSI, VEX_4V, VEX_LIG; } @@ -3055,18 +3055,18 @@ multiclass sse1_fp_unop_rw opc, string OpcodeStr, SDNode OpNode, let Predicates = [HasAVX], hasSideEffects = 0 in { def V#NAME#SSr : SSI, VEX_4V, VEX_LIG; let mayLoad = 1 in { def V#NAME#SSm : SSI, VEX_4V, VEX_LIG; def V#NAME#SSm_Int : SSI, VEX_4V, VEX_LIG; } @@ -3101,22 +3101,22 @@ multiclass sse1_fp_unop_p opc, string OpcodeStr, SDNode OpNode, OpndItins itins> { let Predicates = [HasAVX] in { def V#NAME#PSr : PSI, VEX; def V#NAME#PSm : PSI, VEX; def V#NAME#PSYr : PSI, VEX, VEX_L; def V#NAME#PSYm : PSI, VEX, VEX_L; @@ -3136,23 +3136,23 @@ multiclass sse1_fp_unop_p_int opc, string OpcodeStr, OpndItins itins> { let Predicates = [HasAVX] in { def V#NAME#PSr_Int : PSI, VEX; def V#NAME#PSm_Int : PSI, VEX; def V#NAME#PSYr_Int : PSI, VEX, VEX_L; def V#NAME#PSYm_Int : PSI, VEX, VEX_L; @@ -3174,18 +3174,18 @@ multiclass sse2_fp_unop_s opc, string OpcodeStr, let Predicates = [HasAVX], hasSideEffects = 0 in { def V#NAME#SDr : SDI, VEX_4V, VEX_LIG; let mayLoad = 1 in { def V#NAME#SDm : SDI, VEX_4V, VEX_LIG; def V#NAME#SDm_Int : SDI, VEX_4V, VEX_LIG; } @@ -3212,22 +3212,22 @@ multiclass sse2_fp_unop_p opc, string OpcodeStr, SDNode OpNode, OpndItins itins> { let Predicates = [HasAVX] in { def V#NAME#PDr : PDI, VEX; def V#NAME#PDm : PDI, VEX; def V#NAME#PDYr : PDI, VEX, VEX_L; def V#NAME#PDYm : PDI, VEX, VEX_L; @@ -3986,14 +3986,14 @@ multiclass sse2_pshuffle, VEX; def V#NAME#mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), - !strconcat(!strconcat("v", OpcodeStr), + !strconcat("v", OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (vt128 (OpNode (bitconvert (memopv2i64 addr:$src1)), @@ -4003,14 +4003,14 @@ let Predicates = [HasAVX] in { let Predicates = [HasAVX2] in { def V#NAME#Yri : Ii8<0x70, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, i8imm:$src2), - !strconcat(!strconcat("v", OpcodeStr), + !strconcat("v", OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, (vt256 (OpNode VR256:$src1, (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX, VEX_L; def V#NAME#Ymi : Ii8<0x70, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src1, i8imm:$src2), - !strconcat(!strconcat("v", OpcodeStr), + !strconcat("v", OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, (vt256 (OpNode (bitconvert (memopv4i64 addr:$src1)), -- cgit v1.1 From 800ec3da7af7b73cd3c61d5da53d9d7c8343ad83 Mon Sep 17 00:00:00 2001 From: Timur Iskhodzhanov Date: Mon, 14 Jan 2013 14:13:06 +0000 Subject: Revert r171829 "Split changeset_ty using iterators instead of loops" as it breaks the VS2008 build git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172411 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Support/DeltaAlgorithm.cpp | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/Support/DeltaAlgorithm.cpp b/lib/Support/DeltaAlgorithm.cpp index a1e3311..9e52874 100644 --- a/lib/Support/DeltaAlgorithm.cpp +++ b/lib/Support/DeltaAlgorithm.cpp @@ -27,15 +27,13 @@ bool DeltaAlgorithm::GetTestResult(const changeset_ty &Changes) { void DeltaAlgorithm::Split(const changeset_ty &S, changesetlist_ty &Res) { // FIXME: Allow clients to provide heuristics for improved splitting. - // Get the iterator to the middle. - unsigned N = S.size() / 2; - changeset_ty::iterator middle(S.begin()); - std::advance(middle, N); - - // Create each vector using the middle as the split. - changeset_ty LHS(S.begin(), middle); - changeset_ty RHS(middle, S.end()); + // FIXME: This is really slow. + changeset_ty LHS, RHS; + unsigned idx = 0, N = S.size() / 2; + for (changeset_ty::const_iterator it = S.begin(), + ie = S.end(); it != ie; ++it, ++idx) + ((idx < N) ? LHS : RHS).insert(*it); if (!LHS.empty()) Res.push_back(LHS); if (!RHS.empty()) -- cgit v1.1 From 733c336327621d41617d3d49c7b86199a361a367 Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Mon, 14 Jan 2013 18:08:41 +0000 Subject: Encapsulate the MacroEnabled flag in AsmParser behind accessor methods. The methods are also exposed via the MCAsmParser interface, which allows more than one client to control them. Previously, GenericAsmParser was playing with a member var in AsmParser directly (by virtue of being its friend). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172440 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCParser/AsmParser.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index 50579a6..3621982 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -151,7 +151,7 @@ private: std::vector ActiveMacros; /// Boolean tracking whether macro substitution is enabled. - unsigned MacrosEnabled : 1; + unsigned MacrosEnabledFlag : 1; /// Flag tracking whether any errors have been encountered. unsigned HadError : 1; @@ -231,6 +231,9 @@ public: virtual bool ParseIdentifier(StringRef &Res); virtual void EatToEndOfStatement(); + virtual bool MacrosEnabled() {return MacrosEnabledFlag;} + virtual void SetMacrosEnabled(bool flag) {MacrosEnabledFlag = flag;} + /// } private: @@ -503,7 +506,7 @@ AsmParser::AsmParser(SourceMgr &_SM, MCContext &_Ctx, MCStreamer &_Out, const MCAsmInfo &_MAI) : Lexer(_MAI), Ctx(_Ctx), Out(_Out), MAI(_MAI), SrcMgr(_SM), GenericParser(new GenericAsmParser), PlatformParser(0), - CurBuffer(0), MacrosEnabled(true), CppHashLineNumber(0), + CurBuffer(0), MacrosEnabledFlag(true), CppHashLineNumber(0), AssemblerDialect(~0U), IsDarwin(false), ParsingInlineAsm(false) { // Save the old handler. SavedDiagHandler = SrcMgr.getDiagHandler(); @@ -1278,7 +1281,7 @@ bool AsmParser::ParseStatement(ParseStatementInfo &Info) { } // If macros are enabled, check to see if this is a macro instantiation. - if (MacrosEnabled) + if (MacrosEnabled()) if (const Macro *M = MacroMap.lookup(IDVal)) return HandleMacroEntry(IDVal, IDLoc, M); @@ -3489,7 +3492,7 @@ bool GenericAsmParser::ParseDirectiveMacrosOnOff(StringRef Directive, return Error(getLexer().getLoc(), "unexpected token in '" + Directive + "' directive"); - getParser().MacrosEnabled = Directive == ".macros_on"; + getParser().SetMacrosEnabled(Directive == ".macros_on"); return false; } -- cgit v1.1 From 9bac6b29b832419f8b76bb2c27af74bb57a8d99a Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Mon, 14 Jan 2013 19:00:26 +0000 Subject: Move ParseMacroArgument to the MCAsmParser interfance. Since it's used by extensions. One further step to fully decoupling GenericAsmParser from an intimate knowledge of the internals of AsmParser, pointing it to the MCASmParser interface instead (like all other parser extensions do). Since this change moves the MacroArgument type to the interface header, it's renamed to be a bit more descriptive in a general context. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172449 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCParser/AsmParser.cpp | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) (limited to 'lib') diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index 3621982..752cc19 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -51,9 +51,8 @@ MCAsmParserSemaCallback::~MCAsmParserSemaCallback() {} namespace { /// \brief Helper class for tracking macro definitions. -typedef std::vector MacroArgument; -typedef std::vector MacroArguments; -typedef std::pair MacroParameter; +typedef std::vector MacroArguments; +typedef std::pair MacroParameter; typedef std::vector MacroParameters; struct Macro { @@ -271,7 +270,7 @@ private: /// location. void JumpToLoc(SMLoc Loc, int InBuffer=-1); - bool ParseMacroArgument(MacroArgument &MA, + bool ParseMacroArgument(MCAsmMacroArgument &MA, AsmToken::TokenKind &ArgumentDelimiter); bool ParseMacroArguments(const Macro *M, MacroArguments &A); @@ -1650,7 +1649,7 @@ bool AsmParser::expandMacro(raw_svector_ostream &OS, StringRef Body, break; // Otherwise substitute with the token values, with spaces eliminated. - for (MacroArgument::const_iterator it = A[Index].begin(), + for (MCAsmMacroArgument::const_iterator it = A[Index].begin(), ie = A[Index].end(); it != ie; ++it) OS << it->getString(); break; @@ -1677,7 +1676,7 @@ bool AsmParser::expandMacro(raw_svector_ostream &OS, StringRef Body, Pos = I; } } else { - for (MacroArgument::const_iterator it = A[Index].begin(), + for (MCAsmMacroArgument::const_iterator it = A[Index].begin(), ie = A[Index].end(); it != ie; ++it) if (it->getKind() == AsmToken::String) OS << it->getStringContents(); @@ -1735,10 +1734,7 @@ static bool IsOperator(AsmToken::TokenKind kind) } } -/// ParseMacroArgument - Extract AsmTokens for a macro argument. -/// This is used for both default macro parameter values and the -/// arguments in macro invocations -bool AsmParser::ParseMacroArgument(MacroArgument &MA, +bool AsmParser::ParseMacroArgument(MCAsmMacroArgument &MA, AsmToken::TokenKind &ArgumentDelimiter) { unsigned ParenLevel = 0; unsigned AddTokens = 0; @@ -1825,7 +1821,7 @@ bool AsmParser::ParseMacroArguments(const Macro *M, MacroArguments &A) { // - macros defined with parameters accept at most that many of them for (unsigned Parameter = 0; !NParameters || Parameter < NParameters; ++Parameter) { - MacroArgument MA; + MCAsmMacroArgument MA; if (ParseMacroArgument(MA, ArgumentDelimiter)) return true; @@ -3814,7 +3810,7 @@ bool AsmParser::ParseDirectiveIrpc(SMLoc DirectiveLoc) { StringRef Values = A.front().front().getString(); std::size_t I, End = Values.size(); for (I = 0; I < End; ++I) { - MacroArgument Arg; + MCAsmMacroArgument Arg; Arg.push_back(AsmToken(AsmToken::Identifier, Values.slice(I, I+1))); MacroArguments Args; -- cgit v1.1 From 030f63a397edc20f8f661bac62f7b90cb5cf57bc Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Mon, 14 Jan 2013 19:04:57 +0000 Subject: Expose an InitToTextSection through MCStreamer. The aim of this patch is to fix the following piece of code in the platform-independent AsmParser: void AsmParser::CheckForValidSection() { if (!ParsingInlineAsm && !getStreamer().getCurrentSection()) { TokError("expected section directive before assembly directive"); Out.SwitchSection(Ctx.getMachOSection( "__TEXT", "__text", MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, 0, SectionKind::getText())); } } This was added for the "-n" option of llvm-mc. The proposed fix adds another virtual method to MCStreamer, called InitToTextSection. Conceptually, it's similar to the existing InitSections which initializes all common sections and switches to text. The new method is implemented by each platform streamer in a way that it sees fit. So AsmParser can now do this: void AsmParser::CheckForValidSection() { if (!ParsingInlineAsm && !getStreamer().getCurrentSection()) { TokError("expected section directive before assembly directive"); Out.InitToTextSection(); } } Which is much more reasonable. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172450 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCAsmStreamer.cpp | 11 ++++++++--- lib/MC/MCELFStreamer.cpp | 4 ++++ lib/MC/MCMachOStreamer.cpp | 11 ++++++++--- lib/MC/MCNullStreamer.cpp | 3 +++ lib/MC/MCParser/AsmParser.cpp | 5 +---- lib/MC/MCPureStreamer.cpp | 6 +++++- lib/MC/WinCOFFStreamer.cpp | 5 +++++ 7 files changed, 34 insertions(+), 11 deletions(-) (limited to 'lib') diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp index e234dfe..dd5112c 100644 --- a/lib/MC/MCAsmStreamer.cpp +++ b/lib/MC/MCAsmStreamer.cpp @@ -127,11 +127,16 @@ public: virtual void ChangeSection(const MCSection *Section); virtual void InitSections() { + InitToTextSection(); + } + + virtual void InitToTextSection() { // FIXME, this is MachO specific, but the testsuite // expects this. - SwitchSection(getContext().getMachOSection("__TEXT", "__text", - MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, - 0, SectionKind::getText())); + SwitchSection(getContext().getMachOSection( + "__TEXT", "__text", + MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, + 0, SectionKind::getText())); } virtual void EmitLabel(MCSymbol *Symbol); diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp index d05fcca..cae73be 100644 --- a/lib/MC/MCELFStreamer.cpp +++ b/lib/MC/MCELFStreamer.cpp @@ -65,6 +65,10 @@ inline void MCELFStreamer::SetSectionBss() { MCELFStreamer::~MCELFStreamer() { } +void MCELFStreamer::InitToTextSection() { + SetSectionText(); +} + void MCELFStreamer::InitSections() { // This emulates the same behavior of GNU as. This makes it easier // to compare the output as the major sections are in the same order. diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp index 82ccdd4..f947dda 100644 --- a/lib/MC/MCMachOStreamer.cpp +++ b/lib/MC/MCMachOStreamer.cpp @@ -42,6 +42,7 @@ public: /// @{ virtual void InitSections(); + virtual void InitToTextSection(); virtual void EmitLabel(MCSymbol *Symbol); virtual void EmitDebugLabel(MCSymbol *Symbol); virtual void EmitEHSymAttributes(const MCSymbol *Symbol, @@ -90,10 +91,14 @@ public: } // end anonymous namespace. void MCMachOStreamer::InitSections() { - SwitchSection(getContext().getMachOSection("__TEXT", "__text", - MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, - 0, SectionKind::getText())); + InitToTextSection(); +} +void MCMachOStreamer::InitToTextSection() { + SwitchSection(getContext().getMachOSection( + "__TEXT", "__text", + MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, 0, + SectionKind::getText())); } void MCMachOStreamer::EmitEHSymAttributes(const MCSymbol *Symbol, diff --git a/lib/MC/MCNullStreamer.cpp b/lib/MC/MCNullStreamer.cpp index 364c324..3eee5ca 100644 --- a/lib/MC/MCNullStreamer.cpp +++ b/lib/MC/MCNullStreamer.cpp @@ -24,6 +24,9 @@ namespace { /// @name MCStreamer Interface /// @{ + virtual void InitToTextSection() { + } + virtual void InitSections() { } diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index 752cc19..262cb2c 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -708,10 +708,7 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) { void AsmParser::CheckForValidSection() { if (!ParsingInlineAsm && !getStreamer().getCurrentSection()) { TokError("expected section directive before assembly directive"); - Out.SwitchSection(Ctx.getMachOSection( - "__TEXT", "__text", - MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, - 0, SectionKind::getText())); + Out.InitToTextSection(); } } diff --git a/lib/MC/MCPureStreamer.cpp b/lib/MC/MCPureStreamer.cpp index 97e5a69..6ce7ae8 100644 --- a/lib/MC/MCPureStreamer.cpp +++ b/lib/MC/MCPureStreamer.cpp @@ -36,6 +36,7 @@ public: /// @{ virtual void InitSections(); + virtual void InitToTextSection(); virtual void EmitLabel(MCSymbol *Symbol); virtual void EmitDebugLabel(MCSymbol *Symbol); virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0, @@ -104,11 +105,14 @@ public: } // end anonymous namespace. void MCPureStreamer::InitSections() { + InitToTextSection(); +} + +void MCPureStreamer::InitToTextSection() { // FIMXE: To what!? SwitchSection(getContext().getMachOSection("__TEXT", "__text", MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, 0, SectionKind::getText())); - } void MCPureStreamer::EmitLabel(MCSymbol *Symbol) { diff --git a/lib/MC/WinCOFFStreamer.cpp b/lib/MC/WinCOFFStreamer.cpp index 5489ef8..cc2c272 100644 --- a/lib/MC/WinCOFFStreamer.cpp +++ b/lib/MC/WinCOFFStreamer.cpp @@ -50,6 +50,7 @@ public: // MCStreamer interface virtual void InitSections(); + virtual void InitToTextSection(); virtual void EmitLabel(MCSymbol *Symbol); virtual void EmitDebugLabel(MCSymbol *Symbol); virtual void EmitAssemblerFlag(MCAssemblerFlag Flag); @@ -173,6 +174,10 @@ void WinCOFFStreamer::AddCommonSymbol(MCSymbol *Symbol, uint64_t Size, // MCStreamer interface +void WinCOFFStreamer::InitToTextSection() { + SetSectionText(); +} + void WinCOFFStreamer::InitSections() { SetSectionText(); SetSectionData(); -- cgit v1.1 From 318cad33231f765f9b6b2af4bb43f8c281b99d19 Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Mon, 14 Jan 2013 19:15:01 +0000 Subject: Move CheckForValidSection to the MCAsmParser interface. Now that it behaves itself in terms of streamer independence (r172450), this method can be moved to MCAsmParser to be available to all extensions, overriding, etc. -- -This line, and those below, will be ignored-- M lib/MC/MCParser/AsmParser.cpp M include/llvm/MC/MCParser/MCAsmParser.h git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172451 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCParser/AsmParser.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index 262cb2c..45aaa2e 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -233,10 +233,10 @@ public: virtual bool MacrosEnabled() {return MacrosEnabledFlag;} virtual void SetMacrosEnabled(bool flag) {MacrosEnabledFlag = flag;} + virtual void CheckForValidSection(); /// } private: - void CheckForValidSection(); bool ParseStatement(ParseStatementInfo &Info); void EatToEndOfLine(); -- cgit v1.1 From f3c1335c596e2a45a877e39a139b23ea5481eb37 Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Mon, 14 Jan 2013 19:18:39 +0000 Subject: Changed SmallPtrSet.count guard + SmallPtrSet.insert to just SmallPtrSet.insert. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172452 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/ObjCARC.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp index e7c5178..abd6b41 100644 --- a/lib/Transforms/Scalar/ObjCARC.cpp +++ b/lib/Transforms/Scalar/ObjCARC.cpp @@ -662,10 +662,9 @@ static bool DoesObjCBlockEscape(const Value *BlockPtr) { if (isa(UUser) || isa(UUser) || isa(UUser) || isa(UUser)) { - if (!VisitedSet.count(UUser)) { + if (!VisitedSet.insert(UUser)) { DEBUG(dbgs() << "DoesObjCBlockEscape: User copies value. Escapes " "if result escapes. Adding to list.\n"); - VisitedSet.insert(V); Worklist.push_back(UUser); } else { DEBUG(dbgs() << "DoesObjCBlockEscape: Already visited node.\n"); -- cgit v1.1 From eb3ac4518e46ffaea978d40daf2b4b34b13c48dd Mon Sep 17 00:00:00 2001 From: Nick Lewycky Date: Mon, 14 Jan 2013 20:56:10 +0000 Subject: Fix typo in comment. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172460 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/InstCombine/InstCombineCasts.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index c782032..0c0864f 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -739,7 +739,7 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) { } Instruction *InstCombiner::visitZExt(ZExtInst &CI) { - // If this zero extend is only used by a truncate, let the truncate by + // If this zero extend is only used by a truncate, let the truncate be // eliminated before we try to optimize this zext. if (CI.hasOneUse() && isa(CI.use_back())) return 0; -- cgit v1.1 From 1ba5769676bb14078ddbdb9760523619726800c0 Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Mon, 14 Jan 2013 21:00:37 +0000 Subject: SCEVExpander fix. RAUW needs to update the InsertedExpressions cache. Note that this bug is only exposed because LTO fails to use TTI. Fixes self-LTO of clang. rdar://13007381. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172462 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/ScalarEvolutionExpander.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index b87ad75..fcd7ce2 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -1523,9 +1523,8 @@ Value *SCEVExpander::expand(const SCEV *S) { } // Check to see if we already expanded this here. - std::map, - AssertingVH >::iterator I = - InsertedExpressions.find(std::make_pair(S, InsertPt)); + std::map, TrackingVH >::iterator + I = InsertedExpressions.find(std::make_pair(S, InsertPt)); if (I != InsertedExpressions.end()) return I->second; -- cgit v1.1 From 914d4a76fe0dd7aafb9f06f5af2dcf09c0b87ee7 Mon Sep 17 00:00:00 2001 From: David Greene Date: Mon, 14 Jan 2013 21:04:37 +0000 Subject: Fix Casting Stop a gcc warning about casting away const. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172465 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Use.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/IR/Use.cpp b/lib/IR/Use.cpp index 481cbab..1d343e8 100644 --- a/lib/IR/Use.cpp +++ b/lib/IR/Use.cpp @@ -139,7 +139,7 @@ User *Use::getUser() const { const UserRef *ref = reinterpret_cast(End); return ref->getInt() ? ref->getPointer() - : (User*)End; + : reinterpret_cast(const_cast(End)); } } // End llvm namespace -- cgit v1.1 From 5a80eefdf7ef9022cd148b9eed16aa3c14b59a56 Mon Sep 17 00:00:00 2001 From: David Greene Date: Mon, 14 Jan 2013 21:04:38 +0000 Subject: Fix More Casts Fix another cast-away-const cast. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172466 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Object/MachOObject.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Object/MachOObject.cpp b/lib/Object/MachOObject.cpp index a64db1c..529bdf9 100644 --- a/lib/Object/MachOObject.cpp +++ b/lib/Object/MachOObject.cpp @@ -44,7 +44,8 @@ static void ReadInMemoryStruct(const MachOObject &MOO, } // Check whether we can return a direct pointer. - struct_type *Ptr = (struct_type *) (Buffer.data() + Base); + struct_type *Ptr = reinterpret_cast( + const_cast(Buffer.data() + Base)); if (!MOO.isSwappedEndian()) { Res = Ptr; return; -- cgit v1.1 From 4ee576fac3a84553c9342faea87ff0e13e8eb48d Mon Sep 17 00:00:00 2001 From: David Greene Date: Mon, 14 Jan 2013 21:04:40 +0000 Subject: Fix Casting Bug Add a const version of getFpValPtr to avoid a cast-away-const warning. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172467 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/InstCombine/InstCombineAddSub.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp index f07c58d..03be8ef 100644 --- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -66,10 +66,12 @@ namespace { bool insaneIntVal(int V) { return V > 4 || V < -4; } APFloat *getFpValPtr(void) { return reinterpret_cast(&FpValBuf.buffer[0]); } + const APFloat *getFpValPtr(void) const + { return reinterpret_cast(&FpValBuf.buffer[0]); } const APFloat &getFpVal(void) const { assert(IsFp && BufHasFpVal && "Incorret state"); - return *reinterpret_cast(&FpValBuf.buffer[0]); + return *getFpValPtr(); } APFloat &getFpVal(void) -- cgit v1.1 From ef44c353599e0e2fd5b2ec2ae5d9bc0e2a355cad Mon Sep 17 00:00:00 2001 From: David Greene Date: Mon, 14 Jan 2013 21:04:42 +0000 Subject: Fix Another Cast Properly cast code to eliminate cast-away-const errors. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172468 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Hexagon/HexagonISelLowering.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp index 16cec5c..4c27d35 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -1017,7 +1017,8 @@ SDValue HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op, Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), Offset); HexagonTargetObjectFile &TLOF = - (HexagonTargetObjectFile&)getObjFileLowering(); + static_cast( + const_cast(getObjFileLowering())); if (TLOF.IsGlobalInSmallSection(GV, getTargetMachine())) { return DAG.getNode(HexagonISD::CONST32_GP, dl, getPointerTy(), Result); } -- cgit v1.1 From fe1215ef935f182cdca28b4af655fa0bfa0f47e6 Mon Sep 17 00:00:00 2001 From: David Greene Date: Mon, 14 Jan 2013 21:04:44 +0000 Subject: Fix More Casts Properly cast some more code that triggered cast-away-const errors. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172469 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/ExecutionEngine/JIT/JITMemoryManager.cpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp index 353bebf..66aeb77 100644 --- a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp +++ b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp @@ -72,15 +72,20 @@ namespace { /// getBlockAfter - Return the memory block immediately after this one. /// MemoryRangeHeader &getBlockAfter() const { - return *(MemoryRangeHeader*)((char*)this+BlockSize); + return *reinterpret_cast( + reinterpret_cast( + const_cast(this))+BlockSize); } /// getFreeBlockBefore - If the block before this one is free, return it, /// otherwise return null. FreeRangeHeader *getFreeBlockBefore() const { if (PrevAllocated) return 0; - intptr_t PrevSize = ((intptr_t *)this)[-1]; - return (FreeRangeHeader*)((char*)this-PrevSize); + intptr_t PrevSize = reinterpret_cast( + const_cast(this))[-1]; + return reinterpret_cast( + reinterpret_cast( + const_cast(this))-PrevSize); } /// FreeBlock - Turn an allocated block into a free block, adjusting -- cgit v1.1 From c2680bef3b4994017d83293bc1337b26be54ac77 Mon Sep 17 00:00:00 2001 From: David Greene Date: Mon, 14 Jan 2013 21:04:45 +0000 Subject: Fix Casting Do proper casting to eliminate a const-away-cast compiler warning. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172470 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/ExecutionEngine/ExecutionEngine.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp index ef5f589..6f71ffb 100644 --- a/lib/ExecutionEngine/ExecutionEngine.cpp +++ b/lib/ExecutionEngine/ExecutionEngine.cpp @@ -893,7 +893,8 @@ void ExecutionEngine::StoreValueToMemory(const GenericValue &Val, /// from Src into IntVal, which is assumed to be wide enough and to hold zero. static void LoadIntFromMemory(APInt &IntVal, uint8_t *Src, unsigned LoadBytes) { assert((IntVal.getBitWidth()+7)/8 >= LoadBytes && "Integer too small!"); - uint8_t *Dst = (uint8_t *)IntVal.getRawData(); + uint8_t *Dst = reinterpret_cast( + const_cast(IntVal.getRawData())); if (sys::isLittleEndianHost()) // Little-endian host - the destination must be ordered from LSB to MSB. -- cgit v1.1 From b398cae1e501069c48456a4bfdf8bbf549aa9746 Mon Sep 17 00:00:00 2001 From: David Greene Date: Mon, 14 Jan 2013 21:04:47 +0000 Subject: Fix Casting Fix a casting-away-const compiler warning. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172471 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsCodeEmitter.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/Mips/MipsCodeEmitter.cpp b/lib/Target/Mips/MipsCodeEmitter.cpp index 52fa95b..8efcd3c 100644 --- a/lib/Target/Mips/MipsCodeEmitter.cpp +++ b/lib/Target/Mips/MipsCodeEmitter.cpp @@ -120,7 +120,8 @@ class MipsCodeEmitter : public MachineFunctionPass { char MipsCodeEmitter::ID = 0; bool MipsCodeEmitter::runOnMachineFunction(MachineFunction &MF) { - JTI = ((MipsTargetMachine&) MF.getTarget()).getJITInfo(); + JTI = const_cast( + static_cast(MF.getTarget())).getJITInfo(); II = ((const MipsTargetMachine&) MF.getTarget()).getInstrInfo(); TD = ((const MipsTargetMachine&) MF.getTarget()).getDataLayout(); Subtarget = &TM.getSubtarget (); -- cgit v1.1 From 398db9368d72d1d60d40b2e18c16ca2c14aa7f39 Mon Sep 17 00:00:00 2001 From: Quentin Colombet Date: Mon, 14 Jan 2013 21:07:43 +0000 Subject: Complete the existing support of ARM v6m, v7m, and v7em, i.e., respectively cortex-m0, cortex-m3, and cortex-m4 on the backend side. Adds new subtype values for the MachO format and use them when the related triple are set. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172472 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'lib') diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index 1f1b334..415dee3 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -672,12 +672,21 @@ MCAsmBackend *llvm::createARMAsmBackend(const Target &T, StringRef TT, StringRef else if (TheTriple.getArchName() == "armv6" || TheTriple.getArchName() == "thumbv6") return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V6); + else if (TheTriple.getArchName() == "armv6m" || + TheTriple.getArchName() == "thumbv6m") + return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V6M); + else if (TheTriple.getArchName() == "armv7em" || + TheTriple.getArchName() == "thumbv7em") + return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7EM); else if (TheTriple.getArchName() == "armv7f" || TheTriple.getArchName() == "thumbv7f") return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7F); else if (TheTriple.getArchName() == "armv7k" || TheTriple.getArchName() == "thumbv7k") return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7K); + else if (TheTriple.getArchName() == "armv7m" || + TheTriple.getArchName() == "thumbv7m") + return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7M); else if (TheTriple.getArchName() == "armv7s" || TheTriple.getArchName() == "thumbv7s") return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7S); -- cgit v1.1 From 19d54337169ae4af2d44ae39664d0bac1ae0309c Mon Sep 17 00:00:00 2001 From: Quentin Colombet Date: Mon, 14 Jan 2013 21:34:09 +0000 Subject: Follow up of commit r172472. Refactor the big if/else sequence into one string switch for ARM subtype selection. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172475 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp | 43 ++++++++++----------------- 1 file changed, 15 insertions(+), 28 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index 415dee3..e66e985 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -11,6 +11,7 @@ #include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMBaseInfo.h" #include "MCTargetDesc/ARMFixupKinds.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" @@ -663,34 +664,20 @@ MCAsmBackend *llvm::createARMAsmBackend(const Target &T, StringRef TT, StringRef Triple TheTriple(TT); if (TheTriple.isOSDarwin()) { - if (TheTriple.getArchName() == "armv4t" || - TheTriple.getArchName() == "thumbv4t") - return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V4T); - else if (TheTriple.getArchName() == "armv5e" || - TheTriple.getArchName() == "thumbv5e") - return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V5TEJ); - else if (TheTriple.getArchName() == "armv6" || - TheTriple.getArchName() == "thumbv6") - return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V6); - else if (TheTriple.getArchName() == "armv6m" || - TheTriple.getArchName() == "thumbv6m") - return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V6M); - else if (TheTriple.getArchName() == "armv7em" || - TheTriple.getArchName() == "thumbv7em") - return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7EM); - else if (TheTriple.getArchName() == "armv7f" || - TheTriple.getArchName() == "thumbv7f") - return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7F); - else if (TheTriple.getArchName() == "armv7k" || - TheTriple.getArchName() == "thumbv7k") - return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7K); - else if (TheTriple.getArchName() == "armv7m" || - TheTriple.getArchName() == "thumbv7m") - return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7M); - else if (TheTriple.getArchName() == "armv7s" || - TheTriple.getArchName() == "thumbv7s") - return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7S); - return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7); + object::mach::CPUSubtypeARM CS = + StringSwitch(TheTriple.getArchName()) + .Cases("armv4t", "thumbv4t", object::mach::CSARM_V4T) + .Cases("armv5e", "thumbv5e",object::mach::CSARM_V5TEJ) + .Cases("armv6", "thumbv6", object::mach::CSARM_V6) + .Cases("armv6m", "thumbv6m", object::mach::CSARM_V6M) + .Cases("armv7em", "thumbv7em", object::mach::CSARM_V7EM) + .Cases("armv7f", "thumbv7f", object::mach::CSARM_V7F) + .Cases("armv7k", "thumbv7k", object::mach::CSARM_V7K) + .Cases("armv7m", "thumbv7m", object::mach::CSARM_V7M) + .Cases("armv7s", "thumbv7s", object::mach::CSARM_V7S) + .Default(object::mach::CSARM_V7); + + return new DarwinARMAsmBackend(T, TT, CS); } if (TheTriple.isOSWindows()) -- cgit v1.1 From 89e88e30bff4a5f4303dc9e44d3faa89b81af5a8 Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Mon, 14 Jan 2013 22:04:38 +0000 Subject: This patch addresses an incorrect transformation in the DAG combiner. The included test case is derived from one of the GCC compatibility tests. The problem arises after the selection DAG has been converted to type-legalized form. The combiner first sees a 64-bit load that can be converted into a pre-increment form. The original load feeds into a SRL that isolates the upper 32 bits of the loaded doubleword. This looks like an opportunity for DAGCombiner::ReduceLoadWidth() to replace the 64-bit load with a 32-bit load. However, this transformation is not valid, as the replacement load is not a pre-increment load. The pre-increment load produces an extra result, which feeds a subsequent add instruction. The replacement load only has one result value, and this value is propagated to all uses of the pre- increment load, including the add. Because the add is looking for the second result value as its operand, it ends up attempting to add a constant to a token chain, resulting in a crash. So the patch simply disables this transformation for any load with more than two result values. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172480 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 359c4cf..a82410a 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -5100,16 +5100,26 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { // If we haven't found a load, we can't narrow it. Don't transform one with // multiple uses, this would require adding a new load. - if (!isa(N0) || !N0.hasOneUse() || - // Don't change the width of a volatile load. - cast(N0)->isVolatile()) + if (!isa(N0) || !N0.hasOneUse()) + return SDValue(); + + // Don't change the width of a volatile load. + LoadSDNode *LN0 = cast(N0); + if (LN0->isVolatile()) return SDValue(); // Verify that we are actually reducing a load width here. - if (cast(N0)->getMemoryVT().getSizeInBits() < EVTBits) + if (LN0->getMemoryVT().getSizeInBits() < EVTBits) + return SDValue(); + + // For the transform to be legal, the load must produce only two values + // (the value loaded and the chain). Don't transform a pre-increment + // load, for example, which produces an extra value. Otherwise the + // transformation is not equivalent, and the downstream logic to replace + // uses gets things wrong. + if (LN0->getNumValues() > 2) return SDValue(); - LoadSDNode *LN0 = cast(N0); EVT PtrType = N0.getOperand(1).getValueType(); if (PtrType == MVT::Untyped || PtrType.isExtended()) -- cgit v1.1 From 953cbfcd26fa59d80c8d9ca749b5dd8ef901d11a Mon Sep 17 00:00:00 2001 From: Dmitri Gribenko Date: Mon, 14 Jan 2013 22:08:37 +0000 Subject: Improve r172471: avoid all those extra casts on the lines nearby git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172481 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsCodeEmitter.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/MipsCodeEmitter.cpp b/lib/Target/Mips/MipsCodeEmitter.cpp index 8efcd3c..a24de60 100644 --- a/lib/Target/Mips/MipsCodeEmitter.cpp +++ b/lib/Target/Mips/MipsCodeEmitter.cpp @@ -120,10 +120,12 @@ class MipsCodeEmitter : public MachineFunctionPass { char MipsCodeEmitter::ID = 0; bool MipsCodeEmitter::runOnMachineFunction(MachineFunction &MF) { - JTI = const_cast( - static_cast(MF.getTarget())).getJITInfo(); - II = ((const MipsTargetMachine&) MF.getTarget()).getInstrInfo(); - TD = ((const MipsTargetMachine&) MF.getTarget()).getDataLayout(); + MipsTargetMachine &Target = static_cast( + const_cast(MF.getTarget())); + + JTI = Target.getJITInfo(); + II = Target.getInstrInfo(); + TD = Target.getDataLayout(); Subtarget = &TM.getSubtarget (); MCPEs = &MF.getConstantPool()->getConstants(); MJTEs = 0; -- cgit v1.1 From 510db8bcb959fa69a93c42b58cb3e0ab28d03825 Mon Sep 17 00:00:00 2001 From: Dmitri Gribenko Date: Mon, 14 Jan 2013 22:18:18 +0000 Subject: Improve r172468: const_cast is not needed here git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172483 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Hexagon/HexagonISelLowering.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp index 4c27d35..1a0e581 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -1016,9 +1016,8 @@ SDValue HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op, DebugLoc dl = Op.getDebugLoc(); Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), Offset); - HexagonTargetObjectFile &TLOF = - static_cast( - const_cast(getObjFileLowering())); + const HexagonTargetObjectFile &TLOF = + static_cast(getObjFileLowering()); if (TLOF.IsGlobalInSmallSection(GV, getTargetMachine())) { return DAG.getNode(HexagonISD::CONST32_GP, dl, getPointerTy(), Result); } -- cgit v1.1 From dd2e8950222ab74157b1c083ffa77b0fbaf1d210 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Mon, 14 Jan 2013 22:31:35 +0000 Subject: [ms-inline asm] Extend support for parsing Intel bracketed memory operands that have an arbitrary ordering of the base register, index register and displacement. rdar://12527141 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172484 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/AsmParser/X86AsmParser.cpp | 363 +++++++++++++++++++++++------- 1 file changed, 277 insertions(+), 86 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 5ce258e..05bb1e3 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -684,115 +684,298 @@ static unsigned getIntelMemOperandSize(StringRef OpStr) { return Size; } +enum IntelBracExprState { + IBES_START, + IBES_LBRAC, + IBES_RBRAC, + IBES_REGISTER, + IBES_REGISTER_STAR, + IBES_REGISTER_STAR_INTEGER, + IBES_INTEGER, + IBES_INTEGER_STAR, + IBES_INDEX_REGISTER, + IBES_IDENTIFIER, + IBES_DISP_EXPR, + IBES_MINUS, + IBES_ERROR +}; + +class IntelBracExprStateMachine { + IntelBracExprState State; + unsigned BaseReg, IndexReg, Scale; + int64_t Disp; + + unsigned TmpReg; + int64_t TmpInteger; + + bool isPlus; + +public: + IntelBracExprStateMachine(MCAsmParser &parser) : + State(IBES_START), BaseReg(0), IndexReg(0), Scale(1), Disp(0), + TmpReg(0), TmpInteger(0), isPlus(true) {} + + unsigned getBaseReg() { return BaseReg; } + unsigned getIndexReg() { return IndexReg; } + unsigned getScale() { return Scale; } + int64_t getDisp() { return Disp; } + bool isValidEndState() { return State == IBES_RBRAC; } + + void onPlus() { + switch (State) { + default: + State = IBES_ERROR; + break; + case IBES_INTEGER: + State = IBES_START; + if (isPlus) + Disp += TmpInteger; + else + Disp -= TmpInteger; + break; + case IBES_REGISTER: + State = IBES_START; + // If we already have a BaseReg, then assume this is the IndexReg with a + // scale of 1. + if (!BaseReg) { + BaseReg = TmpReg; + } else { + assert (!IndexReg && "BaseReg/IndexReg already set!"); + IndexReg = TmpReg; + Scale = 1; + } + break; + case IBES_INDEX_REGISTER: + State = IBES_START; + break; + } + isPlus = true; + } + void onMinus() { + switch (State) { + default: + State = IBES_ERROR; + break; + case IBES_START: + State = IBES_MINUS; + break; + case IBES_INTEGER: + State = IBES_START; + if (isPlus) + Disp += TmpInteger; + else + Disp -= TmpInteger; + break; + case IBES_REGISTER: + State = IBES_START; + // If we already have a BaseReg, then assume this is the IndexReg with a + // scale of 1. + if (!BaseReg) { + BaseReg = TmpReg; + } else { + assert (!IndexReg && "BaseReg/IndexReg already set!"); + IndexReg = TmpReg; + Scale = 1; + } + break; + case IBES_INDEX_REGISTER: + State = IBES_START; + break; + } + isPlus = false; + } + void onRegister(unsigned Reg) { + switch (State) { + default: + State = IBES_ERROR; + break; + case IBES_START: + State = IBES_REGISTER; + TmpReg = Reg; + break; + case IBES_INTEGER_STAR: + assert (!IndexReg && "IndexReg already set!"); + State = IBES_INDEX_REGISTER; + IndexReg = Reg; + Scale = TmpInteger; + break; + } + } + void onDispExpr() { + switch (State) { + default: + State = IBES_ERROR; + break; + case IBES_START: + State = IBES_DISP_EXPR; + break; + } + } + void onInteger(int64_t TmpInt) { + switch (State) { + default: + State = IBES_ERROR; + break; + case IBES_START: + State = IBES_INTEGER; + TmpInteger = TmpInt; + break; + case IBES_MINUS: + State = IBES_INTEGER; + TmpInteger = TmpInt; + break; + case IBES_REGISTER_STAR: + assert (!IndexReg && "IndexReg already set!"); + State = IBES_INDEX_REGISTER; + IndexReg = TmpReg; + Scale = TmpInt; + break; + } + } + void onStar() { + switch (State) { + default: + State = IBES_ERROR; + break; + case IBES_INTEGER: + State = IBES_INTEGER_STAR; + break; + case IBES_REGISTER: + State = IBES_REGISTER_STAR; + break; + } + } + void onLBrac() { + switch (State) { + default: + State = IBES_ERROR; + break; + case IBES_RBRAC: + State = IBES_START; + isPlus = true; + break; + } + } + void onRBrac() { + switch (State) { + default: + State = IBES_ERROR; + break; + case IBES_DISP_EXPR: + State = IBES_RBRAC; + break; + case IBES_INTEGER: + State = IBES_RBRAC; + if (isPlus) + Disp += TmpInteger; + else + Disp -= TmpInteger; + break; + case IBES_REGISTER: + State = IBES_RBRAC; + // If we already have a BaseReg, then assume this is the IndexReg with a + // scale of 1. + if (!BaseReg) { + BaseReg = TmpReg; + } else { + assert (!IndexReg && "BaseReg/IndexReg already set!"); + IndexReg = TmpReg; + Scale = 1; + } + break; + case IBES_INDEX_REGISTER: + State = IBES_RBRAC; + break; + } + } +}; + X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, unsigned Size) { - unsigned BaseReg = 0, IndexReg = 0, Scale = 1; const AsmToken &Tok = Parser.getTok(); SMLoc Start = Tok.getLoc(), End = Tok.getEndLoc(); - const MCExpr *Disp = MCConstantExpr::Create(0, getContext()); - // Parse [ BaseReg + Scale*IndexReg + Disp ] or [ symbol ] - // Eat '[' if (getLexer().isNot(AsmToken::LBrac)) return ErrorOperand(Start, "Expected '[' token!"); Parser.Lex(); + unsigned TmpReg = 0; + + // Try to handle '[' 'symbol' ']' if (getLexer().is(AsmToken::Identifier)) { - // Parse BaseReg - if (ParseRegister(BaseReg, Start, End)) { - // Handle '[' 'symbol' ']' - if (getParser().ParseExpression(Disp, End)) return 0; + if (ParseRegister(TmpReg, Start, End)) { + const MCExpr *Disp; + if (getParser().ParseExpression(Disp, End)) + return 0; + if (getLexer().isNot(AsmToken::RBrac)) return ErrorOperand(Parser.getTok().getLoc(), "Expected ']' token!"); End = Parser.getTok().getEndLoc(); Parser.Lex(); return X86Operand::CreateMem(Disp, Start, End, Size); } - } else if (getLexer().is(AsmToken::Integer)) { - int64_t Val = Tok.getIntVal(); - Parser.Lex(); - SMLoc Loc = Tok.getLoc(); - if (getLexer().is(AsmToken::RBrac)) { - // Handle '[' number ']' - End = Parser.getTok().getEndLoc(); - Parser.Lex(); - const MCExpr *Disp = MCConstantExpr::Create(Val, getContext()); - if (SegReg) - return X86Operand::CreateMem(SegReg, Disp, 0, 0, Scale, - Start, End, Size); - return X86Operand::CreateMem(Disp, Start, End, Size); - } else if (getLexer().is(AsmToken::Star)) { - // Handle '[' Scale*IndexReg ']' - Parser.Lex(); - SMLoc IdxRegLoc = Tok.getLoc(); - if (ParseRegister(IndexReg, IdxRegLoc, End)) - return ErrorOperand(IdxRegLoc, "Expected register"); - Scale = Val; - } else - return ErrorOperand(Loc, "Unexpected token"); } - // Parse ][ as a plus. - bool ExpectRBrac = true; - if (getLexer().is(AsmToken::RBrac)) { - ExpectRBrac = false; - End = Parser.getTok().getEndLoc(); - Parser.Lex(); - } + // Parse [ BaseReg + Scale*IndexReg + Disp ]. + bool Done = false; + IntelBracExprStateMachine SM(Parser); + + // If we parsed a register, then the end loc has already been set and + // the identifier has already been lexed. We also need to update the + // state. + if (TmpReg) + SM.onRegister(TmpReg); + + const MCExpr *Disp = 0; + while (!Done) { + bool UpdateLocLex = true; - if (getLexer().is(AsmToken::Plus) || getLexer().is(AsmToken::Minus) || - getLexer().is(AsmToken::LBrac)) { - ExpectRBrac = true; - bool isPlus = getLexer().is(AsmToken::Plus) || - getLexer().is(AsmToken::LBrac); - Parser.Lex(); - SMLoc PlusLoc = Tok.getLoc(); - if (getLexer().is(AsmToken::Integer)) { + // The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an + // identifier. Don't try an parse it as a register. + if (Tok.getString().startswith(".")) + break; + + switch (getLexer().getKind()) { + default: { + if (SM.isValidEndState()) { + Done = true; + break; + } + return ErrorOperand(Tok.getLoc(), "Unexpected token!"); + } + case AsmToken::Identifier: { + // This could be a register or a displacement expression. + if(!ParseRegister(TmpReg, Start, End)) { + SM.onRegister(TmpReg); + UpdateLocLex = false; + break; + } else if (!getParser().ParseExpression(Disp, End)) { + SM.onDispExpr(); + UpdateLocLex = false; + break; + } + return ErrorOperand(Tok.getLoc(), "Unexpected identifier!"); + } + case AsmToken::Integer: { int64_t Val = Tok.getIntVal(); - Parser.Lex(); - if (getLexer().is(AsmToken::Star)) { - Parser.Lex(); - SMLoc IdxRegLoc = Tok.getLoc(); - if (ParseRegister(IndexReg, IdxRegLoc, End)) - return ErrorOperand(IdxRegLoc, "Expected register"); - Scale = Val; - } else if (getLexer().is(AsmToken::RBrac)) { - const MCExpr *ValExpr = MCConstantExpr::Create(Val, getContext()); - Disp = isPlus ? ValExpr : MCConstantExpr::Create(0-Val, getContext()); - } else - return ErrorOperand(PlusLoc, "unexpected token after +"); - } else if (getLexer().is(AsmToken::Identifier)) { - // This could be an index register or a displacement expression. - if (!IndexReg) - ParseRegister(IndexReg, Start, End); - else if (getParser().ParseExpression(Disp, End)) - return 0; + SM.onInteger(Val); + break; } - } - - // Parse ][ as a plus. - if (getLexer().is(AsmToken::RBrac)) { - ExpectRBrac = false; - End = Parser.getTok().getEndLoc(); - Parser.Lex(); - if (getLexer().is(AsmToken::LBrac)) { - ExpectRBrac = true; - Parser.Lex(); - if (getParser().ParseExpression(Disp, End)) - return 0; + case AsmToken::Plus: SM.onPlus(); break; + case AsmToken::Minus: SM.onMinus(); break; + case AsmToken::Star: SM.onStar(); break; + case AsmToken::LBrac: SM.onLBrac(); break; + case AsmToken::RBrac: SM.onRBrac(); break; + } + if (!Done && UpdateLocLex) { + End = Tok.getLoc(); + Parser.Lex(); // Consume the token. } - } else if (ExpectRBrac) { - if (getParser().ParseExpression(Disp, End)) - return 0; } - if (ExpectRBrac) { - if (getLexer().isNot(AsmToken::RBrac)) - return ErrorOperand(End, "expected ']' token!"); - End = Parser.getTok().getEndLoc(); - Parser.Lex(); - } + if (!Disp) + Disp = MCConstantExpr::Create(SM.getDisp(), getContext()); // Parse the dot operator (e.g., [ebx].foo.bar). if (Tok.getString().startswith(".")) { @@ -806,10 +989,18 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, Disp = NewDisp; } + int BaseReg = SM.getBaseReg(); + int IndexReg = SM.getIndexReg(); + // handle [-42] - if (!BaseReg && !IndexReg) - return X86Operand::CreateMem(Disp, Start, End, Size); + if (!BaseReg && !IndexReg) { + if (!SegReg) + return X86Operand::CreateMem(Disp, Start, End); + else + return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, Start, End, Size); + } + int Scale = SM.getScale(); return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start, End, Size); } -- cgit v1.1 From 7d72cf892ec745d916af34cf9e68703010b4ded8 Mon Sep 17 00:00:00 2001 From: Shuxin Yang Date: Mon, 14 Jan 2013 22:48:41 +0000 Subject: This change is to implement following rules under the condition C_A and/or C_R --------------------------------------------------------------------------- C_A: reassociation is allowed C_R: reciprocal of a constant C is appropriate, which means - 1/C is exact, or - reciprocal is allowed and 1/C is neither a special value nor a denormal. ----------------------------------------------------------------------------- rule1: (X/C1) / C2 => X / (C2*C1) (if C_A) => X * (1/(C2*C1)) (if C_A && C_R) rule 2: X*C1 / C2 => X * (C1/C2) if C_A rule 3: (X/Y)/Z = > X/(Y*Z) (if C_A && at least one of Y and Z is symbolic value) rule 4: Z/(X/Y) = > (Z*Y)/X (similar to rule3) rule 5: C1/(X*C2) => (C1/C2) / X (if C_A) rule 6: C1/(X/C2) => (C1*C2) / X (if C_A) rule 7: C1/(C2/X) => (C1/C2) * X (if C_A) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172488 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../InstCombine/InstCombineMulDivRem.cpp | 135 +++++++++++++++++++-- 1 file changed, 127 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index d0f4392..29846c1 100644 --- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -784,21 +784,140 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) { return 0; } +/// CvtFDivConstToReciprocal tries to convert X/C into X*1/C if C not a special +/// FP value and: +/// 1) 1/C is exact, or +/// 2) reciprocal is allowed. +/// If the convertion was successful, the simplified expression "X * 1/C" is +/// returned; otherwise, NULL is returned. +/// +static Instruction *CvtFDivConstToReciprocal(Value *Dividend, + ConstantFP *Divisor, + bool AllowReciprocal) { + const APFloat &FpVal = Divisor->getValueAPF(); + APFloat Reciprocal(FpVal.getSemantics()); + bool Cvt = FpVal.getExactInverse(&Reciprocal); + + if (!Cvt && AllowReciprocal && FpVal.isNormal()) { + Reciprocal = APFloat(FpVal.getSemantics(), 1.0f); + (void)Reciprocal.divide(FpVal, APFloat::rmNearestTiesToEven); + Cvt = !Reciprocal.isDenormal(); + } + + if (!Cvt) + return 0; + + ConstantFP *R; + R = ConstantFP::get(Dividend->getType()->getContext(), Reciprocal); + return BinaryOperator::CreateFMul(Dividend, R); +} + Instruction *InstCombiner::visitFDiv(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); if (Value *V = SimplifyFDivInst(Op0, Op1, TD)) return ReplaceInstUsesWith(I, V); + bool AllowReassociate = I.hasUnsafeAlgebra(); + bool AllowReciprocal = I.hasAllowReciprocal(); + if (ConstantFP *Op1C = dyn_cast(Op1)) { - const APFloat &Op1F = Op1C->getValueAPF(); - - // If the divisor has an exact multiplicative inverse we can turn the fdiv - // into a cheaper fmul. - APFloat Reciprocal(Op1F.getSemantics()); - if (Op1F.getExactInverse(&Reciprocal)) { - ConstantFP *RFP = ConstantFP::get(Builder->getContext(), Reciprocal); - return BinaryOperator::CreateFMul(Op0, RFP); + if (AllowReassociate) { + ConstantFP *C1 = 0; + ConstantFP *C2 = Op1C; + Value *X; + Instruction *Res = 0; + + if (match(Op0, m_FMul(m_Value(X), m_ConstantFP(C1)))) { + // (X*C1)/C2 => X * (C1/C2) + // + Constant *C = ConstantExpr::getFDiv(C1, C2); + const APFloat &F = cast(C)->getValueAPF(); + if (F.isNormal() && !F.isDenormal()) + Res = BinaryOperator::CreateFMul(X, C); + } else if (match(Op0, m_FDiv(m_Value(X), m_ConstantFP(C1)))) { + // (X/C1)/C2 => X /(C2*C1) [=> X * 1/(C2*C1) if reciprocal is allowed] + // + Constant *C = ConstantExpr::getFMul(C1, C2); + const APFloat &F = cast(C)->getValueAPF(); + if (F.isNormal() && !F.isDenormal()) { + Res = CvtFDivConstToReciprocal(X, cast(C), + AllowReciprocal); + if (!Res) + Res = BinaryOperator::CreateFDiv(X, C); + } + } + + if (Res) { + Res->setFastMathFlags(I.getFastMathFlags()); + return Res; + } + } + + // X / C => X * 1/C + if (Instruction *T = CvtFDivConstToReciprocal(Op0, Op1C, AllowReciprocal)) + return T; + + return 0; + } + + if (AllowReassociate && isa(Op0)) { + ConstantFP *C1 = cast(Op0), *C2; + Constant *Fold = 0; + Value *X; + bool CreateDiv = true; + + // C1 / (X*C2) => (C1/C2) / X + if (match(Op1, m_FMul(m_Value(X), m_ConstantFP(C2)))) + Fold = ConstantExpr::getFDiv(C1, C2); + else if (match(Op1, m_FDiv(m_Value(X), m_ConstantFP(C2)))) { + // C1 / (X/C2) => (C1*C2) / X + Fold = ConstantExpr::getFMul(C1, C2); + } else if (match(Op1, m_FDiv(m_ConstantFP(C2), m_Value(X)))) { + // C1 / (C2/X) => (C1/C2) * X + Fold = ConstantExpr::getFDiv(C1, C2); + CreateDiv = false; + } + + if (Fold) { + const APFloat &FoldC = cast(Fold)->getValueAPF(); + if (FoldC.isNormal() && !FoldC.isDenormal()) { + Instruction *R = CreateDiv ? + BinaryOperator::CreateFDiv(Fold, X) : + BinaryOperator::CreateFMul(X, Fold); + R->setFastMathFlags(I.getFastMathFlags()); + return R; + } + } + return 0; + } + + if (AllowReassociate) { + Value *X, *Y; + Value *NewInst = 0; + Instruction *SimpR = 0; + + if (Op0->hasOneUse() && match(Op0, m_FDiv(m_Value(X), m_Value(Y)))) { + // (X/Y) / Z => X / (Y*Z) + // + if (!isa(Y) || !isa(Op1)) { + NewInst = Builder->CreateFMul(Y, Op1); + SimpR = BinaryOperator::CreateFDiv(X, NewInst); + } + } else if (Op1->hasOneUse() && match(Op1, m_FDiv(m_Value(X), m_Value(Y)))) { + // Z / (X/Y) => Z*Y / X + // + if (!isa(Y) || !isa(Op0)) { + NewInst = Builder->CreateFMul(Op0, Y); + SimpR = BinaryOperator::CreateFDiv(NewInst, X); + } + } + + if (NewInst) { + if (Instruction *T = dyn_cast(NewInst)) + T->setDebugLoc(I.getDebugLoc()); + SimpR->setFastMathFlags(I.getFastMathFlags()); + return SimpR; } } -- cgit v1.1 From e673b54bdde4b538cbd67eadac80a15d238c926f Mon Sep 17 00:00:00 2001 From: Jakub Staszak Date: Mon, 14 Jan 2013 23:16:36 +0000 Subject: Remove trailing spaces. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172489 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/InstCombine/InstCombine.h | 50 ++++++++-------- lib/Transforms/InstCombine/InstCombineWorklist.h | 30 +++++----- lib/Transforms/Utils/BasicBlockUtils.cpp | 76 ++++++++++++------------ 3 files changed, 78 insertions(+), 78 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h index a36b1e6..1f6a3a5e 100644 --- a/lib/Transforms/InstCombine/InstCombine.h +++ b/lib/Transforms/InstCombine/InstCombine.h @@ -27,7 +27,7 @@ namespace llvm { class DbgDeclareInst; class MemIntrinsic; class MemSetInst; - + /// SelectPatternFlavor - We can match a variety of different patterns for /// select operations. enum SelectPatternFlavor { @@ -36,7 +36,7 @@ enum SelectPatternFlavor { SPF_SMAX, SPF_UMAX //SPF_ABS - TODO. }; - + /// getComplexity: Assign a complexity or rank value to LLVM Values... /// 0 -> undef, 1 -> Const, 2 -> Other, 3 -> Arg, 3 -> Unary, 4 -> OtherInst static inline unsigned getComplexity(Value *V) { @@ -51,23 +51,23 @@ static inline unsigned getComplexity(Value *V) { return isa(V) ? (isa(V) ? 0 : 1) : 2; } - + /// InstCombineIRInserter - This is an IRBuilder insertion helper that works /// just like the normal insertion helper, but also adds any new instructions /// to the instcombine worklist. -class LLVM_LIBRARY_VISIBILITY InstCombineIRInserter +class LLVM_LIBRARY_VISIBILITY InstCombineIRInserter : public IRBuilderDefaultInserter { InstCombineWorklist &Worklist; public: InstCombineIRInserter(InstCombineWorklist &WL) : Worklist(WL) {} - + void InsertHelper(Instruction *I, const Twine &Name, BasicBlock *BB, BasicBlock::iterator InsertPt) const { IRBuilderDefaultInserter::InsertHelper(I, Name, BB, InsertPt); Worklist.Add(I); } }; - + /// InstCombiner - The -instcombine pass. class LLVM_LIBRARY_VISIBILITY InstCombiner : public FunctionPass, @@ -85,7 +85,7 @@ public: /// instructions into the worklist when they are created. typedef IRBuilder BuilderTy; BuilderTy *Builder; - + static char ID; // Pass identification, replacement for typeid InstCombiner() : FunctionPass(ID), TD(0), Builder(0) { MinimizeSize = false; @@ -94,7 +94,7 @@ public: public: virtual bool runOnFunction(Function &F); - + bool DoOneIteration(Function &F, unsigned ItNum); virtual void getAnalysisUsage(AnalysisUsage &AU) const; @@ -212,10 +212,10 @@ private: bool ShouldChangeType(Type *From, Type *To) const; Value *dyn_castNegVal(Value *V) const; Value *dyn_castFNegVal(Value *V, bool NoSignedZero=false) const; - Type *FindElementAtOffset(Type *Ty, int64_t Offset, + Type *FindElementAtOffset(Type *Ty, int64_t Offset, SmallVectorImpl &NewIndices); Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI); - + /// ShouldOptimizeCast - Return true if the cast from "V to Ty" actually /// results in any code being generated and is interesting to optimize out. If /// the cast can be eliminated by some other simple transformation, we prefer @@ -247,7 +247,7 @@ public: return New; } - // InsertNewInstWith - same as InsertNewInstBefore, but also sets the + // InsertNewInstWith - same as InsertNewInstBefore, but also sets the // debug loc. // Instruction *InsertNewInstWith(Instruction *New, Instruction &Old) { @@ -263,10 +263,10 @@ public: // Instruction *ReplaceInstUsesWith(Instruction &I, Value *V) { Worklist.AddUsersToWorkList(I); // Add all modified instrs to worklist. - + // If we are replacing the instruction with itself, this must be in a // segment of unreachable code, so just clobber the instruction. - if (&I == V) + if (&I == V) V = UndefValue::get(I.getType()); DEBUG(errs() << "IC: Replacing " << I << "\n" @@ -296,13 +296,13 @@ public: MadeIRChange = true; return 0; // Don't do anything with FI } - + void ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, unsigned Depth = 0) const { return llvm::ComputeMaskedBits(V, KnownZero, KnownOne, TD, Depth); } - - bool MaskedValueIsZero(Value *V, const APInt &Mask, + + bool MaskedValueIsZero(Value *V, const APInt &Mask, unsigned Depth = 0) const { return llvm::MaskedValueIsZero(V, Mask, TD, Depth); } @@ -325,10 +325,10 @@ private: /// SimplifyDemandedUseBits - Attempts to replace V with a simpler value /// based on the demanded bits. - Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask, + Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask, APInt& KnownZero, APInt& KnownOne, unsigned Depth); - bool SimplifyDemandedBits(Use &U, APInt DemandedMask, + bool SimplifyDemandedBits(Use &U, APInt DemandedMask, APInt& KnownZero, APInt& KnownOne, unsigned Depth=0); /// Helper routine of SimplifyDemandedUseBits. It tries to simplify demanded @@ -336,15 +336,15 @@ private: Value *SimplifyShrShlDemandedBits(Instruction *Lsr, Instruction *Sftl, APInt DemandedMask, APInt &KnownZero, APInt &KnownOne); - + /// SimplifyDemandedInstructionBits - Inst is an integer instruction that /// SimplifyDemandedBits knows about. See if the instruction has any /// properties that allow us to simplify its operands. bool SimplifyDemandedInstructionBits(Instruction &Inst); - + Value *SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt& UndefElts, unsigned Depth = 0); - + // FoldOpIntoPhi - Given a binary operator, cast instruction, or select // which has a PHI node as operand #0, see if we can fold the instruction // into the PHI (which is only possible if all operands to the PHI are @@ -360,10 +360,10 @@ private: Instruction *FoldPHIArgGEPIntoPHI(PHINode &PN); Instruction *FoldPHIArgLoadIntoPHI(PHINode &PN); - + Instruction *OptAndOp(Instruction *Op, ConstantInt *OpRHS, ConstantInt *AndRHS, BinaryOperator &TheAnd); - + Value *FoldLogicalPlusAnd(Value *LHS, Value *RHS, ConstantInt *Mask, bool isSub, Instruction &I); Value *InsertRangeTest(Value *V, Constant *Lo, Constant *Hi, @@ -382,8 +382,8 @@ private: Value *Descale(Value *Val, APInt Scale, bool &NoSignedWrap); }; - - + + } // end namespace llvm. #endif diff --git a/lib/Transforms/InstCombine/InstCombineWorklist.h b/lib/Transforms/InstCombine/InstCombineWorklist.h index 57ed9e3..49efce5 100644 --- a/lib/Transforms/InstCombine/InstCombineWorklist.h +++ b/lib/Transforms/InstCombine/InstCombineWorklist.h @@ -19,20 +19,20 @@ #include "llvm/Support/raw_ostream.h" namespace llvm { - + /// InstCombineWorklist - This is the worklist management logic for /// InstCombine. class LLVM_LIBRARY_VISIBILITY InstCombineWorklist { SmallVector Worklist; DenseMap WorklistMap; - + void operator=(const InstCombineWorklist&RHS) LLVM_DELETED_FUNCTION; InstCombineWorklist(const InstCombineWorklist&) LLVM_DELETED_FUNCTION; public: InstCombineWorklist() {} - + bool isEmpty() const { return Worklist.empty(); } - + /// Add - Add the specified instruction to the worklist if it isn't already /// in it. void Add(Instruction *I) { @@ -41,12 +41,12 @@ public: Worklist.push_back(I); } } - + void AddValue(Value *V) { if (Instruction *I = dyn_cast(V)) Add(I); } - + /// AddInitialGroup - Add the specified batch of stuff in reverse order. /// which should only be done when the worklist is empty and when the group /// has no duplicates. @@ -61,25 +61,25 @@ public: Worklist.push_back(I); } } - + // Remove - remove I from the worklist if it exists. void Remove(Instruction *I) { DenseMap::iterator It = WorklistMap.find(I); if (It == WorklistMap.end()) return; // Not in worklist. - + // Don't bother moving everything down, just null out the slot. Worklist[It->second] = 0; - + WorklistMap.erase(It); } - + Instruction *RemoveOne() { Instruction *I = Worklist.back(); Worklist.pop_back(); WorklistMap.erase(I); return I; } - + /// AddUsersToWorkList - When an instruction is simplified, add all users of /// the instruction to the work lists because they might get more simplified /// now. @@ -89,18 +89,18 @@ public: UI != UE; ++UI) Add(cast(*UI)); } - - + + /// Zap - check that the worklist is empty and nuke the backing store for /// the map if it is large. void Zap() { assert(WorklistMap.empty() && "Worklist empty, but map not?"); - + // Do an explicit clear, this shrinks the map if needed. WorklistMap.clear(); } }; - + } // end namespace llvm. #endif diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp index 8330e84..ba99d2e 100644 --- a/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -37,12 +37,12 @@ void llvm::DeleteDeadBlock(BasicBlock *BB) { // Can delete self loop. BB->getSinglePredecessor() == BB) && "Block is not dead!"); TerminatorInst *BBTerm = BB->getTerminator(); - + // Loop through all of our successors and make sure they know that one // of their predecessors is going away. for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i) BBTerm->getSuccessor(i)->removePredecessor(BB); - + // Zap all the instructions in the block. while (!BB->empty()) { Instruction &I = BB->back(); @@ -55,7 +55,7 @@ void llvm::DeleteDeadBlock(BasicBlock *BB) { I.replaceAllUsesWith(UndefValue::get(I.getType())); BB->getInstList().pop_back(); } - + // Zap the block! BB->eraseFromParent(); } @@ -66,25 +66,25 @@ void llvm::DeleteDeadBlock(BasicBlock *BB) { /// when the block has exactly one predecessor. void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, Pass *P) { if (!isa(BB->begin())) return; - + AliasAnalysis *AA = 0; MemoryDependenceAnalysis *MemDep = 0; if (P) { AA = P->getAnalysisIfAvailable(); MemDep = P->getAnalysisIfAvailable(); } - + while (PHINode *PN = dyn_cast(BB->begin())) { if (PN->getIncomingValue(0) != PN) PN->replaceAllUsesWith(PN->getIncomingValue(0)); else PN->replaceAllUsesWith(UndefValue::get(PN->getType())); - + if (MemDep) MemDep->removeInstruction(PN); // Memdep updates AA itself. else if (AA && isa(PN->getType())) AA->deleteValue(PN); - + PN->eraseFromParent(); } } @@ -115,7 +115,7 @@ bool llvm::DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI) { bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) { // Don't merge away blocks who have their address taken. if (BB->hasAddressTaken()) return false; - + // Can't merge if there are multiple predecessors, or no predecessors. BasicBlock *PredBB = BB->getUniquePredecessor(); if (!PredBB) return false; @@ -124,7 +124,7 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) { if (PredBB == BB) return false; // Don't break invokes. if (isa(PredBB->getTerminator())) return false; - + succ_iterator SI(succ_begin(PredBB)), SE(succ_end(PredBB)); BasicBlock *OnlySucc = BB; for (; SI != SE; ++SI) @@ -132,7 +132,7 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) { OnlySucc = 0; // There are multiple distinct successors! break; } - + // Can't merge if there are multiple successors. if (!OnlySucc) return false; @@ -149,21 +149,21 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) { // Begin by getting rid of unneeded PHIs. if (isa(BB->front())) FoldSingleEntryPHINodes(BB, P); - + // Delete the unconditional branch from the predecessor... PredBB->getInstList().pop_back(); - + // Make all PHI nodes that referred to BB now refer to Pred as their // source... BB->replaceAllUsesWith(PredBB); - + // Move all definitions in the successor to the predecessor... PredBB->getInstList().splice(PredBB->end(), BB->getInstList()); - + // Inherit predecessors name if it exists. if (!PredBB->hasName()) PredBB->takeName(BB); - + // Finally, erase the old block and update dominator info. if (P) { if (DominatorTree *DT = P->getAnalysisIfAvailable()) { @@ -176,16 +176,16 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) { DT->eraseNode(BB); } - + if (LoopInfo *LI = P->getAnalysisIfAvailable()) LI->removeBlock(BB); - + if (MemoryDependenceAnalysis *MD = P->getAnalysisIfAvailable()) MD->invalidateCachedPredecessors(); } } - + BB->eraseFromParent(); return true; } @@ -251,11 +251,11 @@ unsigned llvm::GetSuccessorNumber(BasicBlock *BB, BasicBlock *Succ) { } } -/// SplitEdge - Split the edge connecting specified block. Pass P must -/// not be NULL. +/// SplitEdge - Split the edge connecting specified block. Pass P must +/// not be NULL. BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) { unsigned SuccNum = GetSuccessorNumber(BB, Succ); - + // If this is a critical edge, let SplitCriticalEdge do it. TerminatorInst *LatchTerm = BB->getTerminator(); if (SplitCriticalEdge(LatchTerm, SuccNum, P)) @@ -271,11 +271,11 @@ BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) { SP = NULL; return SplitBlock(Succ, Succ->begin(), P); } - + // Otherwise, if BB has a single successor, split it at the bottom of the // block. assert(BB->getTerminator()->getNumSuccessors() == 1 && - "Should have a single succ!"); + "Should have a single succ!"); return SplitBlock(BB, BB->getTerminator(), P); } @@ -301,12 +301,12 @@ BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) { if (DomTreeNode *OldNode = DT->getNode(Old)) { std::vector Children; for (DomTreeNode::iterator I = OldNode->begin(), E = OldNode->end(); - I != E; ++I) + I != E; ++I) Children.push_back(*I); DomTreeNode *NewNode = DT->addNewBlock(New,Old); for (std::vector::iterator I = Children.begin(), - E = Children.end(); I != E; ++I) + E = Children.end(); I != E; ++I) DT->changeImmediateDominator(*I, NewNode); } } @@ -424,7 +424,7 @@ static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB, PHINode *NewPHI = PHINode::Create(PN->getType(), Preds.size(), PN->getName() + ".ph", BI); if (AA) AA->copyValue(PN, NewPHI); - + // Move all of the PHI values for 'Preds' to the new PHI. for (unsigned i = 0, e = Preds.size(); i != e; ++i) { Value *V = PN->removeIncomingValue(Preds[i], false); @@ -451,16 +451,16 @@ static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB, /// preserve LoopSimplify (because it's complicated to handle the case where one /// of the edges being split is an exit of a loop with other exits). /// -BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, +BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, ArrayRef Preds, const char *Suffix, Pass *P) { // Create new basic block, insert right before the original block. BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), BB->getName()+Suffix, BB->getParent(), BB); - + // The new block unconditionally branches to the old block. BranchInst *BI = BranchInst::Create(BB, NewBB); - + // Move the edges from Preds to point to NewBB instead of BB. for (unsigned i = 0, e = Preds.size(); i != e; ++i) { // This is slightly more strict than necessary; the minimum requirement @@ -497,13 +497,13 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, /// block gets the remaining predecessors of OrigBB. The landingpad instruction /// OrigBB is clone into both of the new basic blocks. The new blocks are given /// the suffixes 'Suffix1' and 'Suffix2', and are returned in the NewBBs vector. -/// +/// /// This currently updates the LLVM IR, AliasAnalysis, DominatorTree, /// DominanceFrontier, LoopInfo, and LCCSA but no other analyses. In particular, /// it does not preserve LoopSimplify (because it's complicated to handle the /// case where one of the edges being split is an exit of a loop with other /// exits). -/// +/// void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB, ArrayRef Preds, const char *Suffix1, const char *Suffix2, @@ -608,11 +608,11 @@ void llvm::FindFunctionBackedges(const Function &F, const BasicBlock *BB = &F.getEntryBlock(); if (succ_begin(BB) == succ_end(BB)) return; - + SmallPtrSet Visited; SmallVector, 8> VisitStack; SmallPtrSet InStack; - + Visited.insert(BB); VisitStack.push_back(std::make_pair(BB, succ_begin(BB))); InStack.insert(BB); @@ -620,7 +620,7 @@ void llvm::FindFunctionBackedges(const Function &F, std::pair &Top = VisitStack.back(); const BasicBlock *ParentBB = Top.first; succ_const_iterator &I = Top.second; - + bool FoundNew = false; while (I != succ_end(ParentBB)) { BB = *I++; @@ -632,7 +632,7 @@ void llvm::FindFunctionBackedges(const Function &F, if (InStack.count(BB)) Result.push_back(std::make_pair(ParentBB, BB)); } - + if (FoundNew) { // Go down one level if there is a unvisited successor. InStack.insert(BB); @@ -641,7 +641,7 @@ void llvm::FindFunctionBackedges(const Function &F, // Go up one level. InStack.erase(VisitStack.pop_back_val().first); } - } while (!VisitStack.empty()); + } while (!VisitStack.empty()); } /// FoldReturnIntoUncondBranch - This method duplicates the specified return @@ -655,7 +655,7 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, // Clone the return and add it to the end of the predecessor. Instruction *NewRet = RI->clone(); Pred->getInstList().push_back(NewRet); - + // If the return instruction returns a value, and if the value was a // PHI node in "BB", propagate the right value into the return. for (User::op_iterator i = NewRet->op_begin(), e = NewRet->op_end(); @@ -679,7 +679,7 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, } } } - + // Update any PHI nodes in the returning block to realize that we no // longer branch to them. BB->removePredecessor(Pred); -- cgit v1.1 From c0c67b03b03d73d3614a084d467a388c35d264d1 Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Mon, 14 Jan 2013 23:22:36 +0000 Subject: Properly encapsulate additional methods and data from AsmParser. This finally allows AsmParser to no longer list GenericAsmParser as a friend. All member vars directly accessed by GenericAsmParser have been properly encapsulated and exposed through the MCAsmParser interface. This reduces the coupling between AsmParser and GenericAsmParser. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172490 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCParser/AsmParser.cpp | 139 ++++++++++++++++++++++-------------------- 1 file changed, 72 insertions(+), 67 deletions(-) (limited to 'lib') diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index 45aaa2e..665d672 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -50,26 +50,12 @@ MCAsmParserSemaCallback::~MCAsmParserSemaCallback() {} namespace { -/// \brief Helper class for tracking macro definitions. -typedef std::vector MacroArguments; -typedef std::pair MacroParameter; -typedef std::vector MacroParameters; - -struct Macro { - StringRef Name; - StringRef Body; - MacroParameters Parameters; - -public: - Macro(StringRef N, StringRef B, const MacroParameters &P) : - Name(N), Body(B), Parameters(P) {} -}; /// \brief Helper class for storing information about an active macro /// instantiation. struct MacroInstantiation { /// The macro being instantiated. - const Macro *TheMacro; + const MCAsmMacro *TheMacro; /// The macro instantiation with substitutions. MemoryBuffer *Instantiation; @@ -84,7 +70,7 @@ struct MacroInstantiation { SMLoc ExitLoc; public: - MacroInstantiation(const Macro *M, SMLoc IL, int EB, SMLoc EL, + MacroInstantiation(const MCAsmMacro *M, SMLoc IL, int EB, SMLoc EL, MemoryBuffer *I); }; @@ -115,8 +101,6 @@ struct ParseStatementInfo { /// \brief The concrete assembly parser instance. class AsmParser : public MCAsmParser { - friend class GenericAsmParser; - AsmParser(const AsmParser &) LLVM_DELETED_FUNCTION; void operator=(const AsmParser &) LLVM_DELETED_FUNCTION; private: @@ -144,7 +128,7 @@ private: StringMap > DirectiveMap; /// MacroMap - Map of currently defined macros. - StringMap MacroMap; + StringMap MacroMap; /// ActiveMacros - Stack of active macro instantiations. std::vector ActiveMacros; @@ -225,6 +209,9 @@ public: virtual bool ParseParenExpression(const MCExpr *&Res, SMLoc &EndLoc); virtual bool ParseAbsoluteExpression(int64_t &Res); + bool ParseMacroArgument(MCAsmMacroArgument &MA, + AsmToken::TokenKind &ArgumentDelimiter); + /// ParseIdentifier - Parse an identifier or string (as a quoted identifier) /// and set \p Res to the identifier contents. virtual bool ParseIdentifier(StringRef &Res); @@ -233,6 +220,14 @@ public: virtual bool MacrosEnabled() {return MacrosEnabledFlag;} virtual void SetMacrosEnabled(bool flag) {MacrosEnabledFlag = flag;} + virtual const MCAsmMacro* LookupMacro(StringRef Name); + virtual void DefineMacro(StringRef Name, const MCAsmMacro& Macro); + virtual void UndefineMacro(StringRef Name); + + virtual bool InsideMacroInstantiation() {return !ActiveMacros.empty();} + virtual bool HandleMacroEntry(const MCAsmMacro *M, SMLoc NameLoc); + void HandleMacroExit(); + virtual void CheckForValidSection(); /// } @@ -242,12 +237,10 @@ private: void EatToEndOfLine(); bool ParseCppHashLineFilenameComment(const SMLoc &L); - bool HandleMacroEntry(StringRef Name, SMLoc NameLoc, const Macro *M); bool expandMacro(raw_svector_ostream &OS, StringRef Body, - const MacroParameters &Parameters, - const MacroArguments &A, + const MCAsmMacroParameters &Parameters, + const MCAsmMacroArguments &A, const SMLoc &L); - void HandleMacroExit(); void PrintMacroInstantiations(); void PrintMessage(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Msg, @@ -270,9 +263,7 @@ private: /// location. void JumpToLoc(SMLoc Loc, int InBuffer=-1); - bool ParseMacroArgument(MCAsmMacroArgument &MA, - AsmToken::TokenKind &ArgumentDelimiter); - bool ParseMacroArguments(const Macro *M, MacroArguments &A); + bool ParseMacroArguments(const MCAsmMacro *M, MCAsmMacroArguments &A); /// \brief Parse up to the end of statement and a return the contents from the /// current token until the end of the statement; the current token on exit @@ -359,8 +350,8 @@ private: MCSymbolRefExpr::VariantKind Variant); // Macro-like directives - Macro *ParseMacroLikeBody(SMLoc DirectiveLoc); - void InstantiateMacroLikeBody(Macro *M, SMLoc DirectiveLoc, + MCAsmMacro *ParseMacroLikeBody(SMLoc DirectiveLoc); + void InstantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc, raw_svector_ostream &OS); bool ParseDirectiveRept(SMLoc DirectiveLoc); // ".rept" bool ParseDirectiveIrp(SMLoc DirectiveLoc); // ".irp" @@ -540,7 +531,7 @@ AsmParser::~AsmParser() { assert(ActiveMacros.empty() && "Unexpected active macro instantiation!"); // Destroy any macros. - for (StringMap::iterator it = MacroMap.begin(), + for (StringMap::iterator it = MacroMap.begin(), ie = MacroMap.end(); it != ie; ++it) delete it->getValue(); @@ -1278,8 +1269,9 @@ bool AsmParser::ParseStatement(ParseStatementInfo &Info) { // If macros are enabled, check to see if this is a macro instantiation. if (MacrosEnabled()) - if (const Macro *M = MacroMap.lookup(IDVal)) - return HandleMacroEntry(IDVal, IDLoc, M); + if (const MCAsmMacro *M = LookupMacro(IDVal)) { + return HandleMacroEntry(M, IDLoc); + } // Otherwise, we have a normal instruction or directive. if (IDVal[0] == '.' && IDVal != ".") { @@ -1590,8 +1582,8 @@ static bool isIdentifierChar(char c) { } bool AsmParser::expandMacro(raw_svector_ostream &OS, StringRef Body, - const MacroParameters &Parameters, - const MacroArguments &A, + const MCAsmMacroParameters &Parameters, + const MCAsmMacroArguments &A, const SMLoc &L) { unsigned NParameters = Parameters.size(); if (NParameters != 0 && NParameters != A.size()) @@ -1690,7 +1682,7 @@ bool AsmParser::expandMacro(raw_svector_ostream &OS, StringRef Body, return false; } -MacroInstantiation::MacroInstantiation(const Macro *M, SMLoc IL, +MacroInstantiation::MacroInstantiation(const MCAsmMacro *M, SMLoc IL, int EB, SMLoc EL, MemoryBuffer *I) : TheMacro(M), Instantiation(I), InstantiationLoc(IL), ExitBuffer(EB), @@ -1807,7 +1799,7 @@ bool AsmParser::ParseMacroArgument(MCAsmMacroArgument &MA, } // Parse the macro instantiation arguments. -bool AsmParser::ParseMacroArguments(const Macro *M, MacroArguments &A) { +bool AsmParser::ParseMacroArguments(const MCAsmMacro *M, MCAsmMacroArguments &A) { const unsigned NParameters = M ? M->Parameters.size() : 0; // Argument delimiter is initially unknown. It will be set by // ParseMacroArgument() @@ -1851,14 +1843,30 @@ bool AsmParser::ParseMacroArguments(const Macro *M, MacroArguments &A) { return TokError("Too many arguments"); } -bool AsmParser::HandleMacroEntry(StringRef Name, SMLoc NameLoc, - const Macro *M) { +const MCAsmMacro* AsmParser::LookupMacro(StringRef Name) { + StringMap::iterator I = MacroMap.find(Name); + return (I == MacroMap.end()) ? NULL : I->getValue(); +} + +void AsmParser::DefineMacro(StringRef Name, const MCAsmMacro& Macro) { + MacroMap[Name] = new MCAsmMacro(Macro); +} + +void AsmParser::UndefineMacro(StringRef Name) { + StringMap::iterator I = MacroMap.find(Name); + if (I != MacroMap.end()) { + delete I->getValue(); + MacroMap.erase(I); + } +} + +bool AsmParser::HandleMacroEntry(const MCAsmMacro *M, SMLoc NameLoc) { // Arbitrarily limit macro nesting depth, to match 'as'. We can eliminate // this, although we should protect against infinite loops. if (ActiveMacros.size() == 20) return TokError("macros cannot be nested more than 20 levels deep"); - MacroArguments A; + MCAsmMacroArguments A; if (ParseMacroArguments(M, A)) return true; @@ -1877,7 +1885,7 @@ bool AsmParser::HandleMacroEntry(StringRef Name, SMLoc NameLoc, if (expandMacro(OS, Body, M->Parameters, A, getTok().getLoc())) return true; - // We include the .endmacro in the buffer as our queue to exit the macro + // We include the .endmacro in the buffer as our cue to exit the macro // instantiation. OS << ".endmacro\n"; @@ -3498,13 +3506,13 @@ bool GenericAsmParser::ParseDirectiveMacro(StringRef Directive, if (getParser().ParseIdentifier(Name)) return TokError("expected identifier in '.macro' directive"); - MacroParameters Parameters; + MCAsmMacroParameters Parameters; // Argument delimiter is initially unknown. It will be set by // ParseMacroArgument() AsmToken::TokenKind ArgumentDelimiter = AsmToken::Eof; if (getLexer().isNot(AsmToken::EndOfStatement)) { for (;;) { - MacroParameter Parameter; + MCAsmMacroParameter Parameter; if (getParser().ParseIdentifier(Parameter.first)) return TokError("expected identifier in '.macro' directive"); @@ -3550,14 +3558,14 @@ bool GenericAsmParser::ParseDirectiveMacro(StringRef Directive, getParser().EatToEndOfStatement(); } - if (getParser().MacroMap.lookup(Name)) { + if (getParser().LookupMacro(Name)) { return Error(DirectiveLoc, "macro '" + Name + "' is already defined"); } const char *BodyStart = StartToken.getLoc().getPointer(); const char *BodyEnd = EndToken.getLoc().getPointer(); StringRef Body = StringRef(BodyStart, BodyEnd - BodyStart); - getParser().MacroMap[Name] = new Macro(Name, Body, Parameters); + getParser().DefineMacro(Name, MCAsmMacro(Name, Body, Parameters)); return false; } @@ -3571,7 +3579,7 @@ bool GenericAsmParser::ParseDirectiveEndMacro(StringRef Directive, // If we are inside a macro instantiation, terminate the current // instantiation. - if (!getParser().ActiveMacros.empty()) { + if (getParser().InsideMacroInstantiation()) { getParser().HandleMacroExit(); return false; } @@ -3593,13 +3601,10 @@ bool GenericAsmParser::ParseDirectivePurgeMacro(StringRef Directive, if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in '.purgem' directive"); - StringMap::iterator I = getParser().MacroMap.find(Name); - if (I == getParser().MacroMap.end()) + if (!getParser().LookupMacro(Name)) return Error(DirectiveLoc, "macro '" + Name + "' is not defined"); - // Undefine the macro. - delete I->getValue(); - getParser().MacroMap.erase(I); + getParser().UndefineMacro(Name); return false; } @@ -3622,7 +3627,7 @@ bool GenericAsmParser::ParseDirectiveLEB128(StringRef DirName, SMLoc) { return false; } -Macro *AsmParser::ParseMacroLikeBody(SMLoc DirectiveLoc) { +MCAsmMacro *AsmParser::ParseMacroLikeBody(SMLoc DirectiveLoc) { AsmToken EndToken, StartToken = getTok(); unsigned NestLevel = 0; @@ -3663,11 +3668,11 @@ Macro *AsmParser::ParseMacroLikeBody(SMLoc DirectiveLoc) { // We Are Anonymous. StringRef Name; - MacroParameters Parameters; - return new Macro(Name, Body, Parameters); + MCAsmMacroParameters Parameters; + return new MCAsmMacro(Name, Body, Parameters); } -void AsmParser::InstantiateMacroLikeBody(Macro *M, SMLoc DirectiveLoc, +void AsmParser::InstantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc, raw_svector_ostream &OS) { OS << ".endr\n"; @@ -3703,15 +3708,15 @@ bool AsmParser::ParseDirectiveRept(SMLoc DirectiveLoc) { Lex(); // Lex the rept definition. - Macro *M = ParseMacroLikeBody(DirectiveLoc); + MCAsmMacro *M = ParseMacroLikeBody(DirectiveLoc); if (!M) return true; // Macro instantiation is lexical, unfortunately. We construct a new buffer // to hold the macro body with substitutions. SmallString<256> Buf; - MacroParameters Parameters; - MacroArguments A; + MCAsmMacroParameters Parameters; + MCAsmMacroArguments A; raw_svector_ostream OS(Buf); while (Count--) { if (expandMacro(OS, M->Body, Parameters, A, getTok().getLoc())) @@ -3725,8 +3730,8 @@ bool AsmParser::ParseDirectiveRept(SMLoc DirectiveLoc) { /// ParseDirectiveIrp /// ::= .irp symbol,values bool AsmParser::ParseDirectiveIrp(SMLoc DirectiveLoc) { - MacroParameters Parameters; - MacroParameter Parameter; + MCAsmMacroParameters Parameters; + MCAsmMacroParameter Parameter; if (ParseIdentifier(Parameter.first)) return TokError("expected identifier in '.irp' directive"); @@ -3738,7 +3743,7 @@ bool AsmParser::ParseDirectiveIrp(SMLoc DirectiveLoc) { Lex(); - MacroArguments A; + MCAsmMacroArguments A; if (ParseMacroArguments(0, A)) return true; @@ -3746,7 +3751,7 @@ bool AsmParser::ParseDirectiveIrp(SMLoc DirectiveLoc) { Lex(); // Lex the irp definition. - Macro *M = ParseMacroLikeBody(DirectiveLoc); + MCAsmMacro *M = ParseMacroLikeBody(DirectiveLoc); if (!M) return true; @@ -3755,8 +3760,8 @@ bool AsmParser::ParseDirectiveIrp(SMLoc DirectiveLoc) { SmallString<256> Buf; raw_svector_ostream OS(Buf); - for (MacroArguments::iterator i = A.begin(), e = A.end(); i != e; ++i) { - MacroArguments Args; + for (MCAsmMacroArguments::iterator i = A.begin(), e = A.end(); i != e; ++i) { + MCAsmMacroArguments Args; Args.push_back(*i); if (expandMacro(OS, M->Body, Parameters, Args, getTok().getLoc())) @@ -3771,8 +3776,8 @@ bool AsmParser::ParseDirectiveIrp(SMLoc DirectiveLoc) { /// ParseDirectiveIrpc /// ::= .irpc symbol,values bool AsmParser::ParseDirectiveIrpc(SMLoc DirectiveLoc) { - MacroParameters Parameters; - MacroParameter Parameter; + MCAsmMacroParameters Parameters; + MCAsmMacroParameter Parameter; if (ParseIdentifier(Parameter.first)) return TokError("expected identifier in '.irpc' directive"); @@ -3784,7 +3789,7 @@ bool AsmParser::ParseDirectiveIrpc(SMLoc DirectiveLoc) { Lex(); - MacroArguments A; + MCAsmMacroArguments A; if (ParseMacroArguments(0, A)) return true; @@ -3795,7 +3800,7 @@ bool AsmParser::ParseDirectiveIrpc(SMLoc DirectiveLoc) { Lex(); // Lex the irpc definition. - Macro *M = ParseMacroLikeBody(DirectiveLoc); + MCAsmMacro *M = ParseMacroLikeBody(DirectiveLoc); if (!M) return true; @@ -3810,7 +3815,7 @@ bool AsmParser::ParseDirectiveIrpc(SMLoc DirectiveLoc) { MCAsmMacroArgument Arg; Arg.push_back(AsmToken(AsmToken::Identifier, Values.slice(I, I+1))); - MacroArguments Args; + MCAsmMacroArguments Args; Args.push_back(Arg); if (expandMacro(OS, M->Body, Parameters, Args, getTok().getLoc())) -- cgit v1.1 From bbe64fba4abbe65824587bd3ed9509df09cbc5e3 Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Mon, 14 Jan 2013 23:43:18 +0000 Subject: Now GenericAsmParser and AsmParser are no longer friends, GenericAsmParser can simply use the getParser method from MCAsmParserExtension, working through the MCAsmParser interface. There's no longer a need to overload that method to cast it to the concrete AsmParser. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172491 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCParser/AsmParser.cpp | 4 ---- 1 file changed, 4 deletions(-) (limited to 'lib') diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index 665d672..66d3bc7 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -375,10 +375,6 @@ class GenericAsmParser : public MCAsmParserExtension { public: GenericAsmParser() {} - AsmParser &getParser() { - return (AsmParser&) this->MCAsmParserExtension::getParser(); - } - virtual void Initialize(MCAsmParser &Parser) { // Call the base implementation. this->MCAsmParserExtension::Initialize(Parser); -- cgit v1.1 From 096d617796228293810cb0443c6617b33c5afdc5 Mon Sep 17 00:00:00 2001 From: Jack Carter Date: Tue, 15 Jan 2013 01:08:02 +0000 Subject: This patch fixes a Mips specific bug where we need to generate a N64 compound relocation R_MIPS_GPREL_32/R_MIPS_64/R_MIPS_NONE. The bug was exposed by the SingleSourcetest case DuffsDevice.c. Contributer: Jack Carter git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172496 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp index f82e203..7afb77e 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp @@ -108,7 +108,13 @@ unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target, Type = ELF::R_MIPS_64; break; case FK_GPRel_4: - Type = ELF::R_MIPS_GPREL32; + if (isN64()) { + Type = setRType((unsigned)ELF::R_MIPS_GPREL32, Type); + Type = setRType2((unsigned)ELF::R_MIPS_64, Type); + Type = setRType3((unsigned)ELF::R_MIPS_NONE, Type); + } + else + Type = ELF::R_MIPS_GPREL32; break; case Mips::fixup_Mips_GPREL16: Type = ELF::R_MIPS_GPREL16; -- cgit v1.1 From 8dd938ed179267b2a3a5206c78e89530967f45a5 Mon Sep 17 00:00:00 2001 From: Daniel Dunbar Date: Tue, 15 Jan 2013 01:22:53 +0000 Subject: [IR] Add verifier support for llvm.module.flags. - Also, update the LangRef documentation on module flags to match the implementation. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172498 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Verifier.cpp | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) (limited to 'lib') diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp index 4252764..5a4a5a7 100644 --- a/lib/IR/Verifier.cpp +++ b/lib/IR/Verifier.cpp @@ -200,6 +200,8 @@ namespace { E = M.named_metadata_end(); I != E; ++I) visitNamedMDNode(*I); + visitModuleFlags(M); + // If the module is broken, abort at this time. return abortIfBroken(); } @@ -240,6 +242,8 @@ namespace { void visitGlobalAlias(GlobalAlias &GA); void visitNamedMDNode(NamedMDNode &NMD); void visitMDNode(MDNode &MD, Function *F); + void visitModuleFlags(Module &M); + void visitModuleFlag(MDNode *Op, SmallSetVector &SeenIDs); void visitFunction(Function &F); void visitBasicBlock(BasicBlock &BB); using InstVisitor::visit; @@ -521,6 +525,59 @@ void Verifier::visitMDNode(MDNode &MD, Function *F) { } } +void Verifier::visitModuleFlags(Module &M) { + const NamedMDNode *Flags = M.getModuleFlagsMetadata(); + if (!Flags) return; + + // Scan each flag. + SmallSetVector SeenIDs; + for (unsigned I = 0, E = Flags->getNumOperands(); I != E; ++I) { + visitModuleFlag(Flags->getOperand(I), SeenIDs); + } +} + +void Verifier::visitModuleFlag(MDNode *Op, + SmallSetVector &SeenIDs) { + // Each module flag should have three arguments, the merge behavior (a + // constant int), the flag ID (an MDString), and the value. + Assert1(Op->getNumOperands() == 3, + "incorrect number of operands in module flag", Op); + ConstantInt *Behavior = dyn_cast(Op->getOperand(0)); + MDString *ID = dyn_cast(Op->getOperand(1)); + Assert1(Behavior, + "invalid behavior operand in module flag (expected constant integer)", + Op->getOperand(0)); + unsigned BehaviorValue = Behavior->getZExtValue(); + Assert1((Module::Error <= BehaviorValue && + BehaviorValue <= Module::Override), + "invalid behavior operand in module flag (unexpected constant)", + Op->getOperand(0)); + Assert1(ID, + "invalid ID operand in module flag (expected metadata string)", + Op->getOperand(1)); + + // Unless this is a "requires" flag, check the ID is unique. + if (BehaviorValue != Module::Require) { + Assert1(SeenIDs.insert(ID), + "module flag identifiers must be unique (or of 'require' type)", + ID); + } + + // If this is a "requires" flag, sanity check the value. + if (BehaviorValue == Module::Require) { + // The value should itself be an MDNode with two operands, a flag ID (an + // MDString), and a value. + MDNode *Value = dyn_cast(Op->getOperand(2)); + Assert1(Value && Value->getNumOperands() == 2, + "invalid value for 'require' module flag (expected metadata pair)", + Op->getOperand(2)); + Assert1(isa(Value->getOperand(0)), + ("invalid value for 'require' module flag " + "(first value operand should be a string)"), + Value->getOperand(0)); + } +} + // VerifyParameterAttrs - Check the given attributes for an argument or return // value of the specified type. The value V is printed in error messages. void Verifier::VerifyParameterAttrs(Attribute Attrs, Type *Ty, -- cgit v1.1 From ac97f5ce486d1ca2967607028eacddd860aaddd0 Mon Sep 17 00:00:00 2001 From: "Michael J. Spencer" Date: Tue, 15 Jan 2013 07:44:25 +0000 Subject: [Object][ELF] Simplify ELFObjectFile by using ELFType. This simplifies the usage and implementation of ELFObjectFile by using ELFType to replace: This does complicate the base ELF types as they must now use template template parameters to partially specialize for the 32 and 64bit cases. However these are only defined once. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172515 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp | 82 +++++++++++----------- lib/Object/ELFObjectFile.cpp | 16 ++--- 2 files changed, 49 insertions(+), 49 deletions(-) (limited to 'lib') diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp index 1524b48..b8537b1 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp @@ -28,8 +28,6 @@ using namespace llvm; using namespace llvm::object; -using support::endianness; - namespace { static inline @@ -40,22 +38,22 @@ error_code check(error_code Err) { return Err; } -template +template class DyldELFObject - : public ELFObjectFile { - LLVM_ELF_IMPORT_TYPES(target_endianness, max_alignment, is64Bits) + : public ELFObjectFile { + LLVM_ELF_IMPORT_TYPES(ELFT) - typedef Elf_Shdr_Impl Elf_Shdr; - typedef Elf_Sym_Impl Elf_Sym; + typedef Elf_Shdr_Impl Elf_Shdr; + typedef Elf_Sym_Impl Elf_Sym; typedef - Elf_Rel_Impl Elf_Rel; + Elf_Rel_Impl Elf_Rel; typedef - Elf_Rel_Impl Elf_Rela; + Elf_Rel_Impl Elf_Rela; - typedef Elf_Ehdr_Impl Elf_Ehdr; + typedef Elf_Ehdr_Impl Elf_Ehdr; typedef typename ELFDataTypeTypedefHelper< - target_endianness, max_alignment, is64Bits>::value_type addr_type; + ELFT>::value_type addr_type; public: DyldELFObject(MemoryBuffer *Wrapper, error_code &ec); @@ -65,25 +63,25 @@ public: // Methods for type inquiry through isa, cast and dyn_cast static inline bool classof(const Binary *v) { - return (isa >(v) + return (isa >(v) && classof(cast >(v))); + >(v))); } static inline bool classof( - const ELFObjectFile *v) { + const ELFObjectFile *v) { return v->isDyldType(); } }; -template +template class ELFObjectImage : public ObjectImageCommon { protected: - DyldELFObject *DyldObj; + DyldELFObject *DyldObj; bool Registered; public: ELFObjectImage(ObjectBuffer *Input, - DyldELFObject *Obj) + DyldELFObject *Obj) : ObjectImageCommon(Input, Obj), DyldObj(Obj), Registered(false) {} @@ -119,16 +117,15 @@ class ELFObjectImage : public ObjectImageCommon { // The MemoryBuffer passed into this constructor is just a wrapper around the // actual memory. Ultimately, the Binary parent class will take ownership of // this MemoryBuffer object but not the underlying memory. -template -DyldELFObject - ::DyldELFObject(MemoryBuffer *Wrapper, error_code &ec) - : ELFObjectFile(Wrapper, ec) { +template +DyldELFObject::DyldELFObject(MemoryBuffer *Wrapper, error_code &ec) + : ELFObjectFile(Wrapper, ec) { this->isDyldELFObject = true; } -template -void DyldELFObject - ::updateSectionAddress(const SectionRef &Sec, uint64_t Addr) { +template +void DyldELFObject::updateSectionAddress(const SectionRef &Sec, + uint64_t Addr) { DataRefImpl ShdrRef = Sec.getRawDataRefImpl(); Elf_Shdr *shdr = const_cast( reinterpret_cast(ShdrRef.p)); @@ -138,13 +135,12 @@ void DyldELFObject shdr->sh_addr = static_cast(Addr); } -template -void DyldELFObject - ::updateSymbolAddress(const SymbolRef &SymRef, uint64_t Addr){ +template +void DyldELFObject::updateSymbolAddress(const SymbolRef &SymRef, + uint64_t Addr) { Elf_Sym *sym = const_cast( - ELFObjectFile - ::getSymbol(SymRef.getRawDataRefImpl())); + ELFObjectFile::getSymbol(SymRef.getRawDataRefImpl())); // This assumes the address passed in matches the target address bitness // The template-based type cast handles everything else. @@ -164,24 +160,28 @@ ObjectImage *RuntimeDyldELF::createObjectImage(ObjectBuffer *Buffer) { error_code ec; if (Ident.first == ELF::ELFCLASS32 && Ident.second == ELF::ELFDATA2LSB) { - DyldELFObject *Obj = - new DyldELFObject(Buffer->getMemBuffer(), ec); - return new ELFObjectImage(Buffer, Obj); + DyldELFObject > *Obj = + new DyldELFObject >( + Buffer->getMemBuffer(), ec); + return new ELFObjectImage >(Buffer, Obj); } else if (Ident.first == ELF::ELFCLASS32 && Ident.second == ELF::ELFDATA2MSB) { - DyldELFObject *Obj = - new DyldELFObject(Buffer->getMemBuffer(), ec); - return new ELFObjectImage(Buffer, Obj); + DyldELFObject > *Obj = + new DyldELFObject >( + Buffer->getMemBuffer(), ec); + return new ELFObjectImage >(Buffer, Obj); } else if (Ident.first == ELF::ELFCLASS64 && Ident.second == ELF::ELFDATA2MSB) { - DyldELFObject *Obj = - new DyldELFObject(Buffer->getMemBuffer(), ec); - return new ELFObjectImage(Buffer, Obj); + DyldELFObject > *Obj = + new DyldELFObject >( + Buffer->getMemBuffer(), ec); + return new ELFObjectImage >(Buffer, Obj); } else if (Ident.first == ELF::ELFCLASS64 && Ident.second == ELF::ELFDATA2LSB) { - DyldELFObject *Obj = - new DyldELFObject(Buffer->getMemBuffer(), ec); - return new ELFObjectImage(Buffer, Obj); + DyldELFObject > *Obj = + new DyldELFObject >( + Buffer->getMemBuffer(), ec); + return new ELFObjectImage >(Buffer, Obj); } else llvm_unreachable("Unexpected ELF format"); diff --git a/lib/Object/ELFObjectFile.cpp b/lib/Object/ELFObjectFile.cpp index 2c8c1b1..160053d 100644 --- a/lib/Object/ELFObjectFile.cpp +++ b/lib/Object/ELFObjectFile.cpp @@ -28,30 +28,30 @@ ObjectFile *ObjectFile::createELFObjectFile(MemoryBuffer *Object) { if (Ident.first == ELF::ELFCLASS32 && Ident.second == ELF::ELFDATA2LSB) if (MaxAlignment >= 4) - return new ELFObjectFile(Object, ec); + return new ELFObjectFile >(Object, ec); else if (MaxAlignment >= 2) - return new ELFObjectFile(Object, ec); + return new ELFObjectFile >(Object, ec); else llvm_unreachable("Invalid alignment for ELF file!"); else if (Ident.first == ELF::ELFCLASS32 && Ident.second == ELF::ELFDATA2MSB) if (MaxAlignment >= 4) - return new ELFObjectFile(Object, ec); + return new ELFObjectFile >(Object, ec); else if (MaxAlignment >= 2) - return new ELFObjectFile(Object, ec); + return new ELFObjectFile >(Object, ec); else llvm_unreachable("Invalid alignment for ELF file!"); else if (Ident.first == ELF::ELFCLASS64 && Ident.second == ELF::ELFDATA2MSB) if (MaxAlignment >= 8) - return new ELFObjectFile(Object, ec); + return new ELFObjectFile >(Object, ec); else if (MaxAlignment >= 2) - return new ELFObjectFile(Object, ec); + return new ELFObjectFile >(Object, ec); else llvm_unreachable("Invalid alignment for ELF file!"); else if (Ident.first == ELF::ELFCLASS64 && Ident.second == ELF::ELFDATA2LSB) { if (MaxAlignment >= 8) - return new ELFObjectFile(Object, ec); + return new ELFObjectFile >(Object, ec); else if (MaxAlignment >= 2) - return new ELFObjectFile(Object, ec); + return new ELFObjectFile >(Object, ec); else llvm_unreachable("Invalid alignment for ELF file!"); } -- cgit v1.1 From 344d3fb9614a103f84520fd88870121abe94c080 Mon Sep 17 00:00:00 2001 From: Evgeniy Stepanov Date: Tue, 15 Jan 2013 16:43:00 +0000 Subject: Allow vectors in CreatePointerCast. Both underlying IR operations support vectors of pointers already. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172538 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Instructions.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/IR/Instructions.cpp b/lib/IR/Instructions.cpp index 26398ce..aba0fc9 100644 --- a/lib/IR/Instructions.cpp +++ b/lib/IR/Instructions.cpp @@ -2386,11 +2386,11 @@ CastInst *CastInst::CreatePointerCast(Value *S, Type *Ty, CastInst *CastInst::CreatePointerCast(Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore) { - assert(S->getType()->isPointerTy() && "Invalid cast"); - assert((Ty->isIntegerTy() || Ty->isPointerTy()) && + assert(S->getType()->isPtrOrPtrVectorTy() && "Invalid cast"); + assert((Ty->isIntOrIntVectorTy() || Ty->isPtrOrPtrVectorTy()) && "Invalid cast"); - if (Ty->isIntegerTy()) + if (Ty->isIntOrIntVectorTy()) return Create(Instruction::PtrToInt, S, Ty, Name, InsertBefore); return Create(Instruction::BitCast, S, Ty, Name, InsertBefore); } -- cgit v1.1 From 967a946cb463c8b137c6e040f62100efebad93b1 Mon Sep 17 00:00:00 2001 From: Evgeniy Stepanov Date: Tue, 15 Jan 2013 16:44:52 +0000 Subject: [msan] Fix handling of equality comparison of pointer vectors. Also improve test coveration of the handling of relational comparisons. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172539 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Instrumentation/MemorySanitizer.cpp | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 76da970..db0de4d 100644 --- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -574,7 +574,7 @@ struct MemorySanitizerVisitor : public InstVisitor { if (IntegerType *IT = dyn_cast(OrigTy)) return IT; if (VectorType *VT = dyn_cast(OrigTy)) { - uint32_t EltSize = MS.TD->getTypeStoreSizeInBits(VT->getElementType()); + uint32_t EltSize = MS.TD->getTypeSizeInBits(VT->getElementType()); return VectorType::get(IntegerType::get(*MS.C, EltSize), VT->getNumElements()); } @@ -586,7 +586,7 @@ struct MemorySanitizerVisitor : public InstVisitor { DEBUG(dbgs() << "getShadowTy: " << *ST << " ===> " << *Res << "\n"); return Res; } - uint32_t TypeSize = MS.TD->getTypeStoreSizeInBits(OrigTy); + uint32_t TypeSize = MS.TD->getTypeSizeInBits(OrigTy); return IntegerType::get(*MS.C, TypeSize); } @@ -1127,10 +1127,13 @@ struct MemorySanitizerVisitor : public InstVisitor { Value *B = I.getOperand(1); Value *Sa = getShadow(A); Value *Sb = getShadow(B); - if (A->getType()->isPointerTy()) - A = IRB.CreatePointerCast(A, MS.IntptrTy); - if (B->getType()->isPointerTy()) - B = IRB.CreatePointerCast(B, MS.IntptrTy); + + // Get rid of pointers and vectors of pointers. + // For ints (and vectors of ints), types of A and Sa match, + // and this is a no-op. + A = IRB.CreatePointerCast(A, Sa->getType()); + B = IRB.CreatePointerCast(B, Sb->getType()); + // A == B <==> (C = A^B) == 0 // A != B <==> (C = A^B) != 0 // Sc = Sa | Sb -- cgit v1.1 From 6a459e65c8bd90e8381361379255ebfb9552b5cf Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Tue, 15 Jan 2013 18:25:16 +0000 Subject: LoopVectorizer cost model. Honor the user command line flag that selects the vectorization factor even if the target machine does not have any vector registers. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172544 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/LoopVectorize.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 464ed97..bc8e121 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2633,7 +2633,7 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize, if (MaxVectorSize == 0) { DEBUG(dbgs() << "LV: The target has no vector registers.\n"); - return 1; + MaxVectorSize = 1; } assert(MaxVectorSize <= 32 && "Did not expect to pack so many elements" -- cgit v1.1 From 12bfff440700c58bc67e9f9d8f99121632022cf1 Mon Sep 17 00:00:00 2001 From: Daniel Dunbar Date: Tue, 15 Jan 2013 20:52:06 +0000 Subject: [IR] Add verification for module flags with the "require" behavior. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172549 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Verifier.cpp | 42 +++++++++++++++++++++++++++++++++++------- 1 file changed, 35 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp index 5a4a5a7..2488a7d 100644 --- a/lib/IR/Verifier.cpp +++ b/lib/IR/Verifier.cpp @@ -243,7 +243,8 @@ namespace { void visitNamedMDNode(NamedMDNode &NMD); void visitMDNode(MDNode &MD, Function *F); void visitModuleFlags(Module &M); - void visitModuleFlag(MDNode *Op, SmallSetVector &SeenIDs); + void visitModuleFlag(MDNode *Op, DenseMap &SeenIDs, + SmallVectorImpl &Requirements); void visitFunction(Function &F); void visitBasicBlock(BasicBlock &BB); using InstVisitor::visit; @@ -529,15 +530,37 @@ void Verifier::visitModuleFlags(Module &M) { const NamedMDNode *Flags = M.getModuleFlagsMetadata(); if (!Flags) return; - // Scan each flag. - SmallSetVector SeenIDs; + // Scan each flag, and track the flags and requirements. + DenseMap SeenIDs; + SmallVector Requirements; for (unsigned I = 0, E = Flags->getNumOperands(); I != E; ++I) { - visitModuleFlag(Flags->getOperand(I), SeenIDs); + visitModuleFlag(Flags->getOperand(I), SeenIDs, Requirements); + } + + // Validate that the requirements in the module are valid. + for (unsigned I = 0, E = Requirements.size(); I != E; ++I) { + MDNode *Requirement = Requirements[I]; + MDString *Flag = cast(Requirement->getOperand(0)); + Value *ReqValue = Requirement->getOperand(1); + + MDNode *Op = SeenIDs.lookup(Flag); + if (!Op) { + CheckFailed("invalid requirement on flag, flag is not present in module", + Flag); + continue; + } + + if (Op->getOperand(2) != ReqValue) { + CheckFailed(("invalid requirement on flag, " + "flag does not have the required value"), + Flag); + continue; + } } } -void Verifier::visitModuleFlag(MDNode *Op, - SmallSetVector &SeenIDs) { +void Verifier::visitModuleFlag(MDNode *Op, DenseMap&SeenIDs, + SmallVectorImpl &Requirements) { // Each module flag should have three arguments, the merge behavior (a // constant int), the flag ID (an MDString), and the value. Assert1(Op->getNumOperands() == 3, @@ -558,7 +581,8 @@ void Verifier::visitModuleFlag(MDNode *Op, // Unless this is a "requires" flag, check the ID is unique. if (BehaviorValue != Module::Require) { - Assert1(SeenIDs.insert(ID), + bool Inserted = SeenIDs.insert(std::make_pair(ID, Op)).second; + Assert1(Inserted, "module flag identifiers must be unique (or of 'require' type)", ID); } @@ -575,6 +599,10 @@ void Verifier::visitModuleFlag(MDNode *Op, ("invalid value for 'require' module flag " "(first value operand should be a string)"), Value->getOperand(0)); + + // Append it to the list of requirements, to check once all module flags are + // scanned. + Requirements.push_back(Value); } } -- cgit v1.1 From 3d69041abe8a9833e78f645f0d4d7b95b802e3c4 Mon Sep 17 00:00:00 2001 From: Daniel Dunbar Date: Tue, 15 Jan 2013 20:52:09 +0000 Subject: [Linker] Drop asserts that are embedded in cast<> and now checked by the verifier. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172550 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Linker/LinkModules.cpp | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'lib') diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp index 41ec114..e34dbcb 100644 --- a/lib/Linker/LinkModules.cpp +++ b/lib/Linker/LinkModules.cpp @@ -1001,19 +1001,10 @@ categorizeModuleFlagNodes(const NamedMDNode *ModFlags, for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) { MDNode *Op = ModFlags->getOperand(I); - assert(Op->getNumOperands() == 3 && "Invalid module flag metadata!"); - assert(isa(Op->getOperand(0)) && - "Module flag's first operand must be an integer!"); - assert(isa(Op->getOperand(1)) && - "Module flag's second operand must be an MDString!"); - ConstantInt *Behavior = cast(Op->getOperand(0)); MDString *ID = cast(Op->getOperand(1)); Value *Val = Op->getOperand(2); switch (Behavior->getZExtValue()) { - default: - assert(false && "Invalid behavior in module flag metadata!"); - break; case Module::Error: { MDNode *&ErrNode = ErrorNode[ID]; if (!ErrNode) ErrNode = Op; @@ -1126,8 +1117,6 @@ bool ModuleLinker::linkModuleFlagsMetadata() { for (SmallSetVector::iterator II = Set.begin(), IE = Set.end(); II != IE; ++II) { MDNode *Node = *II; - assert(isa(Node->getOperand(2)) && - "Module flag's third operand must be an MDNode!"); MDNode *Val = cast(Node->getOperand(2)); MDString *ReqID = cast(Val->getOperand(0)); -- cgit v1.1 From a1444219b271cab6fbfe340c1328b0ab10d8f7b6 Mon Sep 17 00:00:00 2001 From: Shuxin Yang Date: Tue, 15 Jan 2013 21:09:32 +0000 Subject: 1. Hoist minus sign as high as possible in an attempt to reveal some optimization opportunities (in the enclosing supper-expressions). rule 1. (-0.0 - X ) * Y => -0.0 - (X * Y) if expression "-0.0 - X" has only one reference. rule 2. (0.0 - X ) * Y => -0.0 - (X * Y) if expression "0.0 - X" has only one reference, and the instruction is marked "noSignedZero". 2. Eliminate negation (The compiler was already able to handle these opt if the 0.0s are replaced with -0.0.) rule 3: (0.0 - X) * (0.0 - Y) => X * Y rule 4: (0.0 - X) * C => X * -C if the expr is flagged "noSignedZero". 3. Rule 5: (X*Y) * X => (X*X) * Y if X!=Y and the expression is flagged with "UnsafeAlgebra". The purpose of this transformation is two-fold: a) to form a power expression (of X). b) potentially shorten the critical path: After transformation, the latency of the instruction Y is amortized by the expression of X*X, and therefore Y is in a "less critical" position compared to what it was before the transformation. 4. Remove the InstCombine code about simplifiying "X * select". The reasons are following: a) The "select" is somewhat architecture-dependent, therefore the higher level optimizers are not able to precisely predict if the simplification really yields any performance improvement or not. b) The "select" operator is bit complicate, and tends to obscure optimization opportunities. It is btter to keep it as low as possible in expr tree, and let CodeGen to tackle the optimization. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172551 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../InstCombine/InstCombineMulDivRem.cpp | 91 ++++++++++++++-------- 1 file changed, 60 insertions(+), 31 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 29846c1..8e4267f 100644 --- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -377,6 +377,8 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { if (Value *V = SimplifyFMulInst(Op0, Op1, I.getFastMathFlags(), TD)) return ReplaceInstUsesWith(I, V); + bool AllowReassociate = I.hasUnsafeAlgebra(); + // Simplify mul instructions with a constant RHS. if (isa(Op1)) { // Try to fold constant mul into select arguments. @@ -389,7 +391,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { return NV; ConstantFP *C = dyn_cast(Op1); - if (C && I.hasUnsafeAlgebra() && C->getValueAPF().isNormal()) { + if (C && AllowReassociate && C->getValueAPF().isNormal()) { // Let MDC denote an expression in one of these forms: // X * C, C/X, X/C, where C is a constant. // @@ -430,7 +432,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { BinaryOperator::CreateFAdd(M0, M1) : BinaryOperator::CreateFSub(M0, M1); Instruction *RI = cast(R); - RI->setHasUnsafeAlgebra(true); + RI->copyFastMathFlags(&I); return RI; } } @@ -438,9 +440,6 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { } } - if (Value *Op0v = dyn_castFNegVal(Op0)) // -X * -Y = X*Y - if (Value *Op1v = dyn_castFNegVal(Op1)) - return BinaryOperator::CreateFMul(Op0v, Op1v); // Under unsafe algebra do: // X * log2(0.5*Y) = X*log2(Y) - X @@ -469,36 +468,66 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { } } - // X * cond ? 1.0 : 0.0 => cond ? X : 0.0 - if (I.hasNoNaNs() && I.hasNoSignedZeros()) { - Value *V0 = I.getOperand(0); - Value *V1 = I.getOperand(1); - Value *Cond, *SLHS, *SRHS; - bool Match = false; - - if (match(V0, m_Select(m_Value(Cond), m_Value(SLHS), m_Value(SRHS)))) { - Match = true; - } else if (match(V1, m_Select(m_Value(Cond), m_Value(SLHS), - m_Value(SRHS)))) { - Match = true; - std::swap(V0, V1); + // Handle symmetric situation in a 2-iteration loop + Value *Opnd0 = Op0; + Value *Opnd1 = Op1; + for (int i = 0; i < 2; i++) { + bool IgnoreZeroSign = I.hasNoSignedZeros(); + if (BinaryOperator::isFNeg(Opnd0, IgnoreZeroSign)) { + Value *N0 = dyn_castFNegVal(Opnd0, IgnoreZeroSign); + Value *N1 = dyn_castFNegVal(Opnd1, IgnoreZeroSign); + + // -X * -Y => X*Y + if (N1) + return BinaryOperator::CreateFMul(N0, N1); + + if (Opnd0->hasOneUse()) { + // -X * Y => -(X*Y) (Promote negation as high as possible) + Value *T = Builder->CreateFMul(N0, Opnd1); + cast(T)->setDebugLoc(I.getDebugLoc()); + Instruction *Neg = BinaryOperator::CreateFNeg(T); + if (I.getFastMathFlags().any()) { + cast(T)->copyFastMathFlags(&I); + Neg->copyFastMathFlags(&I); + } + return Neg; + } } - if (Match) { - ConstantFP *C0 = dyn_cast(SLHS); - ConstantFP *C1 = dyn_cast(SRHS); - - if (C0 && C1 && - ((C0->isZero() && C1->isExactlyValue(1.0)) || - (C1->isZero() && C0->isExactlyValue(1.0)))) { - Value *T; - if (C0->isZero()) - T = Builder->CreateSelect(Cond, SLHS, V1); - else - T = Builder->CreateSelect(Cond, V1, SRHS); - return ReplaceInstUsesWith(I, T); + // (X*Y) * X => (X*X) * Y where Y != X + // The purpose is two-fold: + // 1) to form a power expression (of X). + // 2) potentially shorten the critical path: After transformation, the + // latency of the instruction Y is amortized by the expression of X*X, + // and therefore Y is in a "less critical" position compared to what it + // was before the transformation. + // + if (AllowReassociate) { + Value *Opnd0_0, *Opnd0_1; + if (Opnd0->hasOneUse() && + match(Opnd0, m_FMul(m_Value(Opnd0_0), m_Value(Opnd0_1)))) { + Value *Y = 0; + if (Opnd0_0 == Opnd1 && Opnd0_1 != Opnd1) + Y = Opnd0_1; + else if (Opnd0_1 == Opnd1 && Opnd0_0 != Opnd1) + Y = Opnd0_0; + + if (Y) { + Instruction *T = cast(Builder->CreateFMul(Opnd1, Opnd1)); + T->copyFastMathFlags(&I); + T->setDebugLoc(I.getDebugLoc()); + + Instruction *R = BinaryOperator::CreateFMul(T, Y); + R->copyFastMathFlags(&I); + return R; + } } } + + if (!isa(Op1)) + std::swap(Opnd0, Opnd1); + else + break; } return Changed ? &I : 0; -- cgit v1.1 From 6ee130893072977aa70e8ae6470e88171e782e46 Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Tue, 15 Jan 2013 22:59:42 +0000 Subject: Refactor generic Asm directive parsing. After discussing the refactoring with Jim and Daniel, the following changes were made: * All generic directive parsing is now done by AsmParser itself. The previous division between it and GenericAsmParser did not have clear boundaries and just produced unnatural code of GenericAsmParser juggling the internals of AsmParser through an interface. The division of responsibilities is now clear: target-specific directives, other extensions (used by platform-specific parseres), and generic directives. * Priority for directive parsing was reshuffled to ask extensions first and check the generic directives later. No change in functionality. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172568 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCParser/AsmParser.cpp | 2080 +++++++++++++++++++++-------------------- 1 file changed, 1046 insertions(+), 1034 deletions(-) (limited to 'lib') diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index 66d3bc7..ce5ce1d 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -50,7 +50,6 @@ MCAsmParserSemaCallback::~MCAsmParserSemaCallback() {} namespace { - /// \brief Helper class for storing information about an active macro /// instantiation. struct MacroInstantiation { @@ -111,7 +110,6 @@ private: SourceMgr &SrcMgr; SourceMgr::DiagHandlerTy SavedDiagHandler; void *SavedDiagContext; - MCAsmParserExtension *GenericParser; MCAsmParserExtension *PlatformParser; /// This is the current buffer index we're lexing from as managed by the @@ -121,11 +119,12 @@ private: AsmCond TheCondState; std::vector TheCondStack; - /// DirectiveMap - This is a table handlers for directives. Each handler is - /// invoked after the directive identifier is read and is responsible for - /// parsing and validating the rest of the directive. The handler is passed - /// in the directive name and the location of the directive keyword. - StringMap > DirectiveMap; + /// ExtensionDirectiveMap - maps directive names to handler methods in parser + /// extensions. Extensions register themselves in this map by calling + /// AddDirectiveHandler. + typedef std::pair + ExtensionDirectiveHandler; + StringMap ExtensionDirectiveMap; /// MacroMap - Map of currently defined macros. StringMap MacroMap; @@ -164,7 +163,7 @@ public: virtual void AddDirectiveHandler(MCAsmParserExtension *Object, StringRef Directive, DirectiveHandler Handler) { - DirectiveMap[Directive] = std::make_pair(Object, Handler); + ExtensionDirectiveMap[Directive] = std::make_pair(Object, Handler); } public: @@ -282,8 +281,9 @@ private: bool ParseParenExpr(const MCExpr *&Res, SMLoc &EndLoc); bool ParseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc); - // Directive Parsing. + bool ParseRegisterOrRegisterNumber(int64_t &Register, SMLoc DirectiveLoc); + // Generic (target and platform independent) directive parsing. enum DirectiveKind { DK_NO_DIRECTIVE, // Placeholder DK_SET, DK_EQU, DK_EQUIV, DK_ASCII, DK_ASCIZ, DK_STRING, DK_BYTE, DK_SHORT, @@ -297,10 +297,21 @@ private: DK_WEAK_DEF_CAN_BE_HIDDEN, DK_COMM, DK_COMMON, DK_LCOMM, DK_ABORT, DK_INCLUDE, DK_INCBIN, DK_CODE16, DK_CODE16GCC, DK_REPT, DK_IRP, DK_IRPC, DK_IF, DK_IFB, DK_IFNB, DK_IFC, DK_IFNC, DK_IFDEF, DK_IFNDEF, DK_IFNOTDEF, - DK_ELSEIF, DK_ELSE, DK_ENDIF + DK_ELSEIF, DK_ELSE, DK_ENDIF, + DK_SPACE, DK_SKIP, DK_FILE, DK_LINE, DK_LOC, DK_STABS, + DK_CFI_SECTIONS, DK_CFI_STARTPROC, DK_CFI_ENDPROC, DK_CFI_DEF_CFA, + DK_CFI_DEF_CFA_OFFSET, DK_CFI_ADJUST_CFA_OFFSET, DK_CFI_DEF_CFA_REGISTER, + DK_CFI_OFFSET, DK_CFI_REL_OFFSET, DK_CFI_PERSONALITY, DK_CFI_LSDA, + DK_CFI_REMEMBER_STATE, DK_CFI_RESTORE_STATE, DK_CFI_SAME_VALUE, + DK_CFI_RESTORE, DK_CFI_ESCAPE, DK_CFI_SIGNAL_FRAME, DK_CFI_UNDEFINED, + DK_CFI_REGISTER, + DK_MACROS_ON, DK_MACROS_OFF, DK_MACRO, DK_ENDM, DK_ENDMACRO, DK_PURGEM, + DK_SLEB128, DK_ULEB128 }; - StringMap DirectiveKindMapping; + /// DirectiveKindMap - Maps directive name --> DirectiveKind enum, for + /// directives parsed by this class. + StringMap DirectiveKindMap; // ".ascii", ".asciz", ".string" bool ParseDirectiveAscii(StringRef IDVal, bool ZeroTerminated); @@ -314,6 +325,38 @@ private: // ".align{,32}", ".p2align{,w,l}" bool ParseDirectiveAlign(bool IsPow2, unsigned ValueSize); + // ".file", ".line", ".loc", ".stabs" + bool ParseDirectiveFile(SMLoc DirectiveLoc); + bool ParseDirectiveLine(); + bool ParseDirectiveLoc(); + bool ParseDirectiveStabs(); + + // .cfi directives + bool ParseDirectiveCFIRegister(SMLoc DirectiveLoc); + bool ParseDirectiveCFISections(); + bool ParseDirectiveCFIStartProc(); + bool ParseDirectiveCFIEndProc(); + bool ParseDirectiveCFIDefCfaOffset(); + bool ParseDirectiveCFIDefCfa(SMLoc DirectiveLoc); + bool ParseDirectiveCFIAdjustCfaOffset(); + bool ParseDirectiveCFIDefCfaRegister(SMLoc DirectiveLoc); + bool ParseDirectiveCFIOffset(SMLoc DirectiveLoc); + bool ParseDirectiveCFIRelOffset(SMLoc DirectiveLoc); + bool ParseDirectiveCFIPersonalityOrLsda(bool IsPersonality); + bool ParseDirectiveCFIRememberState(); + bool ParseDirectiveCFIRestoreState(); + bool ParseDirectiveCFISameValue(SMLoc DirectiveLoc); + bool ParseDirectiveCFIRestore(SMLoc DirectiveLoc); + bool ParseDirectiveCFIEscape(); + bool ParseDirectiveCFISignalFrame(); + bool ParseDirectiveCFIUndefined(SMLoc DirectiveLoc); + + // macro directives + bool ParseDirectivePurgeMacro(SMLoc DirectiveLoc); + bool ParseDirectiveEndMacro(StringRef Directive); + bool ParseDirectiveMacro(SMLoc DirectiveLoc); + bool ParseDirectiveMacrosOnOff(StringRef Directive); + // ".bundle_align_mode" bool ParseDirectiveBundleAlignMode(); // ".bundle_lock" @@ -321,6 +364,12 @@ private: // ".bundle_unlock" bool ParseDirectiveBundleUnlock(); + // ".space", ".skip" + bool ParseDirectiveSpace(StringRef IDVal); + + // .sleb128 (Signed=true) and .uleb128 (Signed=false) + bool ParseDirectiveLEB128(bool Signed); + /// ParseDirectiveSymbolAttribute - Parse a directive like ".globl" which /// accepts a single symbol (which should be a label or an external). bool ParseDirectiveSymbolAttribute(MCSymbolAttr Attr); @@ -361,121 +410,8 @@ private: // "_emit" bool ParseDirectiveEmit(SMLoc DirectiveLoc, ParseStatementInfo &Info); - void initializeDirectiveKindMapping(); -}; - -/// \brief Generic implementation of directive handling, etc. which is shared -/// (or the default, at least) for all assembler parsers. -class GenericAsmParser : public MCAsmParserExtension { - template - void AddDirectiveHandler(StringRef Directive) { - getParser().AddDirectiveHandler(this, Directive, - HandleDirective); - } -public: - GenericAsmParser() {} - - virtual void Initialize(MCAsmParser &Parser) { - // Call the base implementation. - this->MCAsmParserExtension::Initialize(Parser); - - // Debugging directives. - AddDirectiveHandler<&GenericAsmParser::ParseDirectiveFile>(".file"); - AddDirectiveHandler<&GenericAsmParser::ParseDirectiveLine>(".line"); - AddDirectiveHandler<&GenericAsmParser::ParseDirectiveLoc>(".loc"); - AddDirectiveHandler<&GenericAsmParser::ParseDirectiveStabs>(".stabs"); - - AddDirectiveHandler<&GenericAsmParser::ParseDirectiveSpace>(".space"); - AddDirectiveHandler<&GenericAsmParser::ParseDirectiveSpace>(".skip"); - - // CFI directives. - AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFISections>( - ".cfi_sections"); - AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFIStartProc>( - ".cfi_startproc"); - AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFIEndProc>( - ".cfi_endproc"); - AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFIDefCfa>( - ".cfi_def_cfa"); - AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFIDefCfaOffset>( - ".cfi_def_cfa_offset"); - AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFIAdjustCfaOffset>( - ".cfi_adjust_cfa_offset"); - AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFIDefCfaRegister>( - ".cfi_def_cfa_register"); - AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFIOffset>( - ".cfi_offset"); - AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFIRelOffset>( - ".cfi_rel_offset"); - AddDirectiveHandler< - &GenericAsmParser::ParseDirectiveCFIPersonalityOrLsda>(".cfi_personality"); - AddDirectiveHandler< - &GenericAsmParser::ParseDirectiveCFIPersonalityOrLsda>(".cfi_lsda"); - AddDirectiveHandler< - &GenericAsmParser::ParseDirectiveCFIRememberState>(".cfi_remember_state"); - AddDirectiveHandler< - &GenericAsmParser::ParseDirectiveCFIRestoreState>(".cfi_restore_state"); - AddDirectiveHandler< - &GenericAsmParser::ParseDirectiveCFISameValue>(".cfi_same_value"); - AddDirectiveHandler< - &GenericAsmParser::ParseDirectiveCFIRestore>(".cfi_restore"); - AddDirectiveHandler< - &GenericAsmParser::ParseDirectiveCFIEscape>(".cfi_escape"); - AddDirectiveHandler< - &GenericAsmParser::ParseDirectiveCFISignalFrame>(".cfi_signal_frame"); - AddDirectiveHandler< - &GenericAsmParser::ParseDirectiveCFIUndefined>(".cfi_undefined"); - AddDirectiveHandler< - &GenericAsmParser::ParseDirectiveCFIRegister>(".cfi_register"); - - // Macro directives. - AddDirectiveHandler<&GenericAsmParser::ParseDirectiveMacrosOnOff>( - ".macros_on"); - AddDirectiveHandler<&GenericAsmParser::ParseDirectiveMacrosOnOff>( - ".macros_off"); - AddDirectiveHandler<&GenericAsmParser::ParseDirectiveMacro>(".macro"); - AddDirectiveHandler<&GenericAsmParser::ParseDirectiveEndMacro>(".endm"); - AddDirectiveHandler<&GenericAsmParser::ParseDirectiveEndMacro>(".endmacro"); - AddDirectiveHandler<&GenericAsmParser::ParseDirectivePurgeMacro>(".purgem"); - - AddDirectiveHandler<&GenericAsmParser::ParseDirectiveLEB128>(".sleb128"); - AddDirectiveHandler<&GenericAsmParser::ParseDirectiveLEB128>(".uleb128"); - } - - bool ParseRegisterOrRegisterNumber(int64_t &Register, SMLoc DirectiveLoc); - - bool ParseDirectiveFile(StringRef, SMLoc DirectiveLoc); - bool ParseDirectiveLine(StringRef, SMLoc DirectiveLoc); - bool ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc); - bool ParseDirectiveStabs(StringRef, SMLoc DirectiveLoc); - bool ParseDirectiveSpace(StringRef, SMLoc DirectiveLoc); - bool ParseDirectiveCFISections(StringRef, SMLoc DirectiveLoc); - bool ParseDirectiveCFIStartProc(StringRef, SMLoc DirectiveLoc); - bool ParseDirectiveCFIEndProc(StringRef, SMLoc DirectiveLoc); - bool ParseDirectiveCFIDefCfa(StringRef, SMLoc DirectiveLoc); - bool ParseDirectiveCFIDefCfaOffset(StringRef, SMLoc DirectiveLoc); - bool ParseDirectiveCFIAdjustCfaOffset(StringRef, SMLoc DirectiveLoc); - bool ParseDirectiveCFIDefCfaRegister(StringRef, SMLoc DirectiveLoc); - bool ParseDirectiveCFIOffset(StringRef, SMLoc DirectiveLoc); - bool ParseDirectiveCFIRelOffset(StringRef, SMLoc DirectiveLoc); - bool ParseDirectiveCFIPersonalityOrLsda(StringRef, SMLoc DirectiveLoc); - bool ParseDirectiveCFIRememberState(StringRef, SMLoc DirectiveLoc); - bool ParseDirectiveCFIRestoreState(StringRef, SMLoc DirectiveLoc); - bool ParseDirectiveCFISameValue(StringRef, SMLoc DirectiveLoc); - bool ParseDirectiveCFIRestore(StringRef, SMLoc DirectiveLoc); - bool ParseDirectiveCFIEscape(StringRef, SMLoc DirectiveLoc); - bool ParseDirectiveCFISignalFrame(StringRef, SMLoc DirectiveLoc); - bool ParseDirectiveCFIUndefined(StringRef, SMLoc DirectiveLoc); - bool ParseDirectiveCFIRegister(StringRef, SMLoc DirectiveLoc); - - bool ParseDirectiveMacrosOnOff(StringRef, SMLoc DirectiveLoc); - bool ParseDirectiveMacro(StringRef, SMLoc DirectiveLoc); - bool ParseDirectiveEndMacro(StringRef, SMLoc DirectiveLoc); - bool ParseDirectivePurgeMacro(StringRef, SMLoc DirectiveLoc); - - bool ParseDirectiveLEB128(StringRef, SMLoc); + void initializeDirectiveKindMap(); }; - } namespace llvm { @@ -491,7 +427,7 @@ enum { DEFAULT_ADDRSPACE = 0 }; AsmParser::AsmParser(SourceMgr &_SM, MCContext &_Ctx, MCStreamer &_Out, const MCAsmInfo &_MAI) : Lexer(_MAI), Ctx(_Ctx), Out(_Out), MAI(_MAI), SrcMgr(_SM), - GenericParser(new GenericAsmParser), PlatformParser(0), + PlatformParser(0), CurBuffer(0), MacrosEnabledFlag(true), CppHashLineNumber(0), AssemblerDialect(~0U), IsDarwin(false), ParsingInlineAsm(false) { // Save the old handler. @@ -501,9 +437,6 @@ AsmParser::AsmParser(SourceMgr &_SM, MCContext &_Ctx, SrcMgr.setDiagHandler(DiagHandler, this); Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)); - // Initialize the generic parser. - GenericParser->Initialize(*this); - // Initialize the platform / file format parser. // // FIXME: This is a hack, we need to (majorly) cleanup how these objects are @@ -520,7 +453,7 @@ AsmParser::AsmParser(SourceMgr &_SM, MCContext &_Ctx, PlatformParser->Initialize(*this); } - initializeDirectiveKindMapping(); + initializeDirectiveKindMap(); } AsmParser::~AsmParser() { @@ -532,7 +465,6 @@ AsmParser::~AsmParser() { delete it->getValue(); delete PlatformParser; - delete GenericParser; } void AsmParser::PrintMacroInstantiations() { @@ -1169,10 +1101,10 @@ bool AsmParser::ParseStatement(ParseStatementInfo &Info) { // have to do this so that .endif isn't skipped in a ".if 0" block for // example. StringMap::const_iterator DirKindIt = - DirectiveKindMapping.find(IDVal); + DirectiveKindMap.find(IDVal); DirectiveKind DirKind = - (DirKindIt == DirectiveKindMapping.end()) ? DK_NO_DIRECTIVE : - DirKindIt->getValue(); + (DirKindIt == DirectiveKindMap.end()) ? DK_NO_DIRECTIVE : + DirKindIt->getValue(); switch (DirKind) { default: break; @@ -1270,12 +1202,34 @@ bool AsmParser::ParseStatement(ParseStatementInfo &Info) { } // Otherwise, we have a normal instruction or directive. + + // Directives start with "." if (IDVal[0] == '.' && IDVal != ".") { - - // Target hook for parsing target specific directives. + // There are several entities interested in parsing directives: + // + // 1. The target-specific assembly parser. Some directives are target + // specific or may potentially behave differently on certain targets. + // 2. Asm parser extensions. For example, platform-specific parsers + // (like the ELF parser) register themselves as extensions. + // 3. The generic directive parser implemented by this class. These are + // all the directives that behave in a target and platform independent + // manner, or at least have a default behavior that's shared between + // all targets and platforms. + + // First query the target-specific parser. It will return 'true' if it + // isn't interested in this directive. if (!getTargetParser().ParseDirective(ID)) return false; + // Next, check the extention directive map to see if any extension has + // registered itself to parse this directive. + std::pair Handler = + ExtensionDirectiveMap.lookup(IDVal); + if (Handler.first) + return (*Handler.second)(Handler.first, IDVal, IDLoc); + + // Finally, if no one else is interested in this directive, it must be + // generic and familiar to this class. switch (DirKind) { default: break; @@ -1385,14 +1339,71 @@ bool AsmParser::ParseStatement(ParseStatementInfo &Info) { return ParseDirectiveBundleLock(); case DK_BUNDLE_UNLOCK: return ParseDirectiveBundleUnlock(); + case DK_SLEB128: + return ParseDirectiveLEB128(true); + case DK_ULEB128: + return ParseDirectiveLEB128(false); + case DK_SPACE: + case DK_SKIP: + return ParseDirectiveSpace(IDVal); + case DK_FILE: + return ParseDirectiveFile(IDLoc); + case DK_LINE: + return ParseDirectiveLine(); + case DK_LOC: + return ParseDirectiveLoc(); + case DK_STABS: + return ParseDirectiveStabs(); + case DK_CFI_SECTIONS: + return ParseDirectiveCFISections(); + case DK_CFI_STARTPROC: + return ParseDirectiveCFIStartProc(); + case DK_CFI_ENDPROC: + return ParseDirectiveCFIEndProc(); + case DK_CFI_DEF_CFA: + return ParseDirectiveCFIDefCfa(IDLoc); + case DK_CFI_DEF_CFA_OFFSET: + return ParseDirectiveCFIDefCfaOffset(); + case DK_CFI_ADJUST_CFA_OFFSET: + return ParseDirectiveCFIAdjustCfaOffset(); + case DK_CFI_DEF_CFA_REGISTER: + return ParseDirectiveCFIDefCfaRegister(IDLoc); + case DK_CFI_OFFSET: + return ParseDirectiveCFIOffset(IDLoc); + case DK_CFI_REL_OFFSET: + return ParseDirectiveCFIRelOffset(IDLoc); + case DK_CFI_PERSONALITY: + return ParseDirectiveCFIPersonalityOrLsda(true); + case DK_CFI_LSDA: + return ParseDirectiveCFIPersonalityOrLsda(false); + case DK_CFI_REMEMBER_STATE: + return ParseDirectiveCFIRememberState(); + case DK_CFI_RESTORE_STATE: + return ParseDirectiveCFIRestoreState(); + case DK_CFI_SAME_VALUE: + return ParseDirectiveCFISameValue(IDLoc); + case DK_CFI_RESTORE: + return ParseDirectiveCFIRestore(IDLoc); + case DK_CFI_ESCAPE: + return ParseDirectiveCFIEscape(); + case DK_CFI_SIGNAL_FRAME: + return ParseDirectiveCFISignalFrame(); + case DK_CFI_UNDEFINED: + return ParseDirectiveCFIUndefined(IDLoc); + case DK_CFI_REGISTER: + return ParseDirectiveCFIRegister(IDLoc); + case DK_MACROS_ON: + case DK_MACROS_OFF: + return ParseDirectiveMacrosOnOff(IDVal); + case DK_MACRO: + return ParseDirectiveMacro(IDLoc); + case DK_ENDM: + case DK_ENDMACRO: + return ParseDirectiveEndMacro(IDVal); + case DK_PURGEM: + return ParseDirectivePurgeMacro(IDLoc); } - // Look up the handler in the extension handler table. - std::pair Handler = - DirectiveMap.lookup(IDVal); - if (Handler.first) - return (*Handler.second)(Handler.first, IDVal, IDLoc); - return Error(IDLoc, "unknown directive"); } @@ -2429,1200 +2440,1201 @@ bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) { return false; } +/// ParseDirectiveFile +/// ::= .file [number] filename +/// ::= .file number directory filename +bool AsmParser::ParseDirectiveFile(SMLoc DirectiveLoc) { + // FIXME: I'm not sure what this is. + int64_t FileNumber = -1; + SMLoc FileNumberLoc = getLexer().getLoc(); + if (getLexer().is(AsmToken::Integer)) { + FileNumber = getTok().getIntVal(); + Lex(); -/// ParseDirectiveBundleAlignMode -/// ::= {.bundle_align_mode} expression -bool AsmParser::ParseDirectiveBundleAlignMode() { - CheckForValidSection(); + if (FileNumber < 1) + return TokError("file number less than one"); + } - // Expect a single argument: an expression that evaluates to a constant - // in the inclusive range 0-30. - SMLoc ExprLoc = getLexer().getLoc(); - int64_t AlignSizePow2; - if (ParseAbsoluteExpression(AlignSizePow2)) - return true; - else if (getLexer().isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token after expression in" - " '.bundle_align_mode' directive"); - else if (AlignSizePow2 < 0 || AlignSizePow2 > 30) - return Error(ExprLoc, - "invalid bundle alignment size (expected between 0 and 30)"); + if (getLexer().isNot(AsmToken::String)) + return TokError("unexpected token in '.file' directive"); + // Usually the directory and filename together, otherwise just the directory. + StringRef Path = getTok().getString(); + Path = Path.substr(1, Path.size()-2); Lex(); - // Because of AlignSizePow2's verified range we can safely truncate it to - // unsigned. - getStreamer().EmitBundleAlignMode(static_cast(AlignSizePow2)); + StringRef Directory; + StringRef Filename; + if (getLexer().is(AsmToken::String)) { + if (FileNumber == -1) + return TokError("explicit path specified, but no file number"); + Filename = getTok().getString(); + Filename = Filename.substr(1, Filename.size()-2); + Directory = Path; + Lex(); + } else { + Filename = Path; + } + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.file' directive"); + + if (FileNumber == -1) + getStreamer().EmitFileDirective(Filename); + else { + if (getContext().getGenDwarfForAssembly() == true) + Error(DirectiveLoc, "input can't have .file dwarf directives when -g is " + "used to generate dwarf debug info for assembly code"); + + if (getStreamer().EmitDwarfFileDirective(FileNumber, Directory, Filename)) + Error(FileNumberLoc, "file number already allocated"); + } + return false; } -/// ParseDirectiveBundleLock -/// ::= {.bundle_lock} [align_to_end] -bool AsmParser::ParseDirectiveBundleLock() { - CheckForValidSection(); - bool AlignToEnd = false; - +/// ParseDirectiveLine +/// ::= .line [number] +bool AsmParser::ParseDirectiveLine() { if (getLexer().isNot(AsmToken::EndOfStatement)) { - StringRef Option; - SMLoc Loc = getTok().getLoc(); - const char *kInvalidOptionError = - "invalid option for '.bundle_lock' directive"; + if (getLexer().isNot(AsmToken::Integer)) + return TokError("unexpected token in '.line' directive"); - if (ParseIdentifier(Option)) - return Error(Loc, kInvalidOptionError); + int64_t LineNumber = getTok().getIntVal(); + (void) LineNumber; + Lex(); - if (Option != "align_to_end") - return Error(Loc, kInvalidOptionError); - else if (getLexer().isNot(AsmToken::EndOfStatement)) - return Error(Loc, - "unexpected token after '.bundle_lock' directive option"); - AlignToEnd = true; + // FIXME: Do something with the .line. } - Lex(); + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.line' directive"); - getStreamer().EmitBundleLock(AlignToEnd); return false; } -/// ParseDirectiveBundleLock -/// ::= {.bundle_lock} -bool AsmParser::ParseDirectiveBundleUnlock() { - CheckForValidSection(); - - if (getLexer().isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in '.bundle_unlock' directive"); +/// ParseDirectiveLoc +/// ::= .loc FileNumber [LineNumber] [ColumnPos] [basic_block] [prologue_end] +/// [epilogue_begin] [is_stmt VALUE] [isa VALUE] +/// The first number is a file number, must have been previously assigned with +/// a .file directive, the second number is the line number and optionally the +/// third number is a column position (zero if not specified). The remaining +/// optional items are .loc sub-directives. +bool AsmParser::ParseDirectiveLoc() { + if (getLexer().isNot(AsmToken::Integer)) + return TokError("unexpected token in '.loc' directive"); + int64_t FileNumber = getTok().getIntVal(); + if (FileNumber < 1) + return TokError("file number less than one in '.loc' directive"); + if (!getContext().isValidDwarfFileNumber(FileNumber)) + return TokError("unassigned file number in '.loc' directive"); Lex(); - getStreamer().EmitBundleUnlock(); - return false; -} + int64_t LineNumber = 0; + if (getLexer().is(AsmToken::Integer)) { + LineNumber = getTok().getIntVal(); + if (LineNumber < 1) + return TokError("line number less than one in '.loc' directive"); + Lex(); + } -/// ParseDirectiveSymbolAttribute -/// ::= { ".globl", ".weak", ... } [ identifier ( , identifier )* ] -bool AsmParser::ParseDirectiveSymbolAttribute(MCSymbolAttr Attr) { + int64_t ColumnPos = 0; + if (getLexer().is(AsmToken::Integer)) { + ColumnPos = getTok().getIntVal(); + if (ColumnPos < 0) + return TokError("column position less than zero in '.loc' directive"); + Lex(); + } + + unsigned Flags = DWARF2_LINE_DEFAULT_IS_STMT ? DWARF2_FLAG_IS_STMT : 0; + unsigned Isa = 0; + int64_t Discriminator = 0; if (getLexer().isNot(AsmToken::EndOfStatement)) { for (;;) { + if (getLexer().is(AsmToken::EndOfStatement)) + break; + StringRef Name; SMLoc Loc = getTok().getLoc(); - if (ParseIdentifier(Name)) - return Error(Loc, "expected identifier in directive"); - - MCSymbol *Sym = getContext().GetOrCreateSymbol(Name); - - // Assembler local symbols don't make any sense here. Complain loudly. - if (Sym->isTemporary()) - return Error(Loc, "non-local symbol required in directive"); + return TokError("unexpected token in '.loc' directive"); - getStreamer().EmitSymbolAttribute(Sym, Attr); + if (Name == "basic_block") + Flags |= DWARF2_FLAG_BASIC_BLOCK; + else if (Name == "prologue_end") + Flags |= DWARF2_FLAG_PROLOGUE_END; + else if (Name == "epilogue_begin") + Flags |= DWARF2_FLAG_EPILOGUE_BEGIN; + else if (Name == "is_stmt") { + Loc = getTok().getLoc(); + const MCExpr *Value; + if (ParseExpression(Value)) + return true; + // The expression must be the constant 0 or 1. + if (const MCConstantExpr *MCE = dyn_cast(Value)) { + int Value = MCE->getValue(); + if (Value == 0) + Flags &= ~DWARF2_FLAG_IS_STMT; + else if (Value == 1) + Flags |= DWARF2_FLAG_IS_STMT; + else + return Error(Loc, "is_stmt value not 0 or 1"); + } + else { + return Error(Loc, "is_stmt value not the constant value of 0 or 1"); + } + } + else if (Name == "isa") { + Loc = getTok().getLoc(); + const MCExpr *Value; + if (ParseExpression(Value)) + return true; + // The expression must be a constant greater or equal to 0. + if (const MCConstantExpr *MCE = dyn_cast(Value)) { + int Value = MCE->getValue(); + if (Value < 0) + return Error(Loc, "isa number less than zero"); + Isa = Value; + } + else { + return Error(Loc, "isa number not a constant value"); + } + } + else if (Name == "discriminator") { + if (ParseAbsoluteExpression(Discriminator)) + return true; + } + else { + return Error(Loc, "unknown sub-directive in '.loc' directive"); + } if (getLexer().is(AsmToken::EndOfStatement)) break; - - if (getLexer().isNot(AsmToken::Comma)) - return TokError("unexpected token in directive"); - Lex(); } } - Lex(); + getStreamer().EmitDwarfLocDirective(FileNumber, LineNumber, ColumnPos, Flags, + Isa, Discriminator, StringRef()); + return false; } -/// ParseDirectiveComm -/// ::= ( .comm | .lcomm ) identifier , size_expression [ , align_expression ] -bool AsmParser::ParseDirectiveComm(bool IsLocal) { - CheckForValidSection(); +/// ParseDirectiveStabs +/// ::= .stabs string, number, number, number +bool AsmParser::ParseDirectiveStabs() { + return TokError("unsupported directive '.stabs'"); +} - SMLoc IDLoc = getLexer().getLoc(); +/// ParseDirectiveCFISections +/// ::= .cfi_sections section [, section] +bool AsmParser::ParseDirectiveCFISections() { StringRef Name; + bool EH = false; + bool Debug = false; + if (ParseIdentifier(Name)) - return TokError("expected identifier in directive"); + return TokError("Expected an identifier"); - // Handle the identifier as the key symbol. - MCSymbol *Sym = getContext().GetOrCreateSymbol(Name); + if (Name == ".eh_frame") + EH = true; + else if (Name == ".debug_frame") + Debug = true; - if (getLexer().isNot(AsmToken::Comma)) - return TokError("unexpected token in directive"); - Lex(); + if (getLexer().is(AsmToken::Comma)) { + Lex(); - int64_t Size; - SMLoc SizeLoc = getLexer().getLoc(); - if (ParseAbsoluteExpression(Size)) - return true; - - int64_t Pow2Alignment = 0; - SMLoc Pow2AlignmentLoc; - if (getLexer().is(AsmToken::Comma)) { - Lex(); - Pow2AlignmentLoc = getLexer().getLoc(); - if (ParseAbsoluteExpression(Pow2Alignment)) - return true; - - LCOMM::LCOMMType LCOMM = Lexer.getMAI().getLCOMMDirectiveAlignmentType(); - if (IsLocal && LCOMM == LCOMM::NoAlignment) - return Error(Pow2AlignmentLoc, "alignment not supported on this target"); + if (ParseIdentifier(Name)) + return TokError("Expected an identifier"); - // If this target takes alignments in bytes (not log) validate and convert. - if ((!IsLocal && Lexer.getMAI().getCOMMDirectiveAlignmentIsInBytes()) || - (IsLocal && LCOMM == LCOMM::ByteAlignment)) { - if (!isPowerOf2_64(Pow2Alignment)) - return Error(Pow2AlignmentLoc, "alignment must be a power of 2"); - Pow2Alignment = Log2_64(Pow2Alignment); - } + if (Name == ".eh_frame") + EH = true; + else if (Name == ".debug_frame") + Debug = true; } - if (getLexer().isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in '.comm' or '.lcomm' directive"); - - Lex(); + getStreamer().EmitCFISections(EH, Debug); + return false; +} - // NOTE: a size of zero for a .comm should create a undefined symbol - // but a size of .lcomm creates a bss symbol of size zero. - if (Size < 0) - return Error(SizeLoc, "invalid '.comm' or '.lcomm' directive size, can't " - "be less than zero"); +/// ParseDirectiveCFIStartProc +/// ::= .cfi_startproc +bool AsmParser::ParseDirectiveCFIStartProc() { + getStreamer().EmitCFIStartProc(); + return false; +} - // NOTE: The alignment in the directive is a power of 2 value, the assembler - // may internally end up wanting an alignment in bytes. - // FIXME: Diagnose overflow. - if (Pow2Alignment < 0) - return Error(Pow2AlignmentLoc, "invalid '.comm' or '.lcomm' directive " - "alignment, can't be less than zero"); +/// ParseDirectiveCFIEndProc +/// ::= .cfi_endproc +bool AsmParser::ParseDirectiveCFIEndProc() { + getStreamer().EmitCFIEndProc(); + return false; +} - if (!Sym->isUndefined()) - return Error(IDLoc, "invalid symbol redefinition"); +/// ParseRegisterOrRegisterNumber - parse register name or number. +bool AsmParser::ParseRegisterOrRegisterNumber(int64_t &Register, + SMLoc DirectiveLoc) { + unsigned RegNo; - // Create the Symbol as a common or local common with Size and Pow2Alignment - if (IsLocal) { - getStreamer().EmitLocalCommonSymbol(Sym, Size, 1 << Pow2Alignment); - return false; - } + if (getLexer().isNot(AsmToken::Integer)) { + if (getTargetParser().ParseRegister(RegNo, DirectiveLoc, DirectiveLoc)) + return true; + Register = getContext().getRegisterInfo().getDwarfRegNum(RegNo, true); + } else + return ParseAbsoluteExpression(Register); - getStreamer().EmitCommonSymbol(Sym, Size, 1 << Pow2Alignment); return false; } -/// ParseDirectiveAbort -/// ::= .abort [... message ...] -bool AsmParser::ParseDirectiveAbort() { - // FIXME: Use loc from directive. - SMLoc Loc = getLexer().getLoc(); - - StringRef Str = ParseStringToEndOfStatement(); - if (getLexer().isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in '.abort' directive"); +/// ParseDirectiveCFIDefCfa +/// ::= .cfi_def_cfa register, offset +bool AsmParser::ParseDirectiveCFIDefCfa(SMLoc DirectiveLoc) { + int64_t Register = 0; + if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc)) + return true; + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in directive"); Lex(); - if (Str.empty()) - Error(Loc, ".abort detected. Assembly stopping."); - else - Error(Loc, ".abort '" + Str + "' detected. Assembly stopping."); - // FIXME: Actually abort assembly here. + int64_t Offset = 0; + if (ParseAbsoluteExpression(Offset)) + return true; + getStreamer().EmitCFIDefCfa(Register, Offset); return false; } -/// ParseDirectiveInclude -/// ::= .include "filename" -bool AsmParser::ParseDirectiveInclude() { - if (getLexer().isNot(AsmToken::String)) - return TokError("expected string in '.include' directive"); +/// ParseDirectiveCFIDefCfaOffset +/// ::= .cfi_def_cfa_offset offset +bool AsmParser::ParseDirectiveCFIDefCfaOffset() { + int64_t Offset = 0; + if (ParseAbsoluteExpression(Offset)) + return true; - std::string Filename = getTok().getString(); - SMLoc IncludeLoc = getLexer().getLoc(); - Lex(); + getStreamer().EmitCFIDefCfaOffset(Offset); + return false; +} - if (getLexer().isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in '.include' directive"); +/// ParseDirectiveCFIRegister +/// ::= .cfi_register register, register +bool AsmParser::ParseDirectiveCFIRegister(SMLoc DirectiveLoc) { + int64_t Register1 = 0; + if (ParseRegisterOrRegisterNumber(Register1, DirectiveLoc)) + return true; - // Strip the quotes. - Filename = Filename.substr(1, Filename.size()-2); + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in directive"); + Lex(); - // Attempt to switch the lexer to the included file before consuming the end - // of statement to avoid losing it when we switch. - if (EnterIncludeFile(Filename)) { - Error(IncludeLoc, "Could not find include file '" + Filename + "'"); + int64_t Register2 = 0; + if (ParseRegisterOrRegisterNumber(Register2, DirectiveLoc)) return true; - } + getStreamer().EmitCFIRegister(Register1, Register2); return false; } -/// ParseDirectiveIncbin -/// ::= .incbin "filename" -bool AsmParser::ParseDirectiveIncbin() { - if (getLexer().isNot(AsmToken::String)) - return TokError("expected string in '.incbin' directive"); - - std::string Filename = getTok().getString(); - SMLoc IncbinLoc = getLexer().getLoc(); - Lex(); - - if (getLexer().isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in '.incbin' directive"); +/// ParseDirectiveCFIAdjustCfaOffset +/// ::= .cfi_adjust_cfa_offset adjustment +bool AsmParser::ParseDirectiveCFIAdjustCfaOffset() { + int64_t Adjustment = 0; + if (ParseAbsoluteExpression(Adjustment)) + return true; - // Strip the quotes. - Filename = Filename.substr(1, Filename.size()-2); + getStreamer().EmitCFIAdjustCfaOffset(Adjustment); + return false; +} - // Attempt to process the included file. - if (ProcessIncbinFile(Filename)) { - Error(IncbinLoc, "Could not find incbin file '" + Filename + "'"); +/// ParseDirectiveCFIDefCfaRegister +/// ::= .cfi_def_cfa_register register +bool AsmParser::ParseDirectiveCFIDefCfaRegister(SMLoc DirectiveLoc) { + int64_t Register = 0; + if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc)) return true; - } + getStreamer().EmitCFIDefCfaRegister(Register); return false; } -/// ParseDirectiveIf -/// ::= .if expression -bool AsmParser::ParseDirectiveIf(SMLoc DirectiveLoc) { - TheCondStack.push_back(TheCondState); - TheCondState.TheCond = AsmCond::IfCond; - if (TheCondState.Ignore) { - EatToEndOfStatement(); - } else { - int64_t ExprValue; - if (ParseAbsoluteExpression(ExprValue)) - return true; +/// ParseDirectiveCFIOffset +/// ::= .cfi_offset register, offset +bool AsmParser::ParseDirectiveCFIOffset(SMLoc DirectiveLoc) { + int64_t Register = 0; + int64_t Offset = 0; - if (getLexer().isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in '.if' directive"); + if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc)) + return true; - Lex(); + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in directive"); + Lex(); - TheCondState.CondMet = ExprValue; - TheCondState.Ignore = !TheCondState.CondMet; - } + if (ParseAbsoluteExpression(Offset)) + return true; + getStreamer().EmitCFIOffset(Register, Offset); return false; } -/// ParseDirectiveIfb -/// ::= .ifb string -bool AsmParser::ParseDirectiveIfb(SMLoc DirectiveLoc, bool ExpectBlank) { - TheCondStack.push_back(TheCondState); - TheCondState.TheCond = AsmCond::IfCond; - - if (TheCondState.Ignore) { - EatToEndOfStatement(); - } else { - StringRef Str = ParseStringToEndOfStatement(); +/// ParseDirectiveCFIRelOffset +/// ::= .cfi_rel_offset register, offset +bool AsmParser::ParseDirectiveCFIRelOffset(SMLoc DirectiveLoc) { + int64_t Register = 0; - if (getLexer().isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in '.ifb' directive"); + if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc)) + return true; - Lex(); + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in directive"); + Lex(); - TheCondState.CondMet = ExpectBlank == Str.empty(); - TheCondState.Ignore = !TheCondState.CondMet; - } + int64_t Offset = 0; + if (ParseAbsoluteExpression(Offset)) + return true; + getStreamer().EmitCFIRelOffset(Register, Offset); return false; } -/// ParseDirectiveIfc -/// ::= .ifc string1, string2 -bool AsmParser::ParseDirectiveIfc(SMLoc DirectiveLoc, bool ExpectEqual) { - TheCondStack.push_back(TheCondState); - TheCondState.TheCond = AsmCond::IfCond; +static bool isValidEncoding(int64_t Encoding) { + if (Encoding & ~0xff) + return false; - if (TheCondState.Ignore) { - EatToEndOfStatement(); - } else { - StringRef Str1 = ParseStringToComma(); + if (Encoding == dwarf::DW_EH_PE_omit) + return true; - if (getLexer().isNot(AsmToken::Comma)) - return TokError("unexpected token in '.ifc' directive"); + const unsigned Format = Encoding & 0xf; + if (Format != dwarf::DW_EH_PE_absptr && Format != dwarf::DW_EH_PE_udata2 && + Format != dwarf::DW_EH_PE_udata4 && Format != dwarf::DW_EH_PE_udata8 && + Format != dwarf::DW_EH_PE_sdata2 && Format != dwarf::DW_EH_PE_sdata4 && + Format != dwarf::DW_EH_PE_sdata8 && Format != dwarf::DW_EH_PE_signed) + return false; - Lex(); + const unsigned Application = Encoding & 0x70; + if (Application != dwarf::DW_EH_PE_absptr && + Application != dwarf::DW_EH_PE_pcrel) + return false; - StringRef Str2 = ParseStringToEndOfStatement(); + return true; +} - if (getLexer().isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in '.ifc' directive"); +/// ParseDirectiveCFIPersonalityOrLsda +/// IsPersonality true for cfi_personality, false for cfi_lsda +/// ::= .cfi_personality encoding, [symbol_name] +/// ::= .cfi_lsda encoding, [symbol_name] +bool AsmParser::ParseDirectiveCFIPersonalityOrLsda(bool IsPersonality) { + int64_t Encoding = 0; + if (ParseAbsoluteExpression(Encoding)) + return true; + if (Encoding == dwarf::DW_EH_PE_omit) + return false; - Lex(); + if (!isValidEncoding(Encoding)) + return TokError("unsupported encoding."); - TheCondState.CondMet = ExpectEqual == (Str1 == Str2); - TheCondState.Ignore = !TheCondState.CondMet; - } + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in directive"); + Lex(); + StringRef Name; + if (ParseIdentifier(Name)) + return TokError("expected identifier in directive"); + + MCSymbol *Sym = getContext().GetOrCreateSymbol(Name); + + if (IsPersonality) + getStreamer().EmitCFIPersonality(Sym, Encoding); + else + getStreamer().EmitCFILsda(Sym, Encoding); return false; } -/// ParseDirectiveIfdef -/// ::= .ifdef symbol -bool AsmParser::ParseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined) { - StringRef Name; - TheCondStack.push_back(TheCondState); - TheCondState.TheCond = AsmCond::IfCond; - - if (TheCondState.Ignore) { - EatToEndOfStatement(); - } else { - if (ParseIdentifier(Name)) - return TokError("expected identifier after '.ifdef'"); +/// ParseDirectiveCFIRememberState +/// ::= .cfi_remember_state +bool AsmParser::ParseDirectiveCFIRememberState() { + getStreamer().EmitCFIRememberState(); + return false; +} - Lex(); +/// ParseDirectiveCFIRestoreState +/// ::= .cfi_remember_state +bool AsmParser::ParseDirectiveCFIRestoreState() { + getStreamer().EmitCFIRestoreState(); + return false; +} - MCSymbol *Sym = getContext().LookupSymbol(Name); +/// ParseDirectiveCFISameValue +/// ::= .cfi_same_value register +bool AsmParser::ParseDirectiveCFISameValue(SMLoc DirectiveLoc) { + int64_t Register = 0; - if (expect_defined) - TheCondState.CondMet = (Sym != NULL && !Sym->isUndefined()); - else - TheCondState.CondMet = (Sym == NULL || Sym->isUndefined()); - TheCondState.Ignore = !TheCondState.CondMet; - } + if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc)) + return true; + getStreamer().EmitCFISameValue(Register); return false; } -/// ParseDirectiveElseIf -/// ::= .elseif expression -bool AsmParser::ParseDirectiveElseIf(SMLoc DirectiveLoc) { - if (TheCondState.TheCond != AsmCond::IfCond && - TheCondState.TheCond != AsmCond::ElseIfCond) - Error(DirectiveLoc, "Encountered a .elseif that doesn't follow a .if or " - " an .elseif"); - TheCondState.TheCond = AsmCond::ElseIfCond; +/// ParseDirectiveCFIRestore +/// ::= .cfi_restore register +bool AsmParser::ParseDirectiveCFIRestore(SMLoc DirectiveLoc) { + int64_t Register = 0; + if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc)) + return true; - bool LastIgnoreState = false; - if (!TheCondStack.empty()) - LastIgnoreState = TheCondStack.back().Ignore; - if (LastIgnoreState || TheCondState.CondMet) { - TheCondState.Ignore = true; - EatToEndOfStatement(); - } - else { - int64_t ExprValue; - if (ParseAbsoluteExpression(ExprValue)) - return true; + getStreamer().EmitCFIRestore(Register); + return false; +} - if (getLexer().isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in '.elseif' directive"); +/// ParseDirectiveCFIEscape +/// ::= .cfi_escape expression[,...] +bool AsmParser::ParseDirectiveCFIEscape() { + std::string Values; + int64_t CurrValue; + if (ParseAbsoluteExpression(CurrValue)) + return true; + + Values.push_back((uint8_t)CurrValue); + while (getLexer().is(AsmToken::Comma)) { Lex(); - TheCondState.CondMet = ExprValue; - TheCondState.Ignore = !TheCondState.CondMet; + + if (ParseAbsoluteExpression(CurrValue)) + return true; + + Values.push_back((uint8_t)CurrValue); } + getStreamer().EmitCFIEscape(Values); return false; } -/// ParseDirectiveElse -/// ::= .else -bool AsmParser::ParseDirectiveElse(SMLoc DirectiveLoc) { +/// ParseDirectiveCFISignalFrame +/// ::= .cfi_signal_frame +bool AsmParser::ParseDirectiveCFISignalFrame() { if (getLexer().isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in '.else' directive"); + return Error(getLexer().getLoc(), + "unexpected token in '.cfi_signal_frame'"); - Lex(); + getStreamer().EmitCFISignalFrame(); + return false; +} - if (TheCondState.TheCond != AsmCond::IfCond && - TheCondState.TheCond != AsmCond::ElseIfCond) - Error(DirectiveLoc, "Encountered a .else that doesn't follow a .if or an " - ".elseif"); - TheCondState.TheCond = AsmCond::ElseCond; - bool LastIgnoreState = false; - if (!TheCondStack.empty()) - LastIgnoreState = TheCondStack.back().Ignore; - if (LastIgnoreState || TheCondState.CondMet) - TheCondState.Ignore = true; - else - TheCondState.Ignore = false; +/// ParseDirectiveCFIUndefined +/// ::= .cfi_undefined register +bool AsmParser::ParseDirectiveCFIUndefined(SMLoc DirectiveLoc) { + int64_t Register = 0; + if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc)) + return true; + + getStreamer().EmitCFIUndefined(Register); return false; } -/// ParseDirectiveEndIf -/// ::= .endif -bool AsmParser::ParseDirectiveEndIf(SMLoc DirectiveLoc) { +/// ParseDirectiveMacrosOnOff +/// ::= .macros_on +/// ::= .macros_off +bool AsmParser::ParseDirectiveMacrosOnOff(StringRef Directive) { if (getLexer().isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in '.endif' directive"); - - Lex(); - - if ((TheCondState.TheCond == AsmCond::NoCond) || - TheCondStack.empty()) - Error(DirectiveLoc, "Encountered a .endif that doesn't follow a .if or " - ".else"); - if (!TheCondStack.empty()) { - TheCondState = TheCondStack.back(); - TheCondStack.pop_back(); - } + return Error(getLexer().getLoc(), + "unexpected token in '" + Directive + "' directive"); + SetMacrosEnabled(Directive == ".macros_on"); return false; } -void AsmParser::initializeDirectiveKindMapping() { - DirectiveKindMapping[".set"] = DK_SET; - DirectiveKindMapping[".equ"] = DK_EQU; - DirectiveKindMapping[".equiv"] = DK_EQUIV; - DirectiveKindMapping[".ascii"] = DK_ASCII; - DirectiveKindMapping[".asciz"] = DK_ASCIZ; - DirectiveKindMapping[".string"] = DK_STRING; - DirectiveKindMapping[".byte"] = DK_BYTE; - DirectiveKindMapping[".short"] = DK_SHORT; - DirectiveKindMapping[".value"] = DK_VALUE; - DirectiveKindMapping[".2byte"] = DK_2BYTE; - DirectiveKindMapping[".long"] = DK_LONG; - DirectiveKindMapping[".int"] = DK_INT; - DirectiveKindMapping[".4byte"] = DK_4BYTE; - DirectiveKindMapping[".quad"] = DK_QUAD; - DirectiveKindMapping[".8byte"] = DK_8BYTE; - DirectiveKindMapping[".single"] = DK_SINGLE; - DirectiveKindMapping[".float"] = DK_FLOAT; - DirectiveKindMapping[".double"] = DK_DOUBLE; - DirectiveKindMapping[".align"] = DK_ALIGN; - DirectiveKindMapping[".align32"] = DK_ALIGN32; - DirectiveKindMapping[".balign"] = DK_BALIGN; - DirectiveKindMapping[".balignw"] = DK_BALIGNW; - DirectiveKindMapping[".balignl"] = DK_BALIGNL; - DirectiveKindMapping[".p2align"] = DK_P2ALIGN; - DirectiveKindMapping[".p2alignw"] = DK_P2ALIGNW; - DirectiveKindMapping[".p2alignl"] = DK_P2ALIGNL; - DirectiveKindMapping[".org"] = DK_ORG; - DirectiveKindMapping[".fill"] = DK_FILL; - DirectiveKindMapping[".zero"] = DK_ZERO; - DirectiveKindMapping[".extern"] = DK_EXTERN; - DirectiveKindMapping[".globl"] = DK_GLOBL; - DirectiveKindMapping[".global"] = DK_GLOBAL; - DirectiveKindMapping[".indirect_symbol"] = DK_INDIRECT_SYMBOL; - DirectiveKindMapping[".lazy_reference"] = DK_LAZY_REFERENCE; - DirectiveKindMapping[".no_dead_strip"] = DK_NO_DEAD_STRIP; - DirectiveKindMapping[".symbol_resolver"] = DK_SYMBOL_RESOLVER; - DirectiveKindMapping[".private_extern"] = DK_PRIVATE_EXTERN; - DirectiveKindMapping[".reference"] = DK_REFERENCE; - DirectiveKindMapping[".weak_definition"] = DK_WEAK_DEFINITION; - DirectiveKindMapping[".weak_reference"] = DK_WEAK_REFERENCE; - DirectiveKindMapping[".weak_def_can_be_hidden"] = DK_WEAK_DEF_CAN_BE_HIDDEN; - DirectiveKindMapping[".comm"] = DK_COMM; - DirectiveKindMapping[".common"] = DK_COMMON; - DirectiveKindMapping[".lcomm"] = DK_LCOMM; - DirectiveKindMapping[".abort"] = DK_ABORT; - DirectiveKindMapping[".include"] = DK_INCLUDE; - DirectiveKindMapping[".incbin"] = DK_INCBIN; - DirectiveKindMapping[".code16"] = DK_CODE16; - DirectiveKindMapping[".code16gcc"] = DK_CODE16GCC; - DirectiveKindMapping[".rept"] = DK_REPT; - DirectiveKindMapping[".irp"] = DK_IRP; - DirectiveKindMapping[".irpc"] = DK_IRPC; - DirectiveKindMapping[".endr"] = DK_ENDR; - DirectiveKindMapping[".bundle_align_mode"] = DK_BUNDLE_ALIGN_MODE; - DirectiveKindMapping[".bundle_lock"] = DK_BUNDLE_LOCK; - DirectiveKindMapping[".bundle_unlock"] = DK_BUNDLE_UNLOCK; - DirectiveKindMapping[".if"] = DK_IF; - DirectiveKindMapping[".ifb"] = DK_IFB; - DirectiveKindMapping[".ifnb"] = DK_IFNB; - DirectiveKindMapping[".ifc"] = DK_IFC; - DirectiveKindMapping[".ifnc"] = DK_IFNC; - DirectiveKindMapping[".ifdef"] = DK_IFDEF; - DirectiveKindMapping[".ifndef"] = DK_IFNDEF; - DirectiveKindMapping[".ifnotdef"] = DK_IFNOTDEF; - DirectiveKindMapping[".elseif"] = DK_ELSEIF; - DirectiveKindMapping[".else"] = DK_ELSE; - DirectiveKindMapping[".endif"] = DK_ENDIF; -} +/// ParseDirectiveMacro +/// ::= .macro name [parameters] +bool AsmParser::ParseDirectiveMacro(SMLoc DirectiveLoc) { + StringRef Name; + if (ParseIdentifier(Name)) + return TokError("expected identifier in '.macro' directive"); -/// ParseDirectiveFile -/// ::= .file [number] filename -/// ::= .file number directory filename -bool GenericAsmParser::ParseDirectiveFile(StringRef, SMLoc DirectiveLoc) { - // FIXME: I'm not sure what this is. - int64_t FileNumber = -1; - SMLoc FileNumberLoc = getLexer().getLoc(); - if (getLexer().is(AsmToken::Integer)) { - FileNumber = getTok().getIntVal(); - Lex(); + MCAsmMacroParameters Parameters; + // Argument delimiter is initially unknown. It will be set by + // ParseMacroArgument() + AsmToken::TokenKind ArgumentDelimiter = AsmToken::Eof; + if (getLexer().isNot(AsmToken::EndOfStatement)) { + for (;;) { + MCAsmMacroParameter Parameter; + if (ParseIdentifier(Parameter.first)) + return TokError("expected identifier in '.macro' directive"); - if (FileNumber < 1) - return TokError("file number less than one"); - } + if (getLexer().is(AsmToken::Equal)) { + Lex(); + if (ParseMacroArgument(Parameter.second, ArgumentDelimiter)) + return true; + } - if (getLexer().isNot(AsmToken::String)) - return TokError("unexpected token in '.file' directive"); + Parameters.push_back(Parameter); - // Usually the directory and filename together, otherwise just the directory. - StringRef Path = getTok().getString(); - Path = Path.substr(1, Path.size()-2); + if (getLexer().is(AsmToken::Comma)) + Lex(); + else if (getLexer().is(AsmToken::EndOfStatement)) + break; + } + } + + // Eat the end of statement. Lex(); - StringRef Directory; - StringRef Filename; - if (getLexer().is(AsmToken::String)) { - if (FileNumber == -1) - return TokError("explicit path specified, but no file number"); - Filename = getTok().getString(); - Filename = Filename.substr(1, Filename.size()-2); - Directory = Path; - Lex(); - } else { - Filename = Path; - } + AsmToken EndToken, StartToken = getTok(); - if (getLexer().isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in '.file' directive"); + // Lex the macro definition. + for (;;) { + // Check whether we have reached the end of the file. + if (getLexer().is(AsmToken::Eof)) + return Error(DirectiveLoc, "no matching '.endmacro' in definition"); - if (FileNumber == -1) - getStreamer().EmitFileDirective(Filename); - else { - if (getContext().getGenDwarfForAssembly() == true) - Error(DirectiveLoc, "input can't have .file dwarf directives when -g is " - "used to generate dwarf debug info for assembly code"); + // Otherwise, check whether we have reach the .endmacro. + if (getLexer().is(AsmToken::Identifier) && + (getTok().getIdentifier() == ".endm" || + getTok().getIdentifier() == ".endmacro")) { + EndToken = getTok(); + Lex(); + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '" + EndToken.getIdentifier() + + "' directive"); + break; + } - if (getStreamer().EmitDwarfFileDirective(FileNumber, Directory, Filename)) - Error(FileNumberLoc, "file number already allocated"); + // Otherwise, scan til the end of the statement. + EatToEndOfStatement(); } + if (LookupMacro(Name)) { + return Error(DirectiveLoc, "macro '" + Name + "' is already defined"); + } + + const char *BodyStart = StartToken.getLoc().getPointer(); + const char *BodyEnd = EndToken.getLoc().getPointer(); + StringRef Body = StringRef(BodyStart, BodyEnd - BodyStart); + DefineMacro(Name, MCAsmMacro(Name, Body, Parameters)); return false; } -/// ParseDirectiveLine -/// ::= .line [number] -bool GenericAsmParser::ParseDirectiveLine(StringRef, SMLoc DirectiveLoc) { - if (getLexer().isNot(AsmToken::EndOfStatement)) { - if (getLexer().isNot(AsmToken::Integer)) - return TokError("unexpected token in '.line' directive"); - - int64_t LineNumber = getTok().getIntVal(); - (void) LineNumber; - Lex(); +/// ParseDirectiveEndMacro +/// ::= .endm +/// ::= .endmacro +bool AsmParser::ParseDirectiveEndMacro(StringRef Directive) { + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '" + Directive + "' directive"); - // FIXME: Do something with the .line. + // If we are inside a macro instantiation, terminate the current + // instantiation. + if (InsideMacroInstantiation()) { + HandleMacroExit(); + return false; } + // Otherwise, this .endmacro is a stray entry in the file; well formed + // .endmacro directives are handled during the macro definition parsing. + return TokError("unexpected '" + Directive + "' in file, " + "no current macro definition"); +} + +/// ParseDirectivePurgeMacro +/// ::= .purgem +bool AsmParser::ParseDirectivePurgeMacro(SMLoc DirectiveLoc) { + StringRef Name; + if (ParseIdentifier(Name)) + return TokError("expected identifier in '.purgem' directive"); + if (getLexer().isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in '.line' directive"); + return TokError("unexpected token in '.purgem' directive"); + + if (!LookupMacro(Name)) + return Error(DirectiveLoc, "macro '" + Name + "' is not defined"); + UndefineMacro(Name); return false; } +/// ParseDirectiveBundleAlignMode +/// ::= {.bundle_align_mode} expression +bool AsmParser::ParseDirectiveBundleAlignMode() { + CheckForValidSection(); -/// ParseDirectiveLoc -/// ::= .loc FileNumber [LineNumber] [ColumnPos] [basic_block] [prologue_end] -/// [epilogue_begin] [is_stmt VALUE] [isa VALUE] -/// The first number is a file number, must have been previously assigned with -/// a .file directive, the second number is the line number and optionally the -/// third number is a column position (zero if not specified). The remaining -/// optional items are .loc sub-directives. -bool GenericAsmParser::ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc) { + // Expect a single argument: an expression that evaluates to a constant + // in the inclusive range 0-30. + SMLoc ExprLoc = getLexer().getLoc(); + int64_t AlignSizePow2; + if (ParseAbsoluteExpression(AlignSizePow2)) + return true; + else if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token after expression in" + " '.bundle_align_mode' directive"); + else if (AlignSizePow2 < 0 || AlignSizePow2 > 30) + return Error(ExprLoc, + "invalid bundle alignment size (expected between 0 and 30)"); - if (getLexer().isNot(AsmToken::Integer)) - return TokError("unexpected token in '.loc' directive"); - int64_t FileNumber = getTok().getIntVal(); - if (FileNumber < 1) - return TokError("file number less than one in '.loc' directive"); - if (!getContext().isValidDwarfFileNumber(FileNumber)) - return TokError("unassigned file number in '.loc' directive"); Lex(); - int64_t LineNumber = 0; - if (getLexer().is(AsmToken::Integer)) { - LineNumber = getTok().getIntVal(); - if (LineNumber < 1) - return TokError("line number less than one in '.loc' directive"); - Lex(); - } + // Because of AlignSizePow2's verified range we can safely truncate it to + // unsigned. + getStreamer().EmitBundleAlignMode(static_cast(AlignSizePow2)); + return false; +} - int64_t ColumnPos = 0; - if (getLexer().is(AsmToken::Integer)) { - ColumnPos = getTok().getIntVal(); - if (ColumnPos < 0) - return TokError("column position less than zero in '.loc' directive"); - Lex(); - } +/// ParseDirectiveBundleLock +/// ::= {.bundle_lock} [align_to_end] +bool AsmParser::ParseDirectiveBundleLock() { + CheckForValidSection(); + bool AlignToEnd = false; - unsigned Flags = DWARF2_LINE_DEFAULT_IS_STMT ? DWARF2_FLAG_IS_STMT : 0; - unsigned Isa = 0; - int64_t Discriminator = 0; if (getLexer().isNot(AsmToken::EndOfStatement)) { - for (;;) { - if (getLexer().is(AsmToken::EndOfStatement)) - break; - - StringRef Name; - SMLoc Loc = getTok().getLoc(); - if (getParser().ParseIdentifier(Name)) - return TokError("unexpected token in '.loc' directive"); + StringRef Option; + SMLoc Loc = getTok().getLoc(); + const char *kInvalidOptionError = + "invalid option for '.bundle_lock' directive"; - if (Name == "basic_block") - Flags |= DWARF2_FLAG_BASIC_BLOCK; - else if (Name == "prologue_end") - Flags |= DWARF2_FLAG_PROLOGUE_END; - else if (Name == "epilogue_begin") - Flags |= DWARF2_FLAG_EPILOGUE_BEGIN; - else if (Name == "is_stmt") { - Loc = getTok().getLoc(); - const MCExpr *Value; - if (getParser().ParseExpression(Value)) - return true; - // The expression must be the constant 0 or 1. - if (const MCConstantExpr *MCE = dyn_cast(Value)) { - int Value = MCE->getValue(); - if (Value == 0) - Flags &= ~DWARF2_FLAG_IS_STMT; - else if (Value == 1) - Flags |= DWARF2_FLAG_IS_STMT; - else - return Error(Loc, "is_stmt value not 0 or 1"); - } - else { - return Error(Loc, "is_stmt value not the constant value of 0 or 1"); - } - } - else if (Name == "isa") { - Loc = getTok().getLoc(); - const MCExpr *Value; - if (getParser().ParseExpression(Value)) - return true; - // The expression must be a constant greater or equal to 0. - if (const MCConstantExpr *MCE = dyn_cast(Value)) { - int Value = MCE->getValue(); - if (Value < 0) - return Error(Loc, "isa number less than zero"); - Isa = Value; - } - else { - return Error(Loc, "isa number not a constant value"); - } - } - else if (Name == "discriminator") { - if (getParser().ParseAbsoluteExpression(Discriminator)) - return true; - } - else { - return Error(Loc, "unknown sub-directive in '.loc' directive"); - } + if (ParseIdentifier(Option)) + return Error(Loc, kInvalidOptionError); - if (getLexer().is(AsmToken::EndOfStatement)) - break; - } + if (Option != "align_to_end") + return Error(Loc, kInvalidOptionError); + else if (getLexer().isNot(AsmToken::EndOfStatement)) + return Error(Loc, + "unexpected token after '.bundle_lock' directive option"); + AlignToEnd = true; } - getStreamer().EmitDwarfLocDirective(FileNumber, LineNumber, ColumnPos, Flags, - Isa, Discriminator, StringRef()); + Lex(); + getStreamer().EmitBundleLock(AlignToEnd); return false; } -/// ParseDirectiveStabs -/// ::= .stabs string, number, number, number -bool GenericAsmParser::ParseDirectiveStabs(StringRef Directive, - SMLoc DirectiveLoc) { - return TokError("unsupported directive '" + Directive + "'"); +/// ParseDirectiveBundleLock +/// ::= {.bundle_lock} +bool AsmParser::ParseDirectiveBundleUnlock() { + CheckForValidSection(); + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.bundle_unlock' directive"); + Lex(); + + getStreamer().EmitBundleUnlock(); + return false; } /// ParseDirectiveSpace -/// ::= .space expression [ , expression ] -bool GenericAsmParser::ParseDirectiveSpace(StringRef, SMLoc DirectiveLoc) { - getParser().CheckForValidSection(); +/// ::= (.skip | .space) expression [ , expression ] +bool AsmParser::ParseDirectiveSpace(StringRef IDVal) { + CheckForValidSection(); int64_t NumBytes; - if (getParser().ParseAbsoluteExpression(NumBytes)) + if (ParseAbsoluteExpression(NumBytes)) return true; int64_t FillExpr = 0; if (getLexer().isNot(AsmToken::EndOfStatement)) { if (getLexer().isNot(AsmToken::Comma)) - return TokError("unexpected token in '.space' directive"); + return TokError("unexpected token in '" + Twine(IDVal) + "' directive"); Lex(); - if (getParser().ParseAbsoluteExpression(FillExpr)) + if (ParseAbsoluteExpression(FillExpr)) return true; if (getLexer().isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in '.space' directive"); - } - - Lex(); - - if (NumBytes <= 0) - return TokError("invalid number of bytes in '.space' directive"); - - // FIXME: Sometimes the fill expr is 'nop' if it isn't supplied, instead of 0. - getStreamer().EmitFill(NumBytes, FillExpr, DEFAULT_ADDRSPACE); - - return false; -} - -/// ParseDirectiveCFISections -/// ::= .cfi_sections section [, section] -bool GenericAsmParser::ParseDirectiveCFISections(StringRef, - SMLoc DirectiveLoc) { - StringRef Name; - bool EH = false; - bool Debug = false; - - if (getParser().ParseIdentifier(Name)) - return TokError("Expected an identifier"); - - if (Name == ".eh_frame") - EH = true; - else if (Name == ".debug_frame") - Debug = true; - - if (getLexer().is(AsmToken::Comma)) { - Lex(); - - if (getParser().ParseIdentifier(Name)) - return TokError("Expected an identifier"); - - if (Name == ".eh_frame") - EH = true; - else if (Name == ".debug_frame") - Debug = true; - } - - getStreamer().EmitCFISections(EH, Debug); - - return false; -} - -/// ParseDirectiveCFIStartProc -/// ::= .cfi_startproc -bool GenericAsmParser::ParseDirectiveCFIStartProc(StringRef, - SMLoc DirectiveLoc) { - getStreamer().EmitCFIStartProc(); - return false; -} + return TokError("unexpected token in '" + Twine(IDVal) + "' directive"); + } -/// ParseDirectiveCFIEndProc -/// ::= .cfi_endproc -bool GenericAsmParser::ParseDirectiveCFIEndProc(StringRef, SMLoc DirectiveLoc) { - getStreamer().EmitCFIEndProc(); - return false; -} + Lex(); -/// ParseRegisterOrRegisterNumber - parse register name or number. -bool GenericAsmParser::ParseRegisterOrRegisterNumber(int64_t &Register, - SMLoc DirectiveLoc) { - unsigned RegNo; + if (NumBytes <= 0) + return TokError("invalid number of bytes in '" + + Twine(IDVal) + "' directive"); - if (getLexer().isNot(AsmToken::Integer)) { - if (getParser().getTargetParser().ParseRegister(RegNo, DirectiveLoc, - DirectiveLoc)) - return true; - Register = getContext().getRegisterInfo().getDwarfRegNum(RegNo, true); - } else - return getParser().ParseAbsoluteExpression(Register); + // FIXME: Sometimes the fill expr is 'nop' if it isn't supplied, instead of 0. + getStreamer().EmitFill(NumBytes, FillExpr, DEFAULT_ADDRSPACE); return false; } -/// ParseDirectiveCFIDefCfa -/// ::= .cfi_def_cfa register, offset -bool GenericAsmParser::ParseDirectiveCFIDefCfa(StringRef, - SMLoc DirectiveLoc) { - int64_t Register = 0; - if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc)) +/// ParseDirectiveLEB128 +/// ::= (.sleb128 | .uleb128) expression +bool AsmParser::ParseDirectiveLEB128(bool Signed) { + CheckForValidSection(); + const MCExpr *Value; + + if (ParseExpression(Value)) return true; - if (getLexer().isNot(AsmToken::Comma)) + if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in directive"); - Lex(); - int64_t Offset = 0; - if (getParser().ParseAbsoluteExpression(Offset)) - return true; + if (Signed) + getStreamer().EmitSLEB128Value(Value); + else + getStreamer().EmitULEB128Value(Value); - getStreamer().EmitCFIDefCfa(Register, Offset); return false; } -/// ParseDirectiveCFIDefCfaOffset -/// ::= .cfi_def_cfa_offset offset -bool GenericAsmParser::ParseDirectiveCFIDefCfaOffset(StringRef, - SMLoc DirectiveLoc) { - int64_t Offset = 0; - if (getParser().ParseAbsoluteExpression(Offset)) - return true; +/// ParseDirectiveSymbolAttribute +/// ::= { ".globl", ".weak", ... } [ identifier ( , identifier )* ] +bool AsmParser::ParseDirectiveSymbolAttribute(MCSymbolAttr Attr) { + if (getLexer().isNot(AsmToken::EndOfStatement)) { + for (;;) { + StringRef Name; + SMLoc Loc = getTok().getLoc(); - getStreamer().EmitCFIDefCfaOffset(Offset); - return false; -} + if (ParseIdentifier(Name)) + return Error(Loc, "expected identifier in directive"); -/// ParseDirectiveCFIAdjustCfaOffset -/// ::= .cfi_adjust_cfa_offset adjustment -bool GenericAsmParser::ParseDirectiveCFIAdjustCfaOffset(StringRef, - SMLoc DirectiveLoc) { - int64_t Adjustment = 0; - if (getParser().ParseAbsoluteExpression(Adjustment)) - return true; + MCSymbol *Sym = getContext().GetOrCreateSymbol(Name); - getStreamer().EmitCFIAdjustCfaOffset(Adjustment); - return false; -} + // Assembler local symbols don't make any sense here. Complain loudly. + if (Sym->isTemporary()) + return Error(Loc, "non-local symbol required in directive"); -/// ParseDirectiveCFIDefCfaRegister -/// ::= .cfi_def_cfa_register register -bool GenericAsmParser::ParseDirectiveCFIDefCfaRegister(StringRef, - SMLoc DirectiveLoc) { - int64_t Register = 0; - if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc)) - return true; + getStreamer().EmitSymbolAttribute(Sym, Attr); - getStreamer().EmitCFIDefCfaRegister(Register); + if (getLexer().is(AsmToken::EndOfStatement)) + break; + + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in directive"); + Lex(); + } + } + + Lex(); return false; } -/// ParseDirectiveCFIOffset -/// ::= .cfi_offset register, offset -bool GenericAsmParser::ParseDirectiveCFIOffset(StringRef, SMLoc DirectiveLoc) { - int64_t Register = 0; - int64_t Offset = 0; +/// ParseDirectiveComm +/// ::= ( .comm | .lcomm ) identifier , size_expression [ , align_expression ] +bool AsmParser::ParseDirectiveComm(bool IsLocal) { + CheckForValidSection(); - if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc)) - return true; + SMLoc IDLoc = getLexer().getLoc(); + StringRef Name; + if (ParseIdentifier(Name)) + return TokError("expected identifier in directive"); + + // Handle the identifier as the key symbol. + MCSymbol *Sym = getContext().GetOrCreateSymbol(Name); if (getLexer().isNot(AsmToken::Comma)) return TokError("unexpected token in directive"); Lex(); - if (getParser().ParseAbsoluteExpression(Offset)) + int64_t Size; + SMLoc SizeLoc = getLexer().getLoc(); + if (ParseAbsoluteExpression(Size)) return true; - getStreamer().EmitCFIOffset(Register, Offset); - return false; -} + int64_t Pow2Alignment = 0; + SMLoc Pow2AlignmentLoc; + if (getLexer().is(AsmToken::Comma)) { + Lex(); + Pow2AlignmentLoc = getLexer().getLoc(); + if (ParseAbsoluteExpression(Pow2Alignment)) + return true; -/// ParseDirectiveCFIRelOffset -/// ::= .cfi_rel_offset register, offset -bool GenericAsmParser::ParseDirectiveCFIRelOffset(StringRef, - SMLoc DirectiveLoc) { - int64_t Register = 0; + LCOMM::LCOMMType LCOMM = Lexer.getMAI().getLCOMMDirectiveAlignmentType(); + if (IsLocal && LCOMM == LCOMM::NoAlignment) + return Error(Pow2AlignmentLoc, "alignment not supported on this target"); - if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc)) - return true; + // If this target takes alignments in bytes (not log) validate and convert. + if ((!IsLocal && Lexer.getMAI().getCOMMDirectiveAlignmentIsInBytes()) || + (IsLocal && LCOMM == LCOMM::ByteAlignment)) { + if (!isPowerOf2_64(Pow2Alignment)) + return Error(Pow2AlignmentLoc, "alignment must be a power of 2"); + Pow2Alignment = Log2_64(Pow2Alignment); + } + } + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.comm' or '.lcomm' directive"); - if (getLexer().isNot(AsmToken::Comma)) - return TokError("unexpected token in directive"); Lex(); - int64_t Offset = 0; - if (getParser().ParseAbsoluteExpression(Offset)) - return true; + // NOTE: a size of zero for a .comm should create a undefined symbol + // but a size of .lcomm creates a bss symbol of size zero. + if (Size < 0) + return Error(SizeLoc, "invalid '.comm' or '.lcomm' directive size, can't " + "be less than zero"); - getStreamer().EmitCFIRelOffset(Register, Offset); + // NOTE: The alignment in the directive is a power of 2 value, the assembler + // may internally end up wanting an alignment in bytes. + // FIXME: Diagnose overflow. + if (Pow2Alignment < 0) + return Error(Pow2AlignmentLoc, "invalid '.comm' or '.lcomm' directive " + "alignment, can't be less than zero"); + + if (!Sym->isUndefined()) + return Error(IDLoc, "invalid symbol redefinition"); + + // Create the Symbol as a common or local common with Size and Pow2Alignment + if (IsLocal) { + getStreamer().EmitLocalCommonSymbol(Sym, Size, 1 << Pow2Alignment); + return false; + } + + getStreamer().EmitCommonSymbol(Sym, Size, 1 << Pow2Alignment); return false; } -static bool isValidEncoding(int64_t Encoding) { - if (Encoding & ~0xff) - return false; +/// ParseDirectiveAbort +/// ::= .abort [... message ...] +bool AsmParser::ParseDirectiveAbort() { + // FIXME: Use loc from directive. + SMLoc Loc = getLexer().getLoc(); - if (Encoding == dwarf::DW_EH_PE_omit) - return true; + StringRef Str = ParseStringToEndOfStatement(); + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.abort' directive"); - const unsigned Format = Encoding & 0xf; - if (Format != dwarf::DW_EH_PE_absptr && Format != dwarf::DW_EH_PE_udata2 && - Format != dwarf::DW_EH_PE_udata4 && Format != dwarf::DW_EH_PE_udata8 && - Format != dwarf::DW_EH_PE_sdata2 && Format != dwarf::DW_EH_PE_sdata4 && - Format != dwarf::DW_EH_PE_sdata8 && Format != dwarf::DW_EH_PE_signed) - return false; + Lex(); - const unsigned Application = Encoding & 0x70; - if (Application != dwarf::DW_EH_PE_absptr && - Application != dwarf::DW_EH_PE_pcrel) - return false; + if (Str.empty()) + Error(Loc, ".abort detected. Assembly stopping."); + else + Error(Loc, ".abort '" + Str + "' detected. Assembly stopping."); + // FIXME: Actually abort assembly here. - return true; + return false; } -/// ParseDirectiveCFIPersonalityOrLsda -/// ::= .cfi_personality encoding, [symbol_name] -/// ::= .cfi_lsda encoding, [symbol_name] -bool GenericAsmParser::ParseDirectiveCFIPersonalityOrLsda(StringRef IDVal, - SMLoc DirectiveLoc) { - int64_t Encoding = 0; - if (getParser().ParseAbsoluteExpression(Encoding)) - return true; - if (Encoding == dwarf::DW_EH_PE_omit) - return false; - - if (!isValidEncoding(Encoding)) - return TokError("unsupported encoding."); +/// ParseDirectiveInclude +/// ::= .include "filename" +bool AsmParser::ParseDirectiveInclude() { + if (getLexer().isNot(AsmToken::String)) + return TokError("expected string in '.include' directive"); - if (getLexer().isNot(AsmToken::Comma)) - return TokError("unexpected token in directive"); + std::string Filename = getTok().getString(); + SMLoc IncludeLoc = getLexer().getLoc(); Lex(); - StringRef Name; - if (getParser().ParseIdentifier(Name)) - return TokError("expected identifier in directive"); + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.include' directive"); - MCSymbol *Sym = getContext().GetOrCreateSymbol(Name); + // Strip the quotes. + Filename = Filename.substr(1, Filename.size()-2); - if (IDVal == ".cfi_personality") - getStreamer().EmitCFIPersonality(Sym, Encoding); - else { - assert(IDVal == ".cfi_lsda"); - getStreamer().EmitCFILsda(Sym, Encoding); + // Attempt to switch the lexer to the included file before consuming the end + // of statement to avoid losing it when we switch. + if (EnterIncludeFile(Filename)) { + Error(IncludeLoc, "Could not find include file '" + Filename + "'"); + return true; } - return false; -} - -/// ParseDirectiveCFIRememberState -/// ::= .cfi_remember_state -bool GenericAsmParser::ParseDirectiveCFIRememberState(StringRef IDVal, - SMLoc DirectiveLoc) { - getStreamer().EmitCFIRememberState(); - return false; -} -/// ParseDirectiveCFIRestoreState -/// ::= .cfi_remember_state -bool GenericAsmParser::ParseDirectiveCFIRestoreState(StringRef IDVal, - SMLoc DirectiveLoc) { - getStreamer().EmitCFIRestoreState(); return false; } -/// ParseDirectiveCFISameValue -/// ::= .cfi_same_value register -bool GenericAsmParser::ParseDirectiveCFISameValue(StringRef IDVal, - SMLoc DirectiveLoc) { - int64_t Register = 0; +/// ParseDirectiveIncbin +/// ::= .incbin "filename" +bool AsmParser::ParseDirectiveIncbin() { + if (getLexer().isNot(AsmToken::String)) + return TokError("expected string in '.incbin' directive"); - if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc)) - return true; + std::string Filename = getTok().getString(); + SMLoc IncbinLoc = getLexer().getLoc(); + Lex(); - getStreamer().EmitCFISameValue(Register); + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.incbin' directive"); - return false; -} + // Strip the quotes. + Filename = Filename.substr(1, Filename.size()-2); -/// ParseDirectiveCFIRestore -/// ::= .cfi_restore register -bool GenericAsmParser::ParseDirectiveCFIRestore(StringRef IDVal, - SMLoc DirectiveLoc) { - int64_t Register = 0; - if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc)) + // Attempt to process the included file. + if (ProcessIncbinFile(Filename)) { + Error(IncbinLoc, "Could not find incbin file '" + Filename + "'"); return true; - - getStreamer().EmitCFIRestore(Register); + } return false; } -/// ParseDirectiveCFIEscape -/// ::= .cfi_escape expression[,...] -bool GenericAsmParser::ParseDirectiveCFIEscape(StringRef IDVal, - SMLoc DirectiveLoc) { - std::string Values; - int64_t CurrValue; - if (getParser().ParseAbsoluteExpression(CurrValue)) - return true; +/// ParseDirectiveIf +/// ::= .if expression +bool AsmParser::ParseDirectiveIf(SMLoc DirectiveLoc) { + TheCondStack.push_back(TheCondState); + TheCondState.TheCond = AsmCond::IfCond; + if (TheCondState.Ignore) { + EatToEndOfStatement(); + } else { + int64_t ExprValue; + if (ParseAbsoluteExpression(ExprValue)) + return true; - Values.push_back((uint8_t)CurrValue); + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.if' directive"); - while (getLexer().is(AsmToken::Comma)) { Lex(); - if (getParser().ParseAbsoluteExpression(CurrValue)) - return true; - - Values.push_back((uint8_t)CurrValue); + TheCondState.CondMet = ExprValue; + TheCondState.Ignore = !TheCondState.CondMet; } - getStreamer().EmitCFIEscape(Values); return false; } -/// ParseDirectiveCFISignalFrame -/// ::= .cfi_signal_frame -bool GenericAsmParser::ParseDirectiveCFISignalFrame(StringRef Directive, - SMLoc DirectiveLoc) { - if (getLexer().isNot(AsmToken::EndOfStatement)) - return Error(getLexer().getLoc(), - "unexpected token in '" + Directive + "' directive"); - - getStreamer().EmitCFISignalFrame(); +/// ParseDirectiveIfb +/// ::= .ifb string +bool AsmParser::ParseDirectiveIfb(SMLoc DirectiveLoc, bool ExpectBlank) { + TheCondStack.push_back(TheCondState); + TheCondState.TheCond = AsmCond::IfCond; - return false; -} + if (TheCondState.Ignore) { + EatToEndOfStatement(); + } else { + StringRef Str = ParseStringToEndOfStatement(); -/// ParseDirectiveCFIUndefined -/// ::= .cfi_undefined register -bool GenericAsmParser::ParseDirectiveCFIUndefined(StringRef Directive, - SMLoc DirectiveLoc) { - int64_t Register = 0; + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.ifb' directive"); - if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc)) - return true; + Lex(); - getStreamer().EmitCFIUndefined(Register); + TheCondState.CondMet = ExpectBlank == Str.empty(); + TheCondState.Ignore = !TheCondState.CondMet; + } return false; } -/// ParseDirectiveCFIRegister -/// ::= .cfi_register register, register -bool GenericAsmParser::ParseDirectiveCFIRegister(StringRef Directive, - SMLoc DirectiveLoc) { - int64_t Register1 = 0; - - if (ParseRegisterOrRegisterNumber(Register1, DirectiveLoc)) - return true; +/// ParseDirectiveIfc +/// ::= .ifc string1, string2 +bool AsmParser::ParseDirectiveIfc(SMLoc DirectiveLoc, bool ExpectEqual) { + TheCondStack.push_back(TheCondState); + TheCondState.TheCond = AsmCond::IfCond; - if (getLexer().isNot(AsmToken::Comma)) - return TokError("unexpected token in directive"); - Lex(); + if (TheCondState.Ignore) { + EatToEndOfStatement(); + } else { + StringRef Str1 = ParseStringToComma(); - int64_t Register2 = 0; + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in '.ifc' directive"); - if (ParseRegisterOrRegisterNumber(Register2, DirectiveLoc)) - return true; + Lex(); - getStreamer().EmitCFIRegister(Register1, Register2); + StringRef Str2 = ParseStringToEndOfStatement(); - return false; -} + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.ifc' directive"); -/// ParseDirectiveMacrosOnOff -/// ::= .macros_on -/// ::= .macros_off -bool GenericAsmParser::ParseDirectiveMacrosOnOff(StringRef Directive, - SMLoc DirectiveLoc) { - if (getLexer().isNot(AsmToken::EndOfStatement)) - return Error(getLexer().getLoc(), - "unexpected token in '" + Directive + "' directive"); + Lex(); - getParser().SetMacrosEnabled(Directive == ".macros_on"); + TheCondState.CondMet = ExpectEqual == (Str1 == Str2); + TheCondState.Ignore = !TheCondState.CondMet; + } return false; } -/// ParseDirectiveMacro -/// ::= .macro name [parameters] -bool GenericAsmParser::ParseDirectiveMacro(StringRef Directive, - SMLoc DirectiveLoc) { +/// ParseDirectiveIfdef +/// ::= .ifdef symbol +bool AsmParser::ParseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined) { StringRef Name; - if (getParser().ParseIdentifier(Name)) - return TokError("expected identifier in '.macro' directive"); + TheCondStack.push_back(TheCondState); + TheCondState.TheCond = AsmCond::IfCond; - MCAsmMacroParameters Parameters; - // Argument delimiter is initially unknown. It will be set by - // ParseMacroArgument() - AsmToken::TokenKind ArgumentDelimiter = AsmToken::Eof; - if (getLexer().isNot(AsmToken::EndOfStatement)) { - for (;;) { - MCAsmMacroParameter Parameter; - if (getParser().ParseIdentifier(Parameter.first)) - return TokError("expected identifier in '.macro' directive"); + if (TheCondState.Ignore) { + EatToEndOfStatement(); + } else { + if (ParseIdentifier(Name)) + return TokError("expected identifier after '.ifdef'"); - if (getLexer().is(AsmToken::Equal)) { - Lex(); - if (getParser().ParseMacroArgument(Parameter.second, ArgumentDelimiter)) - return true; - } + Lex(); - Parameters.push_back(Parameter); + MCSymbol *Sym = getContext().LookupSymbol(Name); - if (getLexer().is(AsmToken::Comma)) - Lex(); - else if (getLexer().is(AsmToken::EndOfStatement)) - break; - } + if (expect_defined) + TheCondState.CondMet = (Sym != NULL && !Sym->isUndefined()); + else + TheCondState.CondMet = (Sym == NULL || Sym->isUndefined()); + TheCondState.Ignore = !TheCondState.CondMet; } - // Eat the end of statement. - Lex(); - - AsmToken EndToken, StartToken = getTok(); - - // Lex the macro definition. - for (;;) { - // Check whether we have reached the end of the file. - if (getLexer().is(AsmToken::Eof)) - return Error(DirectiveLoc, "no matching '.endmacro' in definition"); + return false; +} - // Otherwise, check whether we have reach the .endmacro. - if (getLexer().is(AsmToken::Identifier) && - (getTok().getIdentifier() == ".endm" || - getTok().getIdentifier() == ".endmacro")) { - EndToken = getTok(); - Lex(); - if (getLexer().isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in '" + EndToken.getIdentifier() + - "' directive"); - break; - } +/// ParseDirectiveElseIf +/// ::= .elseif expression +bool AsmParser::ParseDirectiveElseIf(SMLoc DirectiveLoc) { + if (TheCondState.TheCond != AsmCond::IfCond && + TheCondState.TheCond != AsmCond::ElseIfCond) + Error(DirectiveLoc, "Encountered a .elseif that doesn't follow a .if or " + " an .elseif"); + TheCondState.TheCond = AsmCond::ElseIfCond; - // Otherwise, scan til the end of the statement. - getParser().EatToEndOfStatement(); + bool LastIgnoreState = false; + if (!TheCondStack.empty()) + LastIgnoreState = TheCondStack.back().Ignore; + if (LastIgnoreState || TheCondState.CondMet) { + TheCondState.Ignore = true; + EatToEndOfStatement(); } + else { + int64_t ExprValue; + if (ParseAbsoluteExpression(ExprValue)) + return true; - if (getParser().LookupMacro(Name)) { - return Error(DirectiveLoc, "macro '" + Name + "' is already defined"); + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.elseif' directive"); + + Lex(); + TheCondState.CondMet = ExprValue; + TheCondState.Ignore = !TheCondState.CondMet; } - const char *BodyStart = StartToken.getLoc().getPointer(); - const char *BodyEnd = EndToken.getLoc().getPointer(); - StringRef Body = StringRef(BodyStart, BodyEnd - BodyStart); - getParser().DefineMacro(Name, MCAsmMacro(Name, Body, Parameters)); return false; } -/// ParseDirectiveEndMacro -/// ::= .endm -/// ::= .endmacro -bool GenericAsmParser::ParseDirectiveEndMacro(StringRef Directive, - SMLoc DirectiveLoc) { +/// ParseDirectiveElse +/// ::= .else +bool AsmParser::ParseDirectiveElse(SMLoc DirectiveLoc) { if (getLexer().isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in '" + Directive + "' directive"); - - // If we are inside a macro instantiation, terminate the current - // instantiation. - if (getParser().InsideMacroInstantiation()) { - getParser().HandleMacroExit(); - return false; - } - - // Otherwise, this .endmacro is a stray entry in the file; well formed - // .endmacro directives are handled during the macro definition parsing. - return TokError("unexpected '" + Directive + "' in file, " - "no current macro definition"); -} - -/// ParseDirectivePurgeMacro -/// ::= .purgem -bool GenericAsmParser::ParseDirectivePurgeMacro(StringRef Directive, - SMLoc DirectiveLoc) { - StringRef Name; - if (getParser().ParseIdentifier(Name)) - return TokError("expected identifier in '.purgem' directive"); + return TokError("unexpected token in '.else' directive"); - if (getLexer().isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in '.purgem' directive"); + Lex(); - if (!getParser().LookupMacro(Name)) - return Error(DirectiveLoc, "macro '" + Name + "' is not defined"); + if (TheCondState.TheCond != AsmCond::IfCond && + TheCondState.TheCond != AsmCond::ElseIfCond) + Error(DirectiveLoc, "Encountered a .else that doesn't follow a .if or an " + ".elseif"); + TheCondState.TheCond = AsmCond::ElseCond; + bool LastIgnoreState = false; + if (!TheCondStack.empty()) + LastIgnoreState = TheCondStack.back().Ignore; + if (LastIgnoreState || TheCondState.CondMet) + TheCondState.Ignore = true; + else + TheCondState.Ignore = false; - getParser().UndefineMacro(Name); return false; } -bool GenericAsmParser::ParseDirectiveLEB128(StringRef DirName, SMLoc) { - getParser().CheckForValidSection(); - - const MCExpr *Value; - - if (getParser().ParseExpression(Value)) - return true; - +/// ParseDirectiveEndIf +/// ::= .endif +bool AsmParser::ParseDirectiveEndIf(SMLoc DirectiveLoc) { if (getLexer().isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in directive"); + return TokError("unexpected token in '.endif' directive"); - if (DirName[1] == 's') - getStreamer().EmitSLEB128Value(Value); - else - getStreamer().EmitULEB128Value(Value); + Lex(); + + if ((TheCondState.TheCond == AsmCond::NoCond) || + TheCondStack.empty()) + Error(DirectiveLoc, "Encountered a .endif that doesn't follow a .if or " + ".else"); + if (!TheCondStack.empty()) { + TheCondState = TheCondStack.back(); + TheCondStack.pop_back(); + } return false; } +void AsmParser::initializeDirectiveKindMap() { + DirectiveKindMap[".set"] = DK_SET; + DirectiveKindMap[".equ"] = DK_EQU; + DirectiveKindMap[".equiv"] = DK_EQUIV; + DirectiveKindMap[".ascii"] = DK_ASCII; + DirectiveKindMap[".asciz"] = DK_ASCIZ; + DirectiveKindMap[".string"] = DK_STRING; + DirectiveKindMap[".byte"] = DK_BYTE; + DirectiveKindMap[".short"] = DK_SHORT; + DirectiveKindMap[".value"] = DK_VALUE; + DirectiveKindMap[".2byte"] = DK_2BYTE; + DirectiveKindMap[".long"] = DK_LONG; + DirectiveKindMap[".int"] = DK_INT; + DirectiveKindMap[".4byte"] = DK_4BYTE; + DirectiveKindMap[".quad"] = DK_QUAD; + DirectiveKindMap[".8byte"] = DK_8BYTE; + DirectiveKindMap[".single"] = DK_SINGLE; + DirectiveKindMap[".float"] = DK_FLOAT; + DirectiveKindMap[".double"] = DK_DOUBLE; + DirectiveKindMap[".align"] = DK_ALIGN; + DirectiveKindMap[".align32"] = DK_ALIGN32; + DirectiveKindMap[".balign"] = DK_BALIGN; + DirectiveKindMap[".balignw"] = DK_BALIGNW; + DirectiveKindMap[".balignl"] = DK_BALIGNL; + DirectiveKindMap[".p2align"] = DK_P2ALIGN; + DirectiveKindMap[".p2alignw"] = DK_P2ALIGNW; + DirectiveKindMap[".p2alignl"] = DK_P2ALIGNL; + DirectiveKindMap[".org"] = DK_ORG; + DirectiveKindMap[".fill"] = DK_FILL; + DirectiveKindMap[".zero"] = DK_ZERO; + DirectiveKindMap[".extern"] = DK_EXTERN; + DirectiveKindMap[".globl"] = DK_GLOBL; + DirectiveKindMap[".global"] = DK_GLOBAL; + DirectiveKindMap[".indirect_symbol"] = DK_INDIRECT_SYMBOL; + DirectiveKindMap[".lazy_reference"] = DK_LAZY_REFERENCE; + DirectiveKindMap[".no_dead_strip"] = DK_NO_DEAD_STRIP; + DirectiveKindMap[".symbol_resolver"] = DK_SYMBOL_RESOLVER; + DirectiveKindMap[".private_extern"] = DK_PRIVATE_EXTERN; + DirectiveKindMap[".reference"] = DK_REFERENCE; + DirectiveKindMap[".weak_definition"] = DK_WEAK_DEFINITION; + DirectiveKindMap[".weak_reference"] = DK_WEAK_REFERENCE; + DirectiveKindMap[".weak_def_can_be_hidden"] = DK_WEAK_DEF_CAN_BE_HIDDEN; + DirectiveKindMap[".comm"] = DK_COMM; + DirectiveKindMap[".common"] = DK_COMMON; + DirectiveKindMap[".lcomm"] = DK_LCOMM; + DirectiveKindMap[".abort"] = DK_ABORT; + DirectiveKindMap[".include"] = DK_INCLUDE; + DirectiveKindMap[".incbin"] = DK_INCBIN; + DirectiveKindMap[".code16"] = DK_CODE16; + DirectiveKindMap[".code16gcc"] = DK_CODE16GCC; + DirectiveKindMap[".rept"] = DK_REPT; + DirectiveKindMap[".irp"] = DK_IRP; + DirectiveKindMap[".irpc"] = DK_IRPC; + DirectiveKindMap[".endr"] = DK_ENDR; + DirectiveKindMap[".bundle_align_mode"] = DK_BUNDLE_ALIGN_MODE; + DirectiveKindMap[".bundle_lock"] = DK_BUNDLE_LOCK; + DirectiveKindMap[".bundle_unlock"] = DK_BUNDLE_UNLOCK; + DirectiveKindMap[".if"] = DK_IF; + DirectiveKindMap[".ifb"] = DK_IFB; + DirectiveKindMap[".ifnb"] = DK_IFNB; + DirectiveKindMap[".ifc"] = DK_IFC; + DirectiveKindMap[".ifnc"] = DK_IFNC; + DirectiveKindMap[".ifdef"] = DK_IFDEF; + DirectiveKindMap[".ifndef"] = DK_IFNDEF; + DirectiveKindMap[".ifnotdef"] = DK_IFNOTDEF; + DirectiveKindMap[".elseif"] = DK_ELSEIF; + DirectiveKindMap[".else"] = DK_ELSE; + DirectiveKindMap[".endif"] = DK_ENDIF; + DirectiveKindMap[".skip"] = DK_SKIP; + DirectiveKindMap[".space"] = DK_SPACE; + DirectiveKindMap[".file"] = DK_FILE; + DirectiveKindMap[".line"] = DK_LINE; + DirectiveKindMap[".loc"] = DK_LOC; + DirectiveKindMap[".stabs"] = DK_STABS; + DirectiveKindMap[".sleb128"] = DK_SLEB128; + DirectiveKindMap[".uleb128"] = DK_ULEB128; + DirectiveKindMap[".cfi_sections"] = DK_CFI_SECTIONS; + DirectiveKindMap[".cfi_startproc"] = DK_CFI_STARTPROC; + DirectiveKindMap[".cfi_endproc"] = DK_CFI_ENDPROC; + DirectiveKindMap[".cfi_def_cfa"] = DK_CFI_DEF_CFA; + DirectiveKindMap[".cfi_def_cfa_offset"] = DK_CFI_DEF_CFA_OFFSET; + DirectiveKindMap[".cfi_adjust_cfa_offset"] = DK_CFI_ADJUST_CFA_OFFSET; + DirectiveKindMap[".cfi_def_cfa_register"] = DK_CFI_DEF_CFA_REGISTER; + DirectiveKindMap[".cfi_offset"] = DK_CFI_OFFSET; + DirectiveKindMap[".cfi_rel_offset"] = DK_CFI_REL_OFFSET; + DirectiveKindMap[".cfi_personality"] = DK_CFI_PERSONALITY; + DirectiveKindMap[".cfi_lsda"] = DK_CFI_LSDA; + DirectiveKindMap[".cfi_remember_state"] = DK_CFI_REMEMBER_STATE; + DirectiveKindMap[".cfi_restore_state"] = DK_CFI_RESTORE_STATE; + DirectiveKindMap[".cfi_same_value"] = DK_CFI_SAME_VALUE; + DirectiveKindMap[".cfi_restore"] = DK_CFI_RESTORE; + DirectiveKindMap[".cfi_escape"] = DK_CFI_ESCAPE; + DirectiveKindMap[".cfi_signal_frame"] = DK_CFI_SIGNAL_FRAME; + DirectiveKindMap[".cfi_undefined"] = DK_CFI_UNDEFINED; + DirectiveKindMap[".cfi_register"] = DK_CFI_REGISTER; + DirectiveKindMap[".macros_on"] = DK_MACROS_ON; + DirectiveKindMap[".macros_off"] = DK_MACROS_OFF; + DirectiveKindMap[".macro"] = DK_MACRO; + DirectiveKindMap[".endm"] = DK_ENDM; + DirectiveKindMap[".endmacro"] = DK_ENDMACRO; + DirectiveKindMap[".purgem"] = DK_PURGEM; +} + + MCAsmMacro *AsmParser::ParseMacroLikeBody(SMLoc DirectiveLoc) { AsmToken EndToken, StartToken = getTok(); -- cgit v1.1 From 1c99a7f4892a24eb227802e042917d05d8cd415f Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Tue, 15 Jan 2013 23:07:53 +0000 Subject: [ms-inline asm] Address the FIXME in AsmParser.cpp. // FIXME: Constraints are hard coded to 'm', but we need an 'r' // constraint for addressof. This needs to be cleaned up! Test cases are already in place. Specifically, clang/test/CodeGen/ms-inline-asm.c t15(), t16(), and t24(). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172569 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCParser/AsmParser.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index ce5ce1d..43c872b 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -3972,15 +3972,13 @@ bool AsmParser::ParseMSInlineAsm(void *AsmLoc, std::string &AsmString, unsigned NumExprs = NumOutputs + NumInputs; OpDecls.resize(NumExprs); Constraints.resize(NumExprs); - // FIXME: Constraints are hard coded to 'm', but we need an 'r' - // constraint for addressof. This needs to be cleaned up! for (unsigned i = 0; i < NumOutputs; ++i) { OpDecls[i] = std::make_pair(OutputDecls[i], OutputDeclsAddressOf[i]); - Constraints[i] = OutputDeclsAddressOf[i] ? "=r" : OutputConstraints[i]; + Constraints[i] = OutputConstraints[i]; } for (unsigned i = 0, j = NumOutputs; i < NumInputs; ++i, ++j) { OpDecls[j] = std::make_pair(InputDecls[i], InputDeclsAddressOf[i]); - Constraints[j] = InputDeclsAddressOf[i] ? "r" : InputConstraints[i]; + Constraints[j] = InputConstraints[i]; } } -- cgit v1.1 From 9ccb76998f741a7d3f0f217392a783dfb99c6e87 Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Tue, 15 Jan 2013 23:22:09 +0000 Subject: Optimize the memory usage of MC bundling, by creating a new type of fragment into which we can emit single instructions without fixups (which is most instructions). This is an optimization required because MCDataFragment is prety large (240 bytes on x64), with no change in functionality. For large programs, this reduces memory usage overhead required for bundling by 40%. To make the code as palatable as possible, the MCEncodedFragment interface was further fragmented (no pun intended) and MCEncodedFragmentWithFixups is used as the interface to work against when the user expects fixups. MCDataFragment and MCRelaxableFragment implement this interface, while the new MCCompactEncodedInstFragment implements MCEncodeFragment. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172572 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCAssembler.cpp | 36 ++++++++++++++++++++++++++++++++++-- lib/MC/MCELFStreamer.cpp | 14 ++++++++++++-- 2 files changed, 46 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp index 5fdc57a..c51ddc8 100644 --- a/lib/MC/MCAssembler.cpp +++ b/lib/MC/MCAssembler.cpp @@ -38,6 +38,8 @@ STATISTIC(EmittedRelaxableFragments, "Number of emitted assembler fragments - relaxable"); STATISTIC(EmittedDataFragments, "Number of emitted assembler fragments - data"); +STATISTIC(EmittedCompactEncodedInstFragments, + "Number of emitted assembler fragments - compact encoded inst"); STATISTIC(EmittedAlignFragments, "Number of emitted assembler fragments - align"); STATISTIC(EmittedFillFragments, @@ -222,6 +224,11 @@ MCEncodedFragment::~MCEncodedFragment() { /* *** */ +MCEncodedFragmentWithFixups::~MCEncodedFragmentWithFixups() { +} + +/* *** */ + MCSectionData::MCSectionData() : Section(0) {} MCSectionData::MCSectionData(const MCSection &_Section, MCAssembler *A) @@ -388,6 +395,7 @@ uint64_t MCAssembler::computeFragmentSize(const MCAsmLayout &Layout, switch (F.getKind()) { case MCFragment::FT_Data: case MCFragment::FT_Relaxable: + case MCFragment::FT_CompactEncodedInst: return cast(F).getContents().size(); case MCFragment::FT_Fill: return cast(F).getSize(); @@ -570,6 +578,11 @@ static void writeFragment(const MCAssembler &Asm, const MCAsmLayout &Layout, writeFragmentContents(F, OW); break; + case MCFragment::FT_CompactEncodedInst: + ++stats::EmittedCompactEncodedInstFragments; + writeFragmentContents(F, OW); + break; + case MCFragment::FT_Fill: { ++stats::EmittedFillFragments; MCFillFragment &FF = cast(F); @@ -742,9 +755,10 @@ void MCAssembler::Finish() { for (MCAssembler::iterator it = begin(), ie = end(); it != ie; ++it) { for (MCSectionData::iterator it2 = it->begin(), ie2 = it->end(); it2 != ie2; ++it2) { - MCEncodedFragment *F = dyn_cast(it2); + MCEncodedFragmentWithFixups *F = + dyn_cast(it2); if (F) { - for (MCEncodedFragment::fixup_iterator it3 = F->fixup_begin(), + for (MCEncodedFragmentWithFixups::fixup_iterator it3 = F->fixup_begin(), ie3 = F->fixup_end(); it3 != ie3; ++it3) { MCFixup &Fixup = *it3; uint64_t FixedValue = handleFixup(Layout, *F, Fixup); @@ -954,6 +968,8 @@ void MCFragment::dump() { switch (getKind()) { case MCFragment::FT_Align: OS << "MCAlignFragment"; break; case MCFragment::FT_Data: OS << "MCDataFragment"; break; + case MCFragment::FT_CompactEncodedInst: + OS << "MCCompactEncodedInstFragment"; break; case MCFragment::FT_Fill: OS << "MCFillFragment"; break; case MCFragment::FT_Relaxable: OS << "MCRelaxableFragment"; break; case MCFragment::FT_Org: OS << "MCOrgFragment"; break; @@ -1001,6 +1017,19 @@ void MCFragment::dump() { } break; } + case MCFragment::FT_CompactEncodedInst: { + const MCCompactEncodedInstFragment *CEIF = + cast(this); + OS << "\n "; + OS << " Contents:["; + const SmallVectorImpl &Contents = CEIF->getContents(); + for (unsigned i = 0, e = Contents.size(); i != e; ++i) { + if (i) OS << ","; + OS << hexdigit((Contents[i] >> 4) & 0xF) << hexdigit(Contents[i] & 0xF); + } + OS << "] (" << Contents.size() << " bytes)"; + break; + } case MCFragment::FT_Fill: { const MCFillFragment *FF = cast(this); OS << " Value:" << FF->getValue() << " ValueSize:" << FF->getValueSize() @@ -1094,7 +1123,9 @@ void MCAssembler::dump() { // anchors for MC*Fragment vtables void MCEncodedFragment::anchor() { } +void MCEncodedFragmentWithFixups::anchor() { } void MCDataFragment::anchor() { } +void MCCompactEncodedInstFragment::anchor() { } void MCRelaxableFragment::anchor() { } void MCAlignFragment::anchor() { } void MCFillFragment::anchor() { } @@ -1102,3 +1133,4 @@ void MCOrgFragment::anchor() { } void MCLEBFragment::anchor() { } void MCDwarfLineAddrFragment::anchor() { } void MCDwarfCallFrameFragment::anchor() { } + diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp index cae73be..e5b749e 100644 --- a/lib/MC/MCELFStreamer.cpp +++ b/lib/MC/MCELFStreamer.cpp @@ -371,8 +371,10 @@ void MCELFStreamer::EmitInstToData(const MCInst &Inst) { // data fragment). // // If bundling is enabled: - // - If we're not in a bundle-locked group, emit the instruction into a data - // fragment of its own. + // - If we're not in a bundle-locked group, emit the instruction into a + // fragment of its own. If there are no fixups registered for the + // instruction, emit a MCCompactEncodedInstFragment. Otherwise, emit a + // MCDataFragment. // - If we're in a bundle-locked group, append the instruction to the current // data fragment because we want all the instructions in a group to get into // the same fragment. Be careful not to do that for the first instruction in @@ -383,6 +385,14 @@ void MCELFStreamer::EmitInstToData(const MCInst &Inst) { MCSectionData *SD = getCurrentSectionData(); if (SD->isBundleLocked() && !SD->isBundleGroupBeforeFirstInst()) DF = getOrCreateDataFragment(); + else if (!SD->isBundleLocked() && Fixups.size() == 0) { + // Optimize memory usage by emitting the instruction to a + // MCCompactEncodedInstFragment when not in a bundle-locked group and + // there are no fixups registered. + MCCompactEncodedInstFragment *CEIF = new MCCompactEncodedInstFragment(SD); + CEIF->getContents().append(Code.begin(), Code.end()); + return; + } else { DF = new MCDataFragment(SD); if (SD->getBundleLockState() == MCSectionData::BundleLockedAlignToEnd) { -- cgit v1.1 From 83d585383345b84ae4a9590e97135f95ae39406b Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Tue, 15 Jan 2013 23:43:14 +0000 Subject: Teach InstCombine to optimize extract of a value from a vector add operation with a constant zero. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172576 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/InstCombine/InstCombineVectorOps.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'lib') diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index dd7ea14..8bfcc80 100644 --- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -13,7 +13,9 @@ //===----------------------------------------------------------------------===// #include "InstCombine.h" +#include "llvm/Support/PatternMatch.h" using namespace llvm; +using namespace PatternMatch; /// CheapToScalarize - Return true if the value is cheaper to scalarize than it /// is to leave as a vector operation. isConstant indicates whether we're @@ -92,6 +94,13 @@ static Value *FindScalarElement(Value *V, unsigned EltNo) { return FindScalarElement(SVI->getOperand(1), InEl - LHSWidth); } + // Extract a value from a vector add operation with a constant zero. + Value *Val = 0; Constant *Con = 0; + if (match(V, m_Add(m_Value(Val), m_Constant(Con)))) { + if (Con->getAggregateElement(EltNo)->isNullValue()) + return FindScalarElement(Val, EltNo); + } + // Otherwise, we don't know. return 0; } -- cgit v1.1 From 72f7bfbf0e02bb11d3e7cca1f9598c5f9d9fa2ca Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Tue, 15 Jan 2013 23:56:56 +0000 Subject: Split address information for DWARF5 split dwarf proposal. This involves using the DW_FORM_GNU_addr_index and a separate .debug_addr section which stays in the executable and is fully linked. Sneak in two other small changes: a) Print out the debug_str_offsets.dwo section. b) Change form we're expecting the entries in the debug_str_offsets.dwo section to take from ULEB128 to U32. Add tests for all of this in the fission-cu.ll test. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172578 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DIE.cpp | 2 + lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp | 20 +++++++ lib/CodeGen/AsmPrinter/DwarfCompileUnit.h | 5 ++ lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 85 ++++++++++++++++++++++------- lib/CodeGen/AsmPrinter/DwarfDebug.h | 25 ++++++++- lib/DebugInfo/DWARFCompileUnit.h | 6 +- lib/DebugInfo/DWARFContext.cpp | 14 ++++- lib/DebugInfo/DWARFContext.h | 5 ++ lib/DebugInfo/DWARFFormValue.cpp | 21 ++++++- lib/DebugInfo/DWARFFormValue.h | 2 + lib/MC/MCObjectFileInfo.cpp | 3 + 11 files changed, 161 insertions(+), 27 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp index fecb041..0a659c2 100644 --- a/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/lib/CodeGen/AsmPrinter/DIE.cpp @@ -198,6 +198,7 @@ void DIEInteger::EmitValue(AsmPrinter *Asm, unsigned Form) const { case dwarf::DW_FORM_ref8: // Fall thru case dwarf::DW_FORM_data8: Size = 8; break; case dwarf::DW_FORM_GNU_str_index: Asm->EmitULEB128(Integer); return; + case dwarf::DW_FORM_GNU_addr_index: Asm->EmitULEB128(Integer); return; case dwarf::DW_FORM_udata: Asm->EmitULEB128(Integer); return; case dwarf::DW_FORM_sdata: Asm->EmitSLEB128(Integer); return; case dwarf::DW_FORM_addr: @@ -222,6 +223,7 @@ unsigned DIEInteger::SizeOf(AsmPrinter *AP, unsigned Form) const { case dwarf::DW_FORM_ref8: // Fall thru case dwarf::DW_FORM_data8: return sizeof(int64_t); case dwarf::DW_FORM_GNU_str_index: return MCAsmInfo::getULEB128Size(Integer); + case dwarf::DW_FORM_GNU_addr_index: return MCAsmInfo::getULEB128Size(Integer); case dwarf::DW_FORM_udata: return MCAsmInfo::getULEB128Size(Integer); case dwarf::DW_FORM_sdata: return MCAsmInfo::getSLEB128Size(Integer); case dwarf::DW_FORM_addr: return AP->getDataLayout().getPointerSize(); diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 21cceaf..5896065 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -170,6 +170,26 @@ void CompileUnit::addLabel(DIE *Die, unsigned Attribute, unsigned Form, Die->addValue(Attribute, Form, Value); } +/// addLabelAddress - Add a dwarf label attribute data and value using +/// DW_FORM_addr or DW_FORM_GNU_addr_index. +/// +void CompileUnit::addLabelAddress(DIE *Die, unsigned Attribute, + MCSymbol *Label) { + if (!DD->useSplitDwarf()) { + if (Label != NULL) { + DIEValue *Value = new (DIEValueAllocator) DIELabel(Label); + Die->addValue(Attribute, dwarf::DW_FORM_addr, Value); + } else { + DIEValue *Value = new (DIEValueAllocator) DIEInteger(0); + Die->addValue(Attribute, dwarf::DW_FORM_addr, Value); + } + } else { + unsigned idx = DU->getAddrPoolIndex(Label); + DIEValue *Value = new (DIEValueAllocator) DIEInteger(idx); + Die->addValue(Attribute, dwarf::DW_FORM_GNU_addr_index, Value); + } +} + /// addDelta - Add a label delta attribute data and value. /// void CompileUnit::addDelta(DIE *Die, unsigned Attribute, unsigned Form, diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index f210dcc..0d84ca5 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -207,6 +207,11 @@ public: void addLabel(DIE *Die, unsigned Attribute, unsigned Form, const MCSymbol *Label); + /// addLabelAddress - Add a dwarf label attribute data and value using + /// either DW_FORM_addr or DW_FORM_GNU_addr_index. + /// + void addLabelAddress(DIE *Die, unsigned Attribute, MCSymbol *Label); + /// addDelta - Add a label delta attribute data and value. /// void addDelta(DIE *Die, unsigned Attribute, unsigned Form, diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 93106a0..1813132 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -237,6 +237,15 @@ unsigned DwarfUnits::getStringPoolIndex(StringRef Str) { return Entry.second; } +unsigned DwarfUnits::getAddrPoolIndex(MCSymbol *Sym) { + std::pair &Entry = AddressPool[Sym]; + if (Entry.first) return Entry.second; + + Entry.second = NextAddrPoolNumber++; + Entry.first = Sym; + return Entry.second; +} + // Define a unique number for the abbreviation. // void DwarfUnits::assignAbbrevNumber(DIEAbbrev &Abbrev) { @@ -384,10 +393,12 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU, } } - SPCU->addLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, - Asm->GetTempSymbol("func_begin", Asm->getFunctionNumber())); - SPCU->addLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, - Asm->GetTempSymbol("func_end", Asm->getFunctionNumber())); + SPCU->addLabelAddress(SPDie, dwarf::DW_AT_low_pc, + Asm->GetTempSymbol("func_begin", + Asm->getFunctionNumber())); + SPCU->addLabelAddress(SPDie, dwarf::DW_AT_high_pc, + Asm->GetTempSymbol("func_end", + Asm->getFunctionNumber())); const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); MachineLocation Location(RI->getFrameRegister(*Asm->MF)); SPCU->addAddress(SPDie, dwarf::DW_AT_frame_base, Location); @@ -429,16 +440,16 @@ DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU, return ScopeDIE; } - const MCSymbol *Start = getLabelBeforeInsn(RI->first); - const MCSymbol *End = getLabelAfterInsn(RI->second); + MCSymbol *Start = getLabelBeforeInsn(RI->first); + MCSymbol *End = getLabelAfterInsn(RI->second); if (End == 0) return 0; assert(Start->isDefined() && "Invalid starting label for an inlined scope!"); assert(End->isDefined() && "Invalid end label for an inlined scope!"); - TheCU->addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, Start); - TheCU->addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, End); + TheCU->addLabelAddress(ScopeDIE, dwarf::DW_AT_low_pc, Start); + TheCU->addLabelAddress(ScopeDIE, dwarf::DW_AT_high_pc, End); return ScopeDIE; } @@ -462,8 +473,8 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU, } SmallVector::const_iterator RI = Ranges.begin(); - const MCSymbol *StartLabel = getLabelBeforeInsn(RI->first); - const MCSymbol *EndLabel = getLabelAfterInsn(RI->second); + MCSymbol *StartLabel = getLabelBeforeInsn(RI->first); + MCSymbol *EndLabel = getLabelAfterInsn(RI->second); if (StartLabel == 0 || EndLabel == 0) { llvm_unreachable("Unexpected Start and End labels for an inlined scope!"); @@ -492,10 +503,8 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU, DebugRangeSymbols.push_back(NULL); DebugRangeSymbols.push_back(NULL); } else { - TheCU->addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, - StartLabel); - TheCU->addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, - EndLabel); + TheCU->addLabelAddress(ScopeDIE, dwarf::DW_AT_low_pc, StartLabel); + TheCU->addLabelAddress(ScopeDIE, dwarf::DW_AT_high_pc, EndLabel); } InlinedSubprogramDIEs.insert(OriginDIE); @@ -646,8 +655,8 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) { DIUnit.getLanguage()); NewCU->addString(Die, dwarf::DW_AT_name, FN); // 2.17.1 requires that we use DW_AT_low_pc for a single entry point - // into an entity. - NewCU->addUInt(Die, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, 0); + // into an entity. We're using 0 (or a NULL label) for this. + NewCU->addLabelAddress(Die, dwarf::DW_AT_low_pc, NULL); // DW_AT_stmt_list is a offset of line number information for this // compile unit in debug_line section. if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) @@ -975,6 +984,9 @@ void DwarfDebug::endModule() { // Emit info into a debug macinfo section. emitDebugMacInfo(); + // Emit DWO addresses. + InfoHolder.emitAddresses(Asm->getObjFileLowering().getDwarfAddrSection()); + // Emit inline info. // TODO: When we don't need the option anymore we // can remove all of the code that this section @@ -1234,14 +1246,14 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, } // Return Label preceding the instruction. -const MCSymbol *DwarfDebug::getLabelBeforeInsn(const MachineInstr *MI) { +MCSymbol *DwarfDebug::getLabelBeforeInsn(const MachineInstr *MI) { MCSymbol *Label = LabelsBeforeInsn.lookup(MI); assert(Label && "Didn't insert label before instruction"); return Label; } // Return Label immediately following the instruction. -const MCSymbol *DwarfDebug::getLabelAfterInsn(const MachineInstr *MI) { +MCSymbol *DwarfDebug::getLabelAfterInsn(const MachineInstr *MI) { return LabelsAfterInsn.lookup(MI); } @@ -2158,7 +2170,7 @@ void DwarfUnits::emitStrings(const MCSection *StrSection, if (OffsetSection) { Asm->OutStreamer.SwitchSection(OffsetSection); unsigned offset = 0; - unsigned size = 4; + unsigned size = 4; // FIXME: DWARF64 is 8. for (unsigned i = 0, e = Entries.size(); i != e; ++i) { Asm->OutStreamer.EmitIntValue(offset, size); offset += Entries[i].second->getKeyLength() + 1; @@ -2166,6 +2178,38 @@ void DwarfUnits::emitStrings(const MCSection *StrSection, } } +// Emit strings into a string section. +void DwarfUnits::emitAddresses(const MCSection *AddrSection) { + + if (AddressPool.empty()) return; + + // Start the dwarf addr section. + Asm->OutStreamer.SwitchSection(AddrSection); + + // Get all of the string pool entries and put them in an array by their ID so + // we can sort them. + SmallVector* >, 64> Entries; + + for (DenseMap >::iterator + I = AddressPool.begin(), E = AddressPool.end(); + I != E; ++I) + Entries.push_back(std::make_pair(I->second.second, &(I->second))); + + array_pod_sort(Entries.begin(), Entries.end()); + + for (unsigned i = 0, e = Entries.size(); i != e; ++i) { + // Emit a label for reference from debug information entries. + MCSymbol *Sym = Entries[i].second->first; + if (Sym) + Asm->EmitLabelReference(Entries[i].second->first, + Asm->getDataLayout().getPointerSize()); + else + Asm->OutStreamer.EmitIntValue(0, Asm->getDataLayout().getPointerSize()); + } + +} + // Emit visible names into a debug str section. void DwarfDebug::emitDebugStr() { DwarfUnits &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; @@ -2402,8 +2446,9 @@ CompileUnit *DwarfDebug::constructSkeletonCU(const MDNode *N) { // FIXME: We also need DW_AT_addr_base and DW_AT_dwo_id. // 2.17.1 requires that we use DW_AT_low_pc for a single entry point - // into an entity. + // into an entity. We're using 0, or a NULL label for this. NewCU->addUInt(Die, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, 0); + // DW_AT_stmt_list is a offset of line number information for this // compile unit in debug_line section. if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index 1e471f7..9cff128 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -195,6 +195,10 @@ public: typedef StringMap, BumpPtrAllocator&> StrPool; +// A Symbol->pair mapping of addresses used by indirect +// references. +typedef DenseMap > AddrPool; + /// \brief Collects and handles information specific to a particular /// collection of units. class DwarfUnits { @@ -215,12 +219,17 @@ class DwarfUnits { unsigned NextStringPoolNumber; std::string StringPref; + // Collection of addresses for this unit and assorted labels. + AddrPool AddressPool; + unsigned NextAddrPoolNumber; + public: DwarfUnits(AsmPrinter *AP, FoldingSet *AS, std::vector *A, const char *Pref, BumpPtrAllocator &DA) : Asm(AP), AbbreviationsSet(AS), Abbreviations(A), - StringPool(DA), NextStringPoolNumber(0), StringPref(Pref) {} + StringPool(DA), NextStringPoolNumber(0), StringPref(Pref), + AddressPool(), NextAddrPoolNumber(0) {} /// \brief Compute the size and offset of a DIE given an incoming Offset. unsigned computeSizeAndOffset(DIE *Die, unsigned Offset); @@ -242,6 +251,9 @@ public: /// \brief Emit all of the strings to the section given. void emitStrings(const MCSection *, const MCSection *, const MCSymbol *); + /// \brief Emit all of the addresses to the section given. + void emitAddresses(const MCSection *); + /// \brief Returns the entry into the start of the pool. MCSymbol *getStringPoolSym(); @@ -255,6 +267,13 @@ public: /// \brief Returns the string pool. StrPool *getStringPool() { return &StringPool; } + + /// \brief Returns the index into the address pool with the given + /// label/symbol. + unsigned getAddrPoolIndex(MCSymbol *); + + /// \brief Returns the address pool. + AddrPool *getAddrPool() { return &AddressPool; } }; /// \brief Collects and handles dwarf debug information. @@ -560,7 +579,7 @@ private: } /// \brief Return Label preceding the instruction. - const MCSymbol *getLabelBeforeInsn(const MachineInstr *MI); + MCSymbol *getLabelBeforeInsn(const MachineInstr *MI); /// \brief Ensure that a label will be emitted after MI. void requestLabelAfterInsn(const MachineInstr *MI) { @@ -568,7 +587,7 @@ private: } /// \brief Return Label immediately following the instruction. - const MCSymbol *getLabelAfterInsn(const MachineInstr *MI); + MCSymbol *getLabelAfterInsn(const MachineInstr *MI); public: //===--------------------------------------------------------------------===// diff --git a/lib/DebugInfo/DWARFCompileUnit.h b/lib/DebugInfo/DWARFCompileUnit.h index c58664f..de70b2e 100644 --- a/lib/DebugInfo/DWARFCompileUnit.h +++ b/lib/DebugInfo/DWARFCompileUnit.h @@ -29,6 +29,7 @@ class DWARFCompileUnit { StringRef RangeSection; StringRef StringSection; StringRef StringOffsetSection; + StringRef AddrOffsetSection; const RelocAddrMap *RelocMap; bool isLittleEndian; @@ -43,16 +44,17 @@ class DWARFCompileUnit { public: DWARFCompileUnit(const DWARFDebugAbbrev *DA, StringRef IS, StringRef AS, - StringRef RS, StringRef SS, StringRef SOS, + StringRef RS, StringRef SS, StringRef SOS, StringRef AOS, const RelocAddrMap *M, bool LE) : Abbrev(DA), InfoSection(IS), AbbrevSection(AS), RangeSection(RS), StringSection(SS), StringOffsetSection(SOS), - RelocMap(M), isLittleEndian(LE) { + AddrOffsetSection(AOS), RelocMap(M), isLittleEndian(LE) { clear(); } StringRef getStringSection() const { return StringSection; } StringRef getStringOffsetSection() const { return StringOffsetSection; } + StringRef getAddrOffsetSection() const { return AddrOffsetSection; } const RelocAddrMap *getRelocMap() const { return RelocMap; } DataExtractor getDebugInfoExtractor() const; diff --git a/lib/DebugInfo/DWARFContext.cpp b/lib/DebugInfo/DWARFContext.cpp index 247ee5b..13a527b 100644 --- a/lib/DebugInfo/DWARFContext.cpp +++ b/lib/DebugInfo/DWARFContext.cpp @@ -86,6 +86,14 @@ void DWARFContext::dump(raw_ostream &OS) { OS << format("0x%8.8x: \"%s\"\n", strDWOOffset, s); strDWOOffset = offset; } + + OS << "\n.debug_str_offsets.dwo contents:\n"; + DataExtractor strOffsetExt(getStringOffsetDWOSection(), isLittleEndian(), 0); + offset = 0; + while (offset < getStringOffsetDWOSection().size()) { + OS << format("0x%8.8x: ", offset); + OS << format("%8.8x\n", strOffsetExt.getU32(&offset)); + } } const DWARFDebugAbbrev *DWARFContext::getDebugAbbrev() { @@ -152,7 +160,8 @@ void DWARFContext::parseCompileUnits() { while (DIData.isValidOffset(offset)) { CUs.push_back(DWARFCompileUnit(getDebugAbbrev(), getInfoSection(), getAbbrevSection(), getRangeSection(), - getStringSection(), "", + getStringSection(), StringRef(), + getAddrSection(), &infoRelocMap(), isLittleEndian())); if (!CUs.back().extract(DIData, &offset)) { @@ -174,6 +183,7 @@ void DWARFContext::parseDWOCompileUnits() { getRangeDWOSection(), getStringDWOSection(), getStringOffsetDWOSection(), + getAddrSection(), &infoDWORelocMap(), isLittleEndian())); if (!DWOCUs.back().extract(DIData, &offset)) { @@ -386,6 +396,8 @@ DWARFContextInMemory::DWARFContextInMemory(object::ObjectFile *Obj) : StringDWOSection = data; else if (name == "debug_str_offsets.dwo") StringOffsetDWOSection = data; + else if (name == "debug_addr") + AddrSection = data; // Any more debug info sections go here. else continue; diff --git a/lib/DebugInfo/DWARFContext.h b/lib/DebugInfo/DWARFContext.h index 687ff93..7da5c85 100644 --- a/lib/DebugInfo/DWARFContext.h +++ b/lib/DebugInfo/DWARFContext.h @@ -108,6 +108,7 @@ public: virtual StringRef getStringDWOSection() = 0; virtual StringRef getStringOffsetDWOSection() = 0; virtual StringRef getRangeDWOSection() = 0; + virtual StringRef getAddrSection() = 0; virtual const RelocAddrMap &infoDWORelocMap() const = 0; static bool isSupportedVersion(unsigned version) { @@ -143,6 +144,7 @@ class DWARFContextInMemory : public DWARFContext { StringRef StringDWOSection; StringRef StringOffsetDWOSection; StringRef RangeDWOSection; + StringRef AddrSection; public: DWARFContextInMemory(object::ObjectFile *); @@ -163,6 +165,9 @@ public: return StringOffsetDWOSection; } virtual StringRef getRangeDWOSection() { return RangeDWOSection; } + virtual StringRef getAddrSection() { + return AddrSection; + } virtual const RelocAddrMap &infoDWORelocMap() const { return InfoDWORelocMap; } diff --git a/lib/DebugInfo/DWARFFormValue.cpp b/lib/DebugInfo/DWARFFormValue.cpp index 14c6804..d1bcf96 100644 --- a/lib/DebugInfo/DWARFFormValue.cpp +++ b/lib/DebugInfo/DWARFFormValue.cpp @@ -325,6 +325,16 @@ DWARFFormValue::dump(raw_ostream &OS, const DWARFCompileUnit *cu) const { switch (Form) { case DW_FORM_addr: OS << format("0x%016" PRIx64, uvalue); break; + case DW_FORM_GNU_addr_index: { + StringRef AddrOffsetSec = cu->getAddrOffsetSection(); + OS << format(" indexed (%8.8x) address = ", (uint32_t)uvalue); + if (AddrOffsetSec.size() != 0) { + DataExtractor DA(AddrOffsetSec, true, cu->getAddressByteSize()); + OS << format("0x%016" PRIx64, getIndirectAddress(&DA, cu)); + } else + OS << ""; + break; + } case DW_FORM_flag_present: OS << "true"; break; case DW_FORM_flag: case DW_FORM_data1: OS << format("0x%02x", (uint8_t)uvalue); break; @@ -452,10 +462,19 @@ DWARFFormValue::getIndirectCString(const DataExtractor *DS, if (!DS || !DSO) return NULL; uint32_t offset = Value.uval * 4; - uint32_t soffset = DSO->getULEB128(&offset); + uint32_t soffset = DSO->getU32(&offset); return DS->getCStr(&soffset); } +uint64_t +DWARFFormValue::getIndirectAddress(const DataExtractor *DA, + const DWARFCompileUnit *cu) const { + if (!DA) return 0; + + uint32_t offset = Value.uval * cu->getAddressByteSize(); + return DA->getAddress(&offset); +} + uint64_t DWARFFormValue::getReference(const DWARFCompileUnit *cu) const { uint64_t die_offset = Value.uval; switch (Form) { diff --git a/lib/DebugInfo/DWARFFormValue.h b/lib/DebugInfo/DWARFFormValue.h index 7768c18..b863001 100644 --- a/lib/DebugInfo/DWARFFormValue.h +++ b/lib/DebugInfo/DWARFFormValue.h @@ -66,6 +66,8 @@ public: const char *getAsCString(const DataExtractor *debug_str_data_ptr) const; const char *getIndirectCString(const DataExtractor *, const DataExtractor *) const; + uint64_t getIndirectAddress(const DataExtractor *, + const DWARFCompileUnit *) const; bool skipValue(DataExtractor debug_info_data, uint32_t *offset_ptr, const DWARFCompileUnit *cu) const; static bool skipValue(uint16_t form, DataExtractor debug_info_data, diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp index a304584..1f5548f 100644 --- a/lib/MC/MCObjectFileInfo.cpp +++ b/lib/MC/MCObjectFileInfo.cpp @@ -437,6 +437,9 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) { DwarfStrOffDWOSection = Ctx->getELFSection(".debug_str_offsets.dwo", ELF::SHT_PROGBITS, 0, SectionKind::getMetadata()); + DwarfAddrSection = + Ctx->getELFSection(".debug_addr", ELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); } -- cgit v1.1 From 490c7d97737ea7719efcea7321d3cfa3984b0027 Mon Sep 17 00:00:00 2001 From: Jack Carter Date: Wed, 16 Jan 2013 00:07:45 +0000 Subject: Akira, Hope you are feeling better. The Mips RDHWR (Read Hardware Register) instruction was not tested for assembler or dissassembler consumption. This patch adds that functionality. Contributer: Vladimir Medic git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172579 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/AsmParser/MipsAsmParser.cpp | 5 ++++- lib/Target/Mips/Disassembler/MipsDisassembler.cpp | 16 ++++++++++++++++ lib/Target/Mips/MipsRegisterInfo.td | 2 +- 3 files changed, 21 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 57338df..4822106 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -1071,6 +1071,9 @@ MipsAsmParser::parseHWRegs(SmallVectorImpl &Operands) { MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseHW64Regs(SmallVectorImpl &Operands) { + + if (!isMips64()) + return MatchOperand_NoMatch; //if the first token is not '$' we have error if (Parser.getTok().isNot(AsmToken::Dollar)) return MatchOperand_NoMatch; @@ -1088,7 +1091,7 @@ MipsAsmParser::parseHW64Regs(SmallVectorImpl &Operands) { MipsOperand *op = MipsOperand::CreateReg(Mips::HWR29_64, S, Parser.getTok().getLoc()); - op->setRegKind(MipsOperand::Kind_HWRegs); + op->setRegKind(MipsOperand::Kind_HW64Regs); Operands.push_back(op); Parser.Lex(); // Eat reg number diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp index 1efeffd..9560f3f 100644 --- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp +++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp @@ -128,6 +128,11 @@ static DecodeStatus DecodeAFGR64RegisterClass(MCInst &Inst, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeHWRegs64RegisterClass(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + static DecodeStatus DecodeACRegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, @@ -454,6 +459,17 @@ static DecodeStatus DecodeAFGR64RegisterClass(MCInst &Inst, return MCDisassembler::Success; } +static DecodeStatus DecodeHWRegs64RegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder) { + //Currently only hardware register 29 is supported + if (RegNo != 29) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateReg(Mips::HWR29_64)); + return MCDisassembler::Success; +} + static DecodeStatus DecodeACRegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td index c6eb0e1..f93dd86 100644 --- a/lib/Target/Mips/MipsRegisterInfo.td +++ b/lib/Target/Mips/MipsRegisterInfo.td @@ -373,6 +373,6 @@ def HWRegsOpnd : RegisterOperand { let ParserMatchClass = HWRegsAsmOperand; } -def HW64RegsOpnd : RegisterOperand { +def HW64RegsOpnd : RegisterOperand { let ParserMatchClass = HW64RegsAsmOperand; } -- cgit v1.1 From 171192f149dce679cd520f85ffced4789448b017 Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Wed, 16 Jan 2013 00:50:52 +0000 Subject: Use the ExtensionDirectiveHandler type in other places where it makes sense. Since we already have this type it's a shame to keep dragging a pair of object and method around explicitly. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172584 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCParser/AsmParser.cpp | 9 +++------ lib/MC/MCParser/COFFAsmParser.cpp | 7 ++++--- lib/MC/MCParser/DarwinAsmParser.cpp | 7 ++++--- lib/MC/MCParser/ELFAsmParser.cpp | 8 +++++--- 4 files changed, 16 insertions(+), 15 deletions(-) (limited to 'lib') diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index 43c872b..0db3430 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -122,8 +122,6 @@ private: /// ExtensionDirectiveMap - maps directive names to handler methods in parser /// extensions. Extensions register themselves in this map by calling /// AddDirectiveHandler. - typedef std::pair - ExtensionDirectiveHandler; StringMap ExtensionDirectiveMap; /// MacroMap - Map of currently defined macros. @@ -160,10 +158,9 @@ public: virtual bool Run(bool NoInitialTextSection, bool NoFinalize = false); - virtual void AddDirectiveHandler(MCAsmParserExtension *Object, - StringRef Directive, - DirectiveHandler Handler) { - ExtensionDirectiveMap[Directive] = std::make_pair(Object, Handler); + virtual void AddDirectiveHandler(StringRef Directive, + ExtensionDirectiveHandler Handler) { + ExtensionDirectiveMap[Directive] = Handler; } public: diff --git a/lib/MC/MCParser/COFFAsmParser.cpp b/lib/MC/MCParser/COFFAsmParser.cpp index e7c564a..d0cc0c5 100644 --- a/lib/MC/MCParser/COFFAsmParser.cpp +++ b/lib/MC/MCParser/COFFAsmParser.cpp @@ -24,10 +24,11 @@ using namespace llvm; namespace { class COFFAsmParser : public MCAsmParserExtension { - template + template void AddDirectiveHandler(StringRef Directive) { - getParser().AddDirectiveHandler(this, Directive, - HandleDirective); + MCAsmParser::ExtensionDirectiveHandler Handler = std::make_pair( + this, HandleDirective); + getParser().AddDirectiveHandler(Directive, Handler); } bool ParseSectionSwitch(StringRef Section, diff --git a/lib/MC/MCParser/DarwinAsmParser.cpp b/lib/MC/MCParser/DarwinAsmParser.cpp index 7b042df..c4974e5 100644 --- a/lib/MC/MCParser/DarwinAsmParser.cpp +++ b/lib/MC/MCParser/DarwinAsmParser.cpp @@ -26,10 +26,11 @@ namespace { /// \brief Implementation of directive handling which is shared across all /// Darwin targets. class DarwinAsmParser : public MCAsmParserExtension { - template + template void AddDirectiveHandler(StringRef Directive) { - getParser().AddDirectiveHandler(this, Directive, - HandleDirective); + MCAsmParser::ExtensionDirectiveHandler Handler = std::make_pair( + this, HandleDirective); + getParser().AddDirectiveHandler(Directive, Handler); } bool ParseSectionSwitch(const char *Segment, const char *Section, diff --git a/lib/MC/MCParser/ELFAsmParser.cpp b/lib/MC/MCParser/ELFAsmParser.cpp index 87126f0..eb39415 100644 --- a/lib/MC/MCParser/ELFAsmParser.cpp +++ b/lib/MC/MCParser/ELFAsmParser.cpp @@ -22,10 +22,12 @@ using namespace llvm; namespace { class ELFAsmParser : public MCAsmParserExtension { - template + template void AddDirectiveHandler(StringRef Directive) { - getParser().AddDirectiveHandler(this, Directive, - HandleDirective); + MCAsmParser::ExtensionDirectiveHandler Handler = std::make_pair( + this, HandleDirective); + + getParser().AddDirectiveHandler(Directive, Handler); } bool ParseSectionSwitch(StringRef Section, unsigned Type, -- cgit v1.1 From 6b6061f01171bd3d4e32022efa45428d4f9941ee Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Wed, 16 Jan 2013 01:22:23 +0000 Subject: Define metadata interfaces for describing a static data member of a class. Emit static data member declarations and definitions through correctly. Part of PR14471. Patch by Paul Robinson! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172590 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp | 137 +++++++++++++++++++--------- lib/CodeGen/AsmPrinter/DwarfCompileUnit.h | 6 ++ lib/IR/DIBuilder.cpp | 32 ++++++- lib/IR/DebugInfo.cpp | 2 + 4 files changed, 131 insertions(+), 46 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 5896065..e98b118 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -670,18 +670,21 @@ void CompileUnit::addTemplateParams(DIE &Buffer, DIArray TParams) { } } +/// getOrCreateContextDIE - Get context owner's DIE. +DIE *CompileUnit::getOrCreateContextDIE(DIDescriptor Context) { + if (Context.isType()) + return getOrCreateTypeDIE(DIType(Context)); + else if (Context.isNameSpace()) + return getOrCreateNameSpace(DINameSpace(Context)); + else if (Context.isSubprogram()) + return getOrCreateSubprogramDIE(DISubprogram(Context)); + else + return getDIE(Context); +} + /// addToContextOwner - Add Die into the list of its context owner's children. void CompileUnit::addToContextOwner(DIE *Die, DIDescriptor Context) { - if (Context.isType()) { - DIE *ContextDIE = getOrCreateTypeDIE(DIType(Context)); - ContextDIE->addChild(Die); - } else if (Context.isNameSpace()) { - DIE *ContextDIE = getOrCreateNameSpace(DINameSpace(Context)); - ContextDIE->addChild(Die); - } else if (Context.isSubprogram()) { - DIE *ContextDIE = getOrCreateSubprogramDIE(DISubprogram(Context)); - ContextDIE->addChild(Die); - } else if (DIE *ContextDIE = getDIE(Context)) + if (DIE *ContextDIE = getOrCreateContextDIE(Context)) ContextDIE->addChild(Die); else addDie(Die); @@ -925,22 +928,15 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { dwarf::DW_ACCESS_public); if (SP.isExplicit()) addFlag(ElemDie, dwarf::DW_AT_explicit); - } - else if (Element.isVariable()) { - DIVariable DV(Element); - ElemDie = new DIE(dwarf::DW_TAG_variable); - addString(ElemDie, dwarf::DW_AT_name, DV.getName()); - addType(ElemDie, DV.getType()); - addFlag(ElemDie, dwarf::DW_AT_declaration); - addFlag(ElemDie, dwarf::DW_AT_external); - addSourceLine(ElemDie, DV); } else if (Element.isDerivedType()) { DIDerivedType DDTy(Element); if (DDTy.getTag() == dwarf::DW_TAG_friend) { ElemDie = new DIE(dwarf::DW_TAG_friend); addType(ElemDie, DDTy.getTypeDerivedFrom(), dwarf::DW_AT_friend); - } else - ElemDie = createMemberDIE(DIDerivedType(Element)); + } else if (DDTy.isStaticMember()) + ElemDie = createStaticMemberDIE(DDTy); + else + ElemDie = createMemberDIE(DDTy); } else if (Element.isObjCProperty()) { DIObjCProperty Property(Element); ElemDie = new DIE(Property.getTag()); @@ -1256,33 +1252,48 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) { if (!GV.Verify()) return; - DIE *VariableDIE = new DIE(GV.getTag()); - // Add to map. - insertDIE(N, VariableDIE); - - // Add name. - addString(VariableDIE, dwarf::DW_AT_name, GV.getDisplayName()); - StringRef LinkageName = GV.getLinkageName(); - bool isGlobalVariable = GV.getGlobal() != NULL; - if (!LinkageName.empty() && isGlobalVariable) - addString(VariableDIE, dwarf::DW_AT_MIPS_linkage_name, - getRealLinkageName(LinkageName)); - // Add type. + DIDescriptor GVContext = GV.getContext(); DIType GTy = GV.getType(); - addType(VariableDIE, GTy); - // Add scoping info. - if (!GV.isLocalToUnit()) - addFlag(VariableDIE, dwarf::DW_AT_external); + // If this is a static data member definition, some attributes belong + // to the declaration DIE. + DIE *VariableDIE = NULL; + DIDerivedType SDMDecl = GV.getStaticDataMemberDeclaration(); + if (SDMDecl.Verify()) { + assert(SDMDecl.isStaticMember() && "Expected static member decl"); + // We need the declaration DIE that is in the static member's class. + // But that class might not exist in the DWARF yet. + // Creating the class will create the static member decl DIE. + getOrCreateContextDIE(SDMDecl.getContext()); + VariableDIE = getDIE(SDMDecl); + assert(VariableDIE && "Static member decl has no context?"); + } + + // If this is not a static data member definition, create the variable + // DIE and add the initial set of attributes to it. + if (!VariableDIE) { + VariableDIE = new DIE(GV.getTag()); + // Add to map. + insertDIE(N, VariableDIE); + + // Add name and type. + addString(VariableDIE, dwarf::DW_AT_name, GV.getDisplayName()); + addType(VariableDIE, GTy); + + // Add scoping info. + if (!GV.isLocalToUnit()) + addFlag(VariableDIE, dwarf::DW_AT_external); + + // Add line number info. + addSourceLine(VariableDIE, GV); + // Add to context owner. + addToContextOwner(VariableDIE, GVContext); + } - // Add line number info. - addSourceLine(VariableDIE, GV); - // Add to context owner. - DIDescriptor GVContext = GV.getContext(); - addToContextOwner(VariableDIE, GVContext); // Add location. bool addToAccelTable = false; DIE *VariableSpecDIE = NULL; + bool isGlobalVariable = GV.getGlobal() != NULL; if (isGlobalVariable) { addToAccelTable = true; DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); @@ -1298,11 +1309,18 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) { addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4, VariableDIE); addBlock(VariableSpecDIE, dwarf::DW_AT_location, 0, Block); - addFlag(VariableDIE, dwarf::DW_AT_declaration); + // A static member's declaration is already flagged as such. + if (!SDMDecl.Verify()) + addFlag(VariableDIE, dwarf::DW_AT_declaration); addDie(VariableSpecDIE); } else { addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block); } + // Add linkage name. + StringRef LinkageName = GV.getLinkageName(); + if (!LinkageName.empty() && isGlobalVariable) + addString(VariableDIE, dwarf::DW_AT_MIPS_linkage_name, + getRealLinkageName(LinkageName)); } else if (const ConstantInt *CI = dyn_cast_or_null(GV.getConstant())) addConstantValue(VariableDIE, CI, GTy.isUnsignedDIType()); @@ -1638,3 +1656,36 @@ DIE *CompileUnit::createMemberDIE(DIDerivedType DT) { } return MemberDie; } + +/// createStaticMemberDIE - Create new DIE for C++ static member. +DIE *CompileUnit::createStaticMemberDIE(const DIDerivedType DT) { + if (!DT.Verify()) + return NULL; + + DIE *StaticMemberDIE = new DIE(DT.getTag()); + DIType Ty = DT.getTypeDerivedFrom(); + + addString(StaticMemberDIE, dwarf::DW_AT_name, DT.getName()); + addType(StaticMemberDIE, Ty); + addSourceLine(StaticMemberDIE, DT); + addFlag(StaticMemberDIE, dwarf::DW_AT_external); + addFlag(StaticMemberDIE, dwarf::DW_AT_declaration); + + // FIXME: We could omit private if the parent is a class_type, and + // public if the parent is something else. + if (DT.isProtected()) + addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, + dwarf::DW_ACCESS_protected); + else if (DT.isPrivate()) + addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, + dwarf::DW_ACCESS_private); + else + addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, + dwarf::DW_ACCESS_public); + + if (const ConstantInt *CI = dyn_cast_or_null(DT.getConstant())) + addConstantValue(StaticMemberDIE, CI, Ty.isUnsignedDIType()); + + insertDIE(DT, StaticMemberDIE); + return StaticMemberDIE; +} diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index 0d84ca5..e576ff2 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -86,6 +86,9 @@ class CompileUnit { /// DWARF version doesn't handle the language, return -1. int64_t getDefaultLowerBound() const; + /// getOrCreateContextDIE - Get context owner's DIE. + DIE *getOrCreateContextDIE(DIDescriptor Context); + public: CompileUnit(unsigned UID, unsigned L, DIE *D, AsmPrinter *A, DwarfDebug *DW, DwarfUnits *); @@ -344,6 +347,9 @@ public: /// createMemberDIE - Create new member DIE. DIE *createMemberDIE(DIDerivedType DT); + /// createStaticMemberDIE - Create new static data member DIE. + DIE *createStaticMemberDIE(DIDerivedType DT); + private: // DIEValueAllocator - All DIEValues are allocated through this allocator. diff --git a/lib/IR/DIBuilder.cpp b/lib/IR/DIBuilder.cpp index bd7f0e3..0d106ac 100644 --- a/lib/IR/DIBuilder.cpp +++ b/lib/IR/DIBuilder.cpp @@ -350,6 +350,30 @@ DIType DIBuilder::createMemberType(DIDescriptor Scope, StringRef Name, return DIType(MDNode::get(VMContext, Elts)); } +/// createStaticMemberType - Create debugging information entry for a +/// C++ static data member. +DIType DIBuilder::createStaticMemberType(DIDescriptor Scope, StringRef Name, + DIFile File, unsigned LineNumber, + DIType Ty, unsigned Flags, + llvm::Value *Val) { + // TAG_member is encoded in DIDerivedType format. + Flags |= DIDescriptor::FlagStaticMember; + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_member), + getNonCompileUnitScope(Scope), + MDString::get(VMContext, Name), + File, + ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), + ConstantInt::get(Type::getInt64Ty(VMContext), 0/*SizeInBits*/), + ConstantInt::get(Type::getInt64Ty(VMContext), 0/*AlignInBits*/), + ConstantInt::get(Type::getInt64Ty(VMContext), 0/*OffsetInBits*/), + ConstantInt::get(Type::getInt32Ty(VMContext), Flags), + Ty, + Val + }; + return DIType(MDNode::get(VMContext, Elts)); +} + /// createObjCIVar - Create debugging information entry for Objective-C /// instance variable. DIType DIBuilder::createObjCIVar(StringRef Name, @@ -787,7 +811,8 @@ createGlobalVariable(StringRef Name, DIFile F, unsigned LineNumber, Ty, ConstantInt::get(Type::getInt32Ty(VMContext), isLocalToUnit), ConstantInt::get(Type::getInt32Ty(VMContext), 1), /* isDefinition*/ - Val + Val, + DIDescriptor() }; MDNode *Node = MDNode::get(VMContext, Elts); AllGVs.push_back(Node); @@ -799,7 +824,7 @@ createGlobalVariable(StringRef Name, DIFile F, unsigned LineNumber, DIGlobalVariable DIBuilder:: createStaticVariable(DIDescriptor Context, StringRef Name, StringRef LinkageName, DIFile F, unsigned LineNumber, - DIType Ty, bool isLocalToUnit, Value *Val) { + DIType Ty, bool isLocalToUnit, Value *Val, MDNode *Decl) { Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_variable), Constant::getNullValue(Type::getInt32Ty(VMContext)), @@ -812,7 +837,8 @@ createStaticVariable(DIDescriptor Context, StringRef Name, Ty, ConstantInt::get(Type::getInt32Ty(VMContext), isLocalToUnit), ConstantInt::get(Type::getInt32Ty(VMContext), 1), /* isDefinition*/ - Val + Val, + DIDescriptor(Decl) }; MDNode *Node = MDNode::get(VMContext, Elts); AllGVs.push_back(Node); diff --git a/lib/IR/DebugInfo.cpp b/lib/IR/DebugInfo.cpp index 7083495..876aff8 100644 --- a/lib/IR/DebugInfo.cpp +++ b/lib/IR/DebugInfo.cpp @@ -1101,6 +1101,8 @@ void DIType::printInternal(raw_ostream &OS) const { OS << " [fwd]"; if (isVector()) OS << " [vector]"; + if (isStaticMember()) + OS << " [static]"; } void DIDerivedType::printInternal(raw_ostream &OS) const { -- cgit v1.1 From 457ee1a12e2c52624af7fdb81cf938f6d8d96572 Mon Sep 17 00:00:00 2001 From: Jack Carter Date: Wed, 16 Jan 2013 01:29:10 +0000 Subject: reverting 172579 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172594 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/AsmParser/MipsAsmParser.cpp | 5 +---- lib/Target/Mips/Disassembler/MipsDisassembler.cpp | 16 ---------------- lib/Target/Mips/MipsRegisterInfo.td | 2 +- 3 files changed, 2 insertions(+), 21 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 4822106..57338df 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -1071,9 +1071,6 @@ MipsAsmParser::parseHWRegs(SmallVectorImpl &Operands) { MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseHW64Regs(SmallVectorImpl &Operands) { - - if (!isMips64()) - return MatchOperand_NoMatch; //if the first token is not '$' we have error if (Parser.getTok().isNot(AsmToken::Dollar)) return MatchOperand_NoMatch; @@ -1091,7 +1088,7 @@ MipsAsmParser::parseHW64Regs(SmallVectorImpl &Operands) { MipsOperand *op = MipsOperand::CreateReg(Mips::HWR29_64, S, Parser.getTok().getLoc()); - op->setRegKind(MipsOperand::Kind_HW64Regs); + op->setRegKind(MipsOperand::Kind_HWRegs); Operands.push_back(op); Parser.Lex(); // Eat reg number diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp index 9560f3f..1efeffd 100644 --- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp +++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp @@ -128,11 +128,6 @@ static DecodeStatus DecodeAFGR64RegisterClass(MCInst &Inst, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeHWRegs64RegisterClass(MCInst &Inst, - unsigned Insn, - uint64_t Address, - const void *Decoder); - static DecodeStatus DecodeACRegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, @@ -459,17 +454,6 @@ static DecodeStatus DecodeAFGR64RegisterClass(MCInst &Inst, return MCDisassembler::Success; } -static DecodeStatus DecodeHWRegs64RegisterClass(MCInst &Inst, - unsigned RegNo, - uint64_t Address, - const void *Decoder) { - //Currently only hardware register 29 is supported - if (RegNo != 29) - return MCDisassembler::Fail; - Inst.addOperand(MCOperand::CreateReg(Mips::HWR29_64)); - return MCDisassembler::Success; -} - static DecodeStatus DecodeACRegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td index f93dd86..c6eb0e1 100644 --- a/lib/Target/Mips/MipsRegisterInfo.td +++ b/lib/Target/Mips/MipsRegisterInfo.td @@ -373,6 +373,6 @@ def HWRegsOpnd : RegisterOperand { let ParserMatchClass = HWRegsAsmOperand; } -def HW64RegsOpnd : RegisterOperand { +def HW64RegsOpnd : RegisterOperand { let ParserMatchClass = HW64RegsAsmOperand; } -- cgit v1.1 From 09c84f13c6662e680f86e16f2474adf6177cf52f Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Wed, 16 Jan 2013 06:32:39 +0000 Subject: [ObjCARC] Turn off ignoring unwind edges in ObjCARC when -fno-objc-arc-exception is enabled due to it's affect on correctness. Specifically according to the semantics of ARC -fno-objc-arc-exception simply states that it is expected that the unwind path out of a call *MAY* not release objects. Thus we can have the situation where a release gets moved into a catch block which we ignore when we remove a retain/release pair resulting in (even though we assume the program is exiting anyways) the cleanup code path potentially blowing up before program exit. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172599 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/ObjCARC.cpp | 33 --------------------------------- 1 file changed, 33 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp index abd6b41..8da8726 100644 --- a/lib/Transforms/Scalar/ObjCARC.cpp +++ b/lib/Transforms/Scalar/ObjCARC.cpp @@ -2675,17 +2675,6 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB, PtrState &S = I->second; succ_const_iterator SI(TI), SE(TI, false); - // If the terminator is an invoke marked with the - // clang.arc.no_objc_arc_exceptions metadata, the unwind edge can be - // ignored, for ARC purposes. - if (isa(TI) && TI->getMetadata(NoObjCARCExceptionsMDKind)) { - DEBUG(dbgs() << "ObjCARCOpt::CheckForCFGHazards: Found an invoke " - "terminator marked with " - "clang.arc.no_objc_arc_exceptions. Ignoring unwind " - "edge.\n"); - --SE; - } - for (; SI != SE; ++SI) { Sequence SuccSSeq = S_None; bool SuccSRRIKnownSafe = false; @@ -2734,17 +2723,6 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB, PtrState &S = I->second; succ_const_iterator SI(TI), SE(TI, false); - // If the terminator is an invoke marked with the - // clang.arc.no_objc_arc_exceptions metadata, the unwind edge can be - // ignored, for ARC purposes. - if (isa(TI) && TI->getMetadata(NoObjCARCExceptionsMDKind)) { - DEBUG(dbgs() << "ObjCARCOpt::CheckForCFGHazards: Found an invoke " - "terminator marked with " - "clang.arc.no_objc_arc_exceptions. Ignoring unwind " - "edge.\n"); - --SE; - } - for (; SI != SE; ++SI) { Sequence SuccSSeq = S_None; bool SuccSRRIKnownSafe = false; @@ -3199,17 +3177,6 @@ ComputePostOrders(Function &F, TerminatorInst *TI = cast(&CurrBB->back()); succ_iterator SE(TI, false); - // If the terminator is an invoke marked with the - // clang.arc.no_objc_arc_exceptions metadata, the unwind edge can be - // ignored, for ARC purposes. - if (isa(TI) && TI->getMetadata(NoObjCARCExceptionsMDKind)) { - DEBUG(dbgs() << "ObjCARCOpt::ComputePostOrders: Found an invoke " - "terminator marked with " - "clang.arc.no_objc_arc_exceptions. Ignoring unwind " - "edge.\n"); - --SE; - } - while (SuccStack.back().second != SE) { BasicBlock *SuccBB = *SuccStack.back().second++; if (Visited.insert(SuccBB)) { -- cgit v1.1 From 19cd7e9ce28ed7f3326ebcd386eec215ab3763f9 Mon Sep 17 00:00:00 2001 From: Alexey Samsonov Date: Wed, 16 Jan 2013 13:23:28 +0000 Subject: ASan: wrap mapping scale and offset in a struct and make it a member of ASan passes. Add test for non-default mapping scale and offset. No functionality change git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172610 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../Instrumentation/AddressSanitizer.cpp | 129 +++++++++++++-------- 1 file changed, 78 insertions(+), 51 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 9bd3239..25ca165 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -53,7 +53,7 @@ using namespace llvm; static const uint64_t kDefaultShadowScale = 3; static const uint64_t kDefaultShadowOffset32 = 1ULL << 29; static const uint64_t kDefaultShadowOffset64 = 1ULL << 44; -static const uint64_t kDefaultShadowOffsetAndroid = 0; +static const uint64_t kDefaultShadowOffsetPie = 0; static const size_t kMaxStackMallocSize = 1 << 16; // 64K static const uintptr_t kCurrentStackFrameMagic = 0x41B58AB3; @@ -186,14 +186,38 @@ class SetOfDynamicallyInitializedGlobals { SmallSet DynInitGlobals; }; -static int MappingScale() { - return ClMappingScale ? ClMappingScale : kDefaultShadowScale; +/// This struct defines the shadow mapping using the rule: +/// shadow = (mem >> Scale) + Offset. +struct ShadowMapping { + int Scale; + uint64_t Offset; +}; + +static ShadowMapping getShadowMapping(const Module &M, int LongSize) { + llvm::Triple targetTriple(M.getTargetTriple()); + bool isAndroid = targetTriple.getEnvironment() == llvm::Triple::Android; + + ShadowMapping Mapping; + + Mapping.Offset = isAndroid ? kDefaultShadowOffsetPie : + (LongSize == 32 ? kDefaultShadowOffset32 : kDefaultShadowOffset64); + if (ClMappingOffsetLog >= 0) { + // Zero offset log is the special case. + Mapping.Offset = (ClMappingOffsetLog == 0) ? 0 : 1ULL << ClMappingOffsetLog; + } + + Mapping.Scale = kDefaultShadowScale; + if (ClMappingScale) { + Mapping.Scale = ClMappingScale; + } + + return Mapping; } -static size_t RedzoneSize() { +static size_t RedzoneSizeForScale(int MappingScale) { // Redzone used for stack and globals is at least 32 bytes. // For scales 6 and 7, the redzone has to be 64 and 128 bytes respectively. - return std::max(32U, 1U << MappingScale()); + return std::max(32U, 1U << MappingScale); } /// AddressSanitizer: instrument the code in module to find memory bugs. @@ -227,6 +251,7 @@ struct AddressSanitizer : public FunctionPass { void createInitializerPoisonCalls(Module &M, Value *FirstAddr, Value *LastAddr); bool maybeInsertAsanInitAtFunctionEntry(Function &F); + void emitShadowMapping(Module &M, IRBuilder<> &IRB) const; virtual bool doInitialization(Module &M); static char ID; // Pass identification, replacement for typeid @@ -242,9 +267,9 @@ struct AddressSanitizer : public FunctionPass { bool CheckLifetime; LLVMContext *C; DataLayout *TD; - uint64_t MappingOffset; int LongSize; Type *IntptrTy; + ShadowMapping Mapping; Function *AsanCtorFunction; Function *AsanInitFunction; Function *AsanHandleNoReturnFunc; @@ -278,6 +303,9 @@ class AddressSanitizerModule : public ModulePass { bool ShouldInstrumentGlobal(GlobalVariable *G); void createInitializerPoisonCalls(Module &M, Value *FirstAddr, Value *LastAddr); + size_t RedzoneSize() const { + return RedzoneSizeForScale(Mapping.Scale); + } bool CheckInitOrder; SmallString<64> BlacklistFile; @@ -286,6 +314,7 @@ class AddressSanitizerModule : public ModulePass { Type *IntptrTy; LLVMContext *C; DataLayout *TD; + ShadowMapping Mapping; Function *AsanPoisonGlobals; Function *AsanUnpoisonGlobals; Function *AsanRegisterGlobals; @@ -308,6 +337,7 @@ struct FunctionStackPoisoner : public InstVisitor { LLVMContext *C; Type *IntptrTy; Type *IntptrPtrTy; + ShadowMapping Mapping; SmallVector AllocaVec; SmallVector RetVec; @@ -332,7 +362,8 @@ struct FunctionStackPoisoner : public InstVisitor { FunctionStackPoisoner(Function &F, AddressSanitizer &ASan) : F(F), ASan(ASan), DIB(*F.getParent()), C(ASan.C), IntptrTy(ASan.IntptrTy), IntptrPtrTy(PointerType::get(IntptrTy, 0)), - TotalStackSize(0), StackAlignment(1 << MappingScale()) {} + Mapping(ASan.Mapping), + TotalStackSize(0), StackAlignment(1 << Mapping.Scale) {} bool runOnFunction() { if (!ClStack) return false; @@ -411,6 +442,9 @@ struct FunctionStackPoisoner : public InstVisitor { AI.getAllocatedType()->isSized()); } + size_t RedzoneSize() const { + return RedzoneSizeForScale(Mapping.Scale); + } uint64_t getAllocaSizeInBytes(AllocaInst *AI) { Type *Ty = AI->getAllocatedType(); uint64_t SizeInBytes = ASan.TD->getTypeAllocSize(Ty); @@ -473,12 +507,12 @@ static bool GlobalWasGeneratedByAsan(GlobalVariable *G) { Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB) { // Shadow >> scale - Shadow = IRB.CreateLShr(Shadow, MappingScale()); - if (MappingOffset == 0) + Shadow = IRB.CreateLShr(Shadow, Mapping.Scale); + if (Mapping.Offset == 0) return Shadow; // (Shadow >> scale) | offset return IRB.CreateOr(Shadow, ConstantInt::get(IntptrTy, - MappingOffset)); + Mapping.Offset)); } void AddressSanitizer::instrumentMemIntrinsicParam( @@ -614,7 +648,7 @@ Instruction *AddressSanitizer::generateCrashCode( Value *AddressSanitizer::createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong, Value *ShadowValue, uint32_t TypeSize) { - size_t Granularity = 1 << MappingScale(); + size_t Granularity = 1 << Mapping.Scale; // Addr & (Granularity - 1) Value *LastAccessedByte = IRB.CreateAnd( AddrLong, ConstantInt::get(IntptrTy, Granularity - 1)); @@ -635,7 +669,7 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns, Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy); Type *ShadowTy = IntegerType::get( - *C, std::max(8U, TypeSize >> MappingScale())); + *C, std::max(8U, TypeSize >> Mapping.Scale)); Type *ShadowPtrTy = PointerType::get(ShadowTy, 0); Value *ShadowPtr = memToShadow(AddrLong, IRB); Value *CmpVal = Constant::getNullValue(ShadowTy); @@ -644,7 +678,7 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns, Value *Cmp = IRB.CreateICmpNE(ShadowValue, CmpVal); size_t AccessSizeIndex = TypeSizeToSizeIndex(TypeSize); - size_t Granularity = 1 << MappingScale(); + size_t Granularity = 1 << Mapping.Scale; TerminatorInst *CrashTerm = 0; if (ClAlwaysSlowPath || (TypeSize < 8 * Granularity)) { @@ -782,7 +816,9 @@ bool AddressSanitizerModule::runOnModule(Module &M) { BL.reset(new BlackList(BlacklistFile)); if (BL->isIn(M)) return false; C = &(M.getContext()); - IntptrTy = Type::getIntNTy(*C, TD->getPointerSizeInBits()); + int LongSize = TD->getPointerSizeInBits(); + IntptrTy = Type::getIntNTy(*C, LongSize); + Mapping = getShadowMapping(M, LongSize); initializeCallbacks(M); DynamicallyInitializedGlobals.Init(M); @@ -930,6 +966,28 @@ void AddressSanitizer::initializeCallbacks(Module &M) { /*hasSideEffects=*/true); } +void AddressSanitizer::emitShadowMapping(Module &M, IRBuilder<> &IRB) const { + // Tell the values of mapping offset and scale to the run-time if they are + // specified by command-line flags. + if (ClMappingOffsetLog >= 0) { + GlobalValue *asan_mapping_offset = + new GlobalVariable(M, IntptrTy, true, GlobalValue::LinkOnceODRLinkage, + ConstantInt::get(IntptrTy, Mapping.Offset), + kAsanMappingOffsetName); + // Read the global, otherwise it may be optimized away. + IRB.CreateLoad(asan_mapping_offset, true); + } + + if (ClMappingScale) { + GlobalValue *asan_mapping_scale = + new GlobalVariable(M, IntptrTy, true, GlobalValue::LinkOnceODRLinkage, + ConstantInt::get(IntptrTy, Mapping.Scale), + kAsanMappingScaleName); + // Read the global, otherwise it may be optimized away. + IRB.CreateLoad(asan_mapping_scale, true); + } +} + // virtual bool AddressSanitizer::doInitialization(Module &M) { // Initialize the private fields. No one has accessed them before. @@ -955,41 +1013,10 @@ bool AddressSanitizer::doInitialization(Module &M) { AsanInitFunction->setLinkage(Function::ExternalLinkage); IRB.CreateCall(AsanInitFunction); - llvm::Triple targetTriple(M.getTargetTriple()); - bool isAndroid = targetTriple.getEnvironment() == llvm::Triple::Android; - - MappingOffset = isAndroid ? kDefaultShadowOffsetAndroid : - (LongSize == 32 ? kDefaultShadowOffset32 : kDefaultShadowOffset64); - if (ClMappingOffsetLog >= 0) { - if (ClMappingOffsetLog == 0) { - // special case - MappingOffset = 0; - } else { - MappingOffset = 1ULL << ClMappingOffsetLog; - } - } - - - if (ClMappingOffsetLog >= 0) { - // Tell the run-time the current values of mapping offset and scale. - GlobalValue *asan_mapping_offset = - new GlobalVariable(M, IntptrTy, true, GlobalValue::LinkOnceODRLinkage, - ConstantInt::get(IntptrTy, MappingOffset), - kAsanMappingOffsetName); - // Read the global, otherwise it may be optimized away. - IRB.CreateLoad(asan_mapping_offset, true); - } - if (ClMappingScale) { - GlobalValue *asan_mapping_scale = - new GlobalVariable(M, IntptrTy, true, GlobalValue::LinkOnceODRLinkage, - ConstantInt::get(IntptrTy, MappingScale()), - kAsanMappingScaleName); - // Read the global, otherwise it may be optimized away. - IRB.CreateLoad(asan_mapping_scale, true); - } + Mapping = getShadowMapping(M, LongSize); + emitShadowMapping(M, IRB); appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndCtorPriority); - return true; } @@ -1147,7 +1174,7 @@ void FunctionStackPoisoner::initializeCallbacks(Module &M) { void FunctionStackPoisoner::poisonRedZones( const ArrayRef &AllocaVec, IRBuilder<> IRB, Value *ShadowBase, bool DoPoison) { - size_t ShadowRZSize = RedzoneSize() >> MappingScale(); + size_t ShadowRZSize = RedzoneSize() >> Mapping.Scale; assert(ShadowRZSize >= 1 && ShadowRZSize <= 4); Type *RZTy = Type::getIntNTy(*C, ShadowRZSize * 8); Type *RZPtrTy = PointerType::get(RZTy, 0); @@ -1178,13 +1205,13 @@ void FunctionStackPoisoner::poisonRedZones( // Poison the partial redzone at right Ptr = IRB.CreateAdd( ShadowBase, ConstantInt::get(IntptrTy, - (Pos >> MappingScale()) - ShadowRZSize)); + (Pos >> Mapping.Scale) - ShadowRZSize)); size_t AddressableBytes = RedzoneSize() - (AlignedSize - SizeInBytes); uint32_t Poison = 0; if (DoPoison) { PoisonShadowPartialRightRedzone((uint8_t*)&Poison, AddressableBytes, RedzoneSize(), - 1ULL << MappingScale(), + 1ULL << Mapping.Scale, kAsanStackPartialRedzoneMagic); } Value *PartialPoison = ConstantInt::get(RZTy, Poison); @@ -1193,7 +1220,7 @@ void FunctionStackPoisoner::poisonRedZones( // Poison the full redzone at right. Ptr = IRB.CreateAdd(ShadowBase, - ConstantInt::get(IntptrTy, Pos >> MappingScale())); + ConstantInt::get(IntptrTy, Pos >> Mapping.Scale)); bool LastAlloca = (i == AllocaVec.size() - 1); Value *Poison = LastAlloca ? PoisonRight : PoisonMid; IRB.CreateStore(Poison, IRB.CreateIntToPtr(Ptr, RZPtrTy)); -- cgit v1.1 From 655578f8b5275e7c59b87d4709b0d56b2621caac Mon Sep 17 00:00:00 2001 From: Evgeniy Stepanov Date: Wed, 16 Jan 2013 14:41:46 +0000 Subject: Allow vectors in CreatePointerCast of constants. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172615 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Constants.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/IR/Constants.cpp b/lib/IR/Constants.cpp index 9327554..e984aac 100644 --- a/lib/IR/Constants.cpp +++ b/lib/IR/Constants.cpp @@ -1465,10 +1465,11 @@ Constant *ConstantExpr::getTruncOrBitCast(Constant *C, Type *Ty) { } Constant *ConstantExpr::getPointerCast(Constant *S, Type *Ty) { - assert(S->getType()->isPointerTy() && "Invalid cast"); - assert((Ty->isIntegerTy() || Ty->isPointerTy()) && "Invalid cast"); + assert(S->getType()->isPtrOrPtrVectorTy() && "Invalid cast"); + assert((Ty->isIntOrIntVectorTy() || Ty->isPtrOrPtrVectorTy()) && + "Invalid cast"); - if (Ty->isIntegerTy()) + if (Ty->isIntOrIntVectorTy()) return getPtrToInt(S, Ty); return getBitCast(S, Ty); } -- cgit v1.1 From fbb662f840c2f76988ff9f3f152695632cfc71be Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Wed, 16 Jan 2013 17:27:22 +0000 Subject: Introduce llvm::sys::getProcessTriple() function. In r143502, we renamed getHostTriple() to getDefaultTargetTriple() as part of work to allow the user to supply a different default target triple at configure time. This change also affected the JIT. However, it is inappropriate to use the default target triple in the JIT in most circumstances because this will not necessarily match the current architecture used by the process, leading to illegal instruction and other such errors at run time. Introduce the getProcessTriple() function for use in the JIT and its clients, and cause the JIT to use it. On architectures with a single bitness, the host and process triples are identical. On other architectures, the host triple represents the architecture of the host CPU, while the process triple represents the architecture used by the host CPU to interpret machine code within the current process. For example, when executing 32-bit code on a 64-bit Linux machine, the host triple may be 'x86_64-unknown-linux-gnu', while the process triple may be 'i386-unknown-linux-gnu'. This fixes JIT for the 32-on-64-bit (and vice versa) build on non-Apple platforms. Differential Revision: http://llvm-reviews.chandlerc.com/D254 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172627 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/ExecutionEngine/TargetSelect.cpp | 15 ++------------- lib/Support/Host.cpp | 12 ++++++++++++ 2 files changed, 14 insertions(+), 13 deletions(-) (limited to 'lib') diff --git a/lib/ExecutionEngine/TargetSelect.cpp b/lib/ExecutionEngine/TargetSelect.cpp index 3c4da75..ca4330f 100644 --- a/lib/ExecutionEngine/TargetSelect.cpp +++ b/lib/ExecutionEngine/TargetSelect.cpp @@ -32,18 +32,7 @@ TargetMachine *EngineBuilder::selectTarget() { // must use the host architecture. if (UseMCJIT && WhichEngine != EngineKind::Interpreter && M) TT.setTriple(M->getTargetTriple()); - else { - TT.setTriple(LLVM_HOSTTRIPLE); -#if defined(__APPLE__) -#if defined(__LP64__) - if (TT.isArch32Bit()) - TT = TT.get64BitArchVariant(); -#else - if (TT.isArch64Bit()) - TT = TT.get32BitArchVariant(); -#endif -#endif // APPLE - } + return selectTarget(TT, MArch, MCPU, MAttrs); } @@ -55,7 +44,7 @@ TargetMachine *EngineBuilder::selectTarget(const Triple &TargetTriple, const SmallVectorImpl& MAttrs) { Triple TheTriple(TargetTriple); if (TheTriple.getTriple().empty()) - TheTriple.setTriple(sys::getDefaultTargetTriple()); + TheTriple.setTriple(sys::getProcessTriple()); // Adjust the triple to match what the user requested. const Target *TheTarget = 0; diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp index 5ad5308..afbf0bf 100644 --- a/lib/Support/Host.cpp +++ b/lib/Support/Host.cpp @@ -15,6 +15,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Triple.h" #include "llvm/Config/config.h" #include "llvm/Support/DataStream.h" #include "llvm/Support/Debug.h" @@ -578,3 +579,14 @@ bool sys::getHostCPUFeatures(StringMap &Features){ return false; } #endif + +std::string sys::getProcessTriple() { + Triple PT(LLVM_HOSTTRIPLE); + + if (sizeof(void *) == 8 && PT.isArch32Bit()) + PT = PT.get64BitArchVariant(); + if (sizeof(void *) == 4 && PT.isArch64Bit()) + PT = PT.get32BitArchVariant(); + + return PT.str(); +} -- cgit v1.1 From 75c9b9384f50e9387f24dd7ce6af403cbda6d19a Mon Sep 17 00:00:00 2001 From: Kevin Enderby Date: Wed, 16 Jan 2013 17:46:23 +0000 Subject: We want the dwarf AT_producer for assembly source files to match clang's AT_producer. Which includes clang's version information so we can tell which version of the compiler was used. This is the first of two steps to allow us to do that. This is the llvm-mc change to provide a method to set the AT_producer string. The second step, coming soon to a clang near you, will have the clang driver pass the value of getClangFullVersion() via an flag when invoking the integrated assembler on assembly source files. rdar://12955296 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172630 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCDwarf.cpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp index 5691822..74851ce 100644 --- a/lib/MC/MCDwarf.cpp +++ b/lib/MC/MCDwarf.cpp @@ -638,9 +638,15 @@ static void EmitGenDwarfInfo(MCStreamer *MCOS, } // AT_producer, the version of the assembler tool. - MCOS->EmitBytes(StringRef("llvm-mc (based on LLVM ")); - MCOS->EmitBytes(StringRef(PACKAGE_VERSION)); - MCOS->EmitBytes(StringRef(")")); + StringRef DwarfDebugProducer = context.getDwarfDebugProducer(); + if (!DwarfDebugProducer.empty()){ + MCOS->EmitBytes(DwarfDebugProducer); + } + else { + MCOS->EmitBytes(StringRef("llvm-mc (based on LLVM ")); + MCOS->EmitBytes(StringRef(PACKAGE_VERSION)); + MCOS->EmitBytes(StringRef(")")); + } MCOS->EmitIntValue(0, 1); // NULL byte to terminate the string. // AT_language, a 4 byte value. We use DW_LANG_Mips_Assembler as the dwarf2 -- cgit v1.1 From 1e08165ae9ee3a7b654d9030bc5c86333fe6673f Mon Sep 17 00:00:00 2001 From: Daniel Dunbar Date: Wed, 16 Jan 2013 18:39:23 +0000 Subject: [Linker] Change module flag linking to be more extensible. - Instead of computing a bunch of buckets of different flag types, just do an incremental link resolving conflicts as they arise. - This also has the advantage of making the link result deterministic and not dependent on map iteration order. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172634 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Linker/LinkModules.cpp | 230 +++++++++++++++++++-------------------------- 1 file changed, 97 insertions(+), 133 deletions(-) (limited to 'lib') diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp index e34dbcb..1b4ef32 100644 --- a/lib/Linker/LinkModules.cpp +++ b/lib/Linker/LinkModules.cpp @@ -421,13 +421,6 @@ namespace { } void computeTypeMapping(); - bool categorizeModuleFlagNodes(const NamedMDNode *ModFlags, - DenseMap &ErrorNode, - DenseMap &WarningNode, - DenseMap &OverrideNode, - DenseMap > &RequireNodes, - SmallSetVector &SeenIDs); bool linkAppendingVarProto(GlobalVariable *DstGV, GlobalVariable *SrcGV); bool linkGlobalProto(GlobalVariable *SrcGV); @@ -987,67 +980,16 @@ void ModuleLinker::linkNamedMDNodes() { } } -/// categorizeModuleFlagNodes - Categorize the module flags according to their -/// type: Error, Warning, Override, and Require. -bool ModuleLinker:: -categorizeModuleFlagNodes(const NamedMDNode *ModFlags, - DenseMap &ErrorNode, - DenseMap &WarningNode, - DenseMap &OverrideNode, - DenseMap > &RequireNodes, - SmallSetVector &SeenIDs) { - bool HasErr = false; - - for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) { - MDNode *Op = ModFlags->getOperand(I); - ConstantInt *Behavior = cast(Op->getOperand(0)); - MDString *ID = cast(Op->getOperand(1)); - Value *Val = Op->getOperand(2); - switch (Behavior->getZExtValue()) { - case Module::Error: { - MDNode *&ErrNode = ErrorNode[ID]; - if (!ErrNode) ErrNode = Op; - if (ErrNode->getOperand(2) != Val) - HasErr = emitError("linking module flags '" + ID->getString() + - "': IDs have conflicting values"); - break; - } - case Module::Warning: { - MDNode *&WarnNode = WarningNode[ID]; - if (!WarnNode) WarnNode = Op; - if (WarnNode->getOperand(2) != Val) - errs() << "WARNING: linking module flags '" << ID->getString() - << "': IDs have conflicting values"; - break; - } - case Module::Require: RequireNodes[ID].insert(Op); break; - case Module::Override: { - MDNode *&OvrNode = OverrideNode[ID]; - if (!OvrNode) OvrNode = Op; - if (OvrNode->getOperand(2) != Val) - HasErr = emitError("linking module flags '" + ID->getString() + - "': IDs have conflicting override values"); - break; - } - } - - SeenIDs.insert(ID); - } - - return HasErr; -} - /// linkModuleFlagsMetadata - Merge the linker flags in Src into the Dest /// module. bool ModuleLinker::linkModuleFlagsMetadata() { + // If the source module has no module flags, we are done. const NamedMDNode *SrcModFlags = SrcM->getModuleFlagsMetadata(); if (!SrcModFlags) return false; - NamedMDNode *DstModFlags = DstM->getOrInsertModuleFlagsMetadata(); - // If the destination module doesn't have module flags yet, then just copy // over the source module's flags. + NamedMDNode *DstModFlags = DstM->getOrInsertModuleFlagsMetadata(); if (DstModFlags->getNumOperands() == 0) { for (unsigned I = 0, E = SrcModFlags->getNumOperands(); I != E; ++I) DstModFlags->addOperand(SrcModFlags->getOperand(I)); @@ -1055,87 +997,109 @@ bool ModuleLinker::linkModuleFlagsMetadata() { return false; } - bool HasErr = false; + // First build a map of the existing module flags and requirements. + DenseMap Flags; + SmallSetVector Requirements; + for (unsigned I = 0, E = DstModFlags->getNumOperands(); I != E; ++I) { + MDNode *Op = DstModFlags->getOperand(I); + ConstantInt *Behavior = cast(Op->getOperand(0)); + MDString *ID = cast(Op->getOperand(1)); - // Otherwise, we have to merge them based on their behaviors. First, - // categorize all of the nodes in the modules' module flags. If an error or - // warning occurs, then emit the appropriate message(s). - DenseMap ErrorNode; - DenseMap WarningNode; - DenseMap OverrideNode; - DenseMap > RequireNodes; - SmallSetVector SeenIDs; - - HasErr |= categorizeModuleFlagNodes(SrcModFlags, ErrorNode, WarningNode, - OverrideNode, RequireNodes, SeenIDs); - HasErr |= categorizeModuleFlagNodes(DstModFlags, ErrorNode, WarningNode, - OverrideNode, RequireNodes, SeenIDs); - - // Check that there isn't both an error and warning node for a flag. - for (SmallSetVector::iterator - I = SeenIDs.begin(), E = SeenIDs.end(); I != E; ++I) { - MDString *ID = *I; - if (ErrorNode[ID] && WarningNode[ID]) - HasErr = emitError("linking module flags '" + ID->getString() + - "': IDs have conflicting behaviors"); + if (Behavior->getZExtValue() == Module::Require) { + Requirements.insert(cast(Op->getOperand(2))); + } else { + Flags[ID] = Op; + } } - // Early exit if we had an error. - if (HasErr) return true; - - // Get the destination's module flags ready for new operands. - DstModFlags->dropAllReferences(); - - // Add all of the module flags to the destination module. - DenseMap > AddedNodes; - for (SmallSetVector::iterator - I = SeenIDs.begin(), E = SeenIDs.end(); I != E; ++I) { - MDString *ID = *I; - if (OverrideNode[ID]) { - DstModFlags->addOperand(OverrideNode[ID]); - AddedNodes[ID].push_back(OverrideNode[ID]); - } else if (ErrorNode[ID]) { - DstModFlags->addOperand(ErrorNode[ID]); - AddedNodes[ID].push_back(ErrorNode[ID]); - } else if (WarningNode[ID]) { - DstModFlags->addOperand(WarningNode[ID]); - AddedNodes[ID].push_back(WarningNode[ID]); + // Merge in the flags from the source module, and also collect its set of + // requirements. + bool HasErr = false; + for (unsigned I = 0, E = SrcModFlags->getNumOperands(); I != E; ++I) { + MDNode *SrcOp = SrcModFlags->getOperand(I); + ConstantInt *SrcBehavior = cast(SrcOp->getOperand(0)); + MDString *ID = cast(SrcOp->getOperand(1)); + MDNode *DstOp = Flags.lookup(ID); + unsigned SrcBehaviorValue = SrcBehavior->getZExtValue(); + + // If this is a requirement, add it and continue. + if (SrcBehaviorValue == Module::Require) { + // If the destination module does not already have this requirement, add + // it. + if (Requirements.insert(cast(SrcOp->getOperand(2)))) { + DstModFlags->addOperand(SrcOp); + } + continue; } - for (SmallSetVector::iterator - II = RequireNodes[ID].begin(), IE = RequireNodes[ID].end(); - II != IE; ++II) - DstModFlags->addOperand(*II); - } + // If there is no existing flag with this ID, just add it. + if (!DstOp) { + Flags[ID] = SrcOp; + DstModFlags->addOperand(SrcOp); + continue; + } - // Now check that all of the requirements have been satisfied. - for (SmallSetVector::iterator - I = SeenIDs.begin(), E = SeenIDs.end(); I != E; ++I) { - MDString *ID = *I; - SmallSetVector &Set = RequireNodes[ID]; - - for (SmallSetVector::iterator - II = Set.begin(), IE = Set.end(); II != IE; ++II) { - MDNode *Node = *II; - MDNode *Val = cast(Node->getOperand(2)); - - MDString *ReqID = cast(Val->getOperand(0)); - Value *ReqVal = Val->getOperand(1); - - bool HasValue = false; - for (SmallVectorImpl::iterator - RI = AddedNodes[ReqID].begin(), RE = AddedNodes[ReqID].end(); - RI != RE; ++RI) { - MDNode *ReqNode = *RI; - if (ReqNode->getOperand(2) == ReqVal) { - HasValue = true; - break; - } + // Otherwise, perform a merge. + ConstantInt *DstBehavior = cast(DstOp->getOperand(0)); + unsigned DstBehaviorValue = DstBehavior->getZExtValue(); + + // If either flag has override behavior, handle it first. + if (DstBehaviorValue == Module::Override) { + // Diagnose inconsistent flags which both have override behavior. + if (SrcBehaviorValue == Module::Override && + SrcOp->getOperand(2) != DstOp->getOperand(2)) { + HasErr |= emitError("linking module flags '" + ID->getString() + + "': IDs have conflicting override values"); } + continue; + } else if (SrcBehaviorValue == Module::Override) { + // Update the destination flag to that of the source. + DstOp->replaceOperandWith(0, SrcBehavior); + DstOp->replaceOperandWith(2, SrcOp->getOperand(2)); + continue; + } - if (!HasValue) - HasErr = emitError("linking module flags '" + ReqID->getString() + - "': does not have the required value"); + // Diagnose inconsistent merge behavior types. + if (SrcBehaviorValue != DstBehaviorValue) { + HasErr |= emitError("linking module flags '" + ID->getString() + + "': IDs have conflicting behaviors"); + continue; + } + + // Perform the merge for standard behavior types. + switch (SrcBehaviorValue) { + case Module::Require: + case Module::Override: assert(0 && "not possible"); break; + case Module::Error: { + // Emit an error if the values differ. + if (SrcOp->getOperand(2) != DstOp->getOperand(2)) { + HasErr |= emitError("linking module flags '" + ID->getString() + + "': IDs have conflicting values"); + } + continue; + } + case Module::Warning: { + // Emit a warning if the values differ. + if (SrcOp->getOperand(2) != DstOp->getOperand(2)) { + errs() << "WARNING: linking module flags '" << ID->getString() + << "': IDs have conflicting values"; + } + continue; + } + } + } + + // Check all of the requirements. + for (unsigned I = 0, E = Requirements.size(); I != E; ++I) { + MDNode *Requirement = Requirements[I]; + MDString *Flag = cast(Requirement->getOperand(0)); + Value *ReqValue = Requirement->getOperand(1); + + MDNode *Op = Flags[Flag]; + if (!Op || Op->getOperand(2) != ReqValue) { + HasErr |= emitError("linking module flags '" + Flag->getString() + + "': does not have the required value"); + continue; } } -- cgit v1.1 From f9f40bd158942f8cd1c3ed62106280f36a022cde Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Wed, 16 Jan 2013 18:56:50 +0000 Subject: Now that GenericAsmParser was folded into AsmParser, some methods and types can return into the safe harbor of AsmParser's private areas. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172637 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCParser/AsmParser.cpp | 74 +++++++++++++++++++++++++++++++++---------- 1 file changed, 57 insertions(+), 17 deletions(-) (limited to 'lib') diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index 0db3430..838a554 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -50,6 +50,25 @@ MCAsmParserSemaCallback::~MCAsmParserSemaCallback() {} namespace { +/// \brief Helper types for tracking macro definitions. +typedef std::vector MCAsmMacroArgument; +typedef std::vector MCAsmMacroArguments; +typedef std::pair MCAsmMacroParameter; +typedef std::vector MCAsmMacroParameters; + +struct MCAsmMacro { + StringRef Name; + StringRef Body; + MCAsmMacroParameters Parameters; + +public: + MCAsmMacro(StringRef N, StringRef B, const MCAsmMacroParameters &P) : + Name(N), Body(B), Parameters(P) {} + + MCAsmMacro(const MCAsmMacro& Other) + : Name(Other.Name), Body(Other.Body), Parameters(Other.Parameters) {} +}; + /// \brief Helper class for storing information about an active macro /// instantiation. struct MacroInstantiation { @@ -73,7 +92,6 @@ public: MemoryBuffer *I); }; -//struct AsmRewrite; struct ParseStatementInfo { /// ParsedOperands - The parsed operands from the last parsed statement. SmallVector ParsedOperands; @@ -205,25 +223,11 @@ public: virtual bool ParseParenExpression(const MCExpr *&Res, SMLoc &EndLoc); virtual bool ParseAbsoluteExpression(int64_t &Res); - bool ParseMacroArgument(MCAsmMacroArgument &MA, - AsmToken::TokenKind &ArgumentDelimiter); - /// ParseIdentifier - Parse an identifier or string (as a quoted identifier) /// and set \p Res to the identifier contents. virtual bool ParseIdentifier(StringRef &Res); virtual void EatToEndOfStatement(); - virtual bool MacrosEnabled() {return MacrosEnabledFlag;} - virtual void SetMacrosEnabled(bool flag) {MacrosEnabledFlag = flag;} - - virtual const MCAsmMacro* LookupMacro(StringRef Name); - virtual void DefineMacro(StringRef Name, const MCAsmMacro& Macro); - virtual void UndefineMacro(StringRef Name); - - virtual bool InsideMacroInstantiation() {return !ActiveMacros.empty();} - virtual bool HandleMacroEntry(const MCAsmMacro *M, SMLoc NameLoc); - void HandleMacroExit(); - virtual void CheckForValidSection(); /// } @@ -238,6 +242,44 @@ private: const MCAsmMacroArguments &A, const SMLoc &L); + /// \brief Are macros enabled in the parser? + bool MacrosEnabled() {return MacrosEnabledFlag;} + + /// \brief Control a flag in the parser that enables or disables macros. + void SetMacrosEnabled(bool Flag) {MacrosEnabledFlag = Flag;} + + /// \brief Lookup a previously defined macro. + /// \param Name Macro name. + /// \returns Pointer to macro. NULL if no such macro was defined. + const MCAsmMacro* LookupMacro(StringRef Name); + + /// \brief Define a new macro with the given name and information. + void DefineMacro(StringRef Name, const MCAsmMacro& Macro); + + /// \brief Undefine a macro. If no such macro was defined, it's a no-op. + void UndefineMacro(StringRef Name); + + /// \brief Are we inside a macro instantiation? + bool InsideMacroInstantiation() {return !ActiveMacros.empty();} + + /// \brief Handle entry to macro instantiation. + /// + /// \param M The macro. + /// \param NameLoc Instantiation location. + bool HandleMacroEntry(const MCAsmMacro *M, SMLoc NameLoc); + + /// \brief Handle exit from macro instantiation. + void HandleMacroExit(); + + /// \brief Extract AsmTokens for a macro argument. If the argument delimiter + /// is initially unknown, set it to AsmToken::Eof. It will be set to the + /// correct delimiter by the method. + bool ParseMacroArgument(MCAsmMacroArgument &MA, + AsmToken::TokenKind &ArgumentDelimiter); + + /// \brief Parse all macro arguments for a given macro. + bool ParseMacroArguments(const MCAsmMacro *M, MCAsmMacroArguments &A); + void PrintMacroInstantiations(); void PrintMessage(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Msg, ArrayRef Ranges = ArrayRef()) const { @@ -259,8 +301,6 @@ private: /// location. void JumpToLoc(SMLoc Loc, int InBuffer=-1); - bool ParseMacroArguments(const MCAsmMacro *M, MCAsmMacroArguments &A); - /// \brief Parse up to the end of statement and a return the contents from the /// current token until the end of the statement; the current token on exit /// will be either the EndOfStatement or EOF. -- cgit v1.1 From ed5df01198a24e3c39930468f47fc2b560fc3684 Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Wed, 16 Jan 2013 19:32:36 +0000 Subject: Some small (and mostly cosmetic) fixes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172640 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCParser/AsmParser.cpp | 39 +++++++++++++++++---------------------- 1 file changed, 17 insertions(+), 22 deletions(-) (limited to 'lib') diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index 838a554..f9ed63a 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -1113,8 +1113,7 @@ bool AsmParser::ParseStatement(ParseStatementInfo &Info) { if (!TheCondState.Ignore) return TokError("unexpected token at start of statement"); IDVal = ""; - } - else { + } else { IDVal = getTok().getString(); Lex(); // Consume the integer token to be used as an identifier token. if (Lexer.getKind() != AsmToken::Colon) { @@ -1122,12 +1121,10 @@ bool AsmParser::ParseStatement(ParseStatementInfo &Info) { return TokError("unexpected token at start of statement"); } } - } else if (Lexer.is(AsmToken::Dot)) { // Treat '.' as a valid identifier in this context. Lex(); IDVal = "."; - } else if (ParseIdentifier(IDVal)) { if (!TheCondState.Ignore) return TokError("unexpected token at start of statement"); @@ -1168,7 +1165,8 @@ bool AsmParser::ParseStatement(ParseStatementInfo &Info) { return ParseDirectiveEndIf(IDLoc); } - // If we are in a ".if 0" block, ignore this statement. + // Ignore the statement if in the middle of inactive conditional + // (e.g. ".if 0"). if (TheCondState.Ignore) { EatToEndOfStatement(); return false; @@ -1451,13 +1449,10 @@ bool AsmParser::ParseStatement(ParseStatementInfo &Info) { CheckForValidSection(); // Canonicalize the opcode to lower case. - SmallString<128> OpcodeStr; - for (unsigned i = 0, e = IDVal.size(); i != e; ++i) - OpcodeStr.push_back(tolower(IDVal[i])); - + std::string OpcodeStr = IDVal.lower(); ParseInstructionInfo IInfo(Info.AsmRewrites); - bool HadError = getTargetParser().ParseInstruction(IInfo, OpcodeStr.str(), - IDLoc,Info.ParsedOperands); + bool HadError = getTargetParser().ParseInstruction(IInfo, OpcodeStr, + IDLoc, Info.ParsedOperands); Info.ParseError = HadError; // Dump the parsed representation, if requested. @@ -1481,22 +1476,22 @@ bool AsmParser::ParseStatement(ParseStatementInfo &Info) { if (!HadError && getContext().getGenDwarfForAssembly() && getContext().getGenDwarfSection() == getStreamer().getCurrentSection()) { - unsigned Line = SrcMgr.FindLineNumber(IDLoc, CurBuffer); + unsigned Line = SrcMgr.FindLineNumber(IDLoc, CurBuffer); - // If we previously parsed a cpp hash file line comment then make sure the - // current Dwarf File is for the CppHashFilename if not then emit the - // Dwarf File table for it and adjust the line number for the .loc. - const std::vector &MCDwarfFiles = - getContext().getMCDwarfFiles(); - if (CppHashFilename.size() != 0) { - if(MCDwarfFiles[getContext().getGenDwarfFileNumber()]->getName() != + // If we previously parsed a cpp hash file line comment then make sure the + // current Dwarf File is for the CppHashFilename if not then emit the + // Dwarf File table for it and adjust the line number for the .loc. + const std::vector &MCDwarfFiles = + getContext().getMCDwarfFiles(); + if (CppHashFilename.size() != 0) { + if (MCDwarfFiles[getContext().getGenDwarfFileNumber()]->getName() != CppHashFilename) - getStreamer().EmitDwarfFileDirective( - getContext().nextGenDwarfFileNumber(), StringRef(), CppHashFilename); + getStreamer().EmitDwarfFileDirective( + getContext().nextGenDwarfFileNumber(), StringRef(), CppHashFilename); unsigned CppHashLocLineNo = SrcMgr.FindLineNumber(CppHashLoc,CppHashBuf); Line = CppHashLineNumber - 1 + (Line - CppHashLocLineNo); - } + } getStreamer().EmitDwarfLocDirective(getContext().getGenDwarfFileNumber(), Line, 0, DWARF2_LINE_DEFAULT_IS_STMT ? -- cgit v1.1 From d3c965d6251e6d939f7797f8704d4e3a82f7e274 Mon Sep 17 00:00:00 2001 From: Renato Golin Date: Wed, 16 Jan 2013 21:29:55 +0000 Subject: Change CostTable model to be global to all targets Moving the X86CostTable to a common place, so that other back-ends can share the code. Also simplifying it a bit and commoning up tables with one and two types on operations. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172658 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/TargetTransformInfo.cpp | 41 ++++++++ lib/Target/X86/X86TargetTransformInfo.cpp | 168 ++++++++++++------------------ 2 files changed, 107 insertions(+), 102 deletions(-) (limited to 'lib') diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp index 3ef74eb..344be71 100644 --- a/lib/Analysis/TargetTransformInfo.cpp +++ b/lib/Analysis/TargetTransformInfo.cpp @@ -286,3 +286,44 @@ char NoTTI::ID = 0; ImmutablePass *llvm::createNoTargetTransformInfoPass() { return new NoTTI(); } + +//======================================= COST TABLES == + +CostTable::CostTable(const CostTableEntry *table, const size_t size, unsigned numTypes) + : table(table), size(size), numTypes(numTypes) { + assert(table && "missing cost table"); + assert(size > 0 && "empty cost table"); +} + +unsigned CostTable::_findCost(int ISD, MVT *Types) const { + for (unsigned i = 0; i < size; ++i) { + if (table[i].ISD == ISD) { + bool found = true; + for (unsigned t=0; tInstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); - static const X86CostTblEntry AVX1CostTable[] = { - // We don't have to scalarize unsupported ops. We can issue two half-sized - // operations and we only need to extract the upper YMM half. - // Two ops + 1 extract + 1 insert = 4. - { ISD::MUL, MVT::v8i32, 4 }, - { ISD::SUB, MVT::v8i32, 4 }, - { ISD::ADD, MVT::v8i32, 4 }, - { ISD::MUL, MVT::v4i64, 4 }, - { ISD::SUB, MVT::v4i64, 4 }, - { ISD::ADD, MVT::v4i64, 4 }, - }; + // We don't have to scalarize unsupported ops. We can issue two half-sized + // operations and we only need to extract the upper YMM half. + // Two ops + 1 extract + 1 insert = 4. + static const CostTableEntry AVX1CostTable[] = { + { ISD::MUL, { MVT::v8i32 }, 4 }, + { ISD::SUB, { MVT::v8i32 }, 4 }, + { ISD::ADD, { MVT::v8i32 }, 4 }, + { ISD::MUL, { MVT::v4i64 }, 4 }, + { ISD::SUB, { MVT::v4i64 }, 4 }, + { ISD::ADD, { MVT::v4i64 }, 4 }, + }; + UnaryCostTable costTable (AVX1CostTable, array_lengthof(AVX1CostTable)); // Look for AVX1 lowering tricks. if (ST->hasAVX()) { - int Idx = FindInTable(AVX1CostTable, array_lengthof(AVX1CostTable), ISD, - LT.second); - if (Idx != -1) - return LT.first * AVX1CostTable[Idx].Cost; + unsigned cost = costTable.findCost(ISD, LT.second); + if (cost != BinaryCostTable::COST_NOT_FOUND) + return LT.first * cost; } // Fallback to the default implementation. return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty); @@ -254,30 +216,29 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const { if (!SrcTy.isSimple() || !DstTy.isSimple()) return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); - static const X86TypeConversionCostTblEntry AVXConversionTbl[] = { - { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, - { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, - { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 1 }, - { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 1 }, - { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 1 }, - { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 1 }, - { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 1 }, - { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 1 }, - { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 1 }, - { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 1 }, - { ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 1 }, - { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 1 }, - { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 6 }, - { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 9 }, - { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 3 }, + static const CostTableEntry AVXConversionTbl[] = { + { ISD::SIGN_EXTEND, { MVT::v8i32, MVT::v8i16 }, 1 }, + { ISD::ZERO_EXTEND, { MVT::v8i32, MVT::v8i16 }, 1 }, + { ISD::SIGN_EXTEND, { MVT::v4i64, MVT::v4i32 }, 1 }, + { ISD::ZERO_EXTEND, { MVT::v4i64, MVT::v4i32 }, 1 }, + { ISD::TRUNCATE, { MVT::v4i32, MVT::v4i64 }, 1 }, + { ISD::TRUNCATE, { MVT::v8i16, MVT::v8i32 }, 1 }, + { ISD::SINT_TO_FP, { MVT::v8f32, MVT::v8i8 }, 1 }, + { ISD::SINT_TO_FP, { MVT::v4f32, MVT::v4i8 }, 1 }, + { ISD::UINT_TO_FP, { MVT::v8f32, MVT::v8i8 }, 1 }, + { ISD::UINT_TO_FP, { MVT::v4f32, MVT::v4i8 }, 1 }, + { ISD::FP_TO_SINT, { MVT::v8i8, MVT::v8f32 }, 1 }, + { ISD::FP_TO_SINT, { MVT::v4i8, MVT::v4f32 }, 1 }, + { ISD::ZERO_EXTEND, { MVT::v8i32, MVT::v8i1 }, 6 }, + { ISD::SIGN_EXTEND, { MVT::v8i32, MVT::v8i1 }, 9 }, + { ISD::TRUNCATE, { MVT::v8i32, MVT::v8i64 }, 3 } }; + BinaryCostTable costTable (AVXConversionTbl, array_lengthof(AVXConversionTbl)); if (ST->hasAVX()) { - int Idx = FindInConvertTable(AVXConversionTbl, - array_lengthof(AVXConversionTbl), - ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()); - if (Idx != -1) - return AVXConversionTbl[Idx].Cost; + unsigned cost = costTable.findCost(ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()); + if (cost != BinaryCostTable::COST_NOT_FOUND) + return cost; } return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); @@ -293,48 +254,51 @@ unsigned X86TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); - static const X86CostTblEntry SSE42CostTbl[] = { - { ISD::SETCC, MVT::v2f64, 1 }, - { ISD::SETCC, MVT::v4f32, 1 }, - { ISD::SETCC, MVT::v2i64, 1 }, - { ISD::SETCC, MVT::v4i32, 1 }, - { ISD::SETCC, MVT::v8i16, 1 }, - { ISD::SETCC, MVT::v16i8, 1 }, + static const CostTableEntry SSE42CostTbl[] = { + { ISD::SETCC, { MVT::v2f64 }, 1 }, + { ISD::SETCC, { MVT::v4f32 }, 1 }, + { ISD::SETCC, { MVT::v2i64 }, 1 }, + { ISD::SETCC, { MVT::v4i32 }, 1 }, + { ISD::SETCC, { MVT::v8i16 }, 1 }, + { ISD::SETCC, { MVT::v16i8 }, 1 }, }; + UnaryCostTable costTableSSE4 (SSE42CostTbl, array_lengthof(SSE42CostTbl)); - static const X86CostTblEntry AVX1CostTbl[] = { - { ISD::SETCC, MVT::v4f64, 1 }, - { ISD::SETCC, MVT::v8f32, 1 }, + static const CostTableEntry AVX1CostTbl[] = { + { ISD::SETCC, { MVT::v4f64 }, 1 }, + { ISD::SETCC, { MVT::v8f32 }, 1 }, // AVX1 does not support 8-wide integer compare. - { ISD::SETCC, MVT::v4i64, 4 }, - { ISD::SETCC, MVT::v8i32, 4 }, - { ISD::SETCC, MVT::v16i16, 4 }, - { ISD::SETCC, MVT::v32i8, 4 }, + { ISD::SETCC, { MVT::v4i64 }, 4 }, + { ISD::SETCC, { MVT::v8i32 }, 4 }, + { ISD::SETCC, { MVT::v16i16 }, 4 }, + { ISD::SETCC, { MVT::v32i8 }, 4 }, }; + UnaryCostTable costTableAVX1 (AVX1CostTbl, array_lengthof(AVX1CostTbl)); - static const X86CostTblEntry AVX2CostTbl[] = { - { ISD::SETCC, MVT::v4i64, 1 }, - { ISD::SETCC, MVT::v8i32, 1 }, - { ISD::SETCC, MVT::v16i16, 1 }, - { ISD::SETCC, MVT::v32i8, 1 }, + static const CostTableEntry AVX2CostTbl[] = { + { ISD::SETCC, { MVT::v4i64 }, 1 }, + { ISD::SETCC, { MVT::v8i32 }, 1 }, + { ISD::SETCC, { MVT::v16i16 }, 1 }, + { ISD::SETCC, { MVT::v32i8 }, 1 }, }; + UnaryCostTable costTableAVX2 (AVX2CostTbl, array_lengthof(AVX2CostTbl)); if (ST->hasAVX2()) { - int Idx = FindInTable(AVX2CostTbl, array_lengthof(AVX2CostTbl), ISD, MTy); - if (Idx != -1) - return LT.first * AVX2CostTbl[Idx].Cost; + unsigned cost = costTableAVX2.findCost(ISD, MTy); + if (cost != BinaryCostTable::COST_NOT_FOUND) + return LT.first * cost; } if (ST->hasAVX()) { - int Idx = FindInTable(AVX1CostTbl, array_lengthof(AVX1CostTbl), ISD, MTy); - if (Idx != -1) - return LT.first * AVX1CostTbl[Idx].Cost; + unsigned cost = costTableAVX1.findCost(ISD, MTy); + if (cost != BinaryCostTable::COST_NOT_FOUND) + return LT.first * cost; } if (ST->hasSSE42()) { - int Idx = FindInTable(SSE42CostTbl, array_lengthof(SSE42CostTbl), ISD, MTy); - if (Idx != -1) - return LT.first * SSE42CostTbl[Idx].Cost; + unsigned cost = costTableSSE4.findCost(ISD, MTy); + if (cost != BinaryCostTable::COST_NOT_FOUND) + return LT.first * cost; } return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy); -- cgit v1.1 From 5db391c67d0922f4ab2ba57c07def19759c801a4 Mon Sep 17 00:00:00 2001 From: Daniel Dunbar Date: Wed, 16 Jan 2013 21:38:56 +0000 Subject: [IR] Add 'Append' and 'AppendUnique' module flag behaviors. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172659 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Verifier.cpp | 46 +++++++++++++++++++++++++++++++++------------- lib/Linker/LinkModules.cpp | 28 ++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+), 13 deletions(-) (limited to 'lib') diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp index 2488a7d..49821f2 100644 --- a/lib/IR/Verifier.cpp +++ b/lib/IR/Verifier.cpp @@ -571,24 +571,25 @@ void Verifier::visitModuleFlag(MDNode *Op, DenseMap&SeenIDs, "invalid behavior operand in module flag (expected constant integer)", Op->getOperand(0)); unsigned BehaviorValue = Behavior->getZExtValue(); - Assert1((Module::Error <= BehaviorValue && - BehaviorValue <= Module::Override), - "invalid behavior operand in module flag (unexpected constant)", - Op->getOperand(0)); Assert1(ID, "invalid ID operand in module flag (expected metadata string)", Op->getOperand(1)); - // Unless this is a "requires" flag, check the ID is unique. - if (BehaviorValue != Module::Require) { - bool Inserted = SeenIDs.insert(std::make_pair(ID, Op)).second; - Assert1(Inserted, - "module flag identifiers must be unique (or of 'require' type)", - ID); - } + // Sanity check the values for behaviors with additional requirements. + switch (BehaviorValue) { + default: + Assert1(false, + "invalid behavior operand in module flag (unexpected constant)", + Op->getOperand(0)); + break; - // If this is a "requires" flag, sanity check the value. - if (BehaviorValue == Module::Require) { + case Module::Error: + case Module::Warning: + case Module::Override: + // These behavior types accept any value. + break; + + case Module::Require: { // The value should itself be an MDNode with two operands, a flag ID (an // MDString), and a value. MDNode *Value = dyn_cast(Op->getOperand(2)); @@ -603,6 +604,25 @@ void Verifier::visitModuleFlag(MDNode *Op, DenseMap&SeenIDs, // Append it to the list of requirements, to check once all module flags are // scanned. Requirements.push_back(Value); + break; + } + + case Module::Append: + case Module::AppendUnique: { + // These behavior types require the operand be an MDNode. + Assert1(isa(Op->getOperand(2)), + "invalid value for 'append'-type module flag " + "(expected a metadata node)", Op->getOperand(2)); + break; + } + } + + // Unless this is a "requires" flag, check the ID is unique. + if (BehaviorValue != Module::Require) { + bool Inserted = SeenIDs.insert(std::make_pair(ID, Op)).second; + Assert1(Inserted, + "module flag identifiers must be unique (or of 'require' type)", + ID); } } diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp index 1b4ef32..3b8928a 100644 --- a/lib/Linker/LinkModules.cpp +++ b/lib/Linker/LinkModules.cpp @@ -1086,6 +1086,34 @@ bool ModuleLinker::linkModuleFlagsMetadata() { } continue; } + case Module::Append: { + MDNode *DstValue = cast(DstOp->getOperand(2)); + MDNode *SrcValue = cast(SrcOp->getOperand(2)); + unsigned NumOps = DstValue->getNumOperands() + SrcValue->getNumOperands(); + Value **VP, **Values = VP = new Value*[NumOps]; + for (unsigned i = 0, e = DstValue->getNumOperands(); i != e; ++i, ++VP) + *VP = DstValue->getOperand(i); + for (unsigned i = 0, e = SrcValue->getNumOperands(); i != e; ++i, ++VP) + *VP = SrcValue->getOperand(i); + DstOp->replaceOperandWith(2, MDNode::get(DstM->getContext(), + ArrayRef(Values, + NumOps))); + delete[] Values; + break; + } + case Module::AppendUnique: { + SmallSetVector Elts; + MDNode *DstValue = cast(DstOp->getOperand(2)); + MDNode *SrcValue = cast(SrcOp->getOperand(2)); + for (unsigned i = 0, e = DstValue->getNumOperands(); i != e; ++i) + Elts.insert(DstValue->getOperand(i)); + for (unsigned i = 0, e = SrcValue->getNumOperands(); i != e; ++i) + Elts.insert(SrcValue->getOperand(i)); + DstOp->replaceOperandWith(2, MDNode::get(DstM->getContext(), + ArrayRef(Elts.begin(), + Elts.end()))); + break; + } } } -- cgit v1.1 From c147b678206db510336ee95c3b55dc9c0ff19595 Mon Sep 17 00:00:00 2001 From: Jack Carter Date: Thu, 17 Jan 2013 00:28:20 +0000 Subject: This is a resubmittal. For some reason it broke the bots yesterday but I cannot reproduce the problem and have scrubed my sources and even tested with llvm-lit -v --vg. The Mips RDHWR (Read Hardware Register) instruction was not tested for assembler or dissassembler consumption. This patch adds that functionality. Contributer: Vladimir Medic git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172685 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/AsmParser/MipsAsmParser.cpp | 8 +++++++- lib/Target/Mips/Disassembler/MipsDisassembler.cpp | 16 ++++++++++++++++ lib/Target/Mips/MipsRegisterInfo.td | 2 +- 3 files changed, 24 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 57338df..39a53ae 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -1045,6 +1045,9 @@ MipsAsmParser::parseCPURegs(SmallVectorImpl &Operands) { MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseHWRegs(SmallVectorImpl &Operands) { + if (isMips64()) + return MatchOperand_NoMatch; + // if the first token is not '$' we have error if (Parser.getTok().isNot(AsmToken::Dollar)) return MatchOperand_NoMatch; @@ -1071,6 +1074,9 @@ MipsAsmParser::parseHWRegs(SmallVectorImpl &Operands) { MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseHW64Regs(SmallVectorImpl &Operands) { + + if (!isMips64()) + return MatchOperand_NoMatch; //if the first token is not '$' we have error if (Parser.getTok().isNot(AsmToken::Dollar)) return MatchOperand_NoMatch; @@ -1088,7 +1094,7 @@ MipsAsmParser::parseHW64Regs(SmallVectorImpl &Operands) { MipsOperand *op = MipsOperand::CreateReg(Mips::HWR29_64, S, Parser.getTok().getLoc()); - op->setRegKind(MipsOperand::Kind_HWRegs); + op->setRegKind(MipsOperand::Kind_HW64Regs); Operands.push_back(op); Parser.Lex(); // Eat reg number diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp index 1efeffd..9560f3f 100644 --- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp +++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp @@ -128,6 +128,11 @@ static DecodeStatus DecodeAFGR64RegisterClass(MCInst &Inst, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeHWRegs64RegisterClass(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + static DecodeStatus DecodeACRegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, @@ -454,6 +459,17 @@ static DecodeStatus DecodeAFGR64RegisterClass(MCInst &Inst, return MCDisassembler::Success; } +static DecodeStatus DecodeHWRegs64RegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder) { + //Currently only hardware register 29 is supported + if (RegNo != 29) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateReg(Mips::HWR29_64)); + return MCDisassembler::Success; +} + static DecodeStatus DecodeACRegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td index c6eb0e1..f93dd86 100644 --- a/lib/Target/Mips/MipsRegisterInfo.td +++ b/lib/Target/Mips/MipsRegisterInfo.td @@ -373,6 +373,6 @@ def HWRegsOpnd : RegisterOperand { let ParserMatchClass = HWRegsAsmOperand; } -def HW64RegsOpnd : RegisterOperand { +def HW64RegsOpnd : RegisterOperand { let ParserMatchClass = HW64RegsAsmOperand; } -- cgit v1.1 From 02c6325a4592fefebc837b677eaf87dc532ecb7c Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Thu, 17 Jan 2013 00:58:38 +0000 Subject: Provide a place for targets to insert ILP optimization passes. Move the early if-conversion pass into this group. ILP optimizations usually need to find the right balance between register pressure and ILP using the MachineTraceMetrics analysis to identify critical paths and estimate other costs. Such passes should run together so they can share dominator tree and loop info analyses. Besides if-conversion, future passes to run here here could include expression height reduction and ARM's MLxExpansion pass. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172687 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/Passes.cpp | 10 ++++++---- lib/Target/X86/X86TargetMachine.cpp | 16 ++++++++++------ 2 files changed, 16 insertions(+), 10 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp index 2a135bc..02a1491 100644 --- a/lib/CodeGen/Passes.cpp +++ b/lib/CodeGen/Passes.cpp @@ -238,9 +238,6 @@ TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm) substitutePass(&EarlyTailDuplicateID, &TailDuplicateID); substitutePass(&PostRAMachineLICMID, &MachineLICMID); - // Disable early if-conversion. Targets that are ready can enable it. - disablePass(&EarlyIfConverterID); - // Temporarily disable experimental passes. const TargetSubtargetInfo &ST = TM->getSubtarget(); if (!ST.enableMachineScheduler()) @@ -551,7 +548,12 @@ void TargetPassConfig::addMachineSSAOptimization() { addPass(&DeadMachineInstructionElimID); printAndVerify("After codegen DCE pass"); - addPass(&EarlyIfConverterID); + // Allow targets to insert passes that improve instruction level parallelism, + // like if-conversion. Such passes will typically need dominator trees and + // loop info, just like LICM and CSE below. + if (addILPOpts()) + printAndVerify("After ILP optimizations"); + addPass(&MachineLICMID); addPass(&MachineCSEID); addPass(&MachineSinkingID); diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 706e64a..799f140 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -151,6 +151,7 @@ public: } virtual bool addInstSelector(); + virtual bool addILPOpts(); virtual bool addPreRegAlloc(); virtual bool addPostRegAlloc(); virtual bool addPreEmitPass(); @@ -158,12 +159,7 @@ public: } // namespace TargetPassConfig *X86TargetMachine::createPassConfig(PassManagerBase &PM) { - X86PassConfig *PC = new X86PassConfig(this, PM); - - if (X86EarlyIfConv && Subtarget.hasCMov()) - PC->enablePass(&EarlyIfConverterID); - - return PC; + return new X86PassConfig(this, PM); } bool X86PassConfig::addInstSelector() { @@ -181,6 +177,14 @@ bool X86PassConfig::addInstSelector() { return false; } +bool X86PassConfig::addILPOpts() { + if (X86EarlyIfConv && getX86Subtarget().hasCMov()) { + addPass(&EarlyIfConverterID); + return true; + } + return false; +} + bool X86PassConfig::addPreRegAlloc() { return false; // -print-machineinstr shouldn't print after this. } -- cgit v1.1 From 5ed625c3cff2511469e9b3c5131c29fd89ddd482 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Thu, 17 Jan 2013 01:06:04 +0000 Subject: Move MachineTraceMetrics.h into include/llvm/CodeGen. Let targets use it. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172688 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/EarlyIfConversion.cpp | 4 +- lib/CodeGen/MachineTraceMetrics.cpp | 2 +- lib/CodeGen/MachineTraceMetrics.h | 350 ------------------------------------ lib/CodeGen/StackColoring.cpp | 1 - 4 files changed, 3 insertions(+), 354 deletions(-) delete mode 100644 lib/CodeGen/MachineTraceMetrics.h (limited to 'lib') diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp index f332925..fac207e 100644 --- a/lib/CodeGen/EarlyIfConversion.cpp +++ b/lib/CodeGen/EarlyIfConversion.cpp @@ -17,8 +17,6 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "early-ifcvt" -#include "llvm/CodeGen/Passes.h" -#include "MachineTraceMetrics.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" @@ -31,6 +29,8 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineTraceMetrics.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp index 685ccab..f77a7b1 100644 --- a/lib/CodeGen/MachineTraceMetrics.cpp +++ b/lib/CodeGen/MachineTraceMetrics.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "machine-trace-metrics" -#include "MachineTraceMetrics.h" +#include "llvm/CodeGen/MachineTraceMetrics.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SparseSet.h" #include "llvm/CodeGen/MachineBasicBlock.h" diff --git a/lib/CodeGen/MachineTraceMetrics.h b/lib/CodeGen/MachineTraceMetrics.h deleted file mode 100644 index 460730b..0000000 --- a/lib/CodeGen/MachineTraceMetrics.h +++ /dev/null @@ -1,350 +0,0 @@ -//===- lib/CodeGen/MachineTraceMetrics.h - Super-scalar metrics -*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the interface for the MachineTraceMetrics analysis pass -// that estimates CPU resource usage and critical data dependency paths through -// preferred traces. This is useful for super-scalar CPUs where execution speed -// can be limited both by data dependencies and by limited execution resources. -// -// Out-of-order CPUs will often be executing instructions from multiple basic -// blocks at the same time. This makes it difficult to estimate the resource -// usage accurately in a single basic block. Resources can be estimated better -// by looking at a trace through the current basic block. -// -// For every block, the MachineTraceMetrics pass will pick a preferred trace -// that passes through the block. The trace is chosen based on loop structure, -// branch probabilities, and resource usage. The intention is to pick likely -// traces that would be the most affected by code transformations. -// -// It is expensive to compute a full arbitrary trace for every block, so to -// save some computations, traces are chosen to be convergent. This means that -// if the traces through basic blocks A and B ever cross when moving away from -// A and B, they never diverge again. This applies in both directions - If the -// traces meet above A and B, they won't diverge when going further back. -// -// Traces tend to align with loops. The trace through a block in an inner loop -// will begin at the loop entry block and end at a back edge. If there are -// nested loops, the trace may begin and end at those instead. -// -// For each trace, we compute the critical path length, which is the number of -// cycles required to execute the trace when execution is limited by data -// dependencies only. We also compute the resource height, which is the number -// of cycles required to execute all instructions in the trace when ignoring -// data dependencies. -// -// Every instruction in the current block has a slack - the number of cycles -// execution of the instruction can be delayed without extending the critical -// path. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CODEGEN_MACHINE_TRACE_METRICS_H -#define LLVM_CODEGEN_MACHINE_TRACE_METRICS_H - -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/TargetSchedule.h" - -namespace llvm { - -class InstrItineraryData; -class MachineBasicBlock; -class MachineInstr; -class MachineLoop; -class MachineLoopInfo; -class MachineRegisterInfo; -class TargetInstrInfo; -class TargetRegisterInfo; -class raw_ostream; - -class MachineTraceMetrics : public MachineFunctionPass { - const MachineFunction *MF; - const TargetInstrInfo *TII; - const TargetRegisterInfo *TRI; - const MachineRegisterInfo *MRI; - const MachineLoopInfo *Loops; - TargetSchedModel SchedModel; - -public: - class Ensemble; - class Trace; - static char ID; - MachineTraceMetrics(); - void getAnalysisUsage(AnalysisUsage&) const; - bool runOnMachineFunction(MachineFunction&); - void releaseMemory(); - void verifyAnalysis() const; - - friend class Ensemble; - friend class Trace; - - /// Per-basic block information that doesn't depend on the trace through the - /// block. - struct FixedBlockInfo { - /// The number of non-trivial instructions in the block. - /// Doesn't count PHI and COPY instructions that are likely to be removed. - unsigned InstrCount; - - /// True when the block contains calls. - bool HasCalls; - - FixedBlockInfo() : InstrCount(~0u), HasCalls(false) {} - - /// Returns true when resource information for this block has been computed. - bool hasResources() const { return InstrCount != ~0u; } - - /// Invalidate resource information. - void invalidate() { InstrCount = ~0u; } - }; - - /// Get the fixed resource information about MBB. Compute it on demand. - const FixedBlockInfo *getResources(const MachineBasicBlock*); - - /// A virtual register or regunit required by a basic block or its trace - /// successors. - struct LiveInReg { - /// The virtual register required, or a register unit. - unsigned Reg; - - /// For virtual registers: Minimum height of the defining instruction. - /// For regunits: Height of the highest user in the trace. - unsigned Height; - - LiveInReg(unsigned Reg, unsigned Height = 0) : Reg(Reg), Height(Height) {} - }; - - /// Per-basic block information that relates to a specific trace through the - /// block. Convergent traces means that only one of these is required per - /// block in a trace ensemble. - struct TraceBlockInfo { - /// Trace predecessor, or NULL for the first block in the trace. - /// Valid when hasValidDepth(). - const MachineBasicBlock *Pred; - - /// Trace successor, or NULL for the last block in the trace. - /// Valid when hasValidHeight(). - const MachineBasicBlock *Succ; - - /// The block number of the head of the trace. (When hasValidDepth()). - unsigned Head; - - /// The block number of the tail of the trace. (When hasValidHeight()). - unsigned Tail; - - /// Accumulated number of instructions in the trace above this block. - /// Does not include instructions in this block. - unsigned InstrDepth; - - /// Accumulated number of instructions in the trace below this block. - /// Includes instructions in this block. - unsigned InstrHeight; - - TraceBlockInfo() : - Pred(0), Succ(0), - InstrDepth(~0u), InstrHeight(~0u), - HasValidInstrDepths(false), HasValidInstrHeights(false) {} - - /// Returns true if the depth resources have been computed from the trace - /// above this block. - bool hasValidDepth() const { return InstrDepth != ~0u; } - - /// Returns true if the height resources have been computed from the trace - /// below this block. - bool hasValidHeight() const { return InstrHeight != ~0u; } - - /// Invalidate depth resources when some block above this one has changed. - void invalidateDepth() { InstrDepth = ~0u; HasValidInstrDepths = false; } - - /// Invalidate height resources when a block below this one has changed. - void invalidateHeight() { InstrHeight = ~0u; HasValidInstrHeights = false; } - - /// Determine if this block belongs to the same trace as TBI and comes - /// before it in the trace. - /// Also returns true when TBI == this. - bool isEarlierInSameTrace(const TraceBlockInfo &TBI) const { - return hasValidDepth() && TBI.hasValidDepth() && - Head == TBI.Head && InstrDepth <= TBI.InstrDepth; - } - - // Data-dependency-related information. Per-instruction depth and height - // are computed from data dependencies in the current trace, using - // itinerary data. - - /// Instruction depths have been computed. This implies hasValidDepth(). - bool HasValidInstrDepths; - - /// Instruction heights have been computed. This implies hasValidHeight(). - bool HasValidInstrHeights; - - /// Critical path length. This is the number of cycles in the longest data - /// dependency chain through the trace. This is only valid when both - /// HasValidInstrDepths and HasValidInstrHeights are set. - unsigned CriticalPath; - - /// Live-in registers. These registers are defined above the current block - /// and used by this block or a block below it. - /// This does not include PHI uses in the current block, but it does - /// include PHI uses in deeper blocks. - SmallVector LiveIns; - - void print(raw_ostream&) const; - }; - - /// InstrCycles represents the cycle height and depth of an instruction in a - /// trace. - struct InstrCycles { - /// Earliest issue cycle as determined by data dependencies and instruction - /// latencies from the beginning of the trace. Data dependencies from - /// before the trace are not included. - unsigned Depth; - - /// Minimum number of cycles from this instruction is issued to the of the - /// trace, as determined by data dependencies and instruction latencies. - unsigned Height; - }; - - /// A trace represents a plausible sequence of executed basic blocks that - /// passes through the current basic block one. The Trace class serves as a - /// handle to internal cached data structures. - class Trace { - Ensemble &TE; - TraceBlockInfo &TBI; - - unsigned getBlockNum() const { return &TBI - &TE.BlockInfo[0]; } - - public: - explicit Trace(Ensemble &te, TraceBlockInfo &tbi) : TE(te), TBI(tbi) {} - void print(raw_ostream&) const; - - /// Compute the total number of instructions in the trace. - unsigned getInstrCount() const { - return TBI.InstrDepth + TBI.InstrHeight; - } - - /// Return the resource depth of the top/bottom of the trace center block. - /// This is the number of cycles required to execute all instructions from - /// the trace head to the trace center block. The resource depth only - /// considers execution resources, it ignores data dependencies. - /// When Bottom is set, instructions in the trace center block are included. - unsigned getResourceDepth(bool Bottom) const; - - /// Return the resource length of the trace. This is the number of cycles - /// required to execute the instructions in the trace if they were all - /// independent, exposing the maximum instruction-level parallelism. - /// - /// Any blocks in Extrablocks are included as if they were part of the - /// trace. - unsigned getResourceLength(ArrayRef Extrablocks = - ArrayRef()) const; - - /// Return the length of the (data dependency) critical path through the - /// trace. - unsigned getCriticalPath() const { return TBI.CriticalPath; } - - /// Return the depth and height of MI. The depth is only valid for - /// instructions in or above the trace center block. The height is only - /// valid for instructions in or below the trace center block. - InstrCycles getInstrCycles(const MachineInstr *MI) const { - return TE.Cycles.lookup(MI); - } - - /// Return the slack of MI. This is the number of cycles MI can be delayed - /// before the critical path becomes longer. - /// MI must be an instruction in the trace center block. - unsigned getInstrSlack(const MachineInstr *MI) const; - - /// Return the Depth of a PHI instruction in a trace center block successor. - /// The PHI does not have to be part of the trace. - unsigned getPHIDepth(const MachineInstr *PHI) const; - }; - - /// A trace ensemble is a collection of traces selected using the same - /// strategy, for example 'minimum resource height'. There is one trace for - /// every block in the function. - class Ensemble { - SmallVector BlockInfo; - DenseMap Cycles; - friend class Trace; - - void computeTrace(const MachineBasicBlock*); - void computeDepthResources(const MachineBasicBlock*); - void computeHeightResources(const MachineBasicBlock*); - unsigned computeCrossBlockCriticalPath(const TraceBlockInfo&); - void computeInstrDepths(const MachineBasicBlock*); - void computeInstrHeights(const MachineBasicBlock*); - void addLiveIns(const MachineInstr *DefMI, unsigned DefOp, - ArrayRef Trace); - - protected: - MachineTraceMetrics &MTM; - virtual const MachineBasicBlock *pickTracePred(const MachineBasicBlock*) =0; - virtual const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*) =0; - explicit Ensemble(MachineTraceMetrics*); - const MachineLoop *getLoopFor(const MachineBasicBlock*) const; - const TraceBlockInfo *getDepthResources(const MachineBasicBlock*) const; - const TraceBlockInfo *getHeightResources(const MachineBasicBlock*) const; - - public: - virtual ~Ensemble(); - virtual const char *getName() const =0; - void print(raw_ostream&) const; - void invalidate(const MachineBasicBlock *MBB); - void verify() const; - - /// Get the trace that passes through MBB. - /// The trace is computed on demand. - Trace getTrace(const MachineBasicBlock *MBB); - }; - - /// Strategies for selecting traces. - enum Strategy { - /// Select the trace through a block that has the fewest instructions. - TS_MinInstrCount, - - TS_NumStrategies - }; - - /// Get the trace ensemble representing the given trace selection strategy. - /// The returned Ensemble object is owned by the MachineTraceMetrics analysis, - /// and valid for the lifetime of the analysis pass. - Ensemble *getEnsemble(Strategy); - - /// Invalidate cached information about MBB. This must be called *before* MBB - /// is erased, or the CFG is otherwise changed. - /// - /// This invalidates per-block information about resource usage for MBB only, - /// and it invalidates per-trace information for any trace that passes - /// through MBB. - /// - /// Call Ensemble::getTrace() again to update any trace handles. - void invalidate(const MachineBasicBlock *MBB); - -private: - // One entry per basic block, indexed by block number. - SmallVector BlockInfo; - - // One ensemble per strategy. - Ensemble* Ensembles[TS_NumStrategies]; -}; - -inline raw_ostream &operator<<(raw_ostream &OS, - const MachineTraceMetrics::Trace &Tr) { - Tr.print(OS); - return OS; -} - -inline raw_ostream &operator<<(raw_ostream &OS, - const MachineTraceMetrics::Ensemble &En) { - En.print(OS); - return OS; -} -} // end namespace llvm - -#endif diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp index 42502eb..c173293 100644 --- a/lib/CodeGen/StackColoring.cpp +++ b/lib/CodeGen/StackColoring.cpp @@ -23,7 +23,6 @@ #define DEBUG_TYPE "stackcoloring" #include "llvm/CodeGen/Passes.h" -#include "MachineTraceMetrics.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/PostOrderIterator.h" -- cgit v1.1 From d96c72a14cf327af98cbc7d3491f7a2248f5137d Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Thu, 17 Jan 2013 02:59:59 +0000 Subject: Add the DW_AT_GNU_addr_base for the skeleton cu. Add support for emitting the dwarf32 version of DW_FORM_sec_offset and correct disassembler support. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172698 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DIE.cpp | 2 ++ lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 6 +++++- lib/DebugInfo/DWARFFormValue.cpp | 6 ++---- 3 files changed, 9 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp index 0a659c2..5d1fd83 100644 --- a/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/lib/CodeGen/AsmPrinter/DIE.cpp @@ -193,6 +193,7 @@ void DIEInteger::EmitValue(AsmPrinter *Asm, unsigned Form) const { case dwarf::DW_FORM_data1: Size = 1; break; case dwarf::DW_FORM_ref2: // Fall thru case dwarf::DW_FORM_data2: Size = 2; break; + case dwarf::DW_FORM_sec_offset: // Fall thru case dwarf::DW_FORM_ref4: // Fall thru case dwarf::DW_FORM_data4: Size = 4; break; case dwarf::DW_FORM_ref8: // Fall thru @@ -218,6 +219,7 @@ unsigned DIEInteger::SizeOf(AsmPrinter *AP, unsigned Form) const { case dwarf::DW_FORM_data1: return sizeof(int8_t); case dwarf::DW_FORM_ref2: // Fall thru case dwarf::DW_FORM_data2: return sizeof(int16_t); + case dwarf::DW_FORM_sec_offset: // Fall thru case dwarf::DW_FORM_ref4: // Fall thru case dwarf::DW_FORM_data4: return sizeof(int32_t); case dwarf::DW_FORM_ref8: // Fall thru diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 1813132..077aab3 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -2443,7 +2443,11 @@ CompileUnit *DwarfDebug::constructSkeletonCU(const MDNode *N) { // FIXME: This should be the .dwo file. NewCU->addLocalString(Die, dwarf::DW_AT_GNU_dwo_name, FN); - // FIXME: We also need DW_AT_addr_base and DW_AT_dwo_id. + // FIXME: We also need DW_AT_dwo_id. + + // FIXME: The addr base should be relative for each compile unit, however, + // this one is going to be 0 anyhow. + NewCU->addUInt(Die, dwarf::DW_AT_GNU_addr_base, dwarf::DW_FORM_sec_offset, 0); // 2.17.1 requires that we use DW_AT_low_pc for a single entry point // into an entity. We're using 0, or a NULL label for this. diff --git a/lib/DebugInfo/DWARFFormValue.cpp b/lib/DebugInfo/DWARFFormValue.cpp index d1bcf96..ea59c1d 100644 --- a/lib/DebugInfo/DWARFFormValue.cpp +++ b/lib/DebugInfo/DWARFFormValue.cpp @@ -173,10 +173,8 @@ DWARFFormValue::extractValue(DataExtractor data, uint32_t *offset_ptr, indirect = true; break; case DW_FORM_sec_offset: - if (cu->getAddressByteSize() == 4) - Value.uval = data.getU32(offset_ptr); - else - Value.uval = data.getU64(offset_ptr); + // FIXME: This is 64-bit for DWARF64. + Value.uval = data.getU32(offset_ptr); break; case DW_FORM_flag_present: Value.uval = 1; -- cgit v1.1 From 446b88fb81e14f5fa6f98ca924c83edc856cade7 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Thu, 17 Jan 2013 03:00:04 +0000 Subject: Fix the assembly and dissassembly of DW_FORM_sec_offset. Found this by changing both the string of the dwo_name to be correct and the type of the statement list. Testcases all around. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172699 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DIE.cpp | 1 + lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 10 ++++++---- lib/DebugInfo/DWARFDebugInfoEntry.cpp | 12 ++++-------- lib/DebugInfo/DWARFFormValue.cpp | 15 +++++---------- 4 files changed, 16 insertions(+), 22 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp index 5d1fd83..4ded281 100644 --- a/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/lib/CodeGen/AsmPrinter/DIE.cpp @@ -254,6 +254,7 @@ void DIELabel::EmitValue(AsmPrinter *AP, unsigned Form) const { /// unsigned DIELabel::SizeOf(AsmPrinter *AP, unsigned Form) const { if (Form == dwarf::DW_FORM_data4) return 4; + if (Form == dwarf::DW_FORM_sec_offset) return 4; if (Form == dwarf::DW_FORM_strp) return 4; return AP->getDataLayout().getPointerSize(); } diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 077aab3..de0b2b0 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -2433,14 +2433,16 @@ void DwarfDebug::emitDebugInlineInfo() { // DW_AT_low_pc and DW_AT_high_pc are not used, and vice versa. CompileUnit *DwarfDebug::constructSkeletonCU(const MDNode *N) { DICompileUnit DIUnit(N); - StringRef FN = DIUnit.getFilename(); CompilationDir = DIUnit.getDirectory(); DIE *Die = new DIE(dwarf::DW_TAG_compile_unit); CompileUnit *NewCU = new CompileUnit(GlobalCUIndexCount++, DIUnit.getLanguage(), Die, Asm, this, &SkeletonHolder); - // FIXME: This should be the .dwo file. + + SmallString<16> T(DIUnit.getFilename()); + sys::path::replace_extension(T, ".dwo"); + StringRef FN = sys::path::filename(T); NewCU->addLocalString(Die, dwarf::DW_AT_GNU_dwo_name, FN); // FIXME: We also need DW_AT_dwo_id. @@ -2456,10 +2458,10 @@ CompileUnit *DwarfDebug::constructSkeletonCU(const MDNode *N) { // DW_AT_stmt_list is a offset of line number information for this // compile unit in debug_line section. if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) - NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, + NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_sec_offset, Asm->GetTempSymbol("section_line")); else - NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0); + NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_sec_offset, 0); if (!CompilationDir.empty()) NewCU->addLocalString(Die, dwarf::DW_AT_comp_dir, CompilationDir); diff --git a/lib/DebugInfo/DWARFDebugInfoEntry.cpp b/lib/DebugInfo/DWARFDebugInfoEntry.cpp index bb11850..02b15d6 100644 --- a/lib/DebugInfo/DWARFDebugInfoEntry.cpp +++ b/lib/DebugInfo/DWARFDebugInfoEntry.cpp @@ -203,11 +203,9 @@ bool DWARFDebugInfoEntryMinimal::extractFast(const DWARFCompileUnit *cu, form = debug_info_data.getULEB128(&offset); break; + // FIXME: 64-bit for DWARF64 case DW_FORM_sec_offset: - if (cu->getAddressByteSize() == 4) - debug_info_data.getU32(offset_ptr); - else - debug_info_data.getU64(offset_ptr); + debug_info_data.getU32(offset_ptr); break; default: @@ -344,11 +342,9 @@ DWARFDebugInfoEntryMinimal::extract(const DWARFCompileUnit *cu, form_is_indirect = true; break; + // FIXME: 64-bit for DWARF64. case DW_FORM_sec_offset: - if (cu->getAddressByteSize() == 4) - debug_info_data.getU32(offset_ptr); - else - debug_info_data.getU64(offset_ptr); + debug_info_data.getU32(offset_ptr); break; default: diff --git a/lib/DebugInfo/DWARFFormValue.cpp b/lib/DebugInfo/DWARFFormValue.cpp index ea59c1d..9f807aa 100644 --- a/lib/DebugInfo/DWARFFormValue.cpp +++ b/lib/DebugInfo/DWARFFormValue.cpp @@ -72,7 +72,7 @@ static const uint8_t form_sizes_addr8[] = { 8, // 0x14 DW_FORM_ref8 0, // 0x15 DW_FORM_ref_udata 0, // 0x16 DW_FORM_indirect - 8, // 0x17 DW_FORM_sec_offset + 4, // 0x17 DW_FORM_sec_offset 0, // 0x18 DW_FORM_exprloc 0, // 0x19 DW_FORM_flag_present 8, // 0x20 DW_FORM_ref_sig8 @@ -299,12 +299,9 @@ DWARFFormValue::skipValue(uint16_t form, DataExtractor debug_info_data, form = debug_info_data.getULEB128(offset_ptr); break; - // 4 for DWARF32, 8 for DWARF64. + // FIXME: 4 for DWARF32, 8 for DWARF64. case DW_FORM_sec_offset: - if (cu->getAddressByteSize() == 4) - *offset_ptr += 4; - else - *offset_ptr += 8; + *offset_ptr += 4; return true; default: @@ -427,11 +424,9 @@ DWARFFormValue::dump(raw_ostream &OS, const DWARFCompileUnit *cu) const { OS << "DW_FORM_indirect"; break; + // Should be formatted to 64-bit for DWARF64. case DW_FORM_sec_offset: - if (cu->getAddressByteSize() == 4) - OS << format("0x%08x", (uint32_t)uvalue); - else - OS << format("0x%016" PRIx64, uvalue); + OS << format("0x%08x", (uint32_t)uvalue); break; default: -- cgit v1.1 From c2cbcc3acfc0e7426626331d57b35f1d6c7a4a47 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 17 Jan 2013 06:59:42 +0000 Subject: Combine AVX and SSE forms of MOVSS and MOVSD into the same multiclasses so they get instantiated together. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172704 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 124 +++++++++++++++++------------------------- 1 file changed, 50 insertions(+), 74 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 89149c6..18c3dfe 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -436,93 +436,69 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, // in terms of a copy, and just mentioned, we don't use movss/movsd for copies. //===----------------------------------------------------------------------===// -class sse12_move_rr : - SI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, RC:$src2), asm, - [(set VR128:$dst, (vt (OpNode VR128:$src1, - (scalar_to_vector RC:$src2))))], - IIC_SSE_MOV_S_RR>; +multiclass sse12_move_rr { + def rr : SI<0x10, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, RC:$src2), + !strconcat(base_opc, asm_opr), + [(set VR128:$dst, (vt (OpNode VR128:$src1, + (scalar_to_vector RC:$src2))))], + IIC_SSE_MOV_S_RR>; -// Loading from memory automatically zeroing upper bits. -class sse12_move_rm : - SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), - !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(set RC:$dst, (mem_pat addr:$src))], - IIC_SSE_MOV_S_RM>; - -// AVX -def VMOVSSrr : sse12_move_rr, XS, VEX_4V, - VEX_LIG; -def VMOVSDrr : sse12_move_rr, XD, VEX_4V, - VEX_LIG; - -// For the disassembler -let isCodeGenOnly = 1, hasSideEffects = 0 in { - def VMOVSSrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst), - (ins VR128:$src1, FR32:$src2), - "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], - IIC_SSE_MOV_S_RR>, - XS, VEX_4V, VEX_LIG; - def VMOVSDrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst), - (ins VR128:$src1, FR64:$src2), - "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], - IIC_SSE_MOV_S_RR>, - XD, VEX_4V, VEX_LIG; + // For the disassembler + let isCodeGenOnly = 1, hasSideEffects = 0 in + def rr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst), + (ins VR128:$src1, RC:$src2), + !strconcat(base_opc, asm_opr), + [], IIC_SSE_MOV_S_RR>; } -let canFoldAsLoad = 1, isReMaterializable = 1 in { - def VMOVSSrm : sse12_move_rm, XS, VEX, - VEX_LIG; - let AddedComplexity = 20 in - def VMOVSDrm : sse12_move_rm, XD, VEX, - VEX_LIG; -} +multiclass sse12_move { + // AVX + defm V#NAME : sse12_move_rr, + VEX_4V, VEX_LIG; -def VMOVSSmr : SI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src), - "movss\t{$src, $dst|$dst, $src}", - [(store FR32:$src, addr:$dst)], IIC_SSE_MOV_S_MR>, - XS, VEX, VEX_LIG; -def VMOVSDmr : SI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src), - "movsd\t{$src, $dst|$dst, $src}", - [(store FR64:$src, addr:$dst)], IIC_SSE_MOV_S_MR>, - XD, VEX, VEX_LIG; + def V#NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>, + VEX, VEX_LIG; + // SSE1 & 2 + let Constraints = "$src1 = $dst" in { + defm NAME : sse12_move_rr; + } -// SSE1 & 2 -let Constraints = "$src1 = $dst" in { - def MOVSSrr : sse12_move_rr, XS; - def MOVSDrr : sse12_move_rr, XD; + def NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>; +} - // For the disassembler - let isCodeGenOnly = 1, hasSideEffects = 0 in { - def MOVSSrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst), - (ins VR128:$src1, FR32:$src2), - "movss\t{$src2, $dst|$dst, $src2}", [], - IIC_SSE_MOV_S_RR>, XS; - def MOVSDrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst), - (ins VR128:$src1, FR64:$src2), - "movsd\t{$src2, $dst|$dst, $src2}", [], - IIC_SSE_MOV_S_RR>, XD; - } +// Loading from memory automatically zeroing upper bits. +multiclass sse12_move_rm { + def V#NAME#rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + [(set RC:$dst, (mem_pat addr:$src))], + IIC_SSE_MOV_S_RM>, VEX, VEX_LIG; + def NAME#rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + [(set RC:$dst, (mem_pat addr:$src))], + IIC_SSE_MOV_S_RM>; } +defm MOVSS : sse12_move, XS; +defm MOVSD : sse12_move, XD; + let canFoldAsLoad = 1, isReMaterializable = 1 in { - def MOVSSrm : sse12_move_rm, XS; + defm MOVSS : sse12_move_rm, XS; let AddedComplexity = 20 in - def MOVSDrm : sse12_move_rm, XD; + defm MOVSD : sse12_move_rm, XD; } -def MOVSSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src), - "movss\t{$src, $dst|$dst, $src}", - [(store FR32:$src, addr:$dst)], IIC_SSE_MOV_S_MR>; -def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src), - "movsd\t{$src, $dst|$dst, $src}", - [(store FR64:$src, addr:$dst)], IIC_SSE_MOV_S_MR>; - // Patterns let Predicates = [HasAVX] in { let AddedComplexity = 15 in { -- cgit v1.1 From 6c327f92a562d9d280bdbc3bde3c0ce269a4c65c Mon Sep 17 00:00:00 2001 From: Elena Demikhovsky Date: Thu, 17 Jan 2013 09:59:53 +0000 Subject: Optimization for the following SIGN_EXTEND pairs: v8i8 -> v8i64, v8i8 -> v8i32, v4i8 -> v4i64, v4i16 -> v4i64 for AVX and AVX2. Bug 14865. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172708 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 18 +++++++++++++----- lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 4 +--- lib/Target/X86/X86ISelLowering.cpp | 27 +++++++++++++++++++++++++-- 3 files changed, 39 insertions(+), 10 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index a82410a..3e5a446 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -4298,11 +4298,19 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { if (isa(N0)) return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N0); - // fold (sext (sext x)) -> (sext x) - // fold (sext (aext x)) -> (sext x) - if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) - return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, - N0.getOperand(0)); + // Folding (sext (sext x)) is obvious, but we do it only after the type + // legalization phase. When the sequence is like {(T1->T2), (T2->T3)} and + // T1 or T3 (or the both) are illegal types, the TypeLegalizer may not + // give a good sequence for the (T1->T3) pair. + // So we give a chance to target specific combiner to optimize T1->T2 and T2->T3 + // separately and may be fold them in a preceding of subsequent instruction. + if (Level >= AfterLegalizeTypes) { + // fold (sext (sext x)) -> (sext x) + // fold (sext (aext x)) -> (sext x) + if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) + return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, + N0.getOperand(0)); + } if (N0.getOpcode() == ISD::TRUNCATE) { // fold (sext (truncate (load x))) -> (sext (smaller load x)) diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 344d144..91491bf 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2554,9 +2554,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, VT.getVectorNumElements() == Operand.getValueType().getVectorNumElements()) && "Vector element count mismatch!"); - if (OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ZERO_EXTEND) - return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0)); - else if (OpOpcode == ISD::UNDEF) + if (OpOpcode == ISD::UNDEF) // sext(undef) = 0, because the top bits will all be the same. return getConstant(0, VT); break; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index f42884d..a8294b6 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -16970,14 +16970,37 @@ static SDValue PerformVZEXT_MOVLCombine(SDNode *N, SelectionDAG &DAG) { static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { + EVT VT = N->getValueType(0); + + if (!VT.isVector()) + return SDValue(); + + SDValue In = N->getOperand(0); + EVT InVT = In.getValueType(); + DebugLoc dl = N->getDebugLoc(); + unsigned ExtenedEltSize = VT.getVectorElementType().getSizeInBits(); + + // Split SIGN_EXTEND operation to use vmovsx instruction when possible + if (InVT == MVT::v8i8) { + if (ExtenedEltSize > 16 && !Subtarget->hasInt256()) + In = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, In); + if (ExtenedEltSize > 32) + In = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i32, In); + return DAG.getNode(ISD::SIGN_EXTEND, dl, VT, In); + } + + if ((InVT == MVT::v4i8 || InVT == MVT::v4i16) && + ExtenedEltSize > 32 && !Subtarget->hasInt256()) { + In = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, In); + return DAG.getNode(ISD::SIGN_EXTEND, dl, VT, In); + } if (!DCI.isBeforeLegalizeOps()) return SDValue(); if (!Subtarget->hasFp256()) return SDValue(); - EVT VT = N->getValueType(0); - if (VT.isVector() && VT.getSizeInBits() == 256) { + if (VT.is256BitVector()) { SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget); if (R.getNode()) return R; -- cgit v1.1 From 11af9a873f9e1409a422ab31e22729368805afaf Mon Sep 17 00:00:00 2001 From: Alexey Samsonov Date: Thu, 17 Jan 2013 11:12:32 +0000 Subject: ASan: add optional 'zero-based shadow' option to ASan passes. Always tell the values of shadow scale and offset to the runtime git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172709 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../Instrumentation/AddressSanitizer.cpp | 74 ++++++++++++---------- 1 file changed, 39 insertions(+), 35 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 25ca165..e733500 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -53,7 +53,6 @@ using namespace llvm; static const uint64_t kDefaultShadowScale = 3; static const uint64_t kDefaultShadowOffset32 = 1ULL << 29; static const uint64_t kDefaultShadowOffset64 = 1ULL << 44; -static const uint64_t kDefaultShadowOffsetPie = 0; static const size_t kMaxStackMallocSize = 1 << 16; // 64K static const uintptr_t kCurrentStackFrameMagic = 0x41B58AB3; @@ -193,13 +192,14 @@ struct ShadowMapping { uint64_t Offset; }; -static ShadowMapping getShadowMapping(const Module &M, int LongSize) { - llvm::Triple targetTriple(M.getTargetTriple()); - bool isAndroid = targetTriple.getEnvironment() == llvm::Triple::Android; +static ShadowMapping getShadowMapping(const Module &M, int LongSize, + bool ZeroBaseShadow) { + llvm::Triple TargetTriple(M.getTargetTriple()); + bool IsAndroid = TargetTriple.getEnvironment() == llvm::Triple::Android; ShadowMapping Mapping; - Mapping.Offset = isAndroid ? kDefaultShadowOffsetPie : + Mapping.Offset = (IsAndroid || ZeroBaseShadow) ? 0 : (LongSize == 32 ? kDefaultShadowOffset32 : kDefaultShadowOffset64); if (ClMappingOffsetLog >= 0) { // Zero offset log is the special case. @@ -225,13 +225,15 @@ struct AddressSanitizer : public FunctionPass { AddressSanitizer(bool CheckInitOrder = false, bool CheckUseAfterReturn = false, bool CheckLifetime = false, - StringRef BlacklistFile = StringRef()) + StringRef BlacklistFile = StringRef(), + bool ZeroBaseShadow = false) : FunctionPass(ID), CheckInitOrder(CheckInitOrder || ClInitializers), CheckUseAfterReturn(CheckUseAfterReturn || ClUseAfterReturn), CheckLifetime(CheckLifetime || ClCheckLifetime), BlacklistFile(BlacklistFile.empty() ? ClBlacklistFile - : BlacklistFile) {} + : BlacklistFile), + ZeroBaseShadow(ZeroBaseShadow) {} virtual const char *getPassName() const { return "AddressSanitizerFunctionPass"; } @@ -265,6 +267,9 @@ struct AddressSanitizer : public FunctionPass { bool CheckInitOrder; bool CheckUseAfterReturn; bool CheckLifetime; + SmallString<64> BlacklistFile; + bool ZeroBaseShadow; + LLVMContext *C; DataLayout *TD; int LongSize; @@ -273,7 +278,6 @@ struct AddressSanitizer : public FunctionPass { Function *AsanCtorFunction; Function *AsanInitFunction; Function *AsanHandleNoReturnFunc; - SmallString<64> BlacklistFile; OwningPtr BL; // This array is indexed by AccessIsWrite and log2(AccessSize). Function *AsanErrorCallback[2][kNumberOfAccessSizes]; @@ -286,11 +290,13 @@ struct AddressSanitizer : public FunctionPass { class AddressSanitizerModule : public ModulePass { public: AddressSanitizerModule(bool CheckInitOrder = false, - StringRef BlacklistFile = StringRef()) + StringRef BlacklistFile = StringRef(), + bool ZeroBaseShadow = false) : ModulePass(ID), CheckInitOrder(CheckInitOrder || ClInitializers), BlacklistFile(BlacklistFile.empty() ? ClBlacklistFile - : BlacklistFile) {} + : BlacklistFile), + ZeroBaseShadow(ZeroBaseShadow) {} bool runOnModule(Module &M); static char ID; // Pass identification, replacement for typeid virtual const char *getPassName() const { @@ -309,6 +315,8 @@ class AddressSanitizerModule : public ModulePass { bool CheckInitOrder; SmallString<64> BlacklistFile; + bool ZeroBaseShadow; + OwningPtr BL; SetOfDynamicallyInitializedGlobals DynamicallyInitializedGlobals; Type *IntptrTy; @@ -473,9 +481,9 @@ INITIALIZE_PASS(AddressSanitizer, "asan", false, false) FunctionPass *llvm::createAddressSanitizerFunctionPass( bool CheckInitOrder, bool CheckUseAfterReturn, bool CheckLifetime, - StringRef BlacklistFile) { + StringRef BlacklistFile, bool ZeroBaseShadow) { return new AddressSanitizer(CheckInitOrder, CheckUseAfterReturn, - CheckLifetime, BlacklistFile); + CheckLifetime, BlacklistFile, ZeroBaseShadow); } char AddressSanitizerModule::ID = 0; @@ -483,8 +491,9 @@ INITIALIZE_PASS(AddressSanitizerModule, "asan-module", "AddressSanitizer: detects use-after-free and out-of-bounds bugs." "ModulePass", false, false) ModulePass *llvm::createAddressSanitizerModulePass( - bool CheckInitOrder, StringRef BlacklistFile) { - return new AddressSanitizerModule(CheckInitOrder, BlacklistFile); + bool CheckInitOrder, StringRef BlacklistFile, bool ZeroBaseShadow) { + return new AddressSanitizerModule(CheckInitOrder, BlacklistFile, + ZeroBaseShadow); } static size_t TypeSizeToSizeIndex(uint32_t TypeSize) { @@ -818,7 +827,7 @@ bool AddressSanitizerModule::runOnModule(Module &M) { C = &(M.getContext()); int LongSize = TD->getPointerSizeInBits(); IntptrTy = Type::getIntNTy(*C, LongSize); - Mapping = getShadowMapping(M, LongSize); + Mapping = getShadowMapping(M, LongSize, ZeroBaseShadow); initializeCallbacks(M); DynamicallyInitializedGlobals.Init(M); @@ -967,25 +976,20 @@ void AddressSanitizer::initializeCallbacks(Module &M) { } void AddressSanitizer::emitShadowMapping(Module &M, IRBuilder<> &IRB) const { - // Tell the values of mapping offset and scale to the run-time if they are - // specified by command-line flags. - if (ClMappingOffsetLog >= 0) { - GlobalValue *asan_mapping_offset = - new GlobalVariable(M, IntptrTy, true, GlobalValue::LinkOnceODRLinkage, - ConstantInt::get(IntptrTy, Mapping.Offset), - kAsanMappingOffsetName); - // Read the global, otherwise it may be optimized away. - IRB.CreateLoad(asan_mapping_offset, true); - } - - if (ClMappingScale) { - GlobalValue *asan_mapping_scale = - new GlobalVariable(M, IntptrTy, true, GlobalValue::LinkOnceODRLinkage, - ConstantInt::get(IntptrTy, Mapping.Scale), - kAsanMappingScaleName); - // Read the global, otherwise it may be optimized away. - IRB.CreateLoad(asan_mapping_scale, true); - } + // Tell the values of mapping offset and scale to the run-time. + GlobalValue *asan_mapping_offset = + new GlobalVariable(M, IntptrTy, true, GlobalValue::LinkOnceODRLinkage, + ConstantInt::get(IntptrTy, Mapping.Offset), + kAsanMappingOffsetName); + // Read the global, otherwise it may be optimized away. + IRB.CreateLoad(asan_mapping_offset, true); + + GlobalValue *asan_mapping_scale = + new GlobalVariable(M, IntptrTy, true, GlobalValue::LinkOnceODRLinkage, + ConstantInt::get(IntptrTy, Mapping.Scale), + kAsanMappingScaleName); + // Read the global, otherwise it may be optimized away. + IRB.CreateLoad(asan_mapping_scale, true); } // virtual @@ -1013,7 +1017,7 @@ bool AddressSanitizer::doInitialization(Module &M) { AsanInitFunction->setLinkage(Function::ExternalLinkage); IRB.CreateCall(AsanInitFunction); - Mapping = getShadowMapping(M, LongSize); + Mapping = getShadowMapping(M, LongSize, ZeroBaseShadow); emitShadowMapping(M, IRB); appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndCtorPriority); -- cgit v1.1 From 792b1233384da442e6c75cf580bd1927123a56f3 Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Thu, 17 Jan 2013 17:45:19 +0000 Subject: This patch fixes the PPC calling convention to handle returns of _Complex float and _Complex long double, by simply increasing the number of floating point registers available for return values. The test case verifies that the correct registers are loaded. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172733 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCCallingConv.td | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td index 3f87e88..120e049 100644 --- a/lib/Target/PowerPC/PPCCallingConv.td +++ b/lib/Target/PowerPC/PPCCallingConv.td @@ -28,8 +28,8 @@ def RetCC_PPC : CallingConv<[ CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>, CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6]>>, - CCIfType<[f32], CCAssignToReg<[F1]>>, - CCIfType<[f64], CCAssignToReg<[F1, F2]>>, + CCIfType<[f32], CCAssignToReg<[F1, F2]>>, + CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4]>>, // Vector types are always returned in V2. CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToReg<[V2]>> -- cgit v1.1 From 88ceb186f156136592c71021585e847c073612cc Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Thu, 17 Jan 2013 18:32:34 +0000 Subject: [ObjCARC] Implemented operator<< for InstructionClass and changed a ``Visited'' Debug message to use it. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172735 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/ObjCARC.cpp | 59 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 56 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp index 8da8726..a54bd2b 100644 --- a/lib/Transforms/Scalar/ObjCARC.cpp +++ b/lib/Transforms/Scalar/ObjCARC.cpp @@ -167,6 +167,59 @@ namespace { IC_User, ///< could "use" a pointer IC_None ///< anything else }; + + raw_ostream &operator<<(raw_ostream &OS, const InstructionClass Class) + LLVM_ATTRIBUTE_USED; + raw_ostream &operator<<(raw_ostream &OS, InstructionClass Class) { + switch (Class) { + case IC_Retain: + return OS << "IC_Retain"; + case IC_RetainRV: + return OS << "IC_RetainRV"; + case IC_RetainBlock: + return OS << "IC_RetainBlock"; + case IC_Release: + return OS << "IC_Release"; + case IC_Autorelease: + return OS << "IC_Autorelease"; + case IC_AutoreleaseRV: + return OS << "IC_AutoreleaseRV"; + case IC_AutoreleasepoolPush: + return OS << "IC_AutoreleasepoolPush"; + case IC_AutoreleasepoolPop: + return OS << "IC_AutoreleasepoolPop"; + case IC_NoopCast: + return OS << "IC_NoopCast"; + case IC_FusedRetainAutorelease: + return OS << "IC_FusedRetainAutorelease"; + case IC_FusedRetainAutoreleaseRV: + return OS << "IC_FusedRetainAutoreleaseRV"; + case IC_LoadWeakRetained: + return OS << "IC_LoadWeakRetained"; + case IC_StoreWeak: + return OS << "IC_StoreWeak"; + case IC_InitWeak: + return OS << "IC_InitWeak"; + case IC_LoadWeak: + return OS << "IC_LoadWeak"; + case IC_MoveWeak: + return OS << "IC_MoveWeak"; + case IC_CopyWeak: + return OS << "IC_CopyWeak"; + case IC_DestroyWeak: + return OS << "IC_DestroyWeak"; + case IC_StoreStrong: + return OS << "IC_StoreStrong"; + case IC_CallOrUser: + return OS << "IC_CallOrUser"; + case IC_Call: + return OS << "IC_Call"; + case IC_User: + return OS << "IC_User"; + case IC_None: + return OS << "IC_None"; + } + } } /// \brief Test whether the given value is possible a reference-counted pointer. @@ -2387,11 +2440,11 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) { Instruction *Inst = &*I++; - DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Visiting: " << - *Inst << "\n"); - InstructionClass Class = GetBasicInstructionClass(Inst); + DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Visiting: Class: " << Class + << "; " << *Inst << "\n"); + switch (Class) { default: break; -- cgit v1.1 From 222f02f7eb70b831881f7b3521ffba9f4d408b94 Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Thu, 17 Jan 2013 18:36:17 +0000 Subject: Added missing const from my last commit. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172736 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/ObjCARC.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp index a54bd2b..8dae235 100644 --- a/lib/Transforms/Scalar/ObjCARC.cpp +++ b/lib/Transforms/Scalar/ObjCARC.cpp @@ -170,7 +170,7 @@ namespace { raw_ostream &operator<<(raw_ostream &OS, const InstructionClass Class) LLVM_ATTRIBUTE_USED; - raw_ostream &operator<<(raw_ostream &OS, InstructionClass Class) { + raw_ostream &operator<<(raw_ostream &OS, const InstructionClass Class) { switch (Class) { case IC_Retain: return OS << "IC_Retain"; -- cgit v1.1 From a454ffd02a7ce2f7c8ffebaad1055bf09637d21b Mon Sep 17 00:00:00 2001 From: Jyotsna Verma Date: Thu, 17 Jan 2013 18:42:37 +0000 Subject: Add indexed load/store instructions for offset validation check. This patch fixes bug 14902 - http://llvm.org/bugs/show_bug.cgi?id=14902 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172737 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Hexagon/HexagonInstrInfo.cpp | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'lib') diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp index 3b1ae09..ab35982 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -2352,7 +2352,9 @@ isValidOffset(const int Opcode, const int Offset) const { switch(Opcode) { case Hexagon::LDriw: + case Hexagon::LDriw_indexed: case Hexagon::LDriw_f: + case Hexagon::STriw_indexed: case Hexagon::STriw: case Hexagon::STriw_f: assert((Offset % 4 == 0) && "Offset has incorrect alignment"); @@ -2360,8 +2362,10 @@ isValidOffset(const int Opcode, const int Offset) const { (Offset <= Hexagon_MEMW_OFFSET_MAX); case Hexagon::LDrid: + case Hexagon::LDrid_indexed: case Hexagon::LDrid_f: case Hexagon::STrid: + case Hexagon::STrid_indexed: case Hexagon::STrid_f: assert((Offset % 8 == 0) && "Offset has incorrect alignment"); return (Offset >= Hexagon_MEMD_OFFSET_MIN) && -- cgit v1.1 From 505bca3617fe310a5ff07914e3cf3ea6ae4d27ed Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Thu, 17 Jan 2013 19:21:48 +0000 Subject: [ms-inline asm] Add support for the 'SIZE' and 'LENGTH' operators. Part of rdar://12576868 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172743 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCParser/AsmParser.cpp | 4 +- lib/Target/X86/AsmParser/X86AsmParser.cpp | 82 +++++++++++++++++-------------- 2 files changed, 48 insertions(+), 38 deletions(-) (limited to 'lib') diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index f9ed63a..0aca2fa 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -3956,10 +3956,10 @@ bool AsmParser::ParseMSInlineAsm(void *AsmLoc, std::string &AsmString, } // Expr/Input or Output. - unsigned Size; bool IsVarDecl; + unsigned Length, Size, Type; void *OpDecl = SI.LookupInlineAsmIdentifier(Operand->getName(), AsmLoc, - Size, IsVarDecl); + Length, Size, Type, IsVarDecl); if (OpDecl) { bool isOutput = (i == 1) && Desc.mayStore(); if (Operand->isMem() && Operand->needSizeDirective()) diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 05bb1e3..38665f0 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -57,7 +57,7 @@ private: X86Operand *ParseATTOperand(); X86Operand *ParseIntelOperand(); X86Operand *ParseIntelOffsetOfOperator(SMLoc StartLoc); - X86Operand *ParseIntelTypeOperator(SMLoc StartLoc); + X86Operand *ParseIntelOperator(SMLoc StartLoc, unsigned OpKind); X86Operand *ParseIntelMemOperand(unsigned SegReg, SMLoc StartLoc); X86Operand *ParseIntelBracExpression(unsigned SegReg, unsigned Size); X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc); @@ -1043,11 +1043,11 @@ X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg, SMLoc Start) { // FIXME: The SemaLookup will fail if the name is anything other then an // identifier. // FIXME: Pass a valid SMLoc. - unsigned tSize; - SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, tSize, - IsVarDecl); + unsigned tLength, tSize, tType; + SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, tLength, + tSize, tType, IsVarDecl); if (!Size) - Size = tSize; + Size = tType * 8; // Size is in terms of bits in this context. NeedSizeDir = Size > 0; } } @@ -1148,10 +1148,19 @@ X86Operand *X86AsmParser::ParseIntelOffsetOfOperator(SMLoc Start) { OffsetOfLoc); } -/// Parse the 'TYPE' operator. The TYPE operator returns the size of a C or -/// C++ type or variable. If the variable is an array, TYPE returns the size of -/// a single element of the array. -X86Operand *X86AsmParser::ParseIntelTypeOperator(SMLoc Start) { +enum IntelOperatorKind { + IOK_LENGTH, + IOK_SIZE, + IOK_TYPE +}; + +/// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator +/// returns the number of elements in an array. It returns the value 1 for +/// non-array variables. The SIZE operator returns the size of a C or C++ +/// variable. A variable's size is the product of its LENGTH and TYPE. The +/// TYPE operator returns the size of a C or C++ type or variable. If the +/// variable is an array, TYPE returns the size of a single element. +X86Operand *X86AsmParser::ParseIntelOperator(SMLoc Start, unsigned OpKind) { SMLoc TypeLoc = Start; Parser.Lex(); // Eat offset. Start = Parser.getTok().getLoc(); @@ -1162,50 +1171,51 @@ X86Operand *X86AsmParser::ParseIntelTypeOperator(SMLoc Start) { if (getParser().ParseExpression(Val, End)) return 0; - unsigned Size = 0; + unsigned Length = 0, Size = 0, Type = 0; if (const MCSymbolRefExpr *SymRef = dyn_cast(Val)) { const MCSymbol &Sym = SymRef->getSymbol(); // FIXME: The SemaLookup will fail if the name is anything other then an // identifier. // FIXME: Pass a valid SMLoc. bool IsVarDecl; - if (!SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, Size, - IsVarDecl)) + if (!SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, Length, + Size, Type, IsVarDecl)) return ErrorOperand(Start, "Unable to lookup TYPE of expr!"); - - Size /= 8; // Size is in terms of bits, but we want bytes in the context. + } + unsigned CVal; + switch(OpKind) { + default: llvm_unreachable("Unexpected operand kind!"); + case IOK_LENGTH: CVal = Length; break; + case IOK_SIZE: CVal = Size; break; + case IOK_TYPE: CVal = Type; break; } // Rewrite the type operator and the C or C++ type or variable in terms of an // immediate. E.g. TYPE foo -> $$4 unsigned Len = End.getPointer() - TypeLoc.getPointer(); - InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, Size)); + InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal)); - const MCExpr *Imm = MCConstantExpr::Create(Size, getContext()); + const MCExpr *Imm = MCConstantExpr::Create(CVal, getContext()); return X86Operand::CreateImm(Imm, Start, End, /*NeedAsmRewrite*/false); } X86Operand *X86AsmParser::ParseIntelOperand() { SMLoc Start = Parser.getTok().getLoc(), End; - - // offset operator. StringRef AsmTokStr = Parser.getTok().getString(); - if ((AsmTokStr == "offset" || AsmTokStr == "OFFSET") && - isParsingInlineAsm()) - return ParseIntelOffsetOfOperator(Start); - - // Type directive. - if ((AsmTokStr == "type" || AsmTokStr == "TYPE") && - isParsingInlineAsm()) - return ParseIntelTypeOperator(Start); - - // Unsupported directives. - if (isParsingIntelSyntax() && - (AsmTokStr == "size" || AsmTokStr == "SIZE" || - AsmTokStr == "length" || AsmTokStr == "LENGTH")) - return ErrorOperand(Start, "Unsupported directive!"); - - // immediate. + + // Offset, length, type and size operators. + if (isParsingInlineAsm()) { + if (AsmTokStr == "offset" || AsmTokStr == "OFFSET") + return ParseIntelOffsetOfOperator(Start); + if (AsmTokStr == "length" || AsmTokStr == "LENGTH") + return ParseIntelOperator(Start, IOK_LENGTH); + if (AsmTokStr == "size" || AsmTokStr == "SIZE") + return ParseIntelOperator(Start, IOK_SIZE); + if (AsmTokStr == "type" || AsmTokStr == "TYPE") + return ParseIntelOperator(Start, IOK_TYPE); + } + + // Immediate. if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Real) || getLexer().is(AsmToken::Minus)) { const MCExpr *Val; @@ -1214,7 +1224,7 @@ X86Operand *X86AsmParser::ParseIntelOperand() { } } - // register + // Register. unsigned RegNo = 0; if (!ParseRegister(RegNo, Start, End)) { // If this is a segment register followed by a ':', then this is the start @@ -1226,7 +1236,7 @@ X86Operand *X86AsmParser::ParseIntelOperand() { return ParseIntelMemOperand(RegNo, Start); } - // mem operand + // Memory operand. return ParseIntelMemOperand(0, Start); } -- cgit v1.1 From 8f4ee4b2a2d2aa682643ee16da86195c804686a6 Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Thu, 17 Jan 2013 19:34:57 +0000 Subject: This patch fixes PR13626 by providing i128 support in the return calling convention. 128-bit integers are now properly returned in GPR3 and GPR4 on PowerPC. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172745 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCCallingConv.td | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td index 120e049..1633580 100644 --- a/lib/Target/PowerPC/PPCCallingConv.td +++ b/lib/Target/PowerPC/PPCCallingConv.td @@ -27,6 +27,7 @@ def RetCC_PPC : CallingConv<[ CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>, CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6]>>, + CCIfType<[i128], CCAssignToReg<[X3, X4, X5, X6]>>, CCIfType<[f32], CCAssignToReg<[F1, F2]>>, CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4]>>, -- cgit v1.1 From 634bd8512a6576f9426713b72ce98904d159fd8a Mon Sep 17 00:00:00 2001 From: Daniel Dunbar Date: Thu, 17 Jan 2013 19:52:25 +0000 Subject: [Linker] Drop support for IR-level extended linking support (archives, etc.). - This code is dead, and the "right" way to get this support is to use the platform-specific linker-integrated LTO mechanisms, or the forthcoming LLVM linker. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172749 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Linker/CMakeLists.txt | 2 - lib/Linker/LinkArchives.cpp | 197 ---------------------------------------- lib/Linker/LinkItems.cpp | 216 -------------------------------------------- lib/Linker/Linker.cpp | 67 -------------- 4 files changed, 482 deletions(-) delete mode 100644 lib/Linker/LinkArchives.cpp delete mode 100644 lib/Linker/LinkItems.cpp (limited to 'lib') diff --git a/lib/Linker/CMakeLists.txt b/lib/Linker/CMakeLists.txt index 0b6d2f4..28f1262 100644 --- a/lib/Linker/CMakeLists.txt +++ b/lib/Linker/CMakeLists.txt @@ -1,6 +1,4 @@ add_llvm_library(LLVMLinker - LinkArchives.cpp - LinkItems.cpp LinkModules.cpp Linker.cpp ) diff --git a/lib/Linker/LinkArchives.cpp b/lib/Linker/LinkArchives.cpp deleted file mode 100644 index a35991c..0000000 --- a/lib/Linker/LinkArchives.cpp +++ /dev/null @@ -1,197 +0,0 @@ -//===- lib/Linker/LinkArchives.cpp - Link LLVM objects and libraries ------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains routines to handle linking together LLVM bitcode files, -// and to handle annoying things like static libraries. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Linker.h" -#include "llvm/ADT/SetOperations.h" -#include "llvm/Bitcode/Archive.h" -#include "llvm/IR/Module.h" -#include -#include -using namespace llvm; - -/// GetAllUndefinedSymbols - calculates the set of undefined symbols that still -/// exist in an LLVM module. This is a bit tricky because there may be two -/// symbols with the same name but different LLVM types that will be resolved to -/// each other but aren't currently (thus we need to treat it as resolved). -/// -/// Inputs: -/// M - The module in which to find undefined symbols. -/// -/// Outputs: -/// UndefinedSymbols - A set of C++ strings containing the name of all -/// undefined symbols. -/// -static void -GetAllUndefinedSymbols(Module *M, std::set &UndefinedSymbols) { - std::set DefinedSymbols; - UndefinedSymbols.clear(); - - // If the program doesn't define a main, try pulling one in from a .a file. - // This is needed for programs where the main function is defined in an - // archive, such f2c'd programs. - Function *Main = M->getFunction("main"); - if (Main == 0 || Main->isDeclaration()) - UndefinedSymbols.insert("main"); - - for (Module::iterator I = M->begin(), E = M->end(); I != E; ++I) - if (I->hasName()) { - if (I->isDeclaration()) - UndefinedSymbols.insert(I->getName()); - else if (!I->hasLocalLinkage()) { - assert(!I->hasDLLImportLinkage() - && "Found dllimported non-external symbol!"); - DefinedSymbols.insert(I->getName()); - } - } - - for (Module::global_iterator I = M->global_begin(), E = M->global_end(); - I != E; ++I) - if (I->hasName()) { - if (I->isDeclaration()) - UndefinedSymbols.insert(I->getName()); - else if (!I->hasLocalLinkage()) { - assert(!I->hasDLLImportLinkage() - && "Found dllimported non-external symbol!"); - DefinedSymbols.insert(I->getName()); - } - } - - for (Module::alias_iterator I = M->alias_begin(), E = M->alias_end(); - I != E; ++I) - if (I->hasName()) - DefinedSymbols.insert(I->getName()); - - // Prune out any defined symbols from the undefined symbols set... - for (std::set::iterator I = UndefinedSymbols.begin(); - I != UndefinedSymbols.end(); ) - if (DefinedSymbols.count(*I)) - UndefinedSymbols.erase(I++); // This symbol really is defined! - else - ++I; // Keep this symbol in the undefined symbols list -} - -/// LinkInArchive - opens an archive library and link in all objects which -/// provide symbols that are currently undefined. -/// -/// Inputs: -/// Filename - The pathname of the archive. -/// -/// Return Value: -/// TRUE - An error occurred. -/// FALSE - No errors. -bool -Linker::LinkInArchive(const sys::Path &Filename, bool &is_native) { - // Make sure this is an archive file we're dealing with - if (!Filename.isArchive()) - return error("File '" + Filename.str() + "' is not an archive."); - - // Open the archive file - verbose("Linking archive file '" + Filename.str() + "'"); - - // Find all of the symbols currently undefined in the bitcode program. - // If all the symbols are defined, the program is complete, and there is - // no reason to link in any archive files. - std::set UndefinedSymbols; - GetAllUndefinedSymbols(Composite, UndefinedSymbols); - - if (UndefinedSymbols.empty()) { - verbose("No symbols undefined, skipping library '" + Filename.str() + "'"); - return false; // No need to link anything in! - } - - std::string ErrMsg; - std::auto_ptr AutoArch ( - Archive::OpenAndLoadSymbols(Filename, Context, &ErrMsg)); - - Archive* arch = AutoArch.get(); - - if (!arch) - return error("Cannot read archive '" + Filename.str() + - "': " + ErrMsg); - if (!arch->isBitcodeArchive()) { - is_native = true; - return false; - } - is_native = false; - - // Save a set of symbols that are not defined by the archive. Since we're - // entering a loop, there's no point searching for these multiple times. This - // variable is used to "set_subtract" from the set of undefined symbols. - std::set NotDefinedByArchive; - - // Save the current set of undefined symbols, because we may have to make - // multiple passes over the archive: - std::set CurrentlyUndefinedSymbols; - - do { - CurrentlyUndefinedSymbols = UndefinedSymbols; - - // Find the modules we need to link into the target module. Note that arch - // keeps ownership of these modules and may return the same Module* from a - // subsequent call. - SmallVector Modules; - if (!arch->findModulesDefiningSymbols(UndefinedSymbols, Modules, &ErrMsg)) - return error("Cannot find symbols in '" + Filename.str() + - "': " + ErrMsg); - - // If we didn't find any more modules to link this time, we are done - // searching this archive. - if (Modules.empty()) - break; - - // Any symbols remaining in UndefinedSymbols after - // findModulesDefiningSymbols are ones that the archive does not define. So - // we add them to the NotDefinedByArchive variable now. - NotDefinedByArchive.insert(UndefinedSymbols.begin(), - UndefinedSymbols.end()); - - // Loop over all the Modules that we got back from the archive - for (SmallVectorImpl::iterator I=Modules.begin(), E=Modules.end(); - I != E; ++I) { - - // Get the module we must link in. - std::string moduleErrorMsg; - Module* aModule = *I; - if (aModule != NULL) { - if (aModule->MaterializeAll(&moduleErrorMsg)) - return error("Could not load a module: " + moduleErrorMsg); - - verbose(" Linking in module: " + aModule->getModuleIdentifier()); - - // Link it in - if (LinkInModule(aModule, &moduleErrorMsg)) - return error("Cannot link in module '" + - aModule->getModuleIdentifier() + "': " + moduleErrorMsg); - } - } - - // Get the undefined symbols from the aggregate module. This recomputes the - // symbols we still need after the new modules have been linked in. - GetAllUndefinedSymbols(Composite, UndefinedSymbols); - - // At this point we have two sets of undefined symbols: UndefinedSymbols - // which holds the undefined symbols from all the modules, and - // NotDefinedByArchive which holds symbols we know the archive doesn't - // define. There's no point searching for symbols that we won't find in the - // archive so we subtract these sets. - set_subtract(UndefinedSymbols, NotDefinedByArchive); - - // If there's no symbols left, no point in continuing to search the - // archive. - if (UndefinedSymbols.empty()) - break; - } while (CurrentlyUndefinedSymbols != UndefinedSymbols); - - return false; -} diff --git a/lib/Linker/LinkItems.cpp b/lib/Linker/LinkItems.cpp deleted file mode 100644 index 8c6ed42..0000000 --- a/lib/Linker/LinkItems.cpp +++ /dev/null @@ -1,216 +0,0 @@ -//===- lib/Linker/LinkItems.cpp - Link LLVM objects and libraries ---------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains routines to handle linking together LLVM bitcode files, -// and to handle annoying things like static libraries. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Linker.h" -#include "llvm/Bitcode/ReaderWriter.h" -#include "llvm/IR/Module.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/Path.h" -#include "llvm/Support/system_error.h" -using namespace llvm; - -// LinkItems - This function is the main entry point into linking. It takes a -// list of LinkItem which indicates the order the files should be linked and -// how each file should be treated (plain file or with library search). The -// function only links bitcode and produces a result list of items that are -// native objects. -bool -Linker::LinkInItems(const ItemList& Items, ItemList& NativeItems) { - // Clear the NativeItems just in case - NativeItems.clear(); - - // For each linkage item ... - for (ItemList::const_iterator I = Items.begin(), E = Items.end(); - I != E; ++I) { - if (I->second) { - // Link in the library suggested. - bool is_native = false; - if (LinkInLibrary(I->first, is_native)) - return true; - if (is_native) - NativeItems.push_back(*I); - } else { - // Link in the file suggested - bool is_native = false; - if (LinkInFile(sys::Path(I->first), is_native)) - return true; - if (is_native) - NativeItems.push_back(*I); - } - } - - return false; -} - - -/// LinkInLibrary - links one library into the HeadModule. -/// -bool Linker::LinkInLibrary(StringRef Lib, bool& is_native) { - is_native = false; - // Determine where this library lives. - sys::Path Pathname = FindLib(Lib); - if (Pathname.isEmpty()) - return error("Cannot find library '" + Lib.str() + "'"); - - // If its an archive, try to link it in - std::string Magic; - Pathname.getMagicNumber(Magic, 64); - switch (sys::IdentifyFileType(Magic.c_str(), 64)) { - default: llvm_unreachable("Bad file type identification"); - case sys::Unknown_FileType: - return warning("Supposed library '" + Lib.str() + "' isn't a library."); - - case sys::Bitcode_FileType: - // LLVM ".so" file. - if (LinkInFile(Pathname, is_native)) - return true; - break; - - case sys::Archive_FileType: - if (LinkInArchive(Pathname, is_native)) - return error("Cannot link archive '" + Pathname.str() + "'"); - break; - - case sys::ELF_Relocatable_FileType: - case sys::ELF_SharedObject_FileType: - case sys::Mach_O_Object_FileType: - case sys::Mach_O_FixedVirtualMemorySharedLib_FileType: - case sys::Mach_O_DynamicallyLinkedSharedLib_FileType: - case sys::Mach_O_DynamicallyLinkedSharedLibStub_FileType: - case sys::COFF_FileType: - is_native = true; - break; - } - return false; -} - -/// LinkLibraries - takes the specified library files and links them into the -/// main bitcode object file. -/// -/// Inputs: -/// Libraries - The list of libraries to link into the module. -/// -/// Return value: -/// FALSE - No error. -/// TRUE - Error. -/// -bool Linker::LinkInLibraries(const std::vector &Libraries) { - - // Process the set of libraries we've been provided. - bool is_native = false; - for (unsigned i = 0; i < Libraries.size(); ++i) - if (LinkInLibrary(Libraries[i], is_native)) - return true; - - return false; -} - -/// LinkInFile - opens a bitcode file and links in all objects which -/// provide symbols that are currently undefined. -/// -/// Inputs: -/// File - The pathname of the bitcode file. -/// -/// Outputs: -/// ErrorMessage - A C++ string detailing what error occurred, if any. -/// -/// Return Value: -/// TRUE - An error occurred. -/// FALSE - No errors. -/// -bool Linker::LinkInFile(const sys::Path &File, bool &is_native) { - is_native = false; - - // Check for a file of name "-", which means "read standard input" - if (File.str() == "-") { - std::auto_ptr M; - OwningPtr Buffer; - error_code ec; - if (!(ec = MemoryBuffer::getSTDIN(Buffer))) { - if (!Buffer->getBufferSize()) { - Error = "standard input is empty"; - } else { - M.reset(ParseBitcodeFile(Buffer.get(), Context, &Error)); - if (M.get()) - if (!LinkInModule(M.get(), &Error)) - return false; - } - } - return error("Cannot link stdin: " + ec.message()); - } - - // Determine what variety of file it is. - std::string Magic; - if (!File.getMagicNumber(Magic, 64)) - return error("Cannot find linker input '" + File.str() + "'"); - - switch (sys::IdentifyFileType(Magic.c_str(), 64)) { - default: llvm_unreachable("Bad file type identification"); - case sys::Unknown_FileType: - return warning("Ignoring file '" + File.str() + - "' because does not contain bitcode."); - - case sys::Archive_FileType: - // A user may specify an ar archive without -l, perhaps because it - // is not installed as a library. Detect that and link the archive. - if (LinkInArchive(File, is_native)) - return true; - break; - - case sys::Bitcode_FileType: { - verbose("Linking bitcode file '" + File.str() + "'"); - std::auto_ptr M(LoadObject(File)); - if (M.get() == 0) - return error("Cannot load file '" + File.str() + "': " + Error); - if (LinkInModule(M.get(), &Error)) - return error("Cannot link file '" + File.str() + "': " + Error); - - verbose("Linked in file '" + File.str() + "'"); - break; - } - - case sys::ELF_Relocatable_FileType: - case sys::ELF_SharedObject_FileType: - case sys::Mach_O_Object_FileType: - case sys::Mach_O_FixedVirtualMemorySharedLib_FileType: - case sys::Mach_O_DynamicallyLinkedSharedLib_FileType: - case sys::Mach_O_DynamicallyLinkedSharedLibStub_FileType: - case sys::COFF_FileType: - is_native = true; - break; - } - return false; -} - -/// LinkFiles - takes a module and a list of files and links them all together. -/// It locates the file either in the current directory, as its absolute -/// or relative pathname, or as a file somewhere in LLVM_LIB_SEARCH_PATH. -/// -/// Inputs: -/// Files - A vector of sys::Path indicating the LLVM bitcode filenames -/// to be linked. The names can refer to a mixture of pure LLVM -/// bitcode files and archive (ar) formatted files. -/// -/// Return value: -/// FALSE - No errors. -/// TRUE - Some error occurred. -/// -bool Linker::LinkInFiles(const std::vector &Files) { - bool is_native; - for (unsigned i = 0; i < Files.size(); ++i) - if (LinkInFile(Files[i], is_native)) - return true; - return false; -} diff --git a/lib/Linker/Linker.cpp b/lib/Linker/Linker.cpp index a30363d..bfd6596 100644 --- a/lib/Linker/Linker.cpp +++ b/lib/Linker/Linker.cpp @@ -112,70 +112,3 @@ Linker::LoadObject(const sys::Path &FN) { Error += ": " + ParseErrorMessage; return std::auto_ptr(); } - -// IsLibrary - Determine if "Name" is a library in "Directory". Return -// a non-empty sys::Path if its found, an empty one otherwise. -static inline sys::Path IsLibrary(StringRef Name, - const sys::Path &Directory) { - - sys::Path FullPath(Directory); - - // Try the libX.a form - FullPath.appendComponent(("lib" + Name).str()); - FullPath.appendSuffix("a"); - if (FullPath.isArchive()) - return FullPath; - - // Try the libX.bca form - FullPath.eraseSuffix(); - FullPath.appendSuffix("bca"); - if (FullPath.isArchive()) - return FullPath; - - // Try the libX.so (or .dylib) form - FullPath.eraseSuffix(); - FullPath.appendSuffix(sys::Path::GetDLLSuffix()); - if (FullPath.isDynamicLibrary()) // Native shared library? - return FullPath; - if (FullPath.isBitcodeFile()) // .so file containing bitcode? - return FullPath; - - // Try libX form, to make it possible to add dependency on the - // specific version of .so, like liblzma.so.1.0.0 - FullPath.eraseSuffix(); - if (FullPath.isDynamicLibrary()) // Native shared library? - return FullPath; - if (FullPath.isBitcodeFile()) // .so file containing bitcode? - return FullPath; - - // Not found .. fall through - - // Indicate that the library was not found in the directory. - FullPath.clear(); - return FullPath; -} - -/// FindLib - Try to convert Filename into the name of a file that we can open, -/// if it does not already name a file we can open, by first trying to open -/// Filename, then libFilename.[suffix] for each of a set of several common -/// library suffixes, in each of the directories in LibPaths. Returns an empty -/// Path if no matching file can be found. -/// -sys::Path -Linker::FindLib(StringRef Filename) { - // Determine if the pathname can be found as it stands. - sys::Path FilePath(Filename); - if (FilePath.canRead() && - (FilePath.isArchive() || FilePath.isDynamicLibrary())) - return FilePath; - - // Iterate over the directories in Paths to see if we can find the library - // there. - for (unsigned Index = 0; Index != LibPaths.size(); ++Index) { - sys::Path Directory(LibPaths[Index]); - sys::Path FullPath = IsLibrary(Filename, Directory); - if (!FullPath.isEmpty()) - return FullPath; - } - return sys::Path(); -} -- cgit v1.1 From 167ede898a6105e05fcd9d2ae5679fbf1744018f Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Thu, 17 Jan 2013 21:28:46 +0000 Subject: Reverting r171325 & r172363. This was causing a mis-compile on the self-hosted LTO build bots. Okay, here's how to reproduce the problem: 1) Build a Release (or Release+Asserts) version of clang in the normal way. 2) Using the clang & clang++ binaries from (1), build a Release (or Release+Asserts) version of the same sources, but this time enable LTO --- specify the `-flto' flag on the command line. 3) Run the ARC migrator tests: $ arcmt-test --args -triple x86_64-apple-darwin10 -fsyntax-only -x objective-c++ ./src/tools/clang/test/ARCMT/cxx-rewrite.mm You'll see that the output isn't correct (the whitespace is off). The mis-compile is in the function `RewriteBuffer::RemoveText' in the clang/lib/Rewrite/Core/Rewriter.cpp file. When that function and RewriteRope.cpp are compiled with LTO and the `arcmt-test' executable is regenerated, you'll see the error. When those files are not LTO'ed, then the output of the `arcmt-test' is fine. It is *really* hard to get a testcase out of this. I'll file a PR with what I have currently. --- Reverse-merging r172363 into '.': U include/llvm/Analysis/MemoryBuiltins.h U lib/Analysis/MemoryBuiltins.cpp --- Reverse-merging r171325 into '.': U test/Transforms/InstCombine/objsize.ll G include/llvm/Analysis/MemoryBuiltins.h G lib/Analysis/MemoryBuiltins.cpp git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172756 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/MemoryBuiltins.cpp | 41 ++++++++++++----------------------------- 1 file changed, 12 insertions(+), 29 deletions(-) (limited to 'lib') diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp index 1d27a83..0fc0550 100644 --- a/lib/Analysis/MemoryBuiltins.cpp +++ b/lib/Analysis/MemoryBuiltins.cpp @@ -385,23 +385,16 @@ ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const DataLayout *TD, SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) { V = V->stripPointerCasts(); + if (Instruction *I = dyn_cast(V)) { + // If we have already seen this instruction, bail out. Cycles can happen in + // unreachable code after constant propagation. + if (!SeenInsts.insert(I)) + return unknown(); - if (isa(V) || isa(V)) { - // return cached value or insert unknown in cache if size of V was not - // computed yet in order to avoid recursions in PHis - std::pair CacheVal = - CacheMap.insert(std::make_pair(V, unknown())); - if (!CacheVal.second) - return CacheVal.first->second; - - SizeOffsetType Result; if (GEPOperator *GEP = dyn_cast(V)) - Result = visitGEPOperator(*GEP); - else - Result = visit(cast(*V)); - return CacheMap[V] = Result; + return visitGEPOperator(*GEP); + return visit(*I); } - if (Argument *A = dyn_cast(V)) return visitArgument(*A); if (ConstantPointerNull *P = dyn_cast(V)) @@ -415,6 +408,8 @@ SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) { if (ConstantExpr *CE = dyn_cast(V)) { if (CE->getOpcode() == Instruction::IntToPtr) return unknown(); // clueless + if (CE->getOpcode() == Instruction::GetElementPtr) + return visitGEPOperator(cast(*CE)); } DEBUG(dbgs() << "ObjectSizeOffsetVisitor::compute() unhandled value: " << *V @@ -548,21 +543,9 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitLoadInst(LoadInst&) { return unknown(); } -SizeOffsetType ObjectSizeOffsetVisitor::visitPHINode(PHINode &PHI) { - if (PHI.getNumIncomingValues() == 0) - return unknown(); - - SizeOffsetType Ret = compute(PHI.getIncomingValue(0)); - if (!bothKnown(Ret)) - return unknown(); - - // verify that all PHI incoming pointers have the same size and offset - for (unsigned i = 1, e = PHI.getNumIncomingValues(); i != e; ++i) { - SizeOffsetType EdgeData = compute(PHI.getIncomingValue(i)); - if (!bothKnown(EdgeData) || EdgeData != Ret) - return unknown(); - } - return Ret; +SizeOffsetType ObjectSizeOffsetVisitor::visitPHINode(PHINode&) { + // too complex to analyze statically. + return unknown(); } SizeOffsetType ObjectSizeOffsetVisitor::visitSelectInst(SelectInst &I) { -- cgit v1.1 From 1381b9b68cef50ee43dfa684203752c50747c7eb Mon Sep 17 00:00:00 2001 From: Daniel Dunbar Date: Thu, 17 Jan 2013 22:05:18 +0000 Subject: [Linker] Drop some now-dead component dependencies. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172759 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Linker/LLVMBuild.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Linker/LLVMBuild.txt b/lib/Linker/LLVMBuild.txt index 2b4c232..0bb26d0 100644 --- a/lib/Linker/LLVMBuild.txt +++ b/lib/Linker/LLVMBuild.txt @@ -19,4 +19,4 @@ type = Library name = Linker parent = Libraries -required_libraries = Archive BitReader Core Support TransformUtils +required_libraries = Core Support TransformUtils -- cgit v1.1 From 3da67ca97383f8d305cc732019a51157f9fce290 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Fri, 18 Jan 2013 00:50:59 +0000 Subject: [ms-inline asm] Make the error message more generic now that we support the 'SIZE' and 'LENGTH' operators. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172773 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/AsmParser/X86AsmParser.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 38665f0..dc15a11 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -1180,7 +1180,7 @@ X86Operand *X86AsmParser::ParseIntelOperator(SMLoc Start, unsigned OpKind) { bool IsVarDecl; if (!SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, Length, Size, Type, IsVarDecl)) - return ErrorOperand(Start, "Unable to lookup TYPE of expr!"); + return ErrorOperand(Start, "Unable to lookup expr!"); } unsigned CVal; switch(OpKind) { -- cgit v1.1 From 9b6a44712a0caef8617adba857f065c5b48b1f45 Mon Sep 17 00:00:00 2001 From: Daniel Dunbar Date: Fri, 18 Jan 2013 01:25:25 +0000 Subject: [MC] Fix 80-col violas. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172776 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCParser/DarwinAsmParser.cpp | 130 ++++++++++++++++++++++++------------ 1 file changed, 89 insertions(+), 41 deletions(-) (limited to 'lib') diff --git a/lib/MC/MCParser/DarwinAsmParser.cpp b/lib/MC/MCParser/DarwinAsmParser.cpp index c4974e5..7fda7ac 100644 --- a/lib/MC/MCParser/DarwinAsmParser.cpp +++ b/lib/MC/MCParser/DarwinAsmParser.cpp @@ -51,8 +51,10 @@ public: AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveDumpOrLoad>(".dump"); AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveDumpOrLoad>(".load"); AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveSection>(".section"); - AddDirectiveHandler<&DarwinAsmParser::ParseDirectivePushSection>(".pushsection"); - AddDirectiveHandler<&DarwinAsmParser::ParseDirectivePopSection>(".popsection"); + AddDirectiveHandler<&DarwinAsmParser::ParseDirectivePushSection>( + ".pushsection"); + AddDirectiveHandler<&DarwinAsmParser::ParseDirectivePopSection>( + ".popsection"); AddDirectiveHandler<&DarwinAsmParser::ParseDirectivePrevious>(".previous"); AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveSecureLogUnique>( ".secure_log_unique"); @@ -61,52 +63,98 @@ public: AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveTBSS>(".tbss"); AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveZerofill>(".zerofill"); - AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveDataRegion>(".data_region"); - AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveDataRegionEnd>(".end_data_region"); + AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveDataRegion>( + ".data_region"); + AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveDataRegionEnd>( + ".end_data_region"); // Special section directives. AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveConst>(".const"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveConstData>(".const_data"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveConstructor>(".constructor"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveCString>(".cstring"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveConstData>( + ".const_data"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveConstructor>( + ".constructor"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveCString>( + ".cstring"); AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveData>(".data"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveDestructor>(".destructor"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveDestructor>( + ".destructor"); AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveDyld>(".dyld"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveFVMLibInit0>(".fvmlib_init0"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveFVMLibInit1>(".fvmlib_init1"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveLazySymbolPointers>(".lazy_symbol_pointer"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveLiteral16>(".literal16"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveLiteral4>(".literal4"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveLiteral8>(".literal8"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveModInitFunc>(".mod_init_func"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveModTermFunc>(".mod_term_func"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveNonLazySymbolPointers>(".non_lazy_symbol_pointer"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCCatClsMeth>(".objc_cat_cls_meth"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCCatInstMeth>(".objc_cat_inst_meth"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCCategory>(".objc_category"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClass>(".objc_class"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClassNames>(".objc_class_names"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClassVars>(".objc_class_vars"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClsMeth>(".objc_cls_meth"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClsRefs>(".objc_cls_refs"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCInstMeth>(".objc_inst_meth"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCInstanceVars>(".objc_instance_vars"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCMessageRefs>(".objc_message_refs"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCMetaClass>(".objc_meta_class"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCMethVarNames>(".objc_meth_var_names"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCMethVarTypes>(".objc_meth_var_types"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCModuleInfo>(".objc_module_info"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCProtocol>(".objc_protocol"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCSelectorStrs>(".objc_selector_strs"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCStringObject>(".objc_string_object"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCSymbols>(".objc_symbols"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectivePICSymbolStub>(".picsymbol_stub"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveStaticConst>(".static_const"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveStaticData>(".static_data"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveSymbolStub>(".symbol_stub"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveFVMLibInit0>( + ".fvmlib_init0"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveFVMLibInit1>( + ".fvmlib_init1"); + AddDirectiveHandler< + &DarwinAsmParser::ParseSectionDirectiveLazySymbolPointers>( + ".lazy_symbol_pointer"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveLiteral16>( + ".literal16"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveLiteral4>( + ".literal4"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveLiteral8>( + ".literal8"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveModInitFunc>( + ".mod_init_func"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveModTermFunc>( + ".mod_term_func"); + AddDirectiveHandler< + &DarwinAsmParser::ParseSectionDirectiveNonLazySymbolPointers>( + ".non_lazy_symbol_pointer"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCCatClsMeth>( + ".objc_cat_cls_meth"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCCatInstMeth>( + ".objc_cat_inst_meth"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCCategory>( + ".objc_category"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClass>( + ".objc_class"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClassNames>( + ".objc_class_names"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClassVars>( + ".objc_class_vars"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClsMeth>( + ".objc_cls_meth"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClsRefs>( + ".objc_cls_refs"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCInstMeth>( + ".objc_inst_meth"); + AddDirectiveHandler< + &DarwinAsmParser::ParseSectionDirectiveObjCInstanceVars>( + ".objc_instance_vars"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCMessageRefs>( + ".objc_message_refs"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCMetaClass>( + ".objc_meta_class"); + AddDirectiveHandler< + &DarwinAsmParser::ParseSectionDirectiveObjCMethVarNames>( + ".objc_meth_var_names"); + AddDirectiveHandler< + &DarwinAsmParser::ParseSectionDirectiveObjCMethVarTypes>( + ".objc_meth_var_types"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCModuleInfo>( + ".objc_module_info"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCProtocol>( + ".objc_protocol"); + AddDirectiveHandler< + &DarwinAsmParser::ParseSectionDirectiveObjCSelectorStrs>( + ".objc_selector_strs"); + AddDirectiveHandler< + &DarwinAsmParser::ParseSectionDirectiveObjCStringObject>( + ".objc_string_object"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCSymbols>( + ".objc_symbols"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectivePICSymbolStub>( + ".picsymbol_stub"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveStaticConst>( + ".static_const"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveStaticData>( + ".static_data"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveSymbolStub>( + ".symbol_stub"); AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveTData>(".tdata"); AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveText>(".text"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveThreadInitFunc>(".thread_init_func"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveThreadInitFunc>( + ".thread_init_func"); AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveTLV>(".tlv"); AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveIdent>(".ident"); -- cgit v1.1 From bfdcc70d34f9c2bf3d4815c6d29fd43f01db8b76 Mon Sep 17 00:00:00 2001 From: Daniel Dunbar Date: Fri, 18 Jan 2013 01:25:33 +0000 Subject: [MC] Expose ParseEscapedString to target AsmParser implementations. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172777 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCParser/AsmParser.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index 0aca2fa..7d4b4d8 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -427,10 +427,7 @@ private: bool ParseDirectiveElseIf(SMLoc DirectiveLoc); // ".elseif" bool ParseDirectiveElse(SMLoc DirectiveLoc); // ".else" bool ParseDirectiveEndIf(SMLoc DirectiveLoc); // .endif - - /// ParseEscapedString - Parse the current token as a string which may include - /// escaped characters and return the string contents. - bool ParseEscapedString(std::string &Data); + virtual bool ParseEscapedString(std::string &Data); const MCExpr *ApplyModifierToExpr(const MCExpr *E, MCSymbolRefExpr::VariantKind Variant); -- cgit v1.1 From cddd236e8a5acb80e9a0e79dc63f6cfaa8205b86 Mon Sep 17 00:00:00 2001 From: Daniel Dunbar Date: Fri, 18 Jan 2013 01:25:48 +0000 Subject: [MC/Mach-O] Add AsmParser support for .linker_option directive. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172778 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCAsmStreamer.cpp | 10 ++++++++++ lib/MC/MCParser/DarwinAsmParser.cpp | 30 ++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) (limited to 'lib') diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp index dd5112c..88a7d33 100644 --- a/lib/MC/MCAsmStreamer.cpp +++ b/lib/MC/MCAsmStreamer.cpp @@ -145,6 +145,7 @@ public: virtual void EmitEHSymAttributes(const MCSymbol *Symbol, MCSymbol *EHSymbol); virtual void EmitAssemblerFlag(MCAssemblerFlag Flag); + virtual void EmitLinkerOptions(ArrayRef Options); virtual void EmitDataRegion(MCDataRegionType Kind); virtual void EmitThumbFunc(MCSymbol *Func); @@ -375,6 +376,15 @@ void MCAsmStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) { EmitEOL(); } +void MCAsmStreamer::EmitLinkerOptions(ArrayRef Options) { + assert(!Options.empty() && "At least one option is required!"); + OS << "\t.linker_option \"" << Options[0] << '"'; + for (ArrayRef::iterator it = Options.begin() + 1, + ie = Options.end(); it != ie; ++it) { + OS << ", " << '"' << *it << '"'; + } +} + void MCAsmStreamer::EmitDataRegion(MCDataRegionType Kind) { MCContext &Ctx = getContext(); const MCAsmInfo &MAI = Ctx.getAsmInfo(); diff --git a/lib/MC/MCParser/DarwinAsmParser.cpp b/lib/MC/MCParser/DarwinAsmParser.cpp index 7fda7ac..9029c6d 100644 --- a/lib/MC/MCParser/DarwinAsmParser.cpp +++ b/lib/MC/MCParser/DarwinAsmParser.cpp @@ -87,6 +87,8 @@ public: AddDirectiveHandler< &DarwinAsmParser::ParseSectionDirectiveLazySymbolPointers>( ".lazy_symbol_pointer"); + AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveLinkerOption>( + ".linker_option"); AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveLiteral16>( ".literal16"); AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveLiteral4>( @@ -163,6 +165,7 @@ public: bool ParseDirectiveDesc(StringRef, SMLoc); bool ParseDirectiveDumpOrLoad(StringRef, SMLoc); bool ParseDirectiveLsym(StringRef, SMLoc); + bool ParseDirectiveLinkerOption(StringRef, SMLoc); bool ParseDirectiveSection(StringRef, SMLoc); bool ParseDirectivePushSection(StringRef, SMLoc); bool ParseDirectivePopSection(StringRef, SMLoc); @@ -435,6 +438,33 @@ bool DarwinAsmParser::ParseDirectiveDumpOrLoad(StringRef Directive, return Warning(IDLoc, "ignoring directive .load for now"); } +/// ParseDirectiveLinkerOption +/// ::= .linker_option "string" ( , "string" )* +bool DarwinAsmParser::ParseDirectiveLinkerOption(StringRef IDVal, SMLoc) { + SmallVector Args; + for (;;) { + if (getLexer().isNot(AsmToken::String)) + return TokError("expected string in '" + Twine(IDVal) + "' directive"); + + std::string Data; + if (getParser().ParseEscapedString(Data)) + return true; + + Args.push_back(Data); + + Lex(); + if (getLexer().is(AsmToken::EndOfStatement)) + break; + + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in '" + Twine(IDVal) + "' directive"); + Lex(); + } + + getStreamer().EmitLinkerOptions(Args); + return false; +} + /// ParseDirectiveLsym /// ::= .lsym identifier , expression bool DarwinAsmParser::ParseDirectiveLsym(StringRef, SMLoc) { -- cgit v1.1 From a94c33942373cb504b6e64c95415165907a89d34 Mon Sep 17 00:00:00 2001 From: Daniel Dunbar Date: Fri, 18 Jan 2013 01:26:07 +0000 Subject: [MC/Mach-O] Add support for linker options in Mach-O files. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172779 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCMachOStreamer.cpp | 5 ++++ lib/MC/MachObjectWriter.cpp | 56 +++++++++++++++++++++++++++++++++++++++++---- lib/Object/MachOObject.cpp | 11 +++++++++ 3 files changed, 67 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp index f947dda..2c0c97a 100644 --- a/lib/MC/MCMachOStreamer.cpp +++ b/lib/MC/MCMachOStreamer.cpp @@ -48,6 +48,7 @@ public: virtual void EmitEHSymAttributes(const MCSymbol *Symbol, MCSymbol *EHSymbol); virtual void EmitAssemblerFlag(MCAssemblerFlag Flag); + virtual void EmitLinkerOptions(ArrayRef Options); virtual void EmitDataRegion(MCDataRegionType Kind); virtual void EmitThumbFunc(MCSymbol *Func); virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute); @@ -178,6 +179,10 @@ void MCMachOStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) { } } +void MCMachOStreamer::EmitLinkerOptions(ArrayRef Options) { + getAssembler().getLinkerOptions().push_back(Options); +} + void MCMachOStreamer::EmitDataRegion(MCDataRegionType Kind) { switch (Kind) { case MCDR_DataRegion: diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp index 0098bea..d13c178 100644 --- a/lib/MC/MachObjectWriter.cpp +++ b/lib/MC/MachObjectWriter.cpp @@ -376,6 +376,39 @@ void MachObjectWriter::WriteLinkeditLoadCommand(uint32_t Type, assert(OS.tell() - Start == macho::LinkeditLoadCommandSize); } +static unsigned ComputeLinkerOptionsLoadCommandSize( + const std::vector &Options) +{ + unsigned Size = sizeof(macho::LinkerOptionsLoadCommand); + for (unsigned i = 0, e = Options.size(); i != e; ++i) + Size += Options[i].size() + 1; + return RoundUpToAlignment(Size, 4); +} + +void MachObjectWriter::WriteLinkerOptionsLoadCommand( + const std::vector &Options) +{ + unsigned Size = ComputeLinkerOptionsLoadCommandSize(Options); + uint64_t Start = OS.tell(); + (void) Start; + + Write32(macho::LCT_LinkerOptions); + Write32(Size); + Write32(Options.size()); + uint64_t BytesWritten = 0; + for (unsigned i = 0, e = Options.size(); i != e; ++i) { + // Write each string, including the null byte. + const std::string &Option = Options[i]; + WriteBytes(Option.c_str(), Option.size() + 1); + BytesWritten += Option.size() + 1; + } + + // Pad to a multiple of 4. + WriteBytes("", OffsetToAlignment(BytesWritten, 4)); + + assert(OS.tell() - Start == Size); +} + void MachObjectWriter::RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout, @@ -693,6 +726,13 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm, macho::SegmentLoadCommand64Size + NumSections * macho::Section64Size : macho::SegmentLoadCommand32Size + NumSections * macho::Section32Size; + // Add the data-in-code load command size, if used. + unsigned NumDataRegions = Asm.getDataRegions().size(); + if (NumDataRegions) { + ++NumLoadCommands; + LoadCommandsSize += macho::LinkeditLoadCommandSize; + } + // Add the symbol table load command sizes, if used. unsigned NumSymbols = LocalSymbolData.size() + ExternalSymbolData.size() + UndefinedSymbolData.size(); @@ -702,13 +742,14 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm, macho::DysymtabLoadCommandSize); } - // Add the data-in-code load command size, if used. - unsigned NumDataRegions = Asm.getDataRegions().size(); - if (NumDataRegions) { + // Add the linker option load commands sizes. + const std::vector > &LinkerOptions = + Asm.getLinkerOptions(); + for (unsigned i = 0, e = LinkerOptions.size(); i != e; ++i) { ++NumLoadCommands; - LoadCommandsSize += macho::LinkeditLoadCommandSize; + LoadCommandsSize += ComputeLinkerOptionsLoadCommandSize(LinkerOptions[i]); } - + // Compute the total size of the section data, as well as its file size and vm // size. uint64_t SectionDataStart = (is64Bit() ? macho::Header64Size : @@ -799,6 +840,11 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm, IndirectSymbolOffset, NumIndirectSymbols); } + // Write the linker options load commands. + for (unsigned i = 0, e = LinkerOptions.size(); i != e; ++i) { + WriteLinkerOptionsLoadCommand(LinkerOptions[i]); + } + // Write the actual section data. for (MCAssembler::const_iterator it = Asm.begin(), ie = Asm.end(); it != ie; ++it) { diff --git a/lib/Object/MachOObject.cpp b/lib/Object/MachOObject.cpp index 529bdf9..c9c341a 100644 --- a/lib/Object/MachOObject.cpp +++ b/lib/Object/MachOObject.cpp @@ -259,6 +259,17 @@ void MachOObject::ReadLinkeditDataLoadCommand(const LoadCommandInfo &LCI, } template<> +void SwapStruct(macho::LinkerOptionsLoadCommand &Value) { + SwapValue(Value.Type); + SwapValue(Value.Size); + SwapValue(Value.Count); +} +void MachOObject::ReadLinkerOptionsLoadCommand(const LoadCommandInfo &LCI, + InMemoryStruct &Res) const { + ReadInMemoryStruct(*this, Buffer->getBuffer(), LCI.Offset, Res); +} + +template<> void SwapStruct(macho::IndirectSymbolTableEntry &Value) { SwapValue(Value.Index); } -- cgit v1.1 From 268e0ffa78483512ad65002cb2081ab88b0f52a8 Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Fri, 18 Jan 2013 03:08:39 +0000 Subject: Fixed 80+ violation. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172782 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/ObjCARC.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp index 8dae235..e93ca27 100644 --- a/lib/Transforms/Scalar/ObjCARC.cpp +++ b/lib/Transforms/Scalar/ObjCARC.cpp @@ -2442,8 +2442,8 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { InstructionClass Class = GetBasicInstructionClass(Inst); - DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Visiting: Class: " << Class - << "; " << *Inst << "\n"); + DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Visiting: Class: " + << Class << "; " << *Inst << "\n"); switch (Class) { default: break; -- cgit v1.1 From 37d093f0b0e4b4d1c49efbf2bdcc9827527e3b9f Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 18 Jan 2013 05:09:16 +0000 Subject: Remove trailing whitespace. Remove new lines between closing brace and 'else' git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172784 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/InstCombine/InstCombineVectorOps.cpp | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 8bfcc80..1bbedb8 100644 --- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -304,12 +304,12 @@ static Value *CollectShuffleElements(Value *V, SmallVectorImpl &Mask, Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext()))); return V; } - + if (isa(V)) { Mask.assign(NumElts, ConstantInt::get(Type::getInt32Ty(V->getContext()),0)); return V; } - + if (InsertElementInst *IEI = dyn_cast(V)) { // If this is an insert of an extract from some other vector, include it. Value *VecOp = IEI->getOperand(0); @@ -609,7 +609,7 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { eltMask = -1; } else if (Mask[i] < (int)LHSWidth) { // This element is from left hand side vector operand. - // + // // If LHS is going to be replaced (case 1, 2, or 4), calculate the // new mask value for the element. if (newLHS != LHS) { @@ -618,8 +618,7 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { // with a -1 mask value. if (eltMask >= (int)LHSOp0Width && isa(LHSOp1)) eltMask = -1; - } - else + } else eltMask = Mask[i]; } else { // This element is from right hand side vector operand @@ -639,8 +638,7 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { && "should have been check above"); eltMask = -1; } - } - else + } else eltMask = Mask[i]-LHSWidth; // If LHS's width is changed, shift the mask value accordingly. -- cgit v1.1 From 081c29b25696006bb72a7ac1035e05f8f935513f Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 18 Jan 2013 05:30:07 +0000 Subject: Check for less than 0 in shuffle mask instead of -1. It's more consistent with other code related to shuffles and easier to implement in compiled code. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172788 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/InstCombine/InstCombineVectorOps.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 1bbedb8..4f71db1 100644 --- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -604,7 +604,7 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { // ShuffleVectorInst is equivalent to the original one. for (unsigned i = 0; i < VWidth; ++i) { int eltMask; - if (Mask[i] == -1) { + if (Mask[i] < 0) { // This element is an undef value. eltMask = -1; } else if (Mask[i] < (int)LHSWidth) { -- cgit v1.1 From 5a529e4f86bc3c76ba086662d7c4ef2d1f85ce6f Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 18 Jan 2013 06:44:29 +0000 Subject: Make more use of is128BitVector/is256BitVector in place of getSizeInBits() == 128/256. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172792 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 66 ++++++++++++++++++-------------------- 1 file changed, 31 insertions(+), 35 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index a8294b6..a1b2b2a 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -3361,8 +3361,8 @@ static bool isPSHUFLWMask(ArrayRef Mask, EVT VT, bool HasInt256) { /// is suitable for input to PALIGNR. static bool isPALIGNRMask(ArrayRef Mask, EVT VT, const X86Subtarget *Subtarget) { - if ((VT.getSizeInBits() == 128 && !Subtarget->hasSSSE3()) || - (VT.getSizeInBits() == 256 && !Subtarget->hasInt256())) + if ((VT.is128BitVector() && !Subtarget->hasSSSE3()) || + (VT.is256BitVector() && !Subtarget->hasInt256())) return false; unsigned NumElts = VT.getVectorNumElements(); @@ -3451,7 +3451,7 @@ static void CommuteVectorShuffleMask(SmallVectorImpl &Mask, /// reverse of what x86 shuffles want. static bool isSHUFPMask(ArrayRef Mask, EVT VT, bool HasFp256, bool Commuted = false) { - if (!HasFp256 && VT.getSizeInBits() == 256) + if (!HasFp256 && VT.is256BitVector()) return false; unsigned NumElems = VT.getVectorNumElements(); @@ -3636,7 +3636,7 @@ static bool isUNPCKLMask(ArrayRef Mask, EVT VT, assert((VT.is128BitVector() || VT.is256BitVector()) && "Unsupported vector type for unpckh"); - if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 && + if (VT.is256BitVector() && NumElts != 4 && NumElts != 8 && (!HasInt256 || (NumElts != 16 && NumElts != 32))) return false; @@ -3675,7 +3675,7 @@ static bool isUNPCKHMask(ArrayRef Mask, EVT VT, assert((VT.is128BitVector() || VT.is256BitVector()) && "Unsupported vector type for unpckh"); - if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 && + if (VT.is256BitVector() && NumElts != 4 && NumElts != 8 && (!HasInt256 || (NumElts != 16 && NumElts != 32))) return false; @@ -3706,14 +3706,14 @@ static bool isUNPCKHMask(ArrayRef Mask, EVT VT, /// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form /// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, /// <0, 0, 1, 1> -static bool isUNPCKL_v_undef_Mask(ArrayRef Mask, EVT VT, - bool HasInt256) { +static bool isUNPCKL_v_undef_Mask(ArrayRef Mask, EVT VT, bool HasInt256) { unsigned NumElts = VT.getVectorNumElements(); + bool Is256BitVec = VT.is256BitVector(); assert((VT.is128BitVector() || VT.is256BitVector()) && "Unsupported vector type for unpckh"); - if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 && + if (Is256BitVec && NumElts != 4 && NumElts != 8 && (!HasInt256 || (NumElts != 16 && NumElts != 32))) return false; @@ -3721,7 +3721,7 @@ static bool isUNPCKL_v_undef_Mask(ArrayRef Mask, EVT VT, // FIXME: Need a better way to get rid of this, there's no latency difference // between UNPCKLPD and MOVDDUP, the later should always be checked first and // the former later. We should also remove the "_undef" special mask. - if (NumElts == 4 && VT.getSizeInBits() == 256) + if (NumElts == 4 && Is256BitVec) return false; // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate @@ -3755,7 +3755,7 @@ static bool isUNPCKH_v_undef_Mask(ArrayRef Mask, EVT VT, bool HasInt256) { assert((VT.is128BitVector() || VT.is256BitVector()) && "Unsupported vector type for unpckh"); - if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 && + if (VT.is256BitVector() && NumElts != 4 && NumElts != 8 && (!HasInt256 || (NumElts != 16 && NumElts != 32))) return false; @@ -3871,7 +3871,7 @@ static bool isVPERMILPMask(ArrayRef Mask, EVT VT, bool HasFp256) { unsigned NumElts = VT.getVectorNumElements(); // Only match 256-bit with 32/64-bit types - if (VT.getSizeInBits() != 256 || (NumElts != 4 && NumElts != 8)) + if (!VT.is256BitVector() || (NumElts != 4 && NumElts != 8)) return false; unsigned NumLanes = VT.getSizeInBits()/128; @@ -3927,8 +3927,8 @@ static bool isMOVSHDUPMask(ArrayRef Mask, EVT VT, unsigned NumElems = VT.getVectorNumElements(); - if ((VT.getSizeInBits() == 128 && NumElems != 4) || - (VT.getSizeInBits() == 256 && NumElems != 8)) + if ((VT.is128BitVector() && NumElems != 4) || + (VT.is256BitVector() && NumElems != 8)) return false; // "i+1" is the value the indexed mask element must have @@ -3950,8 +3950,8 @@ static bool isMOVSLDUPMask(ArrayRef Mask, EVT VT, unsigned NumElems = VT.getVectorNumElements(); - if ((VT.getSizeInBits() == 128 && NumElems != 4) || - (VT.getSizeInBits() == 256 && NumElems != 8)) + if ((VT.is128BitVector() && NumElems != 4) || + (VT.is256BitVector() && NumElems != 8)) return false; // "i" is the value the indexed mask element must have @@ -4358,12 +4358,11 @@ static bool isZeroShuffle(ShuffleVectorSDNode *N) { static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget, SelectionDAG &DAG, DebugLoc dl) { assert(VT.isVector() && "Expected a vector type"); - unsigned Size = VT.getSizeInBits(); // Always build SSE zero vectors as <4 x i32> bitcasted // to their dest type. This ensures they get CSE'd. SDValue Vec; - if (Size == 128) { // SSE + if (VT.is128BitVector()) { // SSE if (Subtarget->hasSSE2()) { // SSE2 SDValue Cst = DAG.getTargetConstant(0, MVT::i32); Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); @@ -4371,7 +4370,7 @@ static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget, SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32); Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f32, Cst, Cst, Cst, Cst); } - } else if (Size == 256) { // AVX + } else if (VT.is256BitVector()) { // AVX if (Subtarget->hasInt256()) { // AVX2 SDValue Cst = DAG.getTargetConstant(0, MVT::i32); SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; @@ -4396,11 +4395,10 @@ static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget, static SDValue getOnesVector(EVT VT, bool HasInt256, SelectionDAG &DAG, DebugLoc dl) { assert(VT.isVector() && "Expected a vector type"); - unsigned Size = VT.getSizeInBits(); SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32); SDValue Vec; - if (Size == 256) { + if (VT.is256BitVector()) { if (HasInt256) { // AVX2 SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops, 8); @@ -4408,7 +4406,7 @@ static SDValue getOnesVector(EVT VT, bool HasInt256, SelectionDAG &DAG, Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); Vec = Concat128BitVectors(Vec, Vec, MVT::v8i32, 8, DAG, dl); } - } else if (Size == 128) { + } else if (VT.is128BitVector()) { Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); } else llvm_unreachable("Unexpected vector type"); @@ -4487,14 +4485,13 @@ static SDValue PromoteSplati8i16(SDValue V, SelectionDAG &DAG, int &EltNo) { static SDValue getLegalSplat(SelectionDAG &DAG, SDValue V, int EltNo) { EVT VT = V.getValueType(); DebugLoc dl = V.getDebugLoc(); - unsigned Size = VT.getSizeInBits(); - if (Size == 128) { + if (VT.is128BitVector()) { V = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, V); int SplatMask[4] = { EltNo, EltNo, EltNo, EltNo }; V = DAG.getVectorShuffle(MVT::v4f32, dl, V, DAG.getUNDEF(MVT::v4f32), &SplatMask[0]); - } else if (Size == 256) { + } else if (VT.is256BitVector()) { // To use VPERMILPS to splat scalars, the second half of indicies must // refer to the higher part, which is a duplication of the lower one, // because VPERMILPS can only handle in-lane permutations. @@ -4518,14 +4515,14 @@ static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) { int EltNo = SV->getSplatIndex(); int NumElems = SrcVT.getVectorNumElements(); - unsigned Size = SrcVT.getSizeInBits(); + bool Is256BitVec = SrcVT.is256BitVector(); - assert(((Size == 128 && NumElems > 4) || Size == 256) && - "Unknown how to promote splat for type"); + assert(((SrcVT.is128BitVector() && NumElems > 4) || Is256BitVec) && + "Unknown how to promote splat for type"); // Extract the 128-bit part containing the splat element and update // the splat element index when it refers to the higher register. - if (Size == 256) { + if (Is256BitVec) { V1 = Extract128BitVector(V1, EltNo, DAG, dl); if (EltNo >= NumElems/2) EltNo -= NumElems/2; @@ -4542,7 +4539,7 @@ static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) { // Recreate the 256-bit vector and place the same 128-bit vector // into the low and high part. This is necessary because we want // to use VPERM* to shuffle the vectors - if (Size == 256) { + if (Is256BitVec) { V1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, SrcVT, V1, V1); } @@ -6672,7 +6669,6 @@ X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const { // Handle splat operations if (SVOp->isSplat()) { unsigned NumElem = VT.getVectorNumElements(); - int Size = VT.getSizeInBits(); // Use vbroadcast whenever the splat comes from a foldable load SDValue Broadcast = LowerVectorBroadcast(Op, DAG); @@ -6680,8 +6676,8 @@ X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const { return Broadcast; // Handle splats by matching through known shuffle masks - if ((Size == 128 && NumElem <= 4) || - (Size == 256 && NumElem <= 8)) + if ((VT.is128BitVector() && NumElem <= 4) || + (VT.is256BitVector() && NumElem <= 8)) return SDValue(); // All remaning splats are promoted to target supported vector shuffles. @@ -15970,7 +15966,7 @@ static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { EVT VT = N->getValueType(0); - if (VT.getSizeInBits() != 256) + if (!VT.is256BitVector()) return SDValue(); assert((N->getOpcode() == ISD::ANY_EXTEND || @@ -15979,7 +15975,7 @@ static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG, SDValue Narrow = N->getOperand(0); EVT NarrowVT = Narrow->getValueType(0); - if (NarrowVT.getSizeInBits() != 128) + if (!NarrowVT.is128BitVector()) return SDValue(); if (Narrow->getOpcode() != ISD::XOR && @@ -17075,7 +17071,7 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG, } } - if (VT.isVector() && VT.getSizeInBits() == 256) { + if (VT.is256BitVector()) { SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget); if (R.getNode()) return R; -- cgit v1.1 From 0a38861364c30c73b64ae8d8ef326f655d22fefd Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 18 Jan 2013 06:50:59 +0000 Subject: Spelling fix: extened->extended. Trailing whitespace in same function. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172793 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index a1b2b2a..5999b15 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -16967,29 +16967,30 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { EVT VT = N->getValueType(0); - + if (!VT.isVector()) return SDValue(); SDValue In = N->getOperand(0); EVT InVT = In.getValueType(); DebugLoc dl = N->getDebugLoc(); - unsigned ExtenedEltSize = VT.getVectorElementType().getSizeInBits(); + unsigned ExtendedEltSize = VT.getVectorElementType().getSizeInBits(); // Split SIGN_EXTEND operation to use vmovsx instruction when possible if (InVT == MVT::v8i8) { - if (ExtenedEltSize > 16 && !Subtarget->hasInt256()) + if (ExtendedEltSize > 16 && !Subtarget->hasInt256()) In = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, In); - if (ExtenedEltSize > 32) + if (ExtendedEltSize > 32) In = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i32, In); return DAG.getNode(ISD::SIGN_EXTEND, dl, VT, In); } if ((InVT == MVT::v4i8 || InVT == MVT::v4i16) && - ExtenedEltSize > 32 && !Subtarget->hasInt256()) { + ExtendedEltSize > 32 && !Subtarget->hasInt256()) { In = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, In); return DAG.getNode(ISD::SIGN_EXTEND, dl, VT, In); } + if (!DCI.isBeforeLegalizeOps()) return SDValue(); -- cgit v1.1 From e6d8fa7d0b4352902886930debe459b9f477303e Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 18 Jan 2013 07:27:20 +0000 Subject: Minor formatting fix. No functional change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172795 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 5999b15..4d982a4 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -5854,7 +5854,7 @@ LowerVECTOR_SHUFFLEv8i16(SDValue Op, const X86Subtarget *Subtarget, int EltIdx = MaskVals[i] * 2; int Idx0 = (TwoInputs && (EltIdx >= 16)) ? 0x80 : EltIdx; int Idx1 = (TwoInputs && (EltIdx >= 16)) ? 0x80 : EltIdx+1; - pshufbMask.push_back(DAG.getConstant(Idx0, MVT::i8)); + pshufbMask.push_back(DAG.getConstant(Idx0, MVT::i8)); pshufbMask.push_back(DAG.getConstant(Idx1, MVT::i8)); } V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, V1); -- cgit v1.1 From 5141d97d3ee9afca936bc870e67c53e1ed05f790 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 18 Jan 2013 08:41:28 +0000 Subject: Calculate vector element size more directly for VINSERTF128/VEXTRACTF128 immediate handling. Also use MVT since this only called on legal types during pattern matching. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172797 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 4d982a4..2b6ff36 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -4011,9 +4011,8 @@ bool X86::isVEXTRACTF128Index(SDNode *N) { uint64_t Index = cast(N->getOperand(1).getNode())->getZExtValue(); - unsigned VL = N->getValueType(0).getVectorNumElements(); - unsigned VBits = N->getValueType(0).getSizeInBits(); - unsigned ElSize = VBits / VL; + MVT VT = N->getValueType(0).getSimpleVT(); + unsigned ElSize = VT.getVectorElementType().getSizeInBits(); bool Result = (Index * ElSize) % 128 == 0; return Result; @@ -4030,9 +4029,8 @@ bool X86::isVINSERTF128Index(SDNode *N) { uint64_t Index = cast(N->getOperand(2).getNode())->getZExtValue(); - unsigned VL = N->getValueType(0).getVectorNumElements(); - unsigned VBits = N->getValueType(0).getSizeInBits(); - unsigned ElSize = VBits / VL; + MVT VT = N->getValueType(0).getSimpleVT(); + unsigned ElSize = VT.getVectorElementType().getSizeInBits(); bool Result = (Index * ElSize) % 128 == 0; return Result; -- cgit v1.1 From ae36eccdfbad53a1e76ca263b7540b84d50d3524 Mon Sep 17 00:00:00 2001 From: Will Dietz Date: Fri, 18 Jan 2013 11:29:21 +0000 Subject: Move Blacklist.h to include/ to enable use from clang. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172806 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../Instrumentation/AddressSanitizer.cpp | 2 +- lib/Transforms/Instrumentation/BlackList.cpp | 19 +++---- lib/Transforms/Instrumentation/BlackList.h | 58 ---------------------- lib/Transforms/Instrumentation/MemorySanitizer.cpp | 2 +- lib/Transforms/Instrumentation/ThreadSanitizer.cpp | 2 +- 5 files changed, 13 insertions(+), 70 deletions(-) delete mode 100644 lib/Transforms/Instrumentation/BlackList.h (limited to 'lib') diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index e733500..1aad842 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -16,7 +16,7 @@ #define DEBUG_TYPE "asan" #include "llvm/Transforms/Instrumentation.h" -#include "BlackList.h" +#include "llvm/Transforms/Utils/BlackList.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DepthFirstIterator.h" diff --git a/lib/Transforms/Instrumentation/BlackList.cpp b/lib/Transforms/Instrumentation/BlackList.cpp index 4fcbea4..d6b2983 100644 --- a/lib/Transforms/Instrumentation/BlackList.cpp +++ b/lib/Transforms/Instrumentation/BlackList.cpp @@ -13,7 +13,8 @@ // //===----------------------------------------------------------------------===// -#include "BlackList.h" +#include "llvm/Transforms/Utils/BlackList.h" + #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" @@ -78,21 +79,21 @@ BlackList::BlackList(const StringRef Path) { } // Iterate through each of the prefixes, and create Regexs for them. - for (StringMap::iterator I = Regexps.begin(), E = Regexps.end(); - I != E; ++I) { + for (StringMap::const_iterator I = Regexps.begin(), + E = Regexps.end(); I != E; ++I) { Entries[I->getKey()] = new Regex(I->getValue()); } } -bool BlackList::isIn(const Function &F) { +bool BlackList::isIn(const Function &F) const { return isIn(*F.getParent()) || inSection("fun", F.getName()); } -bool BlackList::isIn(const GlobalVariable &G) { +bool BlackList::isIn(const GlobalVariable &G) const { return isIn(*G.getParent()) || inSection("global", G.getName()); } -bool BlackList::isIn(const Module &M) { +bool BlackList::isIn(const Module &M) const { return inSection("src", M.getModuleIdentifier()); } @@ -107,14 +108,14 @@ static StringRef GetGVTypeString(const GlobalVariable &G) { return ""; } -bool BlackList::isInInit(const GlobalVariable &G) { +bool BlackList::isInInit(const GlobalVariable &G) const { return (isIn(*G.getParent()) || inSection("global-init", G.getName()) || inSection("global-init-type", GetGVTypeString(G))); } -bool BlackList::inSection(const StringRef Section, const StringRef Query) { - StringMap::iterator I = Entries.find(Section); +bool BlackList::inSection(const StringRef Section, const StringRef Query) const { + StringMap::const_iterator I = Entries.find(Section); if (I == Entries.end()) return false; Regex *FunctionRegex = I->getValue(); diff --git a/lib/Transforms/Instrumentation/BlackList.h b/lib/Transforms/Instrumentation/BlackList.h deleted file mode 100644 index ee18a98..0000000 --- a/lib/Transforms/Instrumentation/BlackList.h +++ /dev/null @@ -1,58 +0,0 @@ -//===-- BlackList.h - blacklist for sanitizers ------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -//===----------------------------------------------------------------------===// -// -// This is a utility class for instrumentation passes (like AddressSanitizer -// or ThreadSanitizer) to avoid instrumenting some functions or global -// variables based on a user-supplied blacklist. -// -// The blacklist disables instrumentation of various functions and global -// variables. Each line contains a prefix, followed by a wild card expression. -// Empty lines and lines starting with "#" are ignored. -// --- -// # Blacklisted items: -// fun:*_ZN4base6subtle* -// global:*global_with_bad_access_or_initialization* -// global-init:*global_with_initialization_issues* -// global-init-type:*Namespace::ClassName* -// src:file_with_tricky_code.cc -// --- -// Note that the wild card is in fact an llvm::Regex, but * is automatically -// replaced with .* -// This is similar to the "ignore" feature of ThreadSanitizer. -// http://code.google.com/p/data-race-test/wiki/ThreadSanitizerIgnores -// -//===----------------------------------------------------------------------===// -// - -#include "llvm/ADT/StringMap.h" - -namespace llvm { -class Function; -class GlobalVariable; -class Module; -class Regex; -class StringRef; - -class BlackList { - public: - BlackList(const StringRef Path); - // Returns whether either this function or it's source file are blacklisted. - bool isIn(const Function &F); - // Returns whether either this global or it's source file are blacklisted. - bool isIn(const GlobalVariable &G); - // Returns whether this module is blacklisted by filename. - bool isIn(const Module &M); - // Returns whether a global should be excluded from initialization checking. - bool isInInit(const GlobalVariable &G); - private: - StringMap Entries; - - bool inSection(const StringRef Section, const StringRef Query); -}; - -} // namespace llvm diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp index db0de4d..2d8ca67 100644 --- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -71,7 +71,7 @@ #define DEBUG_TYPE "msan" #include "llvm/Transforms/Instrumentation.h" -#include "BlackList.h" +#include "llvm/Transforms/Utils/BlackList.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp index 29d2ece..463ca66 100644 --- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp +++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp @@ -22,7 +22,7 @@ #define DEBUG_TYPE "tsan" #include "llvm/Transforms/Instrumentation.h" -#include "BlackList.h" +#include "llvm/Transforms/Utils/BlackList.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" -- cgit v1.1 From fe7d2b61f297d61f2553dd7d811ac32d35d25997 Mon Sep 17 00:00:00 2001 From: Alexey Samsonov Date: Fri, 18 Jan 2013 12:49:06 +0000 Subject: 80 columns git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172813 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Instrumentation/BlackList.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Transforms/Instrumentation/BlackList.cpp b/lib/Transforms/Instrumentation/BlackList.cpp index d6b2983..a3cf84c 100644 --- a/lib/Transforms/Instrumentation/BlackList.cpp +++ b/lib/Transforms/Instrumentation/BlackList.cpp @@ -114,7 +114,8 @@ bool BlackList::isInInit(const GlobalVariable &G) const { inSection("global-init-type", GetGVTypeString(G))); } -bool BlackList::inSection(const StringRef Section, const StringRef Query) const { +bool BlackList::inSection(const StringRef Section, + const StringRef Query) const { StringMap::const_iterator I = Entries.find(Section); if (I == Entries.end()) return false; -- cgit v1.1 From 8d3c9014f922048fcb2e1f1db2150ea9f0a118ad Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Fri, 18 Jan 2013 15:03:03 +0000 Subject: The IR linker still depends on the bitcode reader. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172824 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Linker/LLVMBuild.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Linker/LLVMBuild.txt b/lib/Linker/LLVMBuild.txt index 0bb26d0..360c254 100644 --- a/lib/Linker/LLVMBuild.txt +++ b/lib/Linker/LLVMBuild.txt @@ -19,4 +19,4 @@ type = Library name = Linker parent = Libraries -required_libraries = Core Support TransformUtils +required_libraries = BitReader Core Support TransformUtils -- cgit v1.1 From 6d49b680be6e24b547e6910c2b64914913915084 Mon Sep 17 00:00:00 2001 From: Daniel Dunbar Date: Fri, 18 Jan 2013 19:37:00 +0000 Subject: [MC/Mach-O] Implement integrated assembler support for linker options. - Also, fixup syntax errors in LangRef and missing newline in the MCAsmStreamer. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172837 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/TargetLoweringObjectFileImpl.cpp | 33 ++++++++++++++++++++++------ lib/MC/MCAsmStreamer.cpp | 1 + 2 files changed, 27 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 76c2546..1170bf2 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -406,14 +406,14 @@ TargetLoweringObjectFileELF::InitializeELF(bool UseInitArray_) { // MachO //===----------------------------------------------------------------------===// -/// emitModuleFlags - Emit the module flags that specify the garbage collection -/// information. +/// emitModuleFlags - Perform code emission for module flags. void TargetLoweringObjectFileMachO:: emitModuleFlags(MCStreamer &Streamer, ArrayRef ModuleFlags, Mangler *Mang, const TargetMachine &TM) const { unsigned VersionVal = 0; unsigned ImageInfoFlags = 0; + MDNode *LinkerOptions = 0; StringRef SectionVal; for (ArrayRef::iterator @@ -427,14 +427,33 @@ emitModuleFlags(MCStreamer &Streamer, StringRef Key = MFE.Key->getString(); Value *Val = MFE.Val; - if (Key == "Objective-C Image Info Version") + if (Key == "Objective-C Image Info Version") { VersionVal = cast(Val)->getZExtValue(); - else if (Key == "Objective-C Garbage Collection" || - Key == "Objective-C GC Only" || - Key == "Objective-C Is Simulated") + } else if (Key == "Objective-C Garbage Collection" || + Key == "Objective-C GC Only" || + Key == "Objective-C Is Simulated") { ImageInfoFlags |= cast(Val)->getZExtValue(); - else if (Key == "Objective-C Image Info Section") + } else if (Key == "Objective-C Image Info Section") { SectionVal = cast(Val)->getString(); + } else if (Key == "Linker Options") { + LinkerOptions = cast(Val); + } + } + + // Emit the linker options if present. + if (LinkerOptions) { + for (unsigned i = 0, e = LinkerOptions->getNumOperands(); i != e; ++i) { + MDNode *MDOptions = cast(LinkerOptions->getOperand(i)); + SmallVector StrOptions; + + // Convert to strings. + for (unsigned ii = 0, ie = MDOptions->getNumOperands(); ii != ie; ++ii) { + MDString *MDOption = cast(MDOptions->getOperand(ii)); + StrOptions.push_back(MDOption->getString()); + } + + Streamer.EmitLinkerOptions(StrOptions); + } } // The section is mandatory. If we don't have it, then we don't have GC info. diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp index 88a7d33..7191947 100644 --- a/lib/MC/MCAsmStreamer.cpp +++ b/lib/MC/MCAsmStreamer.cpp @@ -383,6 +383,7 @@ void MCAsmStreamer::EmitLinkerOptions(ArrayRef Options) { ie = Options.end(); it != ie; ++it) { OS << ", " << '"' << *it << '"'; } + OS << "\n"; } void MCAsmStreamer::EmitDataRegion(MCDataRegionType Kind) { -- cgit v1.1 From 92d8cc636266042470532449bb1f83fb743111cd Mon Sep 17 00:00:00 2001 From: Daniel Dunbar Date: Fri, 18 Jan 2013 19:37:11 +0000 Subject: [Linker] Kill Linker::LoadObject which is dead, and drop the BitReader dependency again. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172838 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Linker/LLVMBuild.txt | 2 +- lib/Linker/Linker.cpp | 23 ----------------------- 2 files changed, 1 insertion(+), 24 deletions(-) (limited to 'lib') diff --git a/lib/Linker/LLVMBuild.txt b/lib/Linker/LLVMBuild.txt index 360c254..0bb26d0 100644 --- a/lib/Linker/LLVMBuild.txt +++ b/lib/Linker/LLVMBuild.txt @@ -19,4 +19,4 @@ type = Library name = Linker parent = Libraries -required_libraries = BitReader Core Support TransformUtils +required_libraries = Core Support TransformUtils diff --git a/lib/Linker/Linker.cpp b/lib/Linker/Linker.cpp index bfd6596..c8ea8ff 100644 --- a/lib/Linker/Linker.cpp +++ b/lib/Linker/Linker.cpp @@ -89,26 +89,3 @@ Linker::releaseModule() { Flags = 0; return result; } - -// LoadObject - Read in and parse the bitcode file named by FN and return the -// module it contains (wrapped in an auto_ptr), or auto_ptr() and set -// Error if an error occurs. -std::auto_ptr -Linker::LoadObject(const sys::Path &FN) { - std::string ParseErrorMessage; - Module *Result = 0; - - OwningPtr Buffer; - if (error_code ec = MemoryBuffer::getFileOrSTDIN(FN.c_str(), Buffer)) - ParseErrorMessage = "Error reading file '" + FN.str() + "'" + ": " - + ec.message(); - else - Result = ParseBitcodeFile(Buffer.get(), Context, &ParseErrorMessage); - - if (Result) - return std::auto_ptr(Result); - Error = "Bitcode file '" + FN.str() + "' could not be loaded"; - if (ParseErrorMessage.size()) - Error += ": " + ParseErrorMessage; - return std::auto_ptr(); -} -- cgit v1.1 From b52677c0ad641626beec8c0c6893116808c0dbe1 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Fri, 18 Jan 2013 19:45:22 +0000 Subject: Silence GCC warning about dropping off a non-void function. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172839 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/ObjCARC.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp index e93ca27..c3a0fd6 100644 --- a/lib/Transforms/Scalar/ObjCARC.cpp +++ b/lib/Transforms/Scalar/ObjCARC.cpp @@ -219,6 +219,7 @@ namespace { case IC_None: return OS << "IC_None"; } + llvm_unreachable("Unknown instruction class!"); } } -- cgit v1.1 From e72fac60e3dbcf14ec68cedc1e86feafec1652eb Mon Sep 17 00:00:00 2001 From: Jack Carter Date: Fri, 18 Jan 2013 20:15:06 +0000 Subject: This is a resubmittal. For some reason it broke the bots yesterday but I cannot reproduce the problem and have scrubed my sources and even tested with llvm-lit -v --vg. Removal of redundant code and formatting fixes. Contributers: Jack Carter/Vladimir Medic git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172842 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips64InstrInfo.td | 60 ++++++++++++++++++-------------------- lib/Target/Mips/MipsInstrInfo.td | 44 ++++++++++++++++++---------- 2 files changed, 57 insertions(+), 47 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index bbeb649..2b2c6b7 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -117,8 +117,9 @@ def DSRA32 : shift_rotate_imm<"dsra32", shamt, CPU64RegsOpnd>, SRA_FM<0x3f, 0>; let Predicates = [HasMips64r2, HasStdEnc], DecoderNamespace = "Mips64" in { def DROTR : shift_rotate_imm<"drotr", shamt, CPU64RegsOpnd, rotr, immZExt6>, - SRA_FM<0x3a, 1>; - def DROTRV : shift_rotate_reg<"drotrv", CPU64RegsOpnd, rotr>, SRLV_FM<0x16, 1>; + SRA_FM<0x3a, 1>; + def DROTRV : shift_rotate_reg<"drotrv", CPU64RegsOpnd, rotr>, + SRLV_FM<0x16, 1>; } let DecoderNamespace = "Mips64" in { @@ -173,8 +174,10 @@ def TAILCALL64_R : JumpFR, MTLO_FM<8>, IsTailCall; let DecoderNamespace = "Mips64" in { /// Multiply and Divide Instructions. -def DMULT : Mult<"dmult", IIImul, CPU64RegsOpnd, [HI64, LO64]>, MULT_FM<0, 0x1c>; -def DMULTu : Mult<"dmultu", IIImul, CPU64RegsOpnd, [HI64, LO64]>, MULT_FM<0, 0x1d>; +def DMULT : Mult<"dmult", IIImul, CPU64RegsOpnd, [HI64, LO64]>, + MULT_FM<0, 0x1c>; +def DMULTu : Mult<"dmultu", IIImul, CPU64RegsOpnd, [HI64, LO64]>, + MULT_FM<0, 0x1d>; def DSDIV : Div, MULT_FM<0, 0x1e>; def DUDIV : Div, @@ -305,20 +308,21 @@ def : MipsPat<(bswap CPU64Regs:$rt), (DSHD (DSBH CPU64Regs:$rt))>; //===----------------------------------------------------------------------===// // Instruction aliases //===----------------------------------------------------------------------===// -def : InstAlias<"move $dst,$src", (DADDu CPU64RegsOpnd:$dst, +def : InstAlias<"move $dst, $src", (DADDu CPU64RegsOpnd:$dst, CPU64RegsOpnd:$src,ZERO_64)>, - Requires<[HasMips64]>; + Requires<[HasMips64]>; def : InstAlias<"and $rs, $rt, $imm", (DANDi CPU64RegsOpnd:$rs, CPU64RegsOpnd:$rt, uimm16_64:$imm)>, - Requires<[HasMips64]>; + Requires<[HasMips64]>; def : InstAlias<"slt $rs, $rt, $imm", (SLTi64 CPURegsOpnd:$rs, CPU64Regs:$rt, simm16_64:$imm)>, - Requires<[HasMips64]>; + Requires<[HasMips64]>; def : InstAlias<"xor $rs, $rt, $imm", (XORi64 CPU64RegsOpnd:$rs, CPU64RegsOpnd:$rt, uimm16_64:$imm)>, - Requires<[HasMips64]>; -def : InstAlias<"not $rt, $rs", (NOR64 CPU64RegsOpnd:$rt, CPU64RegsOpnd:$rs, ZERO_64)>, - Requires<[HasMips64]>; + Requires<[HasMips64]>; +def : InstAlias<"not $rt, $rs", + (NOR64 CPU64RegsOpnd:$rt, CPU64RegsOpnd:$rs, ZERO_64)>, + Requires<[HasMips64]>; def : InstAlias<"j $rs", (JR64 CPU64Regs:$rs)>, Requires<[HasMips64]>; def : InstAlias<"daddu $rs, $rt, $imm", (DADDiu CPU64RegsOpnd:$rs, CPU64RegsOpnd:$rt, simm16_64:$imm)>; @@ -326,35 +330,29 @@ def : InstAlias<"dadd $rs, $rt, $imm", (DADDi CPU64RegsOpnd:$rs, CPU64RegsOpnd:$rt, simm16_64:$imm)>; /// Move between CPU and coprocessor registers + let DecoderNamespace = "Mips64" in { -def MFC0_3OP64 : MFC3OP<(outs CPU64Regs:$rt), (ins CPU64Regs:$rd, uimm16:$sel), - "mfc0\t$rt, $rd, $sel">, MFC3OP_FM<0x10, 0>; -def MTC0_3OP64 : MFC3OP<(outs CPU64Regs:$rd, uimm16:$sel), (ins CPU64Regs:$rt), - "mtc0\t$rt, $rd, $sel">, MFC3OP_FM<0x10, 4>; -def MFC2_3OP64 : MFC3OP<(outs CPU64Regs:$rt), (ins CPU64Regs:$rd, uimm16:$sel), - "mfc2\t$rt, $rd, $sel">, MFC3OP_FM<0x12, 0>; -def MTC2_3OP64 : MFC3OP<(outs CPU64Regs:$rd, uimm16:$sel), (ins CPU64Regs:$rt), - "mtc2\t$rt, $rd, $sel">, MFC3OP_FM<0x12, 4>; -def DMFC0_3OP64 : MFC3OP<(outs CPU64Regs:$rt), (ins CPU64Regs:$rd, uimm16:$sel), +def DMFC0_3OP64 : MFC3OP<(outs CPU64RegsOpnd:$rt), + (ins CPU64RegsOpnd:$rd, uimm16:$sel), "dmfc0\t$rt, $rd, $sel">, MFC3OP_FM<0x10, 1>; -def DMTC0_3OP64 : MFC3OP<(outs CPU64Regs:$rd, uimm16:$sel), (ins CPU64Regs:$rt), +def DMTC0_3OP64 : MFC3OP<(outs CPU64RegsOpnd:$rd, uimm16:$sel), + (ins CPU64RegsOpnd:$rt), "dmtc0\t$rt, $rd, $sel">, MFC3OP_FM<0x10, 5>; -def DMFC2_3OP64 : MFC3OP<(outs CPU64Regs:$rt), (ins CPU64Regs:$rd, uimm16:$sel), +def DMFC2_3OP64 : MFC3OP<(outs CPU64RegsOpnd:$rt), + (ins CPU64RegsOpnd:$rd, uimm16:$sel), "dmfc2\t$rt, $rd, $sel">, MFC3OP_FM<0x12, 1>; -def DMTC2_3OP64 : MFC3OP<(outs CPU64Regs:$rd, uimm16:$sel), (ins CPU64Regs:$rt), +def DMTC2_3OP64 : MFC3OP<(outs CPU64RegsOpnd:$rd, uimm16:$sel), + (ins CPU64RegsOpnd:$rt), "dmtc2\t$rt, $rd, $sel">, MFC3OP_FM<0x12, 5>; } + // Two operand (implicit 0 selector) versions: -def : InstAlias<"mfc0 $rt, $rd", (MFC0_3OP64 CPU64Regs:$rt, CPU64Regs:$rd, 0)>; -def : InstAlias<"mtc0 $rt, $rd", (MTC0_3OP64 CPU64Regs:$rd, 0, CPU64Regs:$rt)>; -def : InstAlias<"mfc2 $rt, $rd", (MFC2_3OP64 CPU64Regs:$rt, CPU64Regs:$rd, 0)>; -def : InstAlias<"mtc2 $rt, $rd", (MTC2_3OP64 CPU64Regs:$rd, 0, CPU64Regs:$rt)>; def : InstAlias<"dmfc0 $rt, $rd", - (DMFC0_3OP64 CPU64Regs:$rt, CPU64Regs:$rd, 0)>; + (DMFC0_3OP64 CPU64RegsOpnd:$rt, CPU64RegsOpnd:$rd, 0)>; def : InstAlias<"dmtc0 $rt, $rd", - (DMTC0_3OP64 CPU64Regs:$rd, 0, CPU64Regs:$rt)>; + (DMTC0_3OP64 CPU64RegsOpnd:$rd, 0, CPU64RegsOpnd:$rt)>; def : InstAlias<"dmfc2 $rt, $rd", - (DMFC2_3OP64 CPU64Regs:$rt, CPU64Regs:$rd, 0)>; + (DMFC2_3OP64 CPU64RegsOpnd:$rt, CPU64RegsOpnd:$rd, 0)>; def : InstAlias<"dmtc2 $rt, $rd", - (DMTC2_3OP64 CPU64Regs:$rd, 0, CPU64Regs:$rt)>; + (DMTC2_3OP64 CPU64RegsOpnd:$rd, 0, CPU64RegsOpnd:$rt)>; diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 3ed8f93..9085a26 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -506,7 +506,8 @@ class SetCC_I: InstSE<(outs CPURegsOpnd:$rt), (ins RC:$rs, Od:$imm16), !strconcat(opstr, "\t$rt, $rs, $imm16"), - [(set CPURegsOpnd:$rt, (cond_op RC:$rs, imm_type:$imm16))], IIAlu, FrmI>; + [(set CPURegsOpnd:$rt, (cond_op RC:$rs, imm_type:$imm16))], + IIAlu, FrmI>; // Jump class JumpFJ, def ADDi : ArithLogicI<"addi", simm16, CPURegsOpnd>, ADDI_FM<0x8>; def SLTi : SetCC_I<"slti", setlt, simm16, immSExt16, CPURegs>, SLTI_FM<0xa>; def SLTiu : SetCC_I<"sltiu", setult, simm16, immSExt16, CPURegs>, SLTI_FM<0xb>; -def ANDi : ArithLogicI<"andi", uimm16, CPURegsOpnd, immZExt16, and>, ADDI_FM<0xc>; -def ORi : ArithLogicI<"ori", uimm16, CPURegsOpnd, immZExt16, or>, ADDI_FM<0xd>; -def XORi : ArithLogicI<"xori", uimm16, CPURegsOpnd, immZExt16, xor>, ADDI_FM<0xe>; +def ANDi : ArithLogicI<"andi", uimm16, CPURegsOpnd, immZExt16, and>, + ADDI_FM<0xc>; +def ORi : ArithLogicI<"ori", uimm16, CPURegsOpnd, immZExt16, or>, + ADDI_FM<0xd>; +def XORi : ArithLogicI<"xori", uimm16, CPURegsOpnd, immZExt16, xor>, + ADDI_FM<0xe>; def LUi : LoadUpper<"lui", CPURegs, uimm16>, LUI_FM; /// Arithmetic Instructions (3-Operand, R-Type) @@ -793,9 +797,12 @@ def XOR : ArithLogicR<"xor", CPURegsOpnd, 1, IIAlu, xor>, ADD_FM<0, 0x26>; def NOR : LogicNOR<"nor", CPURegsOpnd>, ADD_FM<0, 0x27>; /// Shift Instructions -def SLL : shift_rotate_imm<"sll", shamt, CPURegsOpnd, shl, immZExt5>, SRA_FM<0, 0>; -def SRL : shift_rotate_imm<"srl", shamt, CPURegsOpnd, srl, immZExt5>, SRA_FM<2, 0>; -def SRA : shift_rotate_imm<"sra", shamt, CPURegsOpnd, sra, immZExt5>, SRA_FM<3, 0>; +def SLL : shift_rotate_imm<"sll", shamt, CPURegsOpnd, shl, immZExt5>, + SRA_FM<0, 0>; +def SRL : shift_rotate_imm<"srl", shamt, CPURegsOpnd, srl, immZExt5>, + SRA_FM<2, 0>; +def SRA : shift_rotate_imm<"sra", shamt, CPURegsOpnd, sra, immZExt5>, + SRA_FM<3, 0>; def SLLV : shift_rotate_reg<"sllv", CPURegsOpnd, shl>, SRLV_FM<4, 0>; def SRLV : shift_rotate_reg<"srlv", CPURegsOpnd, srl>, SRLV_FM<6, 0>; def SRAV : shift_rotate_reg<"srav", CPURegsOpnd, sra>, SRLV_FM<7, 0>; @@ -863,7 +870,8 @@ def RET : RetBase, MTLO_FM<8>; /// Multiply and Divide Instructions. def MULT : Mult<"mult", IIImul, CPURegsOpnd, [HI, LO]>, MULT_FM<0, 0x18>; def MULTu : Mult<"multu", IIImul, CPURegsOpnd, [HI, LO]>, MULT_FM<0, 0x19>; -def SDIV : Div, MULT_FM<0, 0x1a>; +def SDIV : Div, + MULT_FM<0, 0x1a>; def UDIV : Div, MULT_FM<0, 0x1b>; @@ -905,16 +913,20 @@ def EXT : ExtBase<"ext", CPURegsOpnd>, EXT_FM<0>; def INS : InsBase<"ins", CPURegsOpnd>, EXT_FM<4>; /// Move Control Registers From/To CPU Registers -def MFC0_3OP : MFC3OP<(outs CPURegs:$rt), (ins CPURegs:$rd, uimm16:$sel), +def MFC0_3OP : MFC3OP<(outs CPURegsOpnd:$rt), + (ins CPURegsOpnd:$rd, uimm16:$sel), "mfc0\t$rt, $rd, $sel">, MFC3OP_FM<0x10, 0>; -def MTC0_3OP : MFC3OP<(outs CPURegs:$rd, uimm16:$sel), (ins CPURegs:$rt), +def MTC0_3OP : MFC3OP<(outs CPURegsOpnd:$rd, uimm16:$sel), + (ins CPURegsOpnd:$rt), "mtc0\t$rt, $rd, $sel">, MFC3OP_FM<0x10, 4>; -def MFC2_3OP : MFC3OP<(outs CPURegs:$rt), (ins CPURegs:$rd, uimm16:$sel), +def MFC2_3OP : MFC3OP<(outs CPURegsOpnd:$rt), + (ins CPURegsOpnd:$rd, uimm16:$sel), "mfc2\t$rt, $rd, $sel">, MFC3OP_FM<0x12, 0>; -def MTC2_3OP : MFC3OP<(outs CPURegs:$rd, uimm16:$sel), (ins CPURegs:$rt), +def MTC2_3OP : MFC3OP<(outs CPURegsOpnd:$rd, uimm16:$sel), + (ins CPURegsOpnd:$rt), "mtc2\t$rt, $rd, $sel">, MFC3OP_FM<0x12, 4>; //===----------------------------------------------------------------------===// @@ -939,10 +951,10 @@ def : InstAlias<"slt $rs, $rt, $imm", def : InstAlias<"xor $rs, $rt, $imm", (XORi CPURegsOpnd:$rs, CPURegsOpnd:$rt, simm16:$imm)>, Requires<[NotMips64]>; -def : InstAlias<"mfc0 $rt, $rd", (MFC0_3OP CPURegs:$rt, CPURegs:$rd, 0)>; -def : InstAlias<"mtc0 $rt, $rd", (MTC0_3OP CPURegs:$rd, 0, CPURegs:$rt)>; -def : InstAlias<"mfc2 $rt, $rd", (MFC2_3OP CPURegs:$rt, CPURegs:$rd, 0)>; -def : InstAlias<"mtc2 $rt, $rd", (MTC2_3OP CPURegs:$rd, 0, CPURegs:$rt)>; +def : InstAlias<"mfc0 $rt, $rd", (MFC0_3OP CPURegsOpnd:$rt, CPURegsOpnd:$rd, 0)>; +def : InstAlias<"mtc0 $rt, $rd", (MTC0_3OP CPURegsOpnd:$rd, 0, CPURegsOpnd:$rt)>; +def : InstAlias<"mfc2 $rt, $rd", (MFC2_3OP CPURegsOpnd:$rt, CPURegsOpnd:$rd, 0)>; +def : InstAlias<"mtc2 $rt, $rd", (MTC2_3OP CPURegsOpnd:$rd, 0, CPURegsOpnd:$rt)>; //===----------------------------------------------------------------------===// // Assembler Pseudo Instructions -- cgit v1.1 From 956f13440a4aa0297606a4412f4aa091d931592a Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Fri, 18 Jan 2013 21:11:39 +0000 Subject: Push some more methods down to hide the use of the Attribute class. Because the Attribute class is going to stop representing a collection of attributes, limit the use of it as an aggregate in favor of using AttributeSet. This replaces some of the uses for querying the function attributes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172844 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Attributes.cpp | 23 +++++++- lib/IR/Verifier.cpp | 78 ++++++++++++++++--------- lib/Transforms/IPO/ArgumentPromotion.cpp | 4 +- lib/Transforms/IPO/DeadArgumentElimination.cpp | 2 +- lib/Transforms/InstCombine/InstCombineCalls.cpp | 4 +- lib/Transforms/Utils/CloneFunction.cpp | 6 +- 6 files changed, 77 insertions(+), 40 deletions(-) (limited to 'lib') diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 5024a63..173782e 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -255,9 +255,19 @@ AttrBuilder::AttrBuilder(AttributeSet AS, unsigned Idx) assert(AWI && "Cannot find index in attribute set!"); - /// FIXME: This will be modified in the future. Basically, the - /// AttributeWithIndex class will contain the + uint64_t Mask = AWI->Attrs.Raw(); + for (Attribute::AttrKind I = Attribute::None; I != Attribute::EndAttrKinds; + I = Attribute::AttrKind(I + 1)) { + if (uint64_t A = (Mask & AttributeImpl::getAttrMask(I))) { + Attrs.insert(I); + + if (I == Attribute::Alignment) + Alignment = 1ULL << ((A >> 16) - 1); + else if (I == Attribute::StackAlignment) + StackAlignment = 1ULL << ((A >> 26)-1); + } + } } void AttrBuilder::clear() { @@ -610,6 +620,10 @@ std::string AttributeSet::getAsString(unsigned Index) const { return getAttributes(Index).getAsString(); } +unsigned AttributeSet::getParamAlignment(unsigned Idx) const { + return getAttributes(Idx).getAlignment(); +} + unsigned AttributeSet::getStackAlignment(unsigned Index) const { return getAttributes(Index).getStackAlignment(); } @@ -646,6 +660,11 @@ bool AttributeSet::hasAttrSomewhere(Attribute::AttrKind Attr) const { return false; } +AttributeSet AttributeSet::addFnAttributes(LLVMContext &C, + AttributeSet Attrs) const { + return addAttr(C, FunctionIndex, getAttributes(FunctionIndex)); +} + AttributeSet AttributeSet::addAttr(LLVMContext &C, unsigned Idx, Attribute Attrs) const { Attribute OldAttrs = getAttributes(Idx); diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp index 49821f2..07176fe 100644 --- a/lib/IR/Verifier.cpp +++ b/lib/IR/Verifier.cpp @@ -739,41 +739,61 @@ void Verifier::VerifyFunctionAttrs(FunctionType *FT, Assert1(Attr.Index == 1, "Attribute sret is not on first parameter!", V); } - Attribute FAttrs = Attrs.getFnAttributes(); - AttrBuilder NotFn(FAttrs); + if (!Attrs.hasAttributes(AttributeSet::FunctionIndex)) + return; + + AttrBuilder NotFn(Attrs, AttributeSet::FunctionIndex); NotFn.removeFunctionOnlyAttrs(); Assert1(!NotFn.hasAttributes(), "Attribute '" + Attribute::get(V->getContext(), NotFn).getAsString() + "' do not apply to the function!", V); // Check for mutually incompatible attributes. - Assert1(!((FAttrs.hasAttribute(Attribute::ByVal) && - FAttrs.hasAttribute(Attribute::Nest)) || - (FAttrs.hasAttribute(Attribute::ByVal) && - FAttrs.hasAttribute(Attribute::StructRet)) || - (FAttrs.hasAttribute(Attribute::Nest) && - FAttrs.hasAttribute(Attribute::StructRet))), "Attributes " - "'byval, nest, and sret' are incompatible!", V); - - Assert1(!((FAttrs.hasAttribute(Attribute::ByVal) && - FAttrs.hasAttribute(Attribute::Nest)) || - (FAttrs.hasAttribute(Attribute::ByVal) && - FAttrs.hasAttribute(Attribute::InReg)) || - (FAttrs.hasAttribute(Attribute::Nest) && - FAttrs.hasAttribute(Attribute::InReg))), "Attributes " - "'byval, nest, and inreg' are incompatible!", V); - - Assert1(!(FAttrs.hasAttribute(Attribute::ZExt) && - FAttrs.hasAttribute(Attribute::SExt)), "Attributes " - "'zeroext and signext' are incompatible!", V); - - Assert1(!(FAttrs.hasAttribute(Attribute::ReadNone) && - FAttrs.hasAttribute(Attribute::ReadOnly)), "Attributes " - "'readnone and readonly' are incompatible!", V); - - Assert1(!(FAttrs.hasAttribute(Attribute::NoInline) && - FAttrs.hasAttribute(Attribute::AlwaysInline)), "Attributes " - "'noinline and alwaysinline' are incompatible!", V); + Assert1(!((Attrs.hasAttribute(AttributeSet::FunctionIndex, + Attribute::ByVal) && + Attrs.hasAttribute(AttributeSet::FunctionIndex, + Attribute::Nest)) || + (Attrs.hasAttribute(AttributeSet::FunctionIndex, + Attribute::ByVal) && + Attrs.hasAttribute(AttributeSet::FunctionIndex, + Attribute::StructRet)) || + (Attrs.hasAttribute(AttributeSet::FunctionIndex, + Attribute::Nest) && + Attrs.hasAttribute(AttributeSet::FunctionIndex, + Attribute::StructRet))), + "Attributes 'byval, nest, and sret' are incompatible!", V); + + Assert1(!((Attrs.hasAttribute(AttributeSet::FunctionIndex, + Attribute::ByVal) && + Attrs.hasAttribute(AttributeSet::FunctionIndex, + Attribute::Nest)) || + (Attrs.hasAttribute(AttributeSet::FunctionIndex, + Attribute::ByVal) && + Attrs.hasAttribute(AttributeSet::FunctionIndex, + Attribute::InReg)) || + (Attrs.hasAttribute(AttributeSet::FunctionIndex, + Attribute::Nest) && + Attrs.hasAttribute(AttributeSet::FunctionIndex, + Attribute::InReg))), + "Attributes 'byval, nest, and inreg' are incompatible!", V); + + Assert1(!(Attrs.hasAttribute(AttributeSet::FunctionIndex, + Attribute::ZExt) && + Attrs.hasAttribute(AttributeSet::FunctionIndex, + Attribute::SExt)), + "Attributes 'zeroext and signext' are incompatible!", V); + + Assert1(!(Attrs.hasAttribute(AttributeSet::FunctionIndex, + Attribute::ReadNone) && + Attrs.hasAttribute(AttributeSet::FunctionIndex, + Attribute::ReadOnly)), + "Attributes 'readnone and readonly' are incompatible!", V); + + Assert1(!(Attrs.hasAttribute(AttributeSet::FunctionIndex, + Attribute::NoInline) && + Attrs.hasAttribute(AttributeSet::FunctionIndex, + Attribute::AlwaysInline)), + "Attributes 'noinline and alwaysinline' are incompatible!", V); } static bool VerifyAttributeCount(const AttributeSet &Attrs, unsigned Params) { diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index 385544a..15a479e 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -592,7 +592,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, // Add any function attributes. attrs = PAL.getFnAttributes(); - if (attrs.hasAttributes()) + if (PAL.hasAttributes(AttributeSet::FunctionIndex)) AttributesVec.push_back(AttributeWithIndex::get(AttributeSet::FunctionIndex, attrs)); @@ -722,7 +722,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, // Add any function attributes. attrs = CallPAL.getFnAttributes(); - if (attrs.hasAttributes()) + if (CallPAL.hasAttributes(AttributeSet::FunctionIndex)) AttributesVec.push_back(AttributeWithIndex::get(AttributeSet::FunctionIndex, attrs)); diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp index 4757ce8..5b5a015 100644 --- a/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -277,7 +277,7 @@ bool DAE::DeleteDeadVarargs(Function &Fn) { for (unsigned i = 0; PAL.getSlot(i).Index <= NumArgs; ++i) AttributesVec.push_back(PAL.getSlot(i)); Attribute FnAttrs = PAL.getFnAttributes(); - if (FnAttrs.hasAttributes()) + if (PAL.hasAttributes(AttributeSet::FunctionIndex)) AttributesVec.push_back(AttributeWithIndex::get(AttributeSet::FunctionIndex, FnAttrs)); PAL = AttributeSet::get(Fn.getContext(), AttributesVec); diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index d17879b..63e452b 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1176,7 +1176,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { } Attribute FnAttrs = CallerPAL.getFnAttributes(); - if (FnAttrs.hasAttributes()) + if (CallerPAL.hasAttributes(AttributeSet::FunctionIndex)) attrVec.push_back(AttributeWithIndex::get(AttributeSet::FunctionIndex, FnAttrs)); @@ -1320,7 +1320,7 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, // Add any function attributes. Attr = Attrs.getFnAttributes(); - if (Attr.hasAttributes()) + if (Attrs.hasAttributes(AttributeSet::FunctionIndex)) NewAttrs.push_back(AttributeWithIndex::get(AttributeSet::FunctionIndex, Attr)); diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index ccc3eae..ad753ab 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -103,10 +103,8 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, OldFunc->getAttributes() .getRetAttributes())); NewFunc->setAttributes(NewFunc->getAttributes() - .addAttr(NewFunc->getContext(), - AttributeSet::FunctionIndex, - OldFunc->getAttributes() - .getFnAttributes())); + .addFnAttributes(NewFunc->getContext(), + OldFunc->getAttributes())); } -- cgit v1.1 From 935a91540b7aa8f29ea48fe2df657db0ce5b7d5d Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 18 Jan 2013 21:15:50 +0000 Subject: R600: Optimize and cleanup KILL on SI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We shouldn't insert KILL optimization if we don't have a kill instruction at all. Patch by: Christian König Tested-by: Michel Dänzer Reviewed-by: Tom Stellard Signed-off-by: Christian König git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172845 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIISelLowering.cpp | 14 ---- lib/Target/R600/SIISelLowering.h | 2 - lib/Target/R600/SIInstructions.td | 24 +++---- lib/Target/R600/SILowerControlFlow.cpp | 127 ++++++++++++++++++++++----------- 4 files changed, 96 insertions(+), 71 deletions(-) (limited to 'lib') diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 4c672ca..18fa908 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -131,9 +131,6 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( case AMDGPU::SI_INTERP_CONST: LowerSI_INTERP_CONST(MI, *BB, I, MRI); break; - case AMDGPU::SI_KIL: - LowerSI_KIL(MI, *BB, I, MRI); - break; case AMDGPU::SI_WQM: LowerSI_WQM(MI, *BB, I, MRI); break; @@ -211,17 +208,6 @@ void SITargetLowering::LowerSI_INTERP_CONST(MachineInstr *MI, MI->eraseFromParent(); } -void SITargetLowering::LowerSI_KIL(MachineInstr *MI, MachineBasicBlock &BB, - MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const { - // Clear this pixel from the exec mask if the operand is negative - BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_CMPX_LE_F32_e32), - AMDGPU::VCC) - .addReg(AMDGPU::SREG_LIT_0) - .addOperand(MI->getOperand(0)); - - MI->eraseFromParent(); -} - void SITargetLowering::LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB, MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const { unsigned VCC = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h index c088112..db36eef 100644 --- a/lib/Target/R600/SIISelLowering.h +++ b/lib/Target/R600/SIISelLowering.h @@ -34,8 +34,6 @@ class SITargetLowering : public AMDGPUTargetLowering { MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const; void LowerSI_INTERP_CONST(MachineInstr *MI, MachineBasicBlock &BB, MachineBasicBlock::iterator I, MachineRegisterInfo &MRI) const; - void LowerSI_KIL(MachineInstr *MI, MachineBasicBlock &BB, - MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const; void LowerSI_WQM(MachineInstr *MI, MachineBasicBlock &BB, MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const; void LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB, diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 005be96..cac42da 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1080,13 +1080,6 @@ def SI_INTERP_CONST : InstSI < imm:$attr, SReg_32:$params))] >; -def SI_KIL : InstSI < - (outs), - (ins VReg_32:$src), - "SI_KIL $src", - [(int_AMDGPU_kill VReg_32:$src)] ->; - def SI_WQM : InstSI < (outs), (ins), @@ -1157,11 +1150,23 @@ def SI_END_CF : InstSI < [(int_SI_end_cf SReg_64:$saved)] >; +def SI_KILL : InstSI < + (outs), + (ins VReg_32:$src), + "SI_KIL $src", + [(int_AMDGPU_kill VReg_32:$src)] +>; + } // end mayLoad = 1, mayStore = 1, hasSideEffects = 1 // Uses = [EXEC], Defs = [EXEC] } // end IsCodeGenOnly, isPseudo +def : Pat < + (int_AMDGPU_kilp), + (SI_KILL (V_MOV_IMM_I32 0xbf800000)) +>; + /* int_SI_vs_load_input */ def : Pat< (int_SI_vs_load_input SReg_128:$tlst, IMM12bit:$attr_offset, @@ -1315,11 +1320,6 @@ def : Pat< >; def : Pat < - (int_AMDGPU_kilp), - (SI_KIL (V_MOV_IMM_I32 0xbf800000)) ->; - -def : Pat < (int_AMDGPU_cube VReg_128:$src), (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), (V_CUBETC_F32 (EXTRACT_SUBREG VReg_128:$src, sel_x), diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp index 3fbe653..3780e40 100644 --- a/lib/Target/R600/SILowerControlFlow.cpp +++ b/lib/Target/R600/SILowerControlFlow.cpp @@ -68,7 +68,10 @@ private: static char ID; const TargetInstrInfo *TII; - void Skip(MachineInstr &MI, MachineOperand &To); + bool shouldSkip(MachineBasicBlock *From, MachineBasicBlock *To); + + void Skip(MachineInstr &From, MachineOperand &To); + void SkipIfDead(MachineInstr &MI); void If(MachineInstr &MI); void Else(MachineInstr &MI); @@ -78,6 +81,7 @@ private: void Loop(MachineInstr &MI); void EndCf(MachineInstr &MI); + void Kill(MachineInstr &MI); void Branch(MachineInstr &MI); public: @@ -100,22 +104,29 @@ FunctionPass *llvm::createSILowerControlFlowPass(TargetMachine &tm) { return new SILowerControlFlowPass(tm); } -void SILowerControlFlowPass::Skip(MachineInstr &From, MachineOperand &To) { +bool SILowerControlFlowPass::shouldSkip(MachineBasicBlock *From, + MachineBasicBlock *To) { + unsigned NumInstr = 0; - for (MachineBasicBlock *MBB = *From.getParent()->succ_begin(); - NumInstr < SkipThreshold && MBB != To.getMBB() && !MBB->succ_empty(); + for (MachineBasicBlock *MBB = From; MBB != To && !MBB->succ_empty(); MBB = *MBB->succ_begin()) { for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); NumInstr < SkipThreshold && I != E; ++I) { if (I->isBundle() || !I->isBundled()) - ++NumInstr; + if (++NumInstr >= SkipThreshold) + return true; } } - if (NumInstr < SkipThreshold) + return false; +} + +void SILowerControlFlowPass::Skip(MachineInstr &From, MachineOperand &To) { + + if (!shouldSkip(*From.getParent()->succ_begin(), To.getMBB())) return; DebugLoc DL = From.getDebugLoc(); @@ -124,6 +135,38 @@ void SILowerControlFlowPass::Skip(MachineInstr &From, MachineOperand &To) { .addReg(AMDGPU::EXEC); } +void SILowerControlFlowPass::SkipIfDead(MachineInstr &MI) { + + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc DL = MI.getDebugLoc(); + + if (!shouldSkip(&MBB, &MBB.getParent()->back())) + return; + + MachineBasicBlock::iterator Insert = &MI; + ++Insert; + + // If the exec mask is non-zero, skip the next two instructions + BuildMI(MBB, Insert, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ)) + .addImm(3) + .addReg(AMDGPU::EXEC); + + // Exec mask is zero: Export to NULL target... + BuildMI(MBB, Insert, DL, TII->get(AMDGPU::EXP)) + .addImm(0) + .addImm(0x09) // V_008DFC_SQ_EXP_NULL + .addImm(0) + .addImm(1) + .addImm(1) + .addReg(AMDGPU::SREG_LIT_0) + .addReg(AMDGPU::SREG_LIT_0) + .addReg(AMDGPU::SREG_LIT_0) + .addReg(AMDGPU::SREG_LIT_0); + + // ... and terminate wavefront + BuildMI(MBB, Insert, DL, TII->get(AMDGPU::S_ENDPGM)); +} + void SILowerControlFlowPass::If(MachineInstr &MI) { MachineBasicBlock &MBB = *MI.getParent(); DebugLoc DL = MI.getDebugLoc(); @@ -242,8 +285,28 @@ void SILowerControlFlowPass::Branch(MachineInstr &MI) { assert(0); } +void SILowerControlFlowPass::Kill(MachineInstr &MI) { + + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc DL = MI.getDebugLoc(); + + // Kill is only allowed in pixel shaders + MachineFunction &MF = *MBB.getParent(); + SIMachineFunctionInfo *Info = MF.getInfo(); + assert(Info->ShaderType == ShaderType::PIXEL); + + // Clear this pixel from the exec mask if the operand is negative + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMPX_LE_F32_e32), AMDGPU::VCC) + .addReg(AMDGPU::SREG_LIT_0) + .addOperand(MI.getOperand(0)); + + MI.eraseFromParent(); +} + bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { - bool HaveCf = false; + + bool HaveKill = false; + unsigned Depth = 0; for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE; ++BI) { @@ -257,6 +320,7 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { switch (MI.getOpcode()) { default: break; case AMDGPU::SI_IF: + ++Depth; If(MI); break; @@ -277,14 +341,26 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { break; case AMDGPU::SI_LOOP: + ++Depth; Loop(MI); break; case AMDGPU::SI_END_CF: - HaveCf = true; + if (--Depth == 0 && HaveKill) { + SkipIfDead(MI); + HaveKill = false; + } EndCf(MI); break; + case AMDGPU::SI_KILL: + if (Depth == 0) + SkipIfDead(MI); + else + HaveKill = true; + Kill(MI); + break; + case AMDGPU::S_BRANCH: Branch(MI); break; @@ -292,40 +368,5 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { } } - // TODO: What is this good for? - unsigned ShaderType = MF.getInfo()->ShaderType; - if (HaveCf && ShaderType == ShaderType::PIXEL) { - for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); - BI != BE; ++BI) { - - MachineBasicBlock &MBB = *BI; - if (MBB.succ_empty()) { - - MachineInstr &MI = *MBB.getFirstNonPHI(); - DebugLoc DL = MI.getDebugLoc(); - - // If the exec mask is non-zero, skip the next two instructions - BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ)) - .addImm(3) - .addReg(AMDGPU::EXEC); - - // Exec mask is zero: Export to NULL target... - BuildMI(MBB, &MI, DL, TII->get(AMDGPU::EXP)) - .addImm(0) - .addImm(0x09) // V_008DFC_SQ_EXP_NULL - .addImm(0) - .addImm(1) - .addImm(1) - .addReg(AMDGPU::SREG_LIT_0) - .addReg(AMDGPU::SREG_LIT_0) - .addReg(AMDGPU::SREG_LIT_0) - .addReg(AMDGPU::SREG_LIT_0); - - // ... and terminate wavefront - BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ENDPGM)); - } - } - } - return true; } -- cgit v1.1 From 82d3d4524f2595b2dce617e963b6d67876b4f9ba Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 18 Jan 2013 21:15:53 +0000 Subject: R600: Proper insert S_WAITCNT instructions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some instructions like memory reads/writes are executed asynchronously, so we need to insert S_WAITCNT instructions to block before accessing their results. Previously we have just inserted S_WAITCNT instructions after each async instruction, this patch fixes this and adds a prober insertion pass. Patch by: Christian König Tested-by: Michel Dänzer Reviewed-by: Tom Stellard Signed-off-by: Christian König git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172846 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPU.h | 1 + lib/Target/R600/AMDGPUTargetMachine.cpp | 5 + lib/Target/R600/CMakeLists.txt | 1 + lib/Target/R600/SIISelLowering.cpp | 12 -- lib/Target/R600/SIISelLowering.h | 5 - lib/Target/R600/SIInsertWaits.cpp | 353 ++++++++++++++++++++++++++++++++ lib/Target/R600/SIInstrInfo.h | 4 +- lib/Target/R600/SIInstrInfo.td | 30 +-- 8 files changed, 379 insertions(+), 32 deletions(-) create mode 100644 lib/Target/R600/SIInsertWaits.cpp (limited to 'lib') diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h index 0f5125d..c75ec24 100644 --- a/lib/Target/R600/AMDGPU.h +++ b/lib/Target/R600/AMDGPU.h @@ -30,6 +30,7 @@ FunctionPass *createSIAssignInterpRegsPass(TargetMachine &tm); FunctionPass *createSILowerControlFlowPass(TargetMachine &tm); FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS); FunctionPass *createSILowerLiteralConstantsPass(TargetMachine &tm); +FunctionPass *createSIInsertWaits(TargetMachine &tm); // Passes common to R600 and SI Pass *createAMDGPUStructurizeCFGPass(); diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp index d09dc2e..26ac928 100644 --- a/lib/Target/R600/AMDGPUTargetMachine.cpp +++ b/lib/Target/R600/AMDGPUTargetMachine.cpp @@ -116,6 +116,11 @@ bool AMDGPUPassConfig::addPreRegAlloc() { } bool AMDGPUPassConfig::addPostRegAlloc() { + const AMDGPUSubtarget &ST = TM->getSubtarget(); + + if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) { + addPass(createSIInsertWaits(*TM)); + } return false; } diff --git a/lib/Target/R600/CMakeLists.txt b/lib/Target/R600/CMakeLists.txt index ce0b56b..790a4aa 100644 --- a/lib/Target/R600/CMakeLists.txt +++ b/lib/Target/R600/CMakeLists.txt @@ -40,6 +40,7 @@ add_llvm_target(R600CodeGen R600RegisterInfo.cpp SIAnnotateControlFlow.cpp SIAssignInterpRegs.cpp + SIInsertWaits.cpp SIInstrInfo.cpp SIISelLowering.cpp SILowerLiteralConstants.cpp diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 18fa908..ef9d17c 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -66,11 +66,6 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( MachineRegisterInfo & MRI = BB->getParent()->getRegInfo(); MachineBasicBlock::iterator I = MI; - if (TII->get(MI->getOpcode()).TSFlags & SIInstrFlags::NEED_WAIT) { - AppendS_WAITCNT(MI, *BB, llvm::next(I)); - return BB; - } - switch (MI->getOpcode()) { default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); @@ -141,13 +136,6 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( return BB; } -void SITargetLowering::AppendS_WAITCNT(MachineInstr *MI, MachineBasicBlock &BB, - MachineBasicBlock::iterator I) const { - BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_WAITCNT)) - .addImm(0); -} - - void SITargetLowering::LowerSI_WQM(MachineInstr *MI, MachineBasicBlock &BB, MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const { BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_WQM_B64), AMDGPU::EXEC) diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h index db36eef..8528c24 100644 --- a/lib/Target/R600/SIISelLowering.h +++ b/lib/Target/R600/SIISelLowering.h @@ -23,11 +23,6 @@ namespace llvm { class SITargetLowering : public AMDGPUTargetLowering { const SIInstrInfo * TII; - /// Memory reads and writes are syncronized using the S_WAITCNT instruction. - /// This function takes the most conservative approach and inserts an - /// S_WAITCNT instruction after every read and write. - void AppendS_WAITCNT(MachineInstr *MI, MachineBasicBlock &BB, - MachineBasicBlock::iterator I) const; void LowerMOV_IMM(MachineInstr *MI, MachineBasicBlock &BB, MachineBasicBlock::iterator I, unsigned Opocde) const; void LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB, diff --git a/lib/Target/R600/SIInsertWaits.cpp b/lib/Target/R600/SIInsertWaits.cpp new file mode 100644 index 0000000..24fc929 --- /dev/null +++ b/lib/Target/R600/SIInsertWaits.cpp @@ -0,0 +1,353 @@ +//===-- SILowerControlFlow.cpp - Use predicates for control flow ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief Insert wait instructions for memory reads and writes. +/// +/// Memory reads and writes are issued asynchronously, so we need to insert +/// S_WAITCNT instructions when we want to access any of their results or +/// overwrite any register that's used asynchronously. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "SIInstrInfo.h" +#include "SIMachineFunctionInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" + +using namespace llvm; + +namespace { + +/// \brief One variable for each of the hardware counters +typedef union { + struct { + unsigned VM; + unsigned EXP; + unsigned LGKM; + } Named; + unsigned Array[3]; + +} Counters; + +typedef Counters RegCounters[512]; +typedef std::pair RegInterval; + +class SIInsertWaits : public MachineFunctionPass { + +private: + static char ID; + const SIInstrInfo *TII; + const SIRegisterInfo &TRI; + const MachineRegisterInfo *MRI; + + /// \brief Constant hardware limits + static const Counters WaitCounts; + + /// \brief Constant zero value + static const Counters ZeroCounts; + + /// \brief Counter values we have already waited on. + Counters WaitedOn; + + /// \brief Counter values for last instruction issued. + Counters LastIssued; + + /// \brief Registers used by async instructions. + RegCounters UsedRegs; + + /// \brief Registers defined by async instructions. + RegCounters DefinedRegs; + + /// \brief Different export instruction types seen since last wait. + unsigned ExpInstrTypesSeen; + + /// \brief Get increment/decrement amount for this instruction. + Counters getHwCounts(MachineInstr &MI); + + /// \brief Is operand relevant for async execution? + bool isOpRelevant(MachineOperand &Op); + + /// \brief Get register interval an operand affects. + RegInterval getRegInterval(MachineOperand &Op); + + /// \brief Handle instructions async components + void pushInstruction(MachineInstr &MI); + + /// \brief Insert the actual wait instruction + bool insertWait(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + const Counters &Counts); + + /// \brief Resolve all operand dependencies to counter requirements + Counters handleOperands(MachineInstr &MI); + +public: + SIInsertWaits(TargetMachine &tm) : + MachineFunctionPass(ID), + TII(static_cast(tm.getInstrInfo())), + TRI(TII->getRegisterInfo()) { } + + virtual bool runOnMachineFunction(MachineFunction &MF); + + const char *getPassName() const { + return "SI insert wait instructions"; + } + +}; + +} // End anonymous namespace + +char SIInsertWaits::ID = 0; + +const Counters SIInsertWaits::WaitCounts = { { 15, 7, 7 } }; +const Counters SIInsertWaits::ZeroCounts = { { 0, 0, 0 } }; + +FunctionPass *llvm::createSIInsertWaits(TargetMachine &tm) { + return new SIInsertWaits(tm); +} + +Counters SIInsertWaits::getHwCounts(MachineInstr &MI) { + + uint64_t TSFlags = TII->get(MI.getOpcode()).TSFlags; + Counters Result; + + Result.Named.VM = !!(TSFlags & SIInstrFlags::VM_CNT); + + // Only consider stores or EXP for EXP_CNT + Result.Named.EXP = !!(TSFlags & SIInstrFlags::EXP_CNT && + (MI.getOpcode() == AMDGPU::EXP || !MI.getDesc().mayStore())); + + // LGKM may uses larger values + if (TSFlags & SIInstrFlags::LGKM_CNT) { + + MachineOperand &Op = MI.getOperand(0); + assert(Op.isReg() && "First LGKM operand must be a register!"); + + unsigned Reg = Op.getReg(); + unsigned Size = TRI.getMinimalPhysRegClass(Reg)->getSize(); + Result.Named.LGKM = Size > 4 ? 2 : 1; + + } else { + Result.Named.LGKM = 0; + } + + return Result; +} + +bool SIInsertWaits::isOpRelevant(MachineOperand &Op) { + + // Constants are always irrelevant + if (!Op.isReg()) + return false; + + // Defines are always relevant + if (Op.isDef()) + return true; + + // For exports all registers are relevant + MachineInstr &MI = *Op.getParent(); + if (MI.getOpcode() == AMDGPU::EXP) + return true; + + // For stores the stored value is also relevant + if (!MI.getDesc().mayStore()) + return false; + + for (MachineInstr::mop_iterator I = MI.operands_begin(), + E = MI.operands_end(); I != E; ++I) { + + if (I->isReg() && I->isUse()) + return Op.isIdenticalTo(*I); + } + + return false; +} + +RegInterval SIInsertWaits::getRegInterval(MachineOperand &Op) { + + if (!Op.isReg()) + return std::make_pair(0, 0); + + unsigned Reg = Op.getReg(); + unsigned Size = TRI.getMinimalPhysRegClass(Reg)->getSize(); + + assert(Size >= 4); + + RegInterval Result; + Result.first = TRI.getEncodingValue(Reg); + Result.second = Result.first + Size / 4; + + return Result; +} + +void SIInsertWaits::pushInstruction(MachineInstr &MI) { + + // Get the hardware counter increments and sum them up + Counters Increment = getHwCounts(MI); + unsigned Sum = 0; + + for (unsigned i = 0; i < 3; ++i) { + LastIssued.Array[i] += Increment.Array[i]; + Sum += Increment.Array[i]; + } + + // If we don't increase anything then that's it + if (Sum == 0) + return; + + // Remember which export instructions we have seen + if (Increment.Named.EXP) { + ExpInstrTypesSeen |= MI.getOpcode() == AMDGPU::EXP ? 1 : 2; + } + + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + + MachineOperand &Op = MI.getOperand(i); + if (!isOpRelevant(Op)) + continue; + + RegInterval Interval = getRegInterval(Op); + for (unsigned j = Interval.first; j < Interval.second; ++j) { + + // Remember which registers we define + if (Op.isDef()) + DefinedRegs[j] = LastIssued; + + // and which one we are using + if (Op.isUse()) + UsedRegs[j] = LastIssued; + } + } +} + +bool SIInsertWaits::insertWait(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + const Counters &Required) { + + // End of program? No need to wait on anything + if (I != MBB.end() && I->getOpcode() == AMDGPU::S_ENDPGM) + return false; + + // Figure out if the async instructions execute in order + bool Ordered[3]; + + // VM_CNT is always ordered + Ordered[0] = true; + + // EXP_CNT is unordered if we have both EXP & VM-writes + Ordered[1] = ExpInstrTypesSeen == 3; + + // LGKM_CNT is handled as always unordered. TODO: Handle LDS and GDS + Ordered[2] = false; + + // The values we are going to put into the S_WAITCNT instruction + Counters Counts = WaitCounts; + + // Do we really need to wait? + bool NeedWait = false; + + for (unsigned i = 0; i < 3; ++i) { + + if (Required.Array[i] <= WaitedOn.Array[i]) + continue; + + NeedWait = true; + + if (Ordered[i]) { + unsigned Value = LastIssued.Array[i] - Required.Array[i]; + + // adjust the value to the real hardware posibilities + Counts.Array[i] = std::min(Value, WaitCounts.Array[i]); + + } else + Counts.Array[i] = 0; + + // Remember on what we have waited on + WaitedOn.Array[i] = LastIssued.Array[i] - Counts.Array[i]; + } + + if (!NeedWait) + return false; + + // Reset EXP_CNT instruction types + if (Counts.Named.EXP == 0) + ExpInstrTypesSeen = 0; + + // Build the wait instruction + BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT)) + .addImm((Counts.Named.VM & 0xF) | + ((Counts.Named.EXP & 0x7) << 4) | + ((Counts.Named.LGKM & 0x7) << 8)); + + return true; +} + +/// \brief helper function for handleOperands +static void increaseCounters(Counters &Dst, const Counters &Src) { + + for (unsigned i = 0; i < 3; ++i) + Dst.Array[i] = std::max(Dst.Array[i], Src.Array[i]); +} + +Counters SIInsertWaits::handleOperands(MachineInstr &MI) { + + Counters Result = ZeroCounts; + + // For each register affected by this + // instruction increase the result sequence + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + + MachineOperand &Op = MI.getOperand(i); + RegInterval Interval = getRegInterval(Op); + for (unsigned j = Interval.first; j < Interval.second; ++j) { + + if (Op.isDef()) + increaseCounters(Result, UsedRegs[j]); + + if (Op.isUse()) + increaseCounters(Result, DefinedRegs[j]); + } + } + + return Result; +} + +bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) { + + bool Changes = false; + + MRI = &MF.getRegInfo(); + + WaitedOn = ZeroCounts; + LastIssued = ZeroCounts; + + memset(&UsedRegs, 0, sizeof(UsedRegs)); + memset(&DefinedRegs, 0, sizeof(DefinedRegs)); + + for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); + BI != BE; ++BI) { + + MachineBasicBlock &MBB = *BI; + for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); + I != E; ++I) { + + Changes |= insertWait(MBB, I, handleOperands(*I)); + pushInstruction(*I); + } + + // Wait for everything at the end of the MBB + Changes |= insertWait(MBB, MBB.getFirstTerminator(), LastIssued); + } + + return Changes; +} diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h index 631f6c0..783cd9f 100644 --- a/lib/Target/R600/SIInstrInfo.h +++ b/lib/Target/R600/SIInstrInfo.h @@ -55,7 +55,9 @@ public: namespace SIInstrFlags { enum Flags { // First 4 bits are the instruction encoding - NEED_WAIT = 1 << 4 + VM_CNT = 1 << 4, + EXP_CNT = 1 << 5, + LGKM_CNT = 1 << 6 }; } diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index 873a451..8ff2d6d 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -42,11 +42,14 @@ class InstSI pattern> : AMDGPUInst { field bits<4> EncodingType = 0; - field bits<1> NeedWait = 0; + field bits<1> VM_CNT = 0; + field bits<1> EXP_CNT = 0; + field bits<1> LGKM_CNT = 0; let TSFlags{3-0} = EncodingType; - let TSFlags{4} = NeedWait; - + let TSFlags{4} = VM_CNT; + let TSFlags{5} = EXP_CNT; + let TSFlags{6} = LGKM_CNT; } class Enc32 pattern> : @@ -140,8 +143,7 @@ def EXP : Enc64< let Inst{63-56} = VSRC3; let EncodingType = 0; //SIInstrEncodingType::EXP - let NeedWait = 1; - let usesCustomInserter = 1; + let EXP_CNT = 1; } class MIMG op, dag outs, dag ins, string asm, list pattern> : @@ -174,11 +176,10 @@ class MIMG op, dag outs, dag ins, string asm, list pattern> : let Inst{47-40} = VDATA; let Inst{52-48} = SRSRC; let Inst{57-53} = SSAMP; - let EncodingType = 2; //SIInstrEncodingType::MIMG - let NeedWait = 1; - let usesCustomInserter = 1; + let VM_CNT = 1; + let EXP_CNT = 1; } class MTBUF op, dag outs, dag ins, string asm, list pattern> : @@ -215,8 +216,9 @@ class MTBUF op, dag outs, dag ins, string asm, list pattern> : let Inst{63-56} = SOFFSET; let EncodingType = 3; //SIInstrEncodingType::MTBUF - let NeedWait = 1; - let usesCustomInserter = 1; + let VM_CNT = 1; + let EXP_CNT = 1; + let neverHasSideEffects = 1; } @@ -252,8 +254,9 @@ class MUBUF op, dag outs, dag ins, string asm, list pattern> : let Inst{63-56} = SOFFSET; let EncodingType = 4; //SIInstrEncodingType::MUBUF - let NeedWait = 1; - let usesCustomInserter = 1; + let VM_CNT = 1; + let EXP_CNT = 1; + let neverHasSideEffects = 1; } @@ -276,8 +279,7 @@ class SMRD op, dag outs, dag ins, string asm, list pattern> : let Inst{31-27} = 0x18; //encoding let EncodingType = 5; //SIInstrEncodingType::SMRD - let NeedWait = 1; - let usesCustomInserter = 1; + let LGKM_CNT = 1; } class SOP1 op, dag outs, dag ins, string asm, list pattern> : -- cgit v1.1 From c91cbb9b0c90a480299cc7deaef166d47a61d9df Mon Sep 17 00:00:00 2001 From: Jack Carter Date: Fri, 18 Jan 2013 21:20:38 +0000 Subject: This is a resubmittal. For some reason it broke the bots yesterday but I cannot reproduce the problem and have scrubed my sources and even tested with llvm-lit -v --vg. Support for Mips register information sections. Mips ELF object files have a section that is dedicated to register use info. Some of this information such as the assumed Global Pointer value is used by the linker in relocation resolution. The register info file is .reginfo in o32 and .MIPS.options in 64 and n32 abi files. This patch contains the changes needed to create the sections, but leaves the actual register accounting for a future patch. Contributer: Jack Carter git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172847 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/ELFObjectWriter.cpp | 2 + lib/Target/Mips/MCTargetDesc/CMakeLists.txt | 1 + lib/Target/Mips/MCTargetDesc/MipsReginfo.cpp | 80 ++++++++++++++++++++++++++++ lib/Target/Mips/MCTargetDesc/MipsReginfo.h | 31 +++++++++++ lib/Target/Mips/MipsAsmPrinter.cpp | 10 +++- lib/Target/Mips/MipsAsmPrinter.h | 1 + lib/Target/Mips/MipsSubtarget.h | 7 +++ lib/Target/Mips/MipsTargetObjectFile.cpp | 14 +++++ lib/Target/Mips/MipsTargetObjectFile.h | 2 + 9 files changed, 147 insertions(+), 1 deletion(-) create mode 100644 lib/Target/Mips/MCTargetDesc/MipsReginfo.cpp create mode 100644 lib/Target/Mips/MCTargetDesc/MipsReginfo.h (limited to 'lib') diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp index 0072446..3b12d3a 100644 --- a/lib/MC/ELFObjectWriter.cpp +++ b/lib/MC/ELFObjectWriter.cpp @@ -1319,6 +1319,8 @@ void ELFObjectWriter::WriteSection(MCAssembler &Asm, case ELF::SHT_FINI_ARRAY: case ELF::SHT_PREINIT_ARRAY: case ELF::SHT_X86_64_UNWIND: + case ELF::SHT_MIPS_REGINFO: + case ELF::SHT_MIPS_OPTIONS: // Nothing to do. break; diff --git a/lib/Target/Mips/MCTargetDesc/CMakeLists.txt b/lib/Target/Mips/MCTargetDesc/CMakeLists.txt index be5d7e4..f5b0cca 100644 --- a/lib/Target/Mips/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/Mips/MCTargetDesc/CMakeLists.txt @@ -5,6 +5,7 @@ add_llvm_library(LLVMMipsDesc MipsMCCodeEmitter.cpp MipsMCTargetDesc.cpp MipsELFObjectWriter.cpp + MipsReginfo.cpp ) add_dependencies(LLVMMipsDesc MipsCommonTableGen) diff --git a/lib/Target/Mips/MCTargetDesc/MipsReginfo.cpp b/lib/Target/Mips/MCTargetDesc/MipsReginfo.cpp new file mode 100644 index 0000000..1dc9bcb --- /dev/null +++ b/lib/Target/Mips/MCTargetDesc/MipsReginfo.cpp @@ -0,0 +1,80 @@ +//===-- MipsReginfo.cpp - Registerinfo handling --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +// .reginfo +// Elf32_Word ri_gprmask +// Elf32_Word ri_cprmask[4] +// Elf32_Word ri_gp_value +// +// .MIPS.options - N64 +// Elf64_Byte kind (ODK_REGINFO) +// Elf64_Byte size (40 bytes) +// Elf64_Section section (0) +// Elf64_Word info (unused) +// Elf64_Word ri_gprmask () +// Elf64_Word ri_pad () +// Elf64_Word[4] ri_cprmask () +// Elf64_Addr ri_gp_value () +// +// .MIPS.options - N32 +// Elf32_Byte kind (ODK_REGINFO) +// Elf32_Byte size (36 bytes) +// Elf32_Section section (0) +// Elf32_Word info (unused) +// Elf32_Word ri_gprmask () +// Elf32_Word ri_pad () +// Elf32_Word[4] ri_cprmask () +// Elf32_Addr ri_gp_value () +// +//===----------------------------------------------------------------------===// +#include "MCTargetDesc/MipsReginfo.h" +#include "MipsSubtarget.h" +#include "MipsTargetObjectFile.h" +#include "llvm/MC/MCStreamer.h" + +using namespace llvm; + +// Integrated assembler version +void +MipsReginfo::emitMipsReginfoSectionCG(MCStreamer &OS, + const TargetLoweringObjectFile &TLOF, + const MipsSubtarget &MST) const +{ + + if (OS.hasRawTextSupport()) + return; + + const MipsTargetObjectFile &TLOFELF = + static_cast(TLOF); + OS.SwitchSection(TLOFELF.getReginfoSection()); + + // .reginfo + if (MST.isABI_O32()) { + OS.EmitIntValue(0, 4); // ri_gprmask + OS.EmitIntValue(0, 4); // ri_cpr[0]mask + OS.EmitIntValue(0, 4); // ri_cpr[1]mask + OS.EmitIntValue(0, 4); // ri_cpr[2]mask + OS.EmitIntValue(0, 4); // ri_cpr[3]mask + OS.EmitIntValue(0, 4); // ri_gp_value + } + // .MIPS.options + else if (MST.isABI_N64()) { + OS.EmitIntValue(1, 1); // kind + OS.EmitIntValue(40, 1); // size + OS.EmitIntValue(0, 2); // section + OS.EmitIntValue(0, 4); // info + OS.EmitIntValue(0, 4); // ri_gprmask + OS.EmitIntValue(0, 4); // pad + OS.EmitIntValue(0, 4); // ri_cpr[0]mask + OS.EmitIntValue(0, 4); // ri_cpr[1]mask + OS.EmitIntValue(0, 4); // ri_cpr[2]mask + OS.EmitIntValue(0, 4); // ri_cpr[3]mask + OS.EmitIntValue(0, 8); // ri_gp_value + } + else llvm_unreachable("Unsupported abi for reginfo"); +} + diff --git a/lib/Target/Mips/MCTargetDesc/MipsReginfo.h b/lib/Target/Mips/MCTargetDesc/MipsReginfo.h new file mode 100644 index 0000000..039b8ea --- /dev/null +++ b/lib/Target/Mips/MCTargetDesc/MipsReginfo.h @@ -0,0 +1,31 @@ +//=== MipsReginfo.h - MipsReginfo -----------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENCE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef MIPSREGINFO_H +#define MIPSREGINFO_H + +namespace llvm { + class MCStreamer; + class TargetLoweringObjectFile; + class MipsSubtarget; + + class MipsReginfo { + void anchor(); + public: + MipsReginfo() {} + + void emitMipsReginfoSectionCG(MCStreamer &OS, + const TargetLoweringObjectFile &TLOF, + const MipsSubtarget &MST) const; + }; + +} // namespace llvm + +#endif + diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp index 6ad7e96..e3c3429 100644 --- a/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/MipsAsmPrinter.cpp @@ -13,10 +13,10 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "mips-asm-printer" -#include "MipsAsmPrinter.h" #include "InstPrinter/MipsInstPrinter.h" #include "MCTargetDesc/MipsBaseInfo.h" #include "Mips.h" +#include "MipsAsmPrinter.h" #include "MipsInstrInfo.h" #include "MipsMCInstLower.h" #include "llvm/ADT/SmallString.h" @@ -540,6 +540,14 @@ void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) { // return to previous section if (OutStreamer.hasRawTextSupport()) OutStreamer.EmitRawText(StringRef("\t.previous")); + +} + +void MipsAsmPrinter::EmitEndOfAsmFile(Module &M) { + + // Emit Mips ELF register info + Subtarget->getMReginfo().emitMipsReginfoSectionCG( + OutStreamer, getObjFileLowering(), *Subtarget); } MachineLocation diff --git a/lib/Target/Mips/MipsAsmPrinter.h b/lib/Target/Mips/MipsAsmPrinter.h index d8fbeeb..dbdaf26 100644 --- a/lib/Target/Mips/MipsAsmPrinter.h +++ b/lib/Target/Mips/MipsAsmPrinter.h @@ -80,6 +80,7 @@ public: void printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O, const char *Modifier = 0); void EmitStartOfAsmFile(Module &M); + void EmitEndOfAsmFile(Module &M); virtual MachineLocation getDebugValueLocation(const MachineInstr *MI) const; void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS); }; diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h index 6a20815..001d8d1 100644 --- a/lib/Target/Mips/MipsSubtarget.h +++ b/lib/Target/Mips/MipsSubtarget.h @@ -14,6 +14,7 @@ #ifndef MIPSSUBTARGET_H #define MIPSSUBTARGET_H +#include "MCTargetDesc/MipsReginfo.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Target/TargetSubtargetInfo.h" #include @@ -96,6 +97,9 @@ protected: InstrItineraryData InstrItins; + // The instance to the register info section object + MipsReginfo MRI; + public: virtual bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, AntiDepBreakMode& Mode, @@ -145,6 +149,9 @@ public: bool hasSwap() const { return HasSwap; } bool hasBitCount() const { return HasBitCount; } bool hasFPIdx() const { return HasFPIdx; } + + // Grab MipsRegInfo object + const MipsReginfo &getMReginfo() const { return MRI; } }; } // End llvm namespace diff --git a/lib/Target/Mips/MipsTargetObjectFile.cpp b/lib/Target/Mips/MipsTargetObjectFile.cpp index 9aea764..4c748c5 100644 --- a/lib/Target/Mips/MipsTargetObjectFile.cpp +++ b/lib/Target/Mips/MipsTargetObjectFile.cpp @@ -38,6 +38,20 @@ void MipsTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){ ELF::SHF_WRITE |ELF::SHF_ALLOC, SectionKind::getBSS()); + // Register info information + const MipsSubtarget &Subtarget = TM.getSubtarget(); + if (Subtarget.isABI_N64() || Subtarget.isABI_N32()) + ReginfoSection = + getContext().getELFSection(".MIPS.options", + ELF::SHT_MIPS_OPTIONS, + ELF::SHF_ALLOC |ELF::SHF_MIPS_NOSTRIP, + SectionKind::getMetadata()); + else + ReginfoSection = + getContext().getELFSection(".reginfo", + ELF::SHT_MIPS_REGINFO, + ELF::SHF_ALLOC, + SectionKind::getMetadata()); } // A address must be loaded from a small section if its size is less than the diff --git a/lib/Target/Mips/MipsTargetObjectFile.h b/lib/Target/Mips/MipsTargetObjectFile.h index c394a9d..c0e9140 100644 --- a/lib/Target/Mips/MipsTargetObjectFile.h +++ b/lib/Target/Mips/MipsTargetObjectFile.h @@ -17,6 +17,7 @@ namespace llvm { class MipsTargetObjectFile : public TargetLoweringObjectFileELF { const MCSection *SmallDataSection; const MCSection *SmallBSSSection; + const MCSection *ReginfoSection; public: void Initialize(MCContext &Ctx, const TargetMachine &TM); @@ -35,6 +36,7 @@ namespace llvm { const TargetMachine &TM) const; // TODO: Classify globals as mips wishes. + const MCSection *getReginfoSection() const { return ReginfoSection; } }; } // end namespace llvm -- cgit v1.1 From 8a53a8329f47f86757af17dbea2864aa95414615 Mon Sep 17 00:00:00 2001 From: Jordan Rose Date: Fri, 18 Jan 2013 21:45:30 +0000 Subject: Add llvm::hexDigitValue to convert single characters to hex. This is duplicated in a couple places in the codebase. Adopt this in APFloat. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172851 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Support/APFloat.cpp | 21 +-------------------- 1 file changed, 1 insertion(+), 20 deletions(-) (limited to 'lib') diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp index 4a7a5d1..2ac86a2 100644 --- a/lib/Support/APFloat.cpp +++ b/lib/Support/APFloat.cpp @@ -16,6 +16,7 @@ #include "llvm/ADT/APSInt.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/Hashing.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" @@ -101,26 +102,6 @@ decDigitValue(unsigned int c) return c - '0'; } -static unsigned int -hexDigitValue(unsigned int c) -{ - unsigned int r; - - r = c - '0'; - if (r <= 9) - return r; - - r = c - 'A'; - if (r <= 5) - return r + 10; - - r = c - 'a'; - if (r <= 5) - return r + 10; - - return -1U; -} - /* Return the value of a decimal exponent of the form [+-]ddddddd. -- cgit v1.1 From 1a17bd21ff64968817dedb6cd2b7c809cee90804 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Fri, 18 Jan 2013 21:50:24 +0000 Subject: Remove unused parameter. Also use the AttributeSet query methods instead of the Attribute query methods. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172852 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/Analysis.cpp | 15 ++++++++------- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 3 +-- 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp index aaba144..c7abf7a 100644 --- a/lib/CodeGen/Analysis.cpp +++ b/lib/CodeGen/Analysis.cpp @@ -265,8 +265,7 @@ static const Value *getNoopInput(const Value *V, const TargetLowering &TLI) { /// between it and the return. /// /// This function only tests target-independent requirements. -bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attribute CalleeRetAttr, - const TargetLowering &TLI) { +bool llvm::isInTailCallPosition(ImmutableCallSite CS,const TargetLowering &TLI){ const Instruction *I = CS.getInstruction(); const BasicBlock *ExitBB = I->getParent(); const TerminatorInst *Term = ExitBB->getTerminator(); @@ -312,14 +311,16 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attribute CalleeRetAttr, // Conservatively require the attributes of the call to match those of // the return. Ignore noalias because it doesn't affect the call sequence. const Function *F = ExitBB->getParent(); - Attribute CallerRetAttr = F->getAttributes().getRetAttributes(); - if (AttrBuilder(CalleeRetAttr).removeAttribute(Attribute::NoAlias) != - AttrBuilder(CallerRetAttr).removeAttribute(Attribute::NoAlias)) + AttributeSet CallerAttrs = F->getAttributes(); + if (AttrBuilder(CallerAttrs, AttributeSet::ReturnIndex). + removeAttribute(Attribute::NoAlias) != + AttrBuilder(CallerAttrs, AttributeSet::ReturnIndex). + removeAttribute(Attribute::NoAlias)) return false; // It's not safe to eliminate the sign / zero extension of the return value. - if (CallerRetAttr.hasAttribute(Attribute::ZExt) || - CallerRetAttr.hasAttribute(Attribute::SExt)) + if (CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt) || + CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt)) return false; // Otherwise, make sure the unmodified return value of I is the return value. diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 4e07fd3..cf74860 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -5277,8 +5277,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, // Check if target-independent constraints permit a tail call here. // Target-dependent constraints are checked within TLI.LowerCallTo. - if (isTailCall && - !isInTailCallPosition(CS, CS.getAttributes().getRetAttributes(), TLI)) + if (isTailCall && !isInTailCallPosition(CS, TLI)) isTailCall = false; TargetLowering:: -- cgit v1.1 From 1b0c54f1c5dd61e56cb7cbc435fcb3319cff628f Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Fri, 18 Jan 2013 21:53:16 +0000 Subject: Use AttributeSet accessor methods instead of Attribute accessor methods. Further encapsulation of the Attribute object. Don't allow direct access to the Attribute object as an aggregate. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172853 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/TargetLowering.cpp | 8 ++++---- lib/IR/AsmWriter.cpp | 13 ++++++------- lib/IR/Attributes.cpp | 5 +++++ lib/Transforms/IPO/ArgumentPromotion.cpp | 16 ++++++---------- lib/Transforms/InstCombine/InstCombineCalls.cpp | 6 +++--- lib/Transforms/Scalar/CodeGenPrepare.cpp | 12 ++++++------ lib/Transforms/Utils/CloneFunction.cpp | 6 ++---- 7 files changed, 32 insertions(+), 34 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 76ece7f..86949a7 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -50,14 +50,14 @@ bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, // Conservatively require the attributes of the call to match those of // the return. Ignore noalias because it doesn't affect the call sequence. - Attribute CallerRetAttr = F->getAttributes().getRetAttributes(); - if (AttrBuilder(CallerRetAttr) + AttributeSet CallerAttrs = F->getAttributes(); + if (AttrBuilder(CallerAttrs, AttributeSet::ReturnIndex) .removeAttribute(Attribute::NoAlias).hasAttributes()) return false; // It's not safe to eliminate the sign / zero extension of the return value. - if (CallerRetAttr.hasAttribute(Attribute::ZExt) || - CallerRetAttr.hasAttribute(Attribute::SExt)) + if (CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt) || + CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt)) return false; // Check if the only use is a function return node. diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp index 1c46a94..ff43128 100644 --- a/lib/IR/AsmWriter.cpp +++ b/lib/IR/AsmWriter.cpp @@ -1557,9 +1557,8 @@ void AssemblyWriter::printFunction(const Function *F) { FunctionType *FT = F->getFunctionType(); const AttributeSet &Attrs = F->getAttributes(); - Attribute RetAttrs = Attrs.getRetAttributes(); - if (RetAttrs.hasAttributes()) - Out << Attrs.getRetAttributes().getAsString() << ' '; + if (Attrs.hasAttributes(AttributeSet::ReturnIndex)) + Out << Attrs.getAsString(AttributeSet::ReturnIndex) << ' '; TypePrinter.print(F->getReturnType(), Out); Out << ' '; WriteAsOperandInternal(Out, F, &TypePrinter, &Machine, F->getParent()); @@ -1849,8 +1848,8 @@ void AssemblyWriter::printInstruction(const Instruction &I) { Type *RetTy = FTy->getReturnType(); const AttributeSet &PAL = CI->getAttributes(); - if (PAL.getRetAttributes().hasAttributes()) - Out << ' ' << PAL.getRetAttributes().getAsString(); + if (PAL.hasAttributes(AttributeSet::ReturnIndex)) + Out << ' ' << PAL.getAsString(AttributeSet::ReturnIndex); // If possible, print out the short form of the call instruction. We can // only do this if the first argument is a pointer to a nonvararg function, @@ -1888,8 +1887,8 @@ void AssemblyWriter::printInstruction(const Instruction &I) { PrintCallingConv(II->getCallingConv(), Out); } - if (PAL.getRetAttributes().hasAttributes()) - Out << ' ' << PAL.getRetAttributes().getAsString(); + if (PAL.hasAttributes(AttributeSet::ReturnIndex)) + Out << ' ' << PAL.getAsString(AttributeSet::ReturnIndex); // If possible, print out the short form of the invoke instruction. We can // only do this if the first argument is a pointer to a nonvararg function, diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 173782e..bb98358 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -660,6 +660,11 @@ bool AttributeSet::hasAttrSomewhere(Attribute::AttrKind Attr) const { return false; } +AttributeSet AttributeSet::addRetAttributes(LLVMContext &C, + AttributeSet Attrs) const { + return addAttr(C, ReturnIndex, getAttributes(ReturnIndex)); +} + AttributeSet AttributeSet::addFnAttributes(LLVMContext &C, AttributeSet Attrs) const { return addAttr(C, FunctionIndex, getAttributes(FunctionIndex)); diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index 15a479e..75c0504 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -518,10 +518,9 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, const AttributeSet &PAL = F->getAttributes(); // Add any return attributes. - Attribute attrs = PAL.getRetAttributes(); - if (attrs.hasAttributes()) + if (PAL.hasAttributes(AttributeSet::ReturnIndex)) AttributesVec.push_back(AttributeWithIndex::get(AttributeSet::ReturnIndex, - attrs)); + PAL.getRetAttributes())); // First, determine the new argument list unsigned ArgIndex = 1; @@ -591,10 +590,9 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, } // Add any function attributes. - attrs = PAL.getFnAttributes(); if (PAL.hasAttributes(AttributeSet::FunctionIndex)) AttributesVec.push_back(AttributeWithIndex::get(AttributeSet::FunctionIndex, - attrs)); + PAL.getFnAttributes())); Type *RetTy = FTy->getReturnType(); @@ -639,10 +637,9 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, const AttributeSet &CallPAL = CS.getAttributes(); // Add any return attributes. - Attribute attrs = CallPAL.getRetAttributes(); - if (attrs.hasAttributes()) + if (CallPAL.hasAttributes(AttributeSet::ReturnIndex)) AttributesVec.push_back(AttributeWithIndex::get(AttributeSet::ReturnIndex, - attrs)); + CallPAL.getRetAttributes())); // Loop over the operands, inserting GEP and loads in the caller as // appropriate. @@ -721,10 +718,9 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, } // Add any function attributes. - attrs = CallPAL.getFnAttributes(); if (CallPAL.hasAttributes(AttributeSet::FunctionIndex)) AttributesVec.push_back(AttributeWithIndex::get(AttributeSet::FunctionIndex, - attrs)); + CallPAL.getFnAttributes())); Instruction *New; if (InvokeInst *II = dyn_cast(Call)) { diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 63e452b..1ac9a9d 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1014,7 +1014,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { return false; // Cannot transform this return value. if (!CallerPAL.isEmpty() && !Caller->use_empty()) { - AttrBuilder RAttrs = CallerPAL.getRetAttributes(); + AttrBuilder RAttrs(CallerPAL, AttributeSet::ReturnIndex); if (RAttrs.hasAttributes(Attribute::typeIncompatible(NewRetTy))) return false; // Attribute not compatible with transformed value. } @@ -1117,7 +1117,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { attrVec.reserve(NumCommonArgs); // Get any return attributes. - AttrBuilder RAttrs = CallerPAL.getRetAttributes(); + AttrBuilder RAttrs(CallerPAL, AttributeSet::ReturnIndex); // If the return value is not being used, the type may not be compatible // with the existing attributes. Wipe out any problematic attributes. @@ -1287,7 +1287,7 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, // Add any result attributes. Attribute Attr = Attrs.getRetAttributes(); - if (Attr.hasAttributes()) + if (Attrs.hasAttributes(AttributeSet::ReturnIndex)) NewAttrs.push_back(AttributeWithIndex::get(AttributeSet::ReturnIndex, Attr)); diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp index d513c96..d71dd5d 100644 --- a/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -729,9 +729,9 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(BasicBlock *BB) { // It's not safe to eliminate the sign / zero extension of the return value. // See llvm::isInTailCallPosition(). const Function *F = BB->getParent(); - Attribute CallerRetAttr = F->getAttributes().getRetAttributes(); - if (CallerRetAttr.hasAttribute(Attribute::ZExt) || - CallerRetAttr.hasAttribute(Attribute::SExt)) + AttributeSet CallerAttrs = F->getAttributes(); + if (CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt) || + CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt)) return false; // Make sure there are no instructions between the PHI and return, or that the @@ -788,10 +788,10 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(BasicBlock *BB) { // Conservatively require the attributes of the call to match those of the // return. Ignore noalias because it doesn't affect the call sequence. - Attribute CalleeRetAttr = CS.getAttributes().getRetAttributes(); - if (AttrBuilder(CalleeRetAttr). + AttributeSet CalleeAttrs = CS.getAttributes(); + if (AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex). removeAttribute(Attribute::NoAlias) != - AttrBuilder(CallerRetAttr). + AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex). removeAttribute(Attribute::NoAlias)) continue; diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index ad753ab..1ba332b 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -98,10 +98,8 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, Anew->addAttr( OldFunc->getAttributes() .getParamAttributes(I->getArgNo() + 1)); NewFunc->setAttributes(NewFunc->getAttributes() - .addAttr(NewFunc->getContext(), - AttributeSet::ReturnIndex, - OldFunc->getAttributes() - .getRetAttributes())); + .addRetAttributes(NewFunc->getContext(), + OldFunc->getAttributes())); NewFunc->setAttributes(NewFunc->getAttributes() .addFnAttributes(NewFunc->getContext(), OldFunc->getAttributes())); -- cgit v1.1 From aec710675febb13d3cf4a7141d2658a3b1797cfd Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Fri, 18 Jan 2013 21:56:07 +0000 Subject: If the attributes don't exist, just bail for now. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172854 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Attributes.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index bb98358..5de1827 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -253,7 +253,7 @@ AttrBuilder::AttrBuilder(AttributeSet AS, unsigned Idx) break; } - assert(AWI && "Cannot find index in attribute set!"); + if (!AWI) return; uint64_t Mask = AWI->Attrs.Raw(); -- cgit v1.1 From 0969ddf601c7cf9da2b01fe227005951c6af0cac Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Fri, 18 Jan 2013 22:11:33 +0000 Subject: Split out DW_OP_addr for the split debug info DWARF5 proposal. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172857 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp | 24 ++++++++++++++++++------ lib/CodeGen/AsmPrinter/DwarfCompileUnit.h | 5 +++++ 2 files changed, 23 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index e98b118..255e083 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -190,6 +190,22 @@ void CompileUnit::addLabelAddress(DIE *Die, unsigned Attribute, } } +/// addOpAddress - Add a dwarf op address data and value using the +/// form given and an op of either DW_FORM_addr or DW_FORM_GNU_addr_index. +/// +void CompileUnit::addOpAddress(DIE *Die, MCSymbol *Sym) { + + if (!DD->useSplitDwarf()) { + addUInt(Die, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); + addLabel(Die, 0, dwarf::DW_FORM_udata, Sym); + } else { + unsigned idx = DU->getAddrPoolIndex(Sym); + DIEValue *Value = new (DIEValueAllocator) DIEInteger(idx); + addUInt(Die, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_addr_index); + Die->addValue(0, dwarf::DW_FORM_GNU_addr_index, Value); + } +} + /// addDelta - Add a label delta attribute data and value. /// void CompileUnit::addDelta(DIE *Die, unsigned Attribute, unsigned Form, @@ -1297,9 +1313,7 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) { if (isGlobalVariable) { addToAccelTable = true; DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); - addLabel(Block, 0, dwarf::DW_FORM_udata, - Asm->Mang->getSymbol(GV.getGlobal())); + addOpAddress(Block, Asm->Mang->getSymbol(GV.getGlobal())); // Do not create specification DIE if context is either compile unit // or a subprogram. if (GVContext && GV.isDefinition() && !GVContext.isCompileUnit() && @@ -1329,9 +1343,7 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) { // GV is a merged global. DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); Value *Ptr = CE->getOperand(0); - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); - addLabel(Block, 0, dwarf::DW_FORM_udata, - Asm->Mang->getSymbol(cast(Ptr))); + addOpAddress(Block, Asm->Mang->getSymbol(cast(Ptr))); addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); SmallVector Idx(CE->op_begin()+1, CE->op_end()); addUInt(Block, 0, dwarf::DW_FORM_udata, diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index e576ff2..1091bce 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -215,6 +215,11 @@ public: /// void addLabelAddress(DIE *Die, unsigned Attribute, MCSymbol *Label); + /// addOpAddress - Add a dwarf op address data and value using the + /// form given and an op of either DW_FORM_addr or DW_FORM_GNU_addr_index. + /// + void addOpAddress(DIE *Die, MCSymbol *Label); + /// addDelta - Add a label delta attribute data and value. /// void addDelta(DIE *Die, unsigned Attribute, unsigned Form, -- cgit v1.1 From 8a8cef7156b0a3b7a1a5bcf646f049f51cfa5beb Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Fri, 18 Jan 2013 23:00:33 +0000 Subject: Fixed typo in comment. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172863 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/ObjCARC.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp index c3a0fd6..055f30c 100644 --- a/lib/Transforms/Scalar/ObjCARC.cpp +++ b/lib/Transforms/Scalar/ObjCARC.cpp @@ -578,8 +578,8 @@ static Value *GetObjCArg(Value *Inst) { return StripPointerCastsAndObjCCalls(cast(Inst)->getArgOperand(0)); } -/// \brief This is similar to AliasAnalysis's isObjCIdentifiedObject, except -/// that it uses special knowledge of ObjC conventions. +/// \brief This is similar to AliasAnalysis's isIdentifiedObject, except that it +/// uses special knowledge of ObjC conventions. static bool IsObjCIdentifiedObject(const Value *V) { // Assume that call results and arguments have their own "provenance". // Constants (including GlobalVariables) and Allocas are never -- cgit v1.1 From ffd5f9ad601ec34013318e58f37127a5679347a3 Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Fri, 18 Jan 2013 23:02:45 +0000 Subject: Improved comment. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172864 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/ObjCARC.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp index 055f30c..a63e0e0 100644 --- a/lib/Transforms/Scalar/ObjCARC.cpp +++ b/lib/Transforms/Scalar/ObjCARC.cpp @@ -578,8 +578,11 @@ static Value *GetObjCArg(Value *Inst) { return StripPointerCastsAndObjCCalls(cast(Inst)->getArgOperand(0)); } -/// \brief This is similar to AliasAnalysis's isIdentifiedObject, except that it -/// uses special knowledge of ObjC conventions. +/// \brief Return true if this value refers to a distinct and identifiable +/// object. +/// +/// This is similar to AliasAnalysis's isIdentifiedObject, except that it uses +/// special knowledge of ObjC conventions. static bool IsObjCIdentifiedObject(const Value *V) { // Assume that call results and arguments have their own "provenance". // Constants (including GlobalVariables) and Allocas are never -- cgit v1.1 From 7336f7febb5170b374a4cbffee273ad82ff8a1a3 Mon Sep 17 00:00:00 2001 From: Jordan Rose Date: Fri, 18 Jan 2013 23:05:41 +0000 Subject: Use hexDigitValue in LLLexer. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172866 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/AsmParser/LLLexer.cpp | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) (limited to 'lib') diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp index 32e709c..925f9bc 100644 --- a/lib/AsmParser/LLLexer.cpp +++ b/lib/AsmParser/LLLexer.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "LLLexer.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" #include "llvm/Assembly/Parser.h" #include "llvm/IR/DerivedTypes.h" @@ -55,22 +56,12 @@ uint64_t LLLexer::atoull(const char *Buffer, const char *End) { return Result; } -static char parseHexChar(char C) { - if (C >= '0' && C <= '9') - return C-'0'; - if (C >= 'A' && C <= 'F') - return C-'A'+10; - if (C >= 'a' && C <= 'f') - return C-'a'+10; - return 0; -} - uint64_t LLLexer::HexIntToVal(const char *Buffer, const char *End) { uint64_t Result = 0; for (; Buffer != End; ++Buffer) { uint64_t OldRes = Result; Result *= 16; - Result += parseHexChar(*Buffer); + Result += hexDigitValue(*Buffer); if (Result < OldRes) { // Uh, oh, overflow detected!!! Error("constant bigger than 64 bits detected!"); @@ -86,12 +77,12 @@ void LLLexer::HexToIntPair(const char *Buffer, const char *End, for (int i=0; i<16; i++, Buffer++) { assert(Buffer != End); Pair[0] *= 16; - Pair[0] += parseHexChar(*Buffer); + Pair[0] += hexDigitValue(*Buffer); } Pair[1] = 0; for (int i=0; i<16 && Buffer != End; i++, Buffer++) { Pair[1] *= 16; - Pair[1] += parseHexChar(*Buffer); + Pair[1] += hexDigitValue(*Buffer); } if (Buffer != End) Error("constant bigger than 128 bits detected!"); @@ -105,12 +96,12 @@ void LLLexer::FP80HexToIntPair(const char *Buffer, const char *End, for (int i=0; i<4 && Buffer != End; i++, Buffer++) { assert(Buffer != End); Pair[1] *= 16; - Pair[1] += parseHexChar(*Buffer); + Pair[1] += hexDigitValue(*Buffer); } Pair[0] = 0; for (int i=0; i<16; i++, Buffer++) { Pair[0] *= 16; - Pair[0] += parseHexChar(*Buffer); + Pair[0] += hexDigitValue(*Buffer); } if (Buffer != End) Error("constant bigger than 128 bits detected!"); @@ -129,7 +120,7 @@ static void UnEscapeLexed(std::string &Str) { *BOut++ = '\\'; // Two \ becomes one BIn += 2; } else if (BIn < EndBuffer-2 && isxdigit(BIn[1]) && isxdigit(BIn[2])) { - *BOut = parseHexChar(BIn[1]) * 16 + parseHexChar(BIn[2]); + *BOut = hexDigitValue(BIn[1]) * 16 + hexDigitValue(BIn[2]); BIn += 3; // Skip over handled chars ++BOut; } else { -- cgit v1.1 From 48177ac90fb940833b9deea1a6716092348cfe82 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Fri, 18 Jan 2013 23:10:30 +0000 Subject: On Sandybridge loading unaligned 256bits using two XMM loads (vmovups and vinsertf128) is faster than using a single vmovups instruction. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172868 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 2b6ff36..73a1d2e 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -16340,8 +16340,39 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG, EVT MemVT = Ld->getMemoryVT(); DebugLoc dl = Ld->getDebugLoc(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + unsigned RegSz = RegVT.getSizeInBits(); ISD::LoadExtType Ext = Ld->getExtensionType(); + unsigned Alignment = Ld->getAlignment(); + + // On Sandybridge unaligned 256bit loads are inefficient. + if (RegVT.is256BitVector() && !Subtarget->hasInt256() && + !DCI.isBeforeLegalizeOps() && Alignment < 32 && + Ext == ISD::NON_EXTLOAD) { + unsigned NumElems = RegVT.getVectorNumElements(); + SDValue Ptr = Ld->getBasePtr(); + SDValue Increment = DAG.getConstant(16, TLI.getPointerTy()); + + EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), + NumElems/2); + SDValue Load1 = DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr, + Ld->getPointerInfo(), Ld->isVolatile(), + Ld->isNonTemporal(), Ld->isInvariant(), + Alignment); + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); + SDValue Load2 = DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr, + Ld->getPointerInfo(), Ld->isVolatile(), + Ld->isNonTemporal(), Ld->isInvariant(), + Alignment); + SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + Load1.getValue(1), + Load2.getValue(1)); + + SDValue NewVec = DAG.getUNDEF(RegVT); + NewVec = Insert128BitVector(NewVec, Load1, 0, DAG, dl); + NewVec = Insert128BitVector(NewVec, Load2, NumElems/2, DAG, dl); + return DCI.CombineTo(N, NewVec, TF, true); + } // If this is a vector EXT Load then attempt to optimize it using a // shuffle. If SSSE3 is not available we may emit an illegal shuffle but the @@ -16356,7 +16387,6 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG, assert(MemVT.isVector() && "Must load a vector from memory"); unsigned NumElems = RegVT.getVectorNumElements(); - unsigned RegSz = RegVT.getSizeInBits(); unsigned MemSz = MemVT.getSizeInBits(); assert(RegSz > MemSz && "Register size must be greater than the mem size"); -- cgit v1.1 From 065db2347f4df7a92f18221bd7288ebcd4c38c35 Mon Sep 17 00:00:00 2001 From: Renato Golin Date: Sat, 19 Jan 2013 00:42:16 +0000 Subject: Fix 80-col and early exit in cost model git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172877 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/TargetTransformInfo.cpp | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) (limited to 'lib') diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp index 344be71..4873a7f 100644 --- a/lib/Analysis/TargetTransformInfo.cpp +++ b/lib/Analysis/TargetTransformInfo.cpp @@ -289,7 +289,9 @@ ImmutablePass *llvm::createNoTargetTransformInfoPass() { //======================================= COST TABLES == -CostTable::CostTable(const CostTableEntry *table, const size_t size, unsigned numTypes) +CostTable::CostTable(const CostTableEntry *table, + const size_t size, + unsigned numTypes) : table(table), size(size), numTypes(numTypes) { assert(table && "missing cost table"); assert(size > 0 && "empty cost table"); @@ -297,22 +299,23 @@ CostTable::CostTable(const CostTableEntry *table, const size_t size, unsigned nu unsigned CostTable::_findCost(int ISD, MVT *Types) const { for (unsigned i = 0; i < size; ++i) { - if (table[i].ISD == ISD) { - bool found = true; - for (unsigned t=0; t Date: Sat, 19 Jan 2013 02:00:40 +0000 Subject: This is a resubmittal. For some reason it broke the bots yesterday but I cannot reproduce the problem and have scrubed my sources and even tested with llvm-lit -v --vg. Formatting fixes. Mostly long lines and blank spaces at end of lines. Contributer: Jack Carter git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172882 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Disassembler/LLVMBuild.txt | 2 +- lib/Target/Mips/Disassembler/Makefile | 2 +- lib/Target/Mips/Mips16InstrInfo.cpp | 52 +++++++++++++++++++----------- lib/Target/Mips/Mips16InstrInfo.td | 12 +++---- lib/Target/Mips/MipsInstrInfo.td | 12 ++++--- lib/Target/Mips/MipsLongBranch.cpp | 6 ++-- 6 files changed, 52 insertions(+), 34 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/Disassembler/LLVMBuild.txt b/lib/Target/Mips/Disassembler/LLVMBuild.txt index 048ad0d..7101c06 100644 --- a/lib/Target/Mips/Disassembler/LLVMBuild.txt +++ b/lib/Target/Mips/Disassembler/LLVMBuild.txt @@ -1,4 +1,4 @@ -;===- ./lib/Target/Mips/Disassembler/LLVMBuild.txt --------------*- Conf -*--===; +;===- ./lib/Target/Mips/Disassembler/LLVMBuild.txt -------------*- Conf -*--===; ; ; The LLVM Compiler Infrastructure ; diff --git a/lib/Target/Mips/Disassembler/Makefile b/lib/Target/Mips/Disassembler/Makefile index a78feba..7900373 100644 --- a/lib/Target/Mips/Disassembler/Makefile +++ b/lib/Target/Mips/Disassembler/Makefile @@ -1,4 +1,4 @@ -##===- lib/Target/Mips/Disassembler/Makefile ----------------*- Makefile -*-===## +##===- lib/Target/Mips/Disassembler/Makefile ---------------*- Makefile -*-===## # # The LLVM Compiler Infrastructure # diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp index 91b5ba0..117faea 100644 --- a/lib/Target/Mips/Mips16InstrInfo.cpp +++ b/lib/Target/Mips/Mips16InstrInfo.cpp @@ -28,7 +28,8 @@ using namespace llvm; static cl::opt NeverUseSaveRestore( "mips16-never-use-save-restore", cl::init(false), - cl::desc("For testing ability to adjust stack pointer without save/restore instruction"), + cl::desc("For testing ability to adjust stack pointer " + "without save/restore instruction"), cl::Hidden); @@ -169,15 +170,16 @@ unsigned Mips16InstrInfo::GetOppositeBranchOpc(unsigned Opc) const { } // Adjust SP by FrameSize bytes. Save RA, S0, S1 -void Mips16InstrInfo::makeFrame(unsigned SP, int64_t FrameSize, MachineBasicBlock &MBB, +void Mips16InstrInfo::makeFrame(unsigned SP, int64_t FrameSize, + MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); if (!NeverUseSaveRestore) { if (isUInt<11>(FrameSize)) BuildMI(MBB, I, DL, get(Mips::SaveRaF16)).addImm(FrameSize); else { - int Base = 2040; // should create template function like isUInt that returns largest - // possible n bit unsigned integer + int Base = 2040; // should create template function like isUInt that + // returns largest possible n bit unsigned integer int64_t Remainder = FrameSize - Base; BuildMI(MBB, I, DL, get(Mips::SaveRaF16)). addImm(Base); if (isInt<16>(-Remainder)) @@ -193,13 +195,16 @@ void Mips16InstrInfo::makeFrame(unsigned SP, int64_t FrameSize, MachineBasicBloc // sw s1, -8[sp] // sw s0, -12[sp] - MachineInstrBuilder MIB1 = BuildMI(MBB, I, DL, get(Mips::SwRxSpImmX16), Mips::RA); + MachineInstrBuilder MIB1 = BuildMI(MBB, I, DL, get(Mips::SwRxSpImmX16), + Mips::RA); MIB1.addReg(Mips::SP); MIB1.addImm(-4); - MachineInstrBuilder MIB2 = BuildMI(MBB, I, DL, get(Mips::SwRxSpImmX16), Mips::S1); + MachineInstrBuilder MIB2 = BuildMI(MBB, I, DL, get(Mips::SwRxSpImmX16), + Mips::S1); MIB2.addReg(Mips::SP); MIB2.addImm(-8); - MachineInstrBuilder MIB3 = BuildMI(MBB, I, DL, get(Mips::SwRxSpImmX16), Mips::S0); + MachineInstrBuilder MIB3 = BuildMI(MBB, I, DL, get(Mips::SwRxSpImmX16), + Mips::S0); MIB3.addReg(Mips::SP); MIB3.addImm(-12); adjustStackPtrBig(SP, -FrameSize, MBB, I, Mips::V0, Mips::V1); @@ -207,15 +212,16 @@ void Mips16InstrInfo::makeFrame(unsigned SP, int64_t FrameSize, MachineBasicBloc } // Adjust SP by FrameSize bytes. Restore RA, S0, S1 -void Mips16InstrInfo::restoreFrame(unsigned SP, int64_t FrameSize, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const { +void Mips16InstrInfo::restoreFrame(unsigned SP, int64_t FrameSize, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); if (!NeverUseSaveRestore) { if (isUInt<11>(FrameSize)) BuildMI(MBB, I, DL, get(Mips::RestoreRaF16)).addImm(FrameSize); else { - int Base = 2040; // should create template function like isUInt that returns largest - // possible n bit unsigned integer + int Base = 2040; // should create template function like isUInt that + // returns largest possible n bit unsigned integer int64_t Remainder = FrameSize - Base; if (isInt<16>(Remainder)) BuildMI(MBB, I, DL, get(Mips::AddiuSpImmX16)). addImm(Remainder); @@ -229,15 +235,19 @@ void Mips16InstrInfo::restoreFrame(unsigned SP, int64_t FrameSize, MachineBasicB // lw ra, -4[sp] // lw s1, -8[sp] // lw s0, -12[sp] - MachineInstrBuilder MIB1 = BuildMI(MBB, I, DL, get(Mips::LwRxSpImmX16), Mips::A0); + MachineInstrBuilder MIB1 = BuildMI(MBB, I, DL, get(Mips::LwRxSpImmX16), + Mips::A0); MIB1.addReg(Mips::SP); MIB1.addImm(-4); - MachineInstrBuilder MIB0 = BuildMI(MBB, I, DL, get(Mips::Move32R16), Mips::RA); + MachineInstrBuilder MIB0 = BuildMI(MBB, I, DL, get(Mips::Move32R16), + Mips::RA); MIB0.addReg(Mips::A0); - MachineInstrBuilder MIB2 = BuildMI(MBB, I, DL, get(Mips::LwRxSpImmX16), Mips::S1); + MachineInstrBuilder MIB2 = BuildMI(MBB, I, DL, get(Mips::LwRxSpImmX16), + Mips::S1); MIB2.addReg(Mips::SP); MIB2.addImm(-8); - MachineInstrBuilder MIB3 = BuildMI(MBB, I, DL, get(Mips::LwRxSpImmX16), Mips::S0); + MachineInstrBuilder MIB3 = BuildMI(MBB, I, DL, get(Mips::LwRxSpImmX16), + Mips::S0); MIB3.addReg(Mips::SP); MIB3.addImm(-12); } @@ -245,10 +255,12 @@ void Mips16InstrInfo::restoreFrame(unsigned SP, int64_t FrameSize, MachineBasicB } // Adjust SP by Amount bytes where bytes can be up to 32bit number. -// This can only be called at times that we know that there is at least one free register. +// This can only be called at times that we know that there is at least one free +// register. // This is clearly safe at prologue and epilogue. // -void Mips16InstrInfo::adjustStackPtrBig(unsigned SP, int64_t Amount, MachineBasicBlock &MBB, +void Mips16InstrInfo::adjustStackPtrBig(unsigned SP, int64_t Amount, + MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned Reg1, unsigned Reg2) const { DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); @@ -269,11 +281,13 @@ void Mips16InstrInfo::adjustStackPtrBig(unsigned SP, int64_t Amount, MachineBasi MachineInstrBuilder MIB3 = BuildMI(MBB, I, DL, get(Mips::AdduRxRyRz16), Reg1); MIB3.addReg(Reg1); MIB3.addReg(Reg2, RegState::Kill); - MachineInstrBuilder MIB4 = BuildMI(MBB, I, DL, get(Mips::Move32R16), Mips::SP); + MachineInstrBuilder MIB4 = BuildMI(MBB, I, DL, get(Mips::Move32R16), + Mips::SP); MIB4.addReg(Reg1, RegState::Kill); } -void Mips16InstrInfo::adjustStackPtrBigUnrestricted(unsigned SP, int64_t Amount, MachineBasicBlock &MBB, +void Mips16InstrInfo::adjustStackPtrBigUnrestricted(unsigned SP, int64_t Amount, + MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { assert(false && "adjust stack pointer amount exceeded"); } diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td index e8e2f3c..56553d9 100644 --- a/lib/Target/Mips/Mips16InstrInfo.td +++ b/lib/Target/Mips/Mips16InstrInfo.td @@ -257,7 +257,7 @@ class FRR16_JALRC_RA_only_ins nd_, bits<1> l_, class FRR16_JALRC_ins nd, bits<1> l, bits<1> ra, string asmstr, InstrItinClass itin>: - FRR16_JALRC ; // @@ -363,7 +363,7 @@ def imm32: Operand; def Constant32: MipsPseudo16<(outs), (ins imm32:$imm), "\t.word $imm", []>; - + def LwConstant32: MipsPseudo16<(outs), (ins CPU16Regs:$rx, imm32:$imm), "lw\t$rx, 1f\n\tb\t2f\n\t.align\t2\n1: \t.word\t$imm\n2:", []>; @@ -430,7 +430,7 @@ def AddiuSpImmX16 : FEXT_I816_SP_ins<0b011, "addiu", IIAlu> { let Defs = [SP]; let Uses = [SP]; -} +} // // Format: ADDU rz, rx, ry MIPS16e @@ -1072,8 +1072,8 @@ class UncondBranch16_pat: // Indirect branch def: Mips16Pat< - (brind CPU16Regs:$rs), - (JrcRx16 CPU16Regs:$rs)>; + (brind CPU16Regs:$rs), + (JrcRx16 CPU16Regs:$rs)>; // Jump and Link (Call) @@ -1562,7 +1562,7 @@ def: Mips16Pat<(add CPU16Regs:$hi, (MipsLo tglobaladdr:$lo)), // hi/lo relocs -def : Mips16Pat<(MipsHi tglobaltlsaddr:$in), +def : Mips16Pat<(MipsHi tglobaltlsaddr:$in), (SllX16 (LiRxImmX16 tglobaltlsaddr:$in), 16)>; // wrapper_pic diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 9085a26..74f3178 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -951,10 +951,14 @@ def : InstAlias<"slt $rs, $rt, $imm", def : InstAlias<"xor $rs, $rt, $imm", (XORi CPURegsOpnd:$rs, CPURegsOpnd:$rt, simm16:$imm)>, Requires<[NotMips64]>; -def : InstAlias<"mfc0 $rt, $rd", (MFC0_3OP CPURegsOpnd:$rt, CPURegsOpnd:$rd, 0)>; -def : InstAlias<"mtc0 $rt, $rd", (MTC0_3OP CPURegsOpnd:$rd, 0, CPURegsOpnd:$rt)>; -def : InstAlias<"mfc2 $rt, $rd", (MFC2_3OP CPURegsOpnd:$rt, CPURegsOpnd:$rd, 0)>; -def : InstAlias<"mtc2 $rt, $rd", (MTC2_3OP CPURegsOpnd:$rd, 0, CPURegsOpnd:$rt)>; +def : InstAlias<"mfc0 $rt, $rd", (MFC0_3OP CPURegsOpnd:$rt, + CPURegsOpnd:$rd, 0)>; +def : InstAlias<"mtc0 $rt, $rd", (MTC0_3OP CPURegsOpnd:$rd, 0, + CPURegsOpnd:$rt)>; +def : InstAlias<"mfc2 $rt, $rd", (MFC2_3OP CPURegsOpnd:$rt, + CPURegsOpnd:$rd, 0)>; +def : InstAlias<"mtc2 $rt, $rd", (MTC2_3OP CPURegsOpnd:$rd, 0, + CPURegsOpnd:$rt)>; //===----------------------------------------------------------------------===// // Assembler Pseudo Instructions diff --git a/lib/Target/Mips/MipsLongBranch.cpp b/lib/Target/Mips/MipsLongBranch.cpp index 30f68b1..2efe534 100644 --- a/lib/Target/Mips/MipsLongBranch.cpp +++ b/lib/Target/Mips/MipsLongBranch.cpp @@ -10,10 +10,10 @@ // This pass expands a branch or jump instruction into a long branch if its // offset is too large to fit into its immediate field. // -// FIXME: -// 1. Fix pc-region jump instructions which cross 256MB segment boundaries. +// FIXME: +// 1. Fix pc-region jump instructions which cross 256MB segment boundaries. // 2. If program has inline assembly statements whose size cannot be -// determined accurately, load branch target addresses from the GOT. +// determined accurately, load branch target addresses from the GOT. //===----------------------------------------------------------------------===// #define DEBUG_TYPE "mips-long-branch" -- cgit v1.1 From 90230c84668269fbd53d163e398cd16486d5d414 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Sat, 19 Jan 2013 08:03:47 +0000 Subject: Sort all of the includes. Several files got checked in with mis-sorted includes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172891 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 2 +- lib/Transforms/Instrumentation/AddressSanitizer.cpp | 2 +- lib/Transforms/Instrumentation/BlackList.cpp | 1 - lib/Transforms/Instrumentation/MemorySanitizer.cpp | 2 +- lib/Transforms/Instrumentation/ThreadSanitizer.cpp | 2 +- 5 files changed, 4 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 762b346..5856a95 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -23,9 +23,9 @@ #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" +#include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/GlobalAlias.h" #include "llvm/IR/Intrinsics.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 1aad842..8b6d64d 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -16,7 +16,6 @@ #define DEBUG_TYPE "asan" #include "llvm/Transforms/Instrumentation.h" -#include "llvm/Transforms/Utils/BlackList.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DepthFirstIterator.h" @@ -43,6 +42,7 @@ #include "llvm/Support/system_error.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/BlackList.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ModuleUtils.h" #include diff --git a/lib/Transforms/Instrumentation/BlackList.cpp b/lib/Transforms/Instrumentation/BlackList.cpp index a3cf84c..927982d 100644 --- a/lib/Transforms/Instrumentation/BlackList.cpp +++ b/lib/Transforms/Instrumentation/BlackList.cpp @@ -14,7 +14,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/BlackList.h" - #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 2d8ca67..618a6f0 100644 --- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -71,7 +71,6 @@ #define DEBUG_TYPE "msan" #include "llvm/Transforms/Instrumentation.h" -#include "llvm/Transforms/Utils/BlackList.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" @@ -91,6 +90,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/BlackList.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ModuleUtils.h" diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp index 463ca66..f93c5ab 100644 --- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp +++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp @@ -22,7 +22,6 @@ #define DEBUG_TYPE "tsan" #include "llvm/Transforms/Instrumentation.h" -#include "llvm/Transforms/Utils/BlackList.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" @@ -41,6 +40,7 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/BlackList.h" #include "llvm/Transforms/Utils/ModuleUtils.h" using namespace llvm; -- cgit v1.1 From cfcab21e4d0e4d7444b147898d6aed1348df3043 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 19 Jan 2013 08:27:45 +0000 Subject: Use MVT instead of EVT when computing shuffle immediates since they can only be for legal types. Keeps compiler from generating unneeded checks and handling for extended types. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172893 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 73a1d2e..b6b10e2 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -3837,7 +3837,7 @@ static bool isVPERM2X128Mask(ArrayRef Mask, EVT VT, bool HasFp256) { /// getShuffleVPERM2X128Immediate - Return the appropriate immediate to shuffle /// the specified VECTOR_MASK mask with VPERM2F128/VPERM2I128 instructions. static unsigned getShuffleVPERM2X128Immediate(ShuffleVectorSDNode *SVOp) { - EVT VT = SVOp->getValueType(0); + MVT VT = SVOp->getValueType(0).getSimpleVT(); unsigned HalfSize = VT.getVectorNumElements()/2; @@ -4040,7 +4040,7 @@ bool X86::isVINSERTF128Index(SDNode *N) { /// the specified VECTOR_SHUFFLE mask with PSHUF* and SHUFP* instructions. /// Handles 128-bit and 256-bit. static unsigned getShuffleSHUFImmediate(ShuffleVectorSDNode *N) { - EVT VT = N->getValueType(0); + MVT VT = N->getValueType(0).getSimpleVT(); assert((VT.is128BitVector() || VT.is256BitVector()) && "Unsupported vector type for PSHUF/SHUFP"); @@ -4070,7 +4070,7 @@ static unsigned getShuffleSHUFImmediate(ShuffleVectorSDNode *N) { /// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle /// the specified VECTOR_SHUFFLE mask with the PSHUFHW instruction. static unsigned getShufflePSHUFHWImmediate(ShuffleVectorSDNode *N) { - EVT VT = N->getValueType(0); + MVT VT = N->getValueType(0).getSimpleVT(); assert((VT == MVT::v8i16 || VT == MVT::v16i16) && "Unsupported vector type for PSHUFHW"); @@ -4094,7 +4094,7 @@ static unsigned getShufflePSHUFHWImmediate(ShuffleVectorSDNode *N) { /// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle /// the specified VECTOR_SHUFFLE mask with the PSHUFLW instruction. static unsigned getShufflePSHUFLWImmediate(ShuffleVectorSDNode *N) { - EVT VT = N->getValueType(0); + MVT VT = N->getValueType(0).getSimpleVT(); assert((VT == MVT::v8i16 || VT == MVT::v16i16) && "Unsupported vector type for PSHUFHW"); @@ -4118,7 +4118,7 @@ static unsigned getShufflePSHUFLWImmediate(ShuffleVectorSDNode *N) { /// getShufflePALIGNRImmediate - Return the appropriate immediate to shuffle /// the specified VECTOR_SHUFFLE mask with the PALIGNR instruction. static unsigned getShufflePALIGNRImmediate(ShuffleVectorSDNode *SVOp) { - EVT VT = SVOp->getValueType(0); + MVT VT = SVOp->getValueType(0).getSimpleVT(); unsigned EltSize = VT.getVectorElementType().getSizeInBits() >> 3; unsigned NumElts = VT.getVectorNumElements(); @@ -4149,8 +4149,8 @@ unsigned X86::getExtractVEXTRACTF128Immediate(SDNode *N) { uint64_t Index = cast(N->getOperand(1).getNode())->getZExtValue(); - EVT VecVT = N->getOperand(0).getValueType(); - EVT ElVT = VecVT.getVectorElementType(); + MVT VecVT = N->getOperand(0).getValueType().getSimpleVT(); + MVT ElVT = VecVT.getVectorElementType(); unsigned NumElemsPerChunk = 128 / ElVT.getSizeInBits(); return Index / NumElemsPerChunk; @@ -4166,8 +4166,8 @@ unsigned X86::getInsertVINSERTF128Immediate(SDNode *N) { uint64_t Index = cast(N->getOperand(2).getNode())->getZExtValue(); - EVT VecVT = N->getValueType(0); - EVT ElVT = VecVT.getVectorElementType(); + MVT VecVT = N->getValueType(0).getSimpleVT(); + MVT ElVT = VecVT.getVectorElementType(); unsigned NumElemsPerChunk = 128 / ElVT.getSizeInBits(); return Index / NumElemsPerChunk; @@ -4177,7 +4177,7 @@ unsigned X86::getInsertVINSERTF128Immediate(SDNode *N) { /// the specified VECTOR_SHUFFLE mask with VPERMQ and VPERMPD instructions. /// Handles 256-bit. static unsigned getShuffleCLImmediate(ShuffleVectorSDNode *N) { - EVT VT = N->getValueType(0); + MVT VT = N->getValueType(0).getSimpleVT(); unsigned NumElts = VT.getVectorNumElements(); -- cgit v1.1 From ba9586544164e69754039a25cb0ef7907d27382d Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Sat, 19 Jan 2013 08:38:41 +0000 Subject: On Sandybridge split unaligned 256bit stores into two xmm-sized stores. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172894 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index b6b10e2..ca8cd74 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -16344,12 +16344,15 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG, ISD::LoadExtType Ext = Ld->getExtensionType(); unsigned Alignment = Ld->getAlignment(); + bool IsAligned = Alignment == 0 || Alignment == MemVT.getSizeInBits()/8; // On Sandybridge unaligned 256bit loads are inefficient. if (RegVT.is256BitVector() && !Subtarget->hasInt256() && - !DCI.isBeforeLegalizeOps() && Alignment < 32 && - Ext == ISD::NON_EXTLOAD) { + !DCI.isBeforeLegalizeOps() && !IsAligned && Ext == ISD::NON_EXTLOAD) { unsigned NumElems = RegVT.getVectorNumElements(); + if (NumElems < 2) + return SDValue(); + SDValue Ptr = Ld->getBasePtr(); SDValue Increment = DAG.getConstant(16, TLI.getPointerTy()); @@ -16363,7 +16366,7 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG, SDValue Load2 = DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(), Ld->isVolatile(), Ld->isNonTemporal(), Ld->isInvariant(), - Alignment); + std::max(Alignment/2U, 1U)); SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Load1.getValue(1), Load2.getValue(1)); @@ -16536,16 +16539,21 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, DebugLoc dl = St->getDebugLoc(); SDValue StoredVal = St->getOperand(1); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + unsigned Alignment = St->getAlignment(); + bool IsAligned = Alignment == 0 || Alignment == VT.getSizeInBits()/8; // If we are saving a concatenation of two XMM registers, perform two stores. // On Sandy Bridge, 256-bit memory operations are executed by two // 128-bit ports. However, on Haswell it is better to issue a single 256-bit // memory operation. if (VT.is256BitVector() && !Subtarget->hasInt256() && - StoredVal.getNode()->getOpcode() == ISD::CONCAT_VECTORS && - StoredVal.getNumOperands() == 2) { - SDValue Value0 = StoredVal.getOperand(0); - SDValue Value1 = StoredVal.getOperand(1); + StVT == VT && !IsAligned) { + unsigned NumElems = VT.getVectorNumElements(); + if (NumElems < 2) + return SDValue(); + + SDValue Value0 = Extract128BitVector(StoredVal, 0, DAG, dl); + SDValue Value1 = Extract128BitVector(StoredVal, NumElems/2, DAG, dl); SDValue Stride = DAG.getConstant(16, TLI.getPointerTy()); SDValue Ptr0 = St->getBasePtr(); @@ -16553,10 +16561,11 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, SDValue Ch0 = DAG.getStore(St->getChain(), dl, Value0, Ptr0, St->getPointerInfo(), St->isVolatile(), - St->isNonTemporal(), St->getAlignment()); + St->isNonTemporal(), Alignment); SDValue Ch1 = DAG.getStore(St->getChain(), dl, Value1, Ptr1, St->getPointerInfo(), St->isVolatile(), - St->isNonTemporal(), St->getAlignment()); + St->isNonTemporal(), + std::max(Alignment/2U, 1U)); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ch0, Ch1); } -- cgit v1.1 From 1af132dcf3c9fd87ac8ad0c103e74ef3f8a0bae6 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sat, 19 Jan 2013 13:57:58 +0000 Subject: LoopVectorizer: Emit memory checks into their own basic block. This separates the check for "too few elements to run the vector loop" from the "memory overlap" check, giving a lot nicer code and allowing to skip the memory checks when we're not going to execute the vector code anyways. We still leave the decision of whether to emit the memory checks as branches or setccs, but it seems to be doing a good job. If ugly code pops up we may want to emit them as separate blocks too. Small speedup on MultiSource/Benchmarks/MallocBench/espresso. Most of this is legwork to allow multiple bypass blocks while updating PHIs, dominators and loop info. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172902 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/LoopVectorize.cpp | 86 ++++++++++++++++++------------ 1 file changed, 53 insertions(+), 33 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index bc8e121..fec1573 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -163,8 +163,8 @@ private: /// Add code that checks at runtime if the accessed arrays overlap. /// Returns the comparator value or NULL if no check is needed. - Value *addRuntimeCheck(LoopVectorizationLegality *Legal, - Instruction *Loc); + Instruction *addRuntimeCheck(LoopVectorizationLegality *Legal, + Instruction *Loc); /// Create an empty loop, based on the loop ranges of the old loop. void createEmptyLoop(LoopVectorizationLegality *Legal); /// Copy and widen the instructions from the old loop. @@ -283,8 +283,8 @@ private: BasicBlock *LoopVectorBody; ///The scalar loop body. BasicBlock *LoopScalarBody; - ///The first bypass block. - BasicBlock *LoopBypassBlock; + /// A list of all bypass blocks. The first block is the entry of the loop. + SmallVector LoopBypassBlocks; /// The new Induction variable which was added to the new block. PHINode *Induction; @@ -868,7 +868,7 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr) { } } -Value* +Instruction * InnerLoopVectorizer::addRuntimeCheck(LoopVectorizationLegality *Legal, Instruction *Loc) { LoopVectorizationLegality::RuntimePointerCheck *PtrRtCheck = @@ -877,7 +877,7 @@ InnerLoopVectorizer::addRuntimeCheck(LoopVectorizationLegality *Legal, if (!PtrRtCheck->Need) return NULL; - Value *MemoryRuntimeCheck = 0; + Instruction *MemoryRuntimeCheck = 0; unsigned NumPointers = PtrRtCheck->Pointers.size(); SmallVector Starts; SmallVector Ends; @@ -918,8 +918,9 @@ InnerLoopVectorizer::addRuntimeCheck(LoopVectorizationLegality *Legal, Start0, End1, "bound0", Loc); Value *Cmp1 = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULE, Start1, End0, "bound1", Loc); - Value *IsConflict = BinaryOperator::Create(Instruction::And, Cmp0, Cmp1, - "found.conflict", Loc); + Instruction *IsConflict = BinaryOperator::Create(Instruction::And, Cmp0, + Cmp1, "found.conflict", + Loc); if (MemoryRuntimeCheck) MemoryRuntimeCheck = BinaryOperator::Create(Instruction::Or, MemoryRuntimeCheck, @@ -941,7 +942,7 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { the vectorized instructions while the old loop will continue to run the scalar remainder. - [ ] <-- vector loop bypass. + [ ] <-- vector loop bypass (may consist of multiple blocks). / | / v | [ ] <-- vector pre header. @@ -1002,10 +1003,7 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { ConstantInt::get(IdxTy, 0); assert(BypassBlock && "Invalid loop structure"); - - // Generate the code that checks in runtime if arrays overlap. - Value *MemoryRuntimeCheck = addRuntimeCheck(Legal, - BypassBlock->getTerminator()); + LoopBypassBlocks.push_back(BypassBlock); // Split the single block loop into the two loop structure described above. BasicBlock *VectorPH = @@ -1062,10 +1060,24 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { StartIdx, "cmp.zero", Loc); - // If we are using memory runtime checks, include them in. - if (MemoryRuntimeCheck) - Cmp = BinaryOperator::Create(Instruction::Or, Cmp, MemoryRuntimeCheck, - "CntOrMem", Loc); + // Generate the code that checks in runtime if arrays overlap. We put the + // checks into a separate block to make the more common case of few elements + // faster. + if (Instruction *MemoryRuntimeCheck = addRuntimeCheck(Legal, Loc)) { + // Create a new block containing the memory check. + BasicBlock *CheckBlock = BypassBlock->splitBasicBlock(MemoryRuntimeCheck, + "vector.memcheck"); + LoopBypassBlocks.push_back(CheckBlock); + + // Replace the branch into the memory check block with a conditional branch + // for the "few elements case". + Instruction *OldTerm = BypassBlock->getTerminator(); + BranchInst::Create(MiddleBlock, CheckBlock, Cmp, OldTerm); + OldTerm->eraseFromParent(); + + Cmp = MemoryRuntimeCheck; + assert(Loc == CheckBlock->getTerminator()); + } BranchInst::Create(MiddleBlock, VectorPH, Cmp, Loc); // Remove the old terminator. @@ -1109,30 +1121,33 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { Value *CRD = CountRoundDown; if (CRDSize > IISize) CRD = CastInst::Create(Instruction::Trunc, CountRoundDown, - II.StartValue->getType(), - "tr.crd", BypassBlock->getTerminator()); + II.StartValue->getType(), "tr.crd", + LoopBypassBlocks.back()->getTerminator()); else if (CRDSize < IISize) CRD = CastInst::Create(Instruction::SExt, CountRoundDown, II.StartValue->getType(), - "sext.crd", BypassBlock->getTerminator()); + "sext.crd", + LoopBypassBlocks.back()->getTerminator()); // Handle reverse integer induction counter: - EndValue = BinaryOperator::CreateSub(II.StartValue, CRD, "rev.ind.end", - BypassBlock->getTerminator()); + EndValue = + BinaryOperator::CreateSub(II.StartValue, CRD, "rev.ind.end", + LoopBypassBlocks.back()->getTerminator()); break; } case LoopVectorizationLegality::IK_PtrInduction: { // For pointer induction variables, calculate the offset using // the end index. - EndValue = GetElementPtrInst::Create(II.StartValue, CountRoundDown, - "ptr.ind.end", - BypassBlock->getTerminator()); + EndValue = + GetElementPtrInst::Create(II.StartValue, CountRoundDown, "ptr.ind.end", + LoopBypassBlocks.back()->getTerminator()); break; } }// end of case // The new PHI merges the original incoming value, in case of a bypass, // or the value at the end of the vectorized loop. - ResumeVal->addIncoming(II.StartValue, BypassBlock); + for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I) + ResumeVal->addIncoming(II.StartValue, LoopBypassBlocks[I]); ResumeVal->addIncoming(EndValue, VecBody); // Fix the scalar body counter (PHI node). @@ -1148,7 +1163,8 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { assert(!ResumeIndex && "Unexpected resume value found"); ResumeIndex = PHINode::Create(IdxTy, 2, "new.indc.resume.val", MiddleBlock->getTerminator()); - ResumeIndex->addIncoming(StartIdx, BypassBlock); + for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I) + ResumeIndex->addIncoming(StartIdx, LoopBypassBlocks[I]); ResumeIndex->addIncoming(IdxEndRoundDown, VecBody); } @@ -1188,6 +1204,8 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { // Insert the new loop into the loop nest and register the new basic blocks. if (ParentLoop) { ParentLoop->addChildLoop(Lp); + for (unsigned I = 1, E = LoopBypassBlocks.size(); I != E; ++I) + ParentLoop->addBasicBlockToLoop(LoopBypassBlocks[I], LI->getBase()); ParentLoop->addBasicBlockToLoop(ScalarPH, LI->getBase()); ParentLoop->addBasicBlockToLoop(VectorPH, LI->getBase()); ParentLoop->addBasicBlockToLoop(MiddleBlock, LI->getBase()); @@ -1204,7 +1222,6 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { LoopExitBlock = ExitBlock; LoopVectorBody = VecBody; LoopScalarBody = OldBasicBlock; - LoopBypassBlock = BypassBlock; } /// This function returns the identity element (or neutral element) for @@ -1344,7 +1361,7 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { // To do so, we need to generate the 'identity' vector and overide // one of the elements with the incoming scalar reduction. We need // to do it in the vector-loop preheader. - Builder.SetInsertPoint(LoopBypassBlock->getTerminator()); + Builder.SetInsertPoint(LoopBypassBlocks.back()->getTerminator()); // This is the vector-clone of the value that leaves the loop. VectorParts &VectorExit = getVectorValue(RdxDesc.LoopExitInstr); @@ -1392,7 +1409,8 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { VectorParts &RdxExitVal = getVectorValue(RdxDesc.LoopExitInstr); PHINode *NewPhi = Builder.CreatePHI(VecTy, 2, "rdx.vec.exit.phi"); Value *StartVal = (part == 0) ? VectorStart : Identity; - NewPhi->addIncoming(StartVal, LoopBypassBlock); + for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I) + NewPhi->addIncoming(StartVal, LoopBypassBlocks[I]); NewPhi->addIncoming(RdxExitVal[part], LoopVectorBody); RdxParts.push_back(NewPhi); } @@ -1925,12 +1943,14 @@ void InnerLoopVectorizer::updateAnalysis() { SE->forgetLoop(OrigLoop); // Update the dominator tree information. - assert(DT->properlyDominates(LoopBypassBlock, LoopExitBlock) && + assert(DT->properlyDominates(LoopBypassBlocks.front(), LoopExitBlock) && "Entry does not dominate exit."); - DT->addNewBlock(LoopVectorPreHeader, LoopBypassBlock); + for (unsigned I = 1, E = LoopBypassBlocks.size(); I != E; ++I) + DT->addNewBlock(LoopBypassBlocks[I], LoopBypassBlocks[I-1]); + DT->addNewBlock(LoopVectorPreHeader, LoopBypassBlocks.back()); DT->addNewBlock(LoopVectorBody, LoopVectorPreHeader); - DT->addNewBlock(LoopMiddleBlock, LoopBypassBlock); + DT->addNewBlock(LoopMiddleBlock, LoopBypassBlocks.front()); DT->addNewBlock(LoopScalarPreHeader, LoopMiddleBlock); DT->changeImmediateDominator(LoopScalarBody, LoopScalarPreHeader); DT->changeImmediateDominator(LoopExitBlock, LoopMiddleBlock); -- cgit v1.1 From 312c7d9dc126e90ca1f40d30ba9a3ba22ade35c4 Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Sat, 19 Jan 2013 18:19:39 +0000 Subject: BitstreamReader hasn't aged well. It's been hacked on by various people and has past the point of making sense. Lets tidy things up: first step, moving a ton of big functions out of line. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172904 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Bitcode/Reader/BitcodeReader.cpp | 4 - lib/Bitcode/Reader/BitstreamReader.cpp | 253 +++++++++++++++++++++++++++++++++ lib/Bitcode/Reader/CMakeLists.txt | 1 + 3 files changed, 254 insertions(+), 4 deletions(-) create mode 100644 lib/Bitcode/Reader/BitstreamReader.cpp (limited to 'lib') diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index 3bd64a9..fba8769 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -6,10 +6,6 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// -// This header defines the BitcodeReader class. -// -//===----------------------------------------------------------------------===// #include "llvm/Bitcode/ReaderWriter.h" #include "BitcodeReader.h" diff --git a/lib/Bitcode/Reader/BitstreamReader.cpp b/lib/Bitcode/Reader/BitstreamReader.cpp new file mode 100644 index 0000000..abc78acc --- /dev/null +++ b/lib/Bitcode/Reader/BitstreamReader.cpp @@ -0,0 +1,253 @@ +//===- BitstreamReader.cpp - BitstreamReader implementation ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Bitcode/BitstreamReader.h" + +using namespace llvm; + +//===----------------------------------------------------------------------===// +// BitstreamCursor implementation +//===----------------------------------------------------------------------===// + +void BitstreamCursor::operator=(const BitstreamCursor &RHS) { + freeState(); + + BitStream = RHS.BitStream; + NextChar = RHS.NextChar; + CurWord = RHS.CurWord; + BitsInCurWord = RHS.BitsInCurWord; + CurCodeSize = RHS.CurCodeSize; + + // Copy abbreviations, and bump ref counts. + CurAbbrevs = RHS.CurAbbrevs; + for (unsigned i = 0, e = static_cast(CurAbbrevs.size()); + i != e; ++i) + CurAbbrevs[i]->addRef(); + + // Copy block scope and bump ref counts. + BlockScope = RHS.BlockScope; + for (unsigned S = 0, e = static_cast(BlockScope.size()); + S != e; ++S) { + std::vector &Abbrevs = BlockScope[S].PrevAbbrevs; + for (unsigned i = 0, e = static_cast(Abbrevs.size()); + i != e; ++i) + Abbrevs[i]->addRef(); + } +} + +void BitstreamCursor::freeState() { + // Free all the Abbrevs. + for (unsigned i = 0, e = static_cast(CurAbbrevs.size()); + i != e; ++i) + CurAbbrevs[i]->dropRef(); + CurAbbrevs.clear(); + + // Free all the Abbrevs in the block scope. + for (unsigned S = 0, e = static_cast(BlockScope.size()); + S != e; ++S) { + std::vector &Abbrevs = BlockScope[S].PrevAbbrevs; + for (unsigned i = 0, e = static_cast(Abbrevs.size()); + i != e; ++i) + Abbrevs[i]->dropRef(); + } + BlockScope.clear(); +} + +/// EnterSubBlock - Having read the ENTER_SUBBLOCK abbrevid, enter +/// the block, and return true if the block has an error. +bool BitstreamCursor::EnterSubBlock(unsigned BlockID, unsigned *NumWordsP) { + // Save the current block's state on BlockScope. + BlockScope.push_back(Block(CurCodeSize)); + BlockScope.back().PrevAbbrevs.swap(CurAbbrevs); + + // Add the abbrevs specific to this block to the CurAbbrevs list. + if (const BitstreamReader::BlockInfo *Info = + BitStream->getBlockInfo(BlockID)) { + for (unsigned i = 0, e = static_cast(Info->Abbrevs.size()); + i != e; ++i) { + CurAbbrevs.push_back(Info->Abbrevs[i]); + CurAbbrevs.back()->addRef(); + } + } + + // Get the codesize of this block. + CurCodeSize = ReadVBR(bitc::CodeLenWidth); + SkipToWord(); + unsigned NumWords = Read(bitc::BlockSizeWidth); + if (NumWordsP) *NumWordsP = NumWords; + + // Validate that this block is sane. + if (CurCodeSize == 0 || AtEndOfStream()) + return true; + + return false; +} + + +unsigned BitstreamCursor::ReadRecord(unsigned AbbrevID, + SmallVectorImpl &Vals, + const char **BlobStart, unsigned *BlobLen){ + if (AbbrevID == bitc::UNABBREV_RECORD) { + unsigned Code = ReadVBR(6); + unsigned NumElts = ReadVBR(6); + for (unsigned i = 0; i != NumElts; ++i) + Vals.push_back(ReadVBR64(6)); + return Code; + } + + const BitCodeAbbrev *Abbv = getAbbrev(AbbrevID); + + for (unsigned i = 0, e = Abbv->getNumOperandInfos(); i != e; ++i) { + const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i); + if (Op.isLiteral()) { + ReadAbbreviatedLiteral(Op, Vals); + continue; + } + + if (Op.getEncoding() != BitCodeAbbrevOp::Array && + Op.getEncoding() != BitCodeAbbrevOp::Blob) { + ReadAbbreviatedField(Op, Vals); + continue; + } + + if (Op.getEncoding() == BitCodeAbbrevOp::Array) { + // Array case. Read the number of elements as a vbr6. + unsigned NumElts = ReadVBR(6); + + // Get the element encoding. + assert(i+2 == e && "array op not second to last?"); + const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i); + + // Read all the elements. + for (; NumElts; --NumElts) + ReadAbbreviatedField(EltEnc, Vals); + continue; + } + + assert(Op.getEncoding() == BitCodeAbbrevOp::Blob); + // Blob case. Read the number of bytes as a vbr6. + unsigned NumElts = ReadVBR(6); + SkipToWord(); // 32-bit alignment + + // Figure out where the end of this blob will be including tail padding. + size_t NewEnd = NextChar+((NumElts+3)&~3); + + // If this would read off the end of the bitcode file, just set the + // record to empty and return. + if (!canSkipToPos(NewEnd)) { + Vals.append(NumElts, 0); + NextChar = BitStream->getBitcodeBytes().getExtent(); + break; + } + + // Otherwise, read the number of bytes. If we can return a reference to + // the data, do so to avoid copying it. + if (BlobStart) { + *BlobStart = (const char*)BitStream->getBitcodeBytes().getPointer( + NextChar, NumElts); + *BlobLen = NumElts; + } else { + for (; NumElts; ++NextChar, --NumElts) + Vals.push_back(getByte(NextChar)); + } + // Skip over tail padding. + NextChar = NewEnd; + } + + unsigned Code = (unsigned)Vals[0]; + Vals.erase(Vals.begin()); + return Code; +} + + +void BitstreamCursor::ReadAbbrevRecord() { + BitCodeAbbrev *Abbv = new BitCodeAbbrev(); + unsigned NumOpInfo = ReadVBR(5); + for (unsigned i = 0; i != NumOpInfo; ++i) { + bool IsLiteral = Read(1) ? true : false; + if (IsLiteral) { + Abbv->Add(BitCodeAbbrevOp(ReadVBR64(8))); + continue; + } + + BitCodeAbbrevOp::Encoding E = (BitCodeAbbrevOp::Encoding)Read(3); + if (BitCodeAbbrevOp::hasEncodingData(E)) + Abbv->Add(BitCodeAbbrevOp(E, ReadVBR64(5))); + else + Abbv->Add(BitCodeAbbrevOp(E)); + } + CurAbbrevs.push_back(Abbv); +} + +bool BitstreamCursor::ReadBlockInfoBlock() { + // If this is the second stream to get to the block info block, skip it. + if (BitStream->hasBlockInfoRecords()) + return SkipBlock(); + + if (EnterSubBlock(bitc::BLOCKINFO_BLOCK_ID)) return true; + + SmallVector Record; + BitstreamReader::BlockInfo *CurBlockInfo = 0; + + // Read all the records for this module. + while (1) { + unsigned Code = ReadCode(); + if (Code == bitc::END_BLOCK) + return ReadBlockEnd(); + if (Code == bitc::ENTER_SUBBLOCK) { + ReadSubBlockID(); + if (SkipBlock()) return true; + continue; + } + + // Read abbrev records, associate them with CurBID. + if (Code == bitc::DEFINE_ABBREV) { + if (!CurBlockInfo) return true; + ReadAbbrevRecord(); + + // ReadAbbrevRecord installs the abbrev in CurAbbrevs. Move it to the + // appropriate BlockInfo. + BitCodeAbbrev *Abbv = CurAbbrevs.back(); + CurAbbrevs.pop_back(); + CurBlockInfo->Abbrevs.push_back(Abbv); + continue; + } + + // Read a record. + Record.clear(); + switch (ReadRecord(Code, Record)) { + default: break; // Default behavior, ignore unknown content. + case bitc::BLOCKINFO_CODE_SETBID: + if (Record.size() < 1) return true; + CurBlockInfo = &BitStream->getOrCreateBlockInfo((unsigned)Record[0]); + break; + case bitc::BLOCKINFO_CODE_BLOCKNAME: { + if (!CurBlockInfo) return true; + if (BitStream->isIgnoringBlockInfoNames()) break; // Ignore name. + std::string Name; + for (unsigned i = 0, e = Record.size(); i != e; ++i) + Name += (char)Record[i]; + CurBlockInfo->Name = Name; + break; + } + case bitc::BLOCKINFO_CODE_SETRECORDNAME: { + if (!CurBlockInfo) return true; + if (BitStream->isIgnoringBlockInfoNames()) break; // Ignore name. + std::string Name; + for (unsigned i = 1, e = Record.size(); i != e; ++i) + Name += (char)Record[i]; + CurBlockInfo->RecordNames.push_back(std::make_pair((unsigned)Record[0], + Name)); + break; + } + } + } +} + + diff --git a/lib/Bitcode/Reader/CMakeLists.txt b/lib/Bitcode/Reader/CMakeLists.txt index dfe7e10..f614c9f 100644 --- a/lib/Bitcode/Reader/CMakeLists.txt +++ b/lib/Bitcode/Reader/CMakeLists.txt @@ -1,6 +1,7 @@ add_llvm_library(LLVMBitReader BitReader.cpp BitcodeReader.cpp + BitstreamReader.cpp ) add_dependencies(LLVMBitReader intrinsics_gen) -- cgit v1.1 From 63246aa04f23767875bfe6f533285915a766b82d Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Sat, 19 Jan 2013 21:35:24 +0000 Subject: Add a new BitstreamEntry concept, and add two helper methods for walking through a BitstreamCursor that produce it: advance() and advanceSkippingSubblocks(), representing the two most common ways clients want to walk through bitcode. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172919 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Bitcode/Reader/BitcodeReader.cpp | 2 +- lib/Bitcode/Reader/BitstreamReader.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index fba8769..b14250b 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -1737,7 +1737,7 @@ bool BitcodeReader::ParseBitcodeInto(Module *M) { // The ranlib in xcode 4 will align archive members by appending newlines // to the end of them. If this file size is a multiple of 4 but not 8, we // have to read and ignore these final 4 bytes :-( - if (Stream.GetAbbrevIDWidth() == 2 && Code == 2 && + if (Stream.getAbbrevIDWidth() == 2 && Code == 2 && Stream.Read(6) == 2 && Stream.Read(24) == 0xa0a0a && Stream.AtEndOfStream()) return false; diff --git a/lib/Bitcode/Reader/BitstreamReader.cpp b/lib/Bitcode/Reader/BitstreamReader.cpp index abc78acc..a5a7c0b 100644 --- a/lib/Bitcode/Reader/BitstreamReader.cpp +++ b/lib/Bitcode/Reader/BitstreamReader.cpp @@ -150,7 +150,7 @@ unsigned BitstreamCursor::ReadRecord(unsigned AbbrevID, // the data, do so to avoid copying it. if (BlobStart) { *BlobStart = (const char*)BitStream->getBitcodeBytes().getPointer( - NextChar, NumElts); + NextChar, NumElts); *BlobLen = NumElts; } else { for (; NumElts; ++NextChar, --NumElts) -- cgit v1.1 From fe07db323b2b403236375af0a54b0ed2294cd0e6 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Sat, 19 Jan 2013 23:00:25 +0000 Subject: Fix a latent bug exposed by recent static member debug info changes. We weren't encoding boolean constants correctly due to modeling boolean as a signed type & then sign extending an i1 up to a byte & getting 255. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172926 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/DebugInfo.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/IR/DebugInfo.cpp b/lib/IR/DebugInfo.cpp index 876aff8..b431184 100644 --- a/lib/IR/DebugInfo.cpp +++ b/lib/IR/DebugInfo.cpp @@ -383,7 +383,8 @@ bool DIType::isUnsignedDIType() { if (BTy.Verify()) { unsigned Encoding = BTy.getEncoding(); if (Encoding == dwarf::DW_ATE_unsigned || - Encoding == dwarf::DW_ATE_unsigned_char) + Encoding == dwarf::DW_ATE_unsigned_char || + Encoding == dwarf::DW_ATE_boolean) return true; } return false; -- cgit v1.1 From 00a312c478771941bc3e98cfbe6728465c769807 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 19 Jan 2013 23:14:09 +0000 Subject: Capitalize LowerVectorIntExtend to be consistent with all the other lower functions in this file. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172927 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 4 ++-- lib/Target/X86/X86ISelLowering.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index ca8cd74..6dbdd4e 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -6580,7 +6580,7 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) { // Reduce a vector shuffle to zext. SDValue -X86TargetLowering::lowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const { +X86TargetLowering::LowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const { // PMOVZX is only available from SSE41. if (!Subtarget->hasSSE41()) return SDValue(); @@ -6683,7 +6683,7 @@ X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const { } // Check integer expanding shuffles. - SDValue NewOp = lowerVectorIntExtend(Op, DAG); + SDValue NewOp = LowerVectorIntExtend(Op, DAG); if (NewOp.getNode()) return NewOp; diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 35b5abd..dc5caae 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -852,7 +852,7 @@ namespace llvm { SDValue LowerVectorAllZeroTest(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const; virtual SDValue LowerFormalArguments(SDValue Chain, -- cgit v1.1 From 657a99c608c98bb0cad655681c1da35ddd7b1418 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 19 Jan 2013 23:36:09 +0000 Subject: Use MVT instead of EVT in more of the shuffle lowering code. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172930 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 6dbdd4e..f7b40f5 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -3586,7 +3586,7 @@ static bool isMOVLHPSMask(ArrayRef Mask, EVT VT) { static SDValue Compact8x32ShuffleNode(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { - EVT VT = SVOp->getValueType(0); + MVT VT = SVOp->getValueType(0).getSimpleVT(); DebugLoc dl = SVOp->getDebugLoc(); if (VT != MVT::v8i32 && VT != MVT::v8f32) @@ -4207,7 +4207,7 @@ bool X86::isZeroNode(SDValue Elt) { /// their permute mask. static SDValue CommuteVectorShuffle(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { - EVT VT = SVOp->getValueType(0); + MVT VT = SVOp->getValueType(0).getSimpleVT(); unsigned NumElems = VT.getVectorNumElements(); SmallVector MaskVec; @@ -5656,8 +5656,8 @@ LowerVECTOR_SHUFFLEtoBlend(ShuffleVectorSDNode *SVOp, SDValue V1 = SVOp->getOperand(0); SDValue V2 = SVOp->getOperand(1); DebugLoc dl = SVOp->getDebugLoc(); - EVT VT = SVOp->getValueType(0); - EVT EltVT = VT.getVectorElementType(); + MVT VT = SVOp->getValueType(0).getSimpleVT(); + MVT EltVT = VT.getVectorElementType(); unsigned NumElems = VT.getVectorNumElements(); if (!Subtarget->hasSSE41() || EltVT == MVT::i8) @@ -6088,7 +6088,7 @@ static SDValue LowerVECTOR_SHUFFLEv32i8(ShuffleVectorSDNode *SVOp, const X86Subtarget *Subtarget, SelectionDAG &DAG) { - EVT VT = SVOp->getValueType(0); + MVT VT = SVOp->getValueType(0).getSimpleVT(); SDValue V1 = SVOp->getOperand(0); SDValue V2 = SVOp->getOperand(1); DebugLoc dl = SVOp->getDebugLoc(); @@ -6214,14 +6214,14 @@ LowerVECTOR_SHUFFLE_256(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { if (NewOp.getNode()) return NewOp; - EVT VT = SVOp->getValueType(0); + MVT VT = SVOp->getValueType(0).getSimpleVT(); unsigned NumElems = VT.getVectorNumElements(); unsigned NumLaneElems = NumElems / 2; DebugLoc dl = SVOp->getDebugLoc(); - MVT EltVT = VT.getVectorElementType().getSimpleVT(); - EVT NVT = MVT::getVectorVT(EltVT, NumLaneElems); + MVT EltVT = VT.getVectorElementType(); + MVT NVT = MVT::getVectorVT(EltVT, NumLaneElems); SDValue Output[2]; SmallVector Mask; @@ -6326,7 +6326,7 @@ LowerVECTOR_SHUFFLE_128v4(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { SDValue V1 = SVOp->getOperand(0); SDValue V2 = SVOp->getOperand(1); DebugLoc dl = SVOp->getDebugLoc(); - EVT VT = SVOp->getValueType(0); + MVT VT = SVOp->getValueType(0).getSimpleVT(); assert(VT.is128BitVector() && "Unsupported vector size"); @@ -6656,7 +6656,7 @@ X86TargetLowering::LowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const { SDValue X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const { ShuffleVectorSDNode *SVOp = cast(Op); - EVT VT = Op.getValueType(); + MVT VT = Op.getValueType().getSimpleVT(); DebugLoc dl = Op.getDebugLoc(); SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); @@ -6701,7 +6701,7 @@ X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const { if (ISD::isBuildVectorAllZeros(V2.getNode())) { SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl); if (NewOp.getNode()) { - EVT NewVT = NewOp.getValueType(); + MVT NewVT = NewOp.getValueType().getSimpleVT(); if (isCommutedMOVLMask(cast(NewOp)->getMask(), NewVT, true, false)) return getVZextMovL(VT, NewVT, NewOp.getOperand(0), @@ -6710,7 +6710,7 @@ X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const { } else if (ISD::isBuildVectorAllZeros(V1.getNode())) { SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl); if (NewOp.getNode()) { - EVT NewVT = NewOp.getValueType(); + MVT NewVT = NewOp.getValueType().getSimpleVT(); if (isMOVLMask(cast(NewOp)->getMask(), NewVT)) return getVZextMovL(VT, NewVT, NewOp.getOperand(1), DAG, Subtarget, dl); @@ -6725,7 +6725,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { ShuffleVectorSDNode *SVOp = cast(Op); SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); - EVT VT = Op.getValueType(); + MVT VT = Op.getValueType().getSimpleVT(); DebugLoc dl = Op.getDebugLoc(); unsigned NumElems = VT.getVectorNumElements(); bool V1IsUndef = V1.getOpcode() == ISD::UNDEF; @@ -6816,7 +6816,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { if (isShift && ShVal.hasOneUse()) { // If the shifted value has multiple uses, it may be cheaper to use // v_set0 + movlhps or movhlps, etc. - EVT EltVT = VT.getVectorElementType(); + MVT EltVT = VT.getVectorElementType(); ShAmt *= EltVT.getSizeInBits(); return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl); } @@ -6855,7 +6855,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { if (isShift) { // No better options. Use a vshldq / vsrldq. - EVT EltVT = VT.getVectorElementType(); + MVT EltVT = VT.getVectorElementType(); ShAmt *= EltVT.getSizeInBits(); return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl); } -- cgit v1.1 From f9147c41d8101dbd98662d6d7be78278d53f690f Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Sun, 20 Jan 2013 00:00:00 +0000 Subject: move some private methods out of line, add a skipRecord() method. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172931 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Bitcode/Reader/BitstreamReader.cpp | 114 ++++++++++++++++++++++++++++++++- 1 file changed, 111 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Bitcode/Reader/BitstreamReader.cpp b/lib/Bitcode/Reader/BitstreamReader.cpp index a5a7c0b..be70f52 100644 --- a/lib/Bitcode/Reader/BitstreamReader.cpp +++ b/lib/Bitcode/Reader/BitstreamReader.cpp @@ -89,6 +89,114 @@ bool BitstreamCursor::EnterSubBlock(unsigned BlockID, unsigned *NumWordsP) { return false; } +void BitstreamCursor::readAbbreviatedLiteral(const BitCodeAbbrevOp &Op, + SmallVectorImpl &Vals) { + assert(Op.isLiteral() && "Not a literal"); + // If the abbrev specifies the literal value to use, use it. + Vals.push_back(Op.getLiteralValue()); +} + +void BitstreamCursor::readAbbreviatedField(const BitCodeAbbrevOp &Op, + SmallVectorImpl &Vals) { + assert(!Op.isLiteral() && "Use ReadAbbreviatedLiteral for literals!"); + + // Decode the value as we are commanded. + switch (Op.getEncoding()) { + case BitCodeAbbrevOp::Array: + case BitCodeAbbrevOp::Blob: + assert(0 && "Should not reach here"); + case BitCodeAbbrevOp::Fixed: + Vals.push_back(Read((unsigned)Op.getEncodingData())); + break; + case BitCodeAbbrevOp::VBR: + Vals.push_back(ReadVBR64((unsigned)Op.getEncodingData())); + break; + case BitCodeAbbrevOp::Char6: + Vals.push_back(BitCodeAbbrevOp::DecodeChar6(Read(6))); + break; + } +} + +void BitstreamCursor::skipAbbreviatedField(const BitCodeAbbrevOp &Op) { + assert(!Op.isLiteral() && "Use ReadAbbreviatedLiteral for literals!"); + + // Decode the value as we are commanded. + switch (Op.getEncoding()) { + case BitCodeAbbrevOp::Array: + case BitCodeAbbrevOp::Blob: + assert(0 && "Should not reach here"); + case BitCodeAbbrevOp::Fixed: + (void)Read((unsigned)Op.getEncodingData()); + break; + case BitCodeAbbrevOp::VBR: + (void)ReadVBR64((unsigned)Op.getEncodingData()); + break; + case BitCodeAbbrevOp::Char6: + (void)Read(6); + break; + } +} + + + +/// skipRecord - Read the current record and discard it. +void BitstreamCursor::skipRecord(unsigned AbbrevID) { + // Skip unabbreviated records by reading past their entries. + if (AbbrevID == bitc::UNABBREV_RECORD) { + unsigned Code = ReadVBR(6); + (void)Code; + unsigned NumElts = ReadVBR(6); + for (unsigned i = 0; i != NumElts; ++i) + (void)ReadVBR64(6); + return; + } + + const BitCodeAbbrev *Abbv = getAbbrev(AbbrevID); + + for (unsigned i = 0, e = Abbv->getNumOperandInfos(); i != e; ++i) { + const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i); + if (Op.isLiteral()) + continue; + + if (Op.getEncoding() != BitCodeAbbrevOp::Array && + Op.getEncoding() != BitCodeAbbrevOp::Blob) { + skipAbbreviatedField(Op); + continue; + } + + if (Op.getEncoding() == BitCodeAbbrevOp::Array) { + // Array case. Read the number of elements as a vbr6. + unsigned NumElts = ReadVBR(6); + + // Get the element encoding. + assert(i+2 == e && "array op not second to last?"); + const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i); + + // Read all the elements. + for (; NumElts; --NumElts) + skipAbbreviatedField(EltEnc); + continue; + } + + assert(Op.getEncoding() == BitCodeAbbrevOp::Blob); + // Blob case. Read the number of bytes as a vbr6. + unsigned NumElts = ReadVBR(6); + SkipToWord(); // 32-bit alignment + + // Figure out where the end of this blob will be including tail padding. + size_t NewEnd = NextChar+((NumElts+3)&~3); + + // If this would read off the end of the bitcode file, just set the + // record to empty and return. + if (!canSkipToPos(NewEnd)) { + NextChar = BitStream->getBitcodeBytes().getExtent(); + break; + } + + // Skip over the blob. + NextChar = NewEnd; + } +} unsigned BitstreamCursor::ReadRecord(unsigned AbbrevID, SmallVectorImpl &Vals, @@ -106,13 +214,13 @@ unsigned BitstreamCursor::ReadRecord(unsigned AbbrevID, for (unsigned i = 0, e = Abbv->getNumOperandInfos(); i != e; ++i) { const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i); if (Op.isLiteral()) { - ReadAbbreviatedLiteral(Op, Vals); + readAbbreviatedLiteral(Op, Vals); continue; } if (Op.getEncoding() != BitCodeAbbrevOp::Array && Op.getEncoding() != BitCodeAbbrevOp::Blob) { - ReadAbbreviatedField(Op, Vals); + readAbbreviatedField(Op, Vals); continue; } @@ -126,7 +234,7 @@ unsigned BitstreamCursor::ReadRecord(unsigned AbbrevID, // Read all the elements. for (; NumElts; --NumElts) - ReadAbbreviatedField(EltEnc, Vals); + readAbbreviatedField(EltEnc, Vals); continue; } -- cgit v1.1 From 45e1c758338b0b2747d76d1e47bdce7c4d75dd56 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 20 Jan 2013 00:38:18 +0000 Subject: Use MVT instead of EVT in more instruction lowering code. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172933 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index f7b40f5..e30d25a 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -4390,7 +4390,7 @@ static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget, /// Always build ones vectors as <4 x i32> or <8 x i32>. For 256-bit types with /// no AVX2 supprt, use two <4 x i32> inserted in a <8 x i32> appropriately. /// Then bitcast to their original type, ensuring they get CSE'd. -static SDValue getOnesVector(EVT VT, bool HasInt256, SelectionDAG &DAG, +static SDValue getOnesVector(MVT VT, bool HasInt256, SelectionDAG &DAG, DebugLoc dl) { assert(VT.isVector() && "Expected a vector type"); @@ -5100,7 +5100,7 @@ X86TargetLowering::LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const { if (!Subtarget->hasFp256()) return SDValue(); - EVT VT = Op.getValueType(); + MVT VT = Op.getValueType().getSimpleVT(); DebugLoc dl = Op.getDebugLoc(); assert((VT.is128BitVector() || VT.is256BitVector()) && @@ -5298,8 +5298,8 @@ SDValue X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); - EVT VT = Op.getValueType(); - EVT ExtVT = VT.getVectorElementType(); + MVT VT = Op.getValueType().getSimpleVT(); + MVT ExtVT = VT.getVectorElementType(); unsigned NumElems = Op.getNumOperands(); // Vectors containing all zeros can be matched by pxor and xorps later @@ -5630,7 +5630,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { // to create 256-bit vectors from two other 128-bit ones. static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { DebugLoc dl = Op.getDebugLoc(); - EVT ResVT = Op.getValueType(); + MVT ResVT = Op.getValueType().getSimpleVT(); assert(ResVT.is256BitVector() && "Value type must be 256-bit wide"); @@ -7038,10 +7038,10 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { SDValue X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) const { - EVT VT = Op.getValueType(); + MVT VT = Op.getValueType().getSimpleVT(); DebugLoc dl = Op.getDebugLoc(); - if (!Op.getOperand(0).getValueType().is128BitVector()) + if (!Op.getOperand(0).getValueType().getSimpleVT().is128BitVector()) return SDValue(); if (VT.getSizeInBits() == 8) { @@ -7106,7 +7106,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, return SDValue(); SDValue Vec = Op.getOperand(0); - EVT VecVT = Vec.getValueType(); + MVT VecVT = Vec.getValueType().getSimpleVT(); // If this is a 256-bit vector result, first extract the 128-bit vector and // then extract the element from the 128-bit vector. @@ -7133,7 +7133,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, return Res; } - EVT VT = Op.getValueType(); + MVT VT = Op.getValueType().getSimpleVT(); DebugLoc dl = Op.getDebugLoc(); // TODO: handle v16i8. if (VT.getSizeInBits() == 16) { @@ -7146,7 +7146,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, MVT::v4i32, Vec), Op.getOperand(1))); // Transform it so it match pextrw which produces a 32-bit result. - EVT EltVT = MVT::i32; + MVT EltVT = MVT::i32; SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, EltVT, Op.getOperand(0), Op.getOperand(1)); SDValue Assert = DAG.getNode(ISD::AssertZext, dl, EltVT, Extract, @@ -7161,7 +7161,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, // SHUFPS the element to the lowest double word, then movss. int Mask[4] = { static_cast(Idx), -1, -1, -1 }; - EVT VVT = Op.getOperand(0).getValueType(); + MVT VVT = Op.getOperand(0).getValueType().getSimpleVT(); SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0), DAG.getUNDEF(VVT), Mask); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec, @@ -7180,7 +7180,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, // Note if the lower 64 bits of the result of the UNPCKHPD is then stored // to a f64mem, the whole operation is folded into a single MOVHPDmr. int Mask[2] = { 1, -1 }; - EVT VVT = Op.getOperand(0).getValueType(); + MVT VVT = Op.getOperand(0).getValueType().getSimpleVT(); SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0), DAG.getUNDEF(VVT), Mask); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec, @@ -7193,8 +7193,8 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) const { - EVT VT = Op.getValueType(); - EVT EltVT = VT.getVectorElementType(); + MVT VT = Op.getValueType().getSimpleVT(); + MVT EltVT = VT.getVectorElementType(); DebugLoc dl = Op.getDebugLoc(); SDValue N0 = Op.getOperand(0); @@ -7247,8 +7247,8 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { - EVT VT = Op.getValueType(); - EVT EltVT = VT.getVectorElementType(); + MVT VT = Op.getValueType().getSimpleVT(); + MVT EltVT = VT.getVectorElementType(); DebugLoc dl = Op.getDebugLoc(); SDValue N0 = Op.getOperand(0); @@ -7296,7 +7296,7 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) { LLVMContext *Context = DAG.getContext(); DebugLoc dl = Op.getDebugLoc(); - EVT OpVT = Op.getValueType(); + MVT OpVT = Op.getValueType().getSimpleVT(); // If this is a 256-bit vector result, first insert into a 128-bit // vector and then insert into the 256-bit vector. -- cgit v1.1 From 3b2aba09e2534a23ab6c50d9f60d1d7d9ff59eb0 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 20 Jan 2013 00:43:42 +0000 Subject: Remove DebugLoc argument from static function. It can easily be obtained from the SVOp passed in. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172935 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index e30d25a..59a25ff 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -6135,8 +6135,9 @@ SDValue LowerVECTOR_SHUFFLEv32i8(ShuffleVectorSDNode *SVOp, /// vector_shuffle X, Y, <2, 3, | 10, 11, | 0, 1, | 14, 15> static SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp, - SelectionDAG &DAG, DebugLoc dl) { + SelectionDAG &DAG) { MVT VT = SVOp->getValueType(0).getSimpleVT(); + DebugLoc dl = SVOp->getDebugLoc(); unsigned NumElems = VT.getVectorNumElements(); MVT NewVT; unsigned Scale; @@ -6691,7 +6692,7 @@ X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const { // do it! if (VT == MVT::v8i16 || VT == MVT::v16i8 || VT == MVT::v16i16 || VT == MVT::v32i8) { - SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl); + SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG); if (NewOp.getNode()) return DAG.getNode(ISD::BITCAST, dl, VT, NewOp); } else if ((VT == MVT::v4i32 || @@ -6699,7 +6700,7 @@ X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const { // FIXME: Figure out a cleaner way to do this. // Try to make use of movq to zero out the top part. if (ISD::isBuildVectorAllZeros(V2.getNode())) { - SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl); + SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG); if (NewOp.getNode()) { MVT NewVT = NewOp.getValueType().getSimpleVT(); if (isCommutedMOVLMask(cast(NewOp)->getMask(), @@ -6708,7 +6709,7 @@ X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const { DAG, Subtarget, dl); } } else if (ISD::isBuildVectorAllZeros(V1.getNode())) { - SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl); + SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG); if (NewOp.getNode()) { MVT NewVT = NewOp.getValueType().getSimpleVT(); if (isMOVLMask(cast(NewOp)->getMask(), NewVT)) -- cgit v1.1 From f84b7500ce489d2e4039348ed30bf584f0b61973 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 20 Jan 2013 00:50:58 +0000 Subject: Make some helper methods static. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172936 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 10 +++------- lib/Target/X86/X86ISelLowering.h | 2 -- 2 files changed, 3 insertions(+), 9 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 59a25ff..ec9f675 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -6173,7 +6173,7 @@ SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp, /// getVZextMovL - Return a zero-extending vector move low node. /// -static SDValue getVZextMovL(EVT VT, EVT OpVT, +static SDValue getVZextMovL(MVT VT, EVT OpVT, SDValue SrcOp, SelectionDAG &DAG, const X86Subtarget *Subtarget, DebugLoc dl) { if (VT == MVT::v2f64 || VT == MVT::v4f32) { @@ -7036,9 +7036,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { return SDValue(); } -SDValue -X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, - SelectionDAG &DAG) const { +static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) { MVT VT = Op.getValueType().getSimpleVT(); DebugLoc dl = Op.getDebugLoc(); @@ -7191,9 +7189,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, return SDValue(); } -SDValue -X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, - SelectionDAG &DAG) const { +static SDValue LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) { MVT VT = Op.getValueType().getSimpleVT(); MVT EltVT = VT.getVectorElementType(); DebugLoc dl = Op.getDebugLoc(); diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index dc5caae..c1f940a 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -794,9 +794,7 @@ namespace llvm { SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) const; SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl, -- cgit v1.1 From 194ef24dfedf62642c853a851db4d7e528d27460 Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Sun, 20 Jan 2013 01:06:48 +0000 Subject: stringref'ize readRecord and properly capitalize it. Add a compatibility method to easy the transition. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172940 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Bitcode/Reader/BitstreamReader.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/Bitcode/Reader/BitstreamReader.cpp b/lib/Bitcode/Reader/BitstreamReader.cpp index be70f52..84d5ca6 100644 --- a/lib/Bitcode/Reader/BitstreamReader.cpp +++ b/lib/Bitcode/Reader/BitstreamReader.cpp @@ -198,9 +198,9 @@ void BitstreamCursor::skipRecord(unsigned AbbrevID) { } } -unsigned BitstreamCursor::ReadRecord(unsigned AbbrevID, +unsigned BitstreamCursor::readRecord(unsigned AbbrevID, SmallVectorImpl &Vals, - const char **BlobStart, unsigned *BlobLen){ + StringRef *Blob) { if (AbbrevID == bitc::UNABBREV_RECORD) { unsigned Code = ReadVBR(6); unsigned NumElts = ReadVBR(6); @@ -256,10 +256,11 @@ unsigned BitstreamCursor::ReadRecord(unsigned AbbrevID, // Otherwise, read the number of bytes. If we can return a reference to // the data, do so to avoid copying it. - if (BlobStart) { - *BlobStart = (const char*)BitStream->getBitcodeBytes().getPointer( - NextChar, NumElts); - *BlobLen = NumElts; + if (Blob) { + *Blob = + StringRef((const char*)BitStream->getBitcodeBytes().getPointer( + NextChar, NumElts), + NumElts); } else { for (; NumElts; ++NextChar, --NumElts) Vals.push_back(getByte(NextChar)); -- cgit v1.1 From 14268416720155d63f190a6143ee40b3b850e409 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Sun, 20 Jan 2013 01:18:01 +0000 Subject: The last of PR14471 - emission of constant floats git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172941 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp | 20 ++++++++++++++++---- lib/CodeGen/AsmPrinter/DwarfCompileUnit.h | 3 +++ 2 files changed, 19 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 255e083..2e4ed5d 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -628,10 +628,21 @@ bool CompileUnit::addConstantFPValue(DIE *Die, const MachineOperand &MO) { return true; } +/// addConstantFPValue - Add constant value entry in variable DIE. +bool CompileUnit::addConstantFPValue(DIE *Die, const ConstantFP *CFP) { + return addConstantValue(Die, CFP->getValueAPF().bitcastToAPInt(), false); +} + /// addConstantValue - Add constant value entry in variable DIE. bool CompileUnit::addConstantValue(DIE *Die, const ConstantInt *CI, bool Unsigned) { - unsigned CIBitWidth = CI->getBitWidth(); + return addConstantValue(Die, CI->getValue(), Unsigned); +} + +// addConstantValue - Add constant value entry in variable DIE. +bool CompileUnit::addConstantValue(DIE *Die, const APInt &Val, + bool Unsigned) { + unsigned CIBitWidth = Val.getBitWidth(); if (CIBitWidth <= 64) { unsigned form = 0; switch (CIBitWidth) { @@ -643,16 +654,15 @@ bool CompileUnit::addConstantValue(DIE *Die, const ConstantInt *CI, form = Unsigned ? dwarf::DW_FORM_udata : dwarf::DW_FORM_sdata; } if (Unsigned) - addUInt(Die, dwarf::DW_AT_const_value, form, CI->getZExtValue()); + addUInt(Die, dwarf::DW_AT_const_value, form, Val.getZExtValue()); else - addSInt(Die, dwarf::DW_AT_const_value, form, CI->getSExtValue()); + addSInt(Die, dwarf::DW_AT_const_value, form, Val.getSExtValue()); return true; } DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); // Get the raw data form of the large APInt. - const APInt Val = CI->getValue(); const uint64_t *Ptr64 = Val.getRawData(); int NumBytes = Val.getBitWidth() / 8; // 8 bits per byte. @@ -1697,6 +1707,8 @@ DIE *CompileUnit::createStaticMemberDIE(const DIDerivedType DT) { if (const ConstantInt *CI = dyn_cast_or_null(DT.getConstant())) addConstantValue(StaticMemberDIE, CI, Ty.isUnsignedDIType()); + if (const ConstantFP *CFP = dyn_cast_or_null(DT.getConstant())) + addConstantFPValue(StaticMemberDIE, CFP); insertDIE(DT, StaticMemberDIE); return StaticMemberDIE; diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index 1091bce..c7662f9 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -27,6 +27,7 @@ class DwarfUnits; class MachineLocation; class MachineOperand; class ConstantInt; +class ConstantFP; class DbgVariable; //===----------------------------------------------------------------------===// @@ -250,9 +251,11 @@ public: /// addConstantValue - Add constant value entry in variable DIE. bool addConstantValue(DIE *Die, const MachineOperand &MO, DIType Ty); bool addConstantValue(DIE *Die, const ConstantInt *CI, bool Unsigned); + bool addConstantValue(DIE *Die, const APInt &Val, bool Unsigned); /// addConstantFPValue - Add constant value entry in variable DIE. bool addConstantFPValue(DIE *Die, const MachineOperand &MO); + bool addConstantFPValue(DIE *Die, const ConstantFP *CFP); /// addTemplateParams - Add template parameters in buffer. void addTemplateParams(DIE &Buffer, DIArray TParams); -- cgit v1.1 From 5a4251c767adb7a47ad7a53719398ee1342cc400 Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Sun, 20 Jan 2013 02:13:19 +0000 Subject: convert the bitstream reader itself and the IR .bc file parser to use the new advance() APIs, simplifying things and making a bunch of details more private to BitstreamCursor. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172947 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Bitcode/Reader/BitcodeReader.cpp | 438 +++++++++++++++------------------ lib/Bitcode/Reader/BitstreamReader.cpp | 24 +- 2 files changed, 214 insertions(+), 248 deletions(-) (limited to 'lib') diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index b14250b..219fc18 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -441,29 +441,22 @@ bool BitcodeReader::ParseAttributeBlock() { // Read all the records. while (1) { - unsigned Code = Stream.ReadCode(); - if (Code == bitc::END_BLOCK) { - if (Stream.ReadBlockEnd()) - return Error("Error at end of PARAMATTR block"); + BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); + + switch (Entry.Kind) { + case BitstreamEntry::SubBlock: // Handled for us already. + case BitstreamEntry::Error: + return Error("Error at end of PARAMATTR block"); + case BitstreamEntry::EndBlock: return false; + case BitstreamEntry::Record: + // The interesting case. + break; } - - if (Code == bitc::ENTER_SUBBLOCK) { - // No known subblocks, always skip them. - Stream.ReadSubBlockID(); - if (Stream.SkipBlock()) - return Error("Malformed block record"); - continue; - } - - if (Code == bitc::DEFINE_ABBREV) { - Stream.ReadAbbrevRecord(); - continue; - } - + // Read a record. Record.clear(); - switch (Stream.ReadRecord(Code, Record)) { + switch (Stream.readRecord(Entry.ID, Record)) { default: // Default behavior: ignore. break; case bitc::PARAMATTR_CODE_ENTRY: { // ENTRY: [paramidx0, attr0, ...] @@ -509,32 +502,26 @@ bool BitcodeReader::ParseTypeTableBody() { // Read all the records for this type table. while (1) { - unsigned Code = Stream.ReadCode(); - if (Code == bitc::END_BLOCK) { + BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); + + switch (Entry.Kind) { + case BitstreamEntry::SubBlock: // Handled for us already. + case BitstreamEntry::Error: + Error("Error in the type table block"); + return true; + case BitstreamEntry::EndBlock: if (NumRecords != TypeList.size()) return Error("Invalid type forward reference in TYPE_BLOCK"); - if (Stream.ReadBlockEnd()) - return Error("Error at end of type table block"); return false; - } - - if (Code == bitc::ENTER_SUBBLOCK) { - // No known subblocks, always skip them. - Stream.ReadSubBlockID(); - if (Stream.SkipBlock()) - return Error("Malformed block record"); - continue; - } - - if (Code == bitc::DEFINE_ABBREV) { - Stream.ReadAbbrevRecord(); - continue; + case BitstreamEntry::Record: + // The interesting case. + break; } // Read a record. Record.clear(); Type *ResultTy = 0; - switch (Stream.ReadRecord(Code, Record)) { + switch (Stream.readRecord(Entry.ID, Record)) { default: return Error("unknown type in type table"); case bitc::TYPE_CODE_NUMENTRY: // TYPE_CODE_NUMENTRY: [numentries] // TYPE_CODE_NUMENTRY contains a count of the number of types in the @@ -732,28 +719,22 @@ bool BitcodeReader::ParseValueSymbolTable() { // Read all the records for this value table. SmallString<128> ValueName; while (1) { - unsigned Code = Stream.ReadCode(); - if (Code == bitc::END_BLOCK) { - if (Stream.ReadBlockEnd()) - return Error("Error at end of value symbol table block"); + BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); + + switch (Entry.Kind) { + case BitstreamEntry::SubBlock: // Handled for us already. + case BitstreamEntry::Error: + return Error("malformed value symbol table block"); + case BitstreamEntry::EndBlock: return false; - } - if (Code == bitc::ENTER_SUBBLOCK) { - // No known subblocks, always skip them. - Stream.ReadSubBlockID(); - if (Stream.SkipBlock()) - return Error("Malformed block record"); - continue; - } - - if (Code == bitc::DEFINE_ABBREV) { - Stream.ReadAbbrevRecord(); - continue; + case BitstreamEntry::Record: + // The interesting case. + break; } // Read a record. Record.clear(); - switch (Stream.ReadRecord(Code, Record)) { + switch (Stream.readRecord(Entry.ID, Record)) { default: // Default behavior: unknown type. break; case bitc::VST_CODE_ENTRY: { // VST_ENTRY: [valueid, namechar x N] @@ -793,30 +774,24 @@ bool BitcodeReader::ParseMetadata() { // Read all the records. while (1) { - unsigned Code = Stream.ReadCode(); - if (Code == bitc::END_BLOCK) { - if (Stream.ReadBlockEnd()) - return Error("Error at end of PARAMATTR block"); + BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); + + switch (Entry.Kind) { + case BitstreamEntry::SubBlock: // Handled for us already. + case BitstreamEntry::Error: + Error("malformed metadata block"); + return true; + case BitstreamEntry::EndBlock: return false; - } - - if (Code == bitc::ENTER_SUBBLOCK) { - // No known subblocks, always skip them. - Stream.ReadSubBlockID(); - if (Stream.SkipBlock()) - return Error("Malformed block record"); - continue; - } - - if (Code == bitc::DEFINE_ABBREV) { - Stream.ReadAbbrevRecord(); - continue; + case BitstreamEntry::Record: + // The interesting case. + break; } bool IsFunctionLocal = false; // Read a record. Record.clear(); - Code = Stream.ReadRecord(Code, Record); + unsigned Code = Stream.readRecord(Entry.ID, Record); switch (Code) { default: // Default behavior: ignore. break; @@ -827,7 +802,7 @@ bool BitcodeReader::ParseMetadata() { Code = Stream.ReadCode(); // METADATA_NAME is always followed by METADATA_NAMED_NODE. - unsigned NextBitCode = Stream.ReadRecord(Code, Record); + unsigned NextBitCode = Stream.readRecord(Code, Record); assert(NextBitCode == bitc::METADATA_NAMED_NODE); (void)NextBitCode; // Read named metadata elements. @@ -954,27 +929,29 @@ bool BitcodeReader::ParseConstants() { Type *CurTy = Type::getInt32Ty(Context); unsigned NextCstNo = ValueList.size(); while (1) { - unsigned Code = Stream.ReadCode(); - if (Code == bitc::END_BLOCK) + BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); + + switch (Entry.Kind) { + case BitstreamEntry::SubBlock: // Handled for us already. + case BitstreamEntry::Error: + return Error("malformed block record in AST file"); + case BitstreamEntry::EndBlock: + if (NextCstNo != ValueList.size()) + return Error("Invalid constant reference!"); + + // Once all the constants have been read, go through and resolve forward + // references. + ValueList.ResolveConstantForwardRefs(); + return false; + case BitstreamEntry::Record: + // The interesting case. break; - - if (Code == bitc::ENTER_SUBBLOCK) { - // No known subblocks, always skip them. - Stream.ReadSubBlockID(); - if (Stream.SkipBlock()) - return Error("Malformed block record"); - continue; - } - - if (Code == bitc::DEFINE_ABBREV) { - Stream.ReadAbbrevRecord(); - continue; } // Read a record. Record.clear(); Value *V = 0; - unsigned BitCode = Stream.ReadRecord(Code, Record); + unsigned BitCode = Stream.readRecord(Entry.ID, Record); switch (BitCode) { default: // Default behavior: unknown constant case bitc::CST_CODE_UNDEF: // UNDEF @@ -1329,17 +1306,6 @@ bool BitcodeReader::ParseConstants() { ValueList.AssignValue(V, NextCstNo); ++NextCstNo; } - - if (NextCstNo != ValueList.size()) - return Error("Invalid constant reference!"); - - if (Stream.ReadBlockEnd()) - return Error("Error at end of constants block"); - - // Once all the constants have been read, go through and resolve forward - // references. - ValueList.ResolveConstantForwardRefs(); - return false; } bool BitcodeReader::ParseUseLists() { @@ -1350,29 +1316,22 @@ bool BitcodeReader::ParseUseLists() { // Read all the records. while (1) { - unsigned Code = Stream.ReadCode(); - if (Code == bitc::END_BLOCK) { - if (Stream.ReadBlockEnd()) - return Error("Error at end of use-list table block"); + BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); + + switch (Entry.Kind) { + case BitstreamEntry::SubBlock: // Handled for us already. + case BitstreamEntry::Error: + return Error("malformed use list block"); + case BitstreamEntry::EndBlock: return false; - } - - if (Code == bitc::ENTER_SUBBLOCK) { - // No known subblocks, always skip them. - Stream.ReadSubBlockID(); - if (Stream.SkipBlock()) - return Error("Malformed block record"); - continue; - } - - if (Code == bitc::DEFINE_ABBREV) { - Stream.ReadAbbrevRecord(); - continue; + case BitstreamEntry::Record: + // The interesting case. + break; } // Read a use list record. Record.clear(); - switch (Stream.ReadRecord(Code, Record)) { + switch (Stream.readRecord(Entry.ID, Record)) { default: // Default behavior: unknown type. break; case bitc::USELIST_CODE_ENTRY: { // USELIST_CODE_ENTRY: TBD. @@ -1444,17 +1403,18 @@ bool BitcodeReader::ParseModule(bool Resume) { std::vector GCTable; // Read all the records for this module. - while (!Stream.AtEndOfStream()) { - unsigned Code = Stream.ReadCode(); - if (Code == bitc::END_BLOCK) { - if (Stream.ReadBlockEnd()) - return Error("Error at end of module block"); - + while (1) { + BitstreamEntry Entry = Stream.advance(); + + switch (Entry.Kind) { + case BitstreamEntry::Error: + Error("malformed module block"); + return true; + case BitstreamEntry::EndBlock: return GlobalCleanup(); - } - - if (Code == bitc::ENTER_SUBBLOCK) { - switch (Stream.ReadSubBlockID()) { + + case BitstreamEntry::SubBlock: + switch (Entry.ID) { default: // Skip unknown content. if (Stream.SkipBlock()) return Error("Malformed block record"); @@ -1493,7 +1453,7 @@ bool BitcodeReader::ParseModule(bool Resume) { return true; SeenFirstFunctionBody = true; } - + if (RememberAndSkipFunctionBody()) return true; // For streaming bitcode, suspend parsing when we reach the function @@ -1513,15 +1473,15 @@ bool BitcodeReader::ParseModule(bool Resume) { break; } continue; + + case BitstreamEntry::Record: + // The interesting case. + break; } - if (Code == bitc::DEFINE_ABBREV) { - Stream.ReadAbbrevRecord(); - continue; - } // Read a record. - switch (Stream.ReadRecord(Code, Record)) { + switch (Stream.readRecord(Entry.ID, Record)) { default: break; // Default behavior, ignore unknown content. case bitc::MODULE_CODE_VERSION: { // VERSION: [version#] if (Record.size() < 1) @@ -1709,8 +1669,6 @@ bool BitcodeReader::ParseModule(bool Resume) { } Record.clear(); } - - return Error("Premature end of bitstream"); } bool BitcodeReader::ParseBitcodeInto(Module *M) { @@ -1729,47 +1687,55 @@ bool BitcodeReader::ParseBitcodeInto(Module *M) { // We expect a number of well-defined blocks, though we don't necessarily // need to understand them all. - while (!Stream.AtEndOfStream()) { - unsigned Code = Stream.ReadCode(); - - if (Code != bitc::ENTER_SUBBLOCK) { - - // The ranlib in xcode 4 will align archive members by appending newlines + while (1) { + if (Stream.AtEndOfStream()) + return false; + + BitstreamEntry Entry = + Stream.advance(BitstreamCursor::AF_DontAutoprocessAbbrevs); + + switch (Entry.Kind) { + case BitstreamEntry::Error: + Error("malformed module file"); + return true; + case BitstreamEntry::EndBlock: + return false; + + case BitstreamEntry::SubBlock: + switch (Entry.ID) { + case bitc::BLOCKINFO_BLOCK_ID: + if (Stream.ReadBlockInfoBlock()) + return Error("Malformed BlockInfoBlock"); + break; + case bitc::MODULE_BLOCK_ID: + // Reject multiple MODULE_BLOCK's in a single bitstream. + if (TheModule) + return Error("Multiple MODULE_BLOCKs in same stream"); + TheModule = M; + if (ParseModule(false)) + return true; + if (LazyStreamer) return false; + break; + default: + if (Stream.SkipBlock()) + return Error("Malformed block record"); + break; + } + continue; + case BitstreamEntry::Record: + // There should be no records in the top-level of blocks. + + // The ranlib in Xcode 4 will align archive members by appending newlines // to the end of them. If this file size is a multiple of 4 but not 8, we // have to read and ignore these final 4 bytes :-( - if (Stream.getAbbrevIDWidth() == 2 && Code == 2 && + if (Stream.getAbbrevIDWidth() == 2 && Entry.ID == 2 && Stream.Read(6) == 2 && Stream.Read(24) == 0xa0a0a && Stream.AtEndOfStream()) return false; - + return Error("Invalid record at top-level"); } - - unsigned BlockID = Stream.ReadSubBlockID(); - - // We only know the MODULE subblock ID. - switch (BlockID) { - case bitc::BLOCKINFO_BLOCK_ID: - if (Stream.ReadBlockInfoBlock()) - return Error("Malformed BlockInfoBlock"); - break; - case bitc::MODULE_BLOCK_ID: - // Reject multiple MODULE_BLOCK's in a single bitstream. - if (TheModule) - return Error("Multiple MODULE_BLOCKs in same stream"); - TheModule = M; - if (ParseModule(false)) - return true; - if (LazyStreamer) return false; - break; - default: - if (Stream.SkipBlock()) - return Error("Malformed block record"); - break; - } } - - return false; } bool BitcodeReader::ParseModuleTriple(std::string &Triple) { @@ -1779,32 +1745,22 @@ bool BitcodeReader::ParseModuleTriple(std::string &Triple) { SmallVector Record; // Read all the records for this module. - while (!Stream.AtEndOfStream()) { - unsigned Code = Stream.ReadCode(); - if (Code == bitc::END_BLOCK) { - if (Stream.ReadBlockEnd()) - return Error("Error at end of module block"); - + while (1) { + BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); + + switch (Entry.Kind) { + case BitstreamEntry::SubBlock: // Handled for us already. + case BitstreamEntry::Error: + return Error("malformed module block"); + case BitstreamEntry::EndBlock: return false; - } - - if (Code == bitc::ENTER_SUBBLOCK) { - switch (Stream.ReadSubBlockID()) { - default: // Skip unknown content. - if (Stream.SkipBlock()) - return Error("Malformed block record"); - break; - } - continue; - } - - if (Code == bitc::DEFINE_ABBREV) { - Stream.ReadAbbrevRecord(); - continue; + case BitstreamEntry::Record: + // The interesting case. + break; } // Read a record. - switch (Stream.ReadRecord(Code, Record)) { + switch (Stream.readRecord(Entry.ID, Record)) { default: break; // Default behavior, ignore unknown content. case bitc::MODULE_CODE_TRIPLE: { // TRIPLE: [strchr x N] std::string S; @@ -1816,8 +1772,6 @@ bool BitcodeReader::ParseModuleTriple(std::string &Triple) { } Record.clear(); } - - return Error("Premature end of bitstream"); } bool BitcodeReader::ParseTriple(std::string &Triple) { @@ -1834,28 +1788,32 @@ bool BitcodeReader::ParseTriple(std::string &Triple) { // We expect a number of well-defined blocks, though we don't necessarily // need to understand them all. - while (!Stream.AtEndOfStream()) { - unsigned Code = Stream.ReadCode(); - - if (Code != bitc::ENTER_SUBBLOCK) - return Error("Invalid record at top-level"); - - unsigned BlockID = Stream.ReadSubBlockID(); - - // We only know the MODULE subblock ID. - switch (BlockID) { - case bitc::MODULE_BLOCK_ID: - if (ParseModuleTriple(Triple)) + while (1) { + BitstreamEntry Entry = Stream.advance(); + + switch (Entry.Kind) { + case BitstreamEntry::Error: + Error("malformed module file"); + return true; + case BitstreamEntry::EndBlock: + return false; + + case BitstreamEntry::SubBlock: + if (Entry.ID == bitc::MODULE_BLOCK_ID) + return ParseModuleTriple(Triple); + + // Ignore other sub-blocks. + if (Stream.SkipBlock()) { + Error("malformed block record in AST file"); return true; - break; - default: - if (Stream.SkipBlock()) - return Error("Malformed block record"); - break; + } + continue; + + case BitstreamEntry::Record: + Stream.skipRecord(Entry.ID); + continue; } } - - return false; } /// ParseMetadataAttachment - Parse metadata attachments. @@ -1864,20 +1822,23 @@ bool BitcodeReader::ParseMetadataAttachment() { return Error("Malformed block record"); SmallVector Record; - while(1) { - unsigned Code = Stream.ReadCode(); - if (Code == bitc::END_BLOCK) { - if (Stream.ReadBlockEnd()) - return Error("Error at end of PARAMATTR block"); + while (1) { + BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); + + switch (Entry.Kind) { + case BitstreamEntry::SubBlock: // Handled for us already. + case BitstreamEntry::Error: + return Error("malformed metadata block"); + case BitstreamEntry::EndBlock: + return false; + case BitstreamEntry::Record: + // The interesting case. break; } - if (Code == bitc::DEFINE_ABBREV) { - Stream.ReadAbbrevRecord(); - continue; - } + // Read a metadata attachment record. Record.clear(); - switch (Stream.ReadRecord(Code, Record)) { + switch (Stream.readRecord(Entry.ID, Record)) { default: // Default behavior: ignore. break; case bitc::METADATA_ATTACHMENT: { @@ -1898,7 +1859,6 @@ bool BitcodeReader::ParseMetadataAttachment() { } } } - return false; } /// ParseFunctionBody - Lazily parse the specified function body block. @@ -1923,15 +1883,16 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { // Read all the records. SmallVector Record; while (1) { - unsigned Code = Stream.ReadCode(); - if (Code == bitc::END_BLOCK) { - if (Stream.ReadBlockEnd()) - return Error("Error at end of function block"); - break; - } - - if (Code == bitc::ENTER_SUBBLOCK) { - switch (Stream.ReadSubBlockID()) { + BitstreamEntry Entry = Stream.advance(); + + switch (Entry.Kind) { + case BitstreamEntry::Error: + return Error("Bitcode error in function block"); + case BitstreamEntry::EndBlock: + goto OutOfRecordLoop; + + case BitstreamEntry::SubBlock: + switch (Entry.ID) { default: // Skip unknown content. if (Stream.SkipBlock()) return Error("Malformed block record"); @@ -1951,17 +1912,16 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { break; } continue; + + case BitstreamEntry::Record: + // The interesting case. + break; } - - if (Code == bitc::DEFINE_ABBREV) { - Stream.ReadAbbrevRecord(); - continue; - } - + // Read a record. Record.clear(); Instruction *I = 0; - unsigned BitCode = Stream.ReadRecord(Code, Record); + unsigned BitCode = Stream.readRecord(Entry.ID, Record); switch (BitCode) { default: // Default behavior: reject return Error("Unknown instruction"); @@ -2738,6 +2698,8 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { ValueList.AssignValue(I, NextValueNo++); } +OutOfRecordLoop: + // Check the function list for unresolved values. if (Argument *A = dyn_cast(ValueList.back())) { if (A->getParent() == 0) { diff --git a/lib/Bitcode/Reader/BitstreamReader.cpp b/lib/Bitcode/Reader/BitstreamReader.cpp index 84d5ca6..83df57b 100644 --- a/lib/Bitcode/Reader/BitstreamReader.cpp +++ b/lib/Bitcode/Reader/BitstreamReader.cpp @@ -306,17 +306,21 @@ bool BitstreamCursor::ReadBlockInfoBlock() { // Read all the records for this module. while (1) { - unsigned Code = ReadCode(); - if (Code == bitc::END_BLOCK) - return ReadBlockEnd(); - if (Code == bitc::ENTER_SUBBLOCK) { - ReadSubBlockID(); - if (SkipBlock()) return true; - continue; - } + BitstreamEntry Entry = advanceSkippingSubblocks(AF_DontAutoprocessAbbrevs); + switch (Entry.Kind) { + case llvm::BitstreamEntry::SubBlock: // Handled for us already. + case llvm::BitstreamEntry::Error: + return true; + case llvm::BitstreamEntry::EndBlock: + return false; + case llvm::BitstreamEntry::Record: + // The interesting case. + break; + } + // Read abbrev records, associate them with CurBID. - if (Code == bitc::DEFINE_ABBREV) { + if (Entry.ID == bitc::DEFINE_ABBREV) { if (!CurBlockInfo) return true; ReadAbbrevRecord(); @@ -330,7 +334,7 @@ bool BitstreamCursor::ReadBlockInfoBlock() { // Read a record. Record.clear(); - switch (ReadRecord(Code, Record)) { + switch (readRecord(Entry.ID, Record)) { default: break; // Default behavior, ignore unknown content. case bitc::BLOCKINFO_CODE_SETBID: if (Record.size() < 1) return true; -- cgit v1.1 From 1ca114a66b666f932741d00d74636dc35ea1d466 Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Sun, 20 Jan 2013 02:54:05 +0000 Subject: trivial micro-optimization: lazily call the virtual method instead of eagerly calling it. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172953 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Bitcode/Reader/BitcodeReader.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index 219fc18..00474ec 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -796,7 +796,7 @@ bool BitcodeReader::ParseMetadata() { default: // Default behavior: ignore. break; case bitc::METADATA_NAME: { - // Read named of the named metadata. + // Read name of the named metadata. SmallString<8> Name(Record.begin(), Record.end()); Record.clear(); Code = Stream.ReadCode(); -- cgit v1.1 From 0bbbc52dc8f1d7d41ab06e0d84daf990c8bc7f93 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Sun, 20 Jan 2013 05:24:29 +0000 Subject: LoopVectorizer: Implement a new heuristics for selecting the unroll factor. We ignore the cpu frontend and focus on pipeline utilization. We do this because we don't have a good way to estimate the loop body size at the IR level. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172964 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/LoopVectorize.cpp | 87 ++++++++++++++++++++++-------- 1 file changed, 65 insertions(+), 22 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index fec1573..bb8b428 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -106,9 +106,6 @@ static const unsigned TinyTripCountVectorThreshold = 16; /// We don't unroll loops with a known constant trip count below this number. static const unsigned TinyTripCountUnrollThreshold = 128; -/// We don't unroll loops that are larget than this threshold. -static const unsigned MaxLoopSizeThreshold = 32; - /// When performing a runtime memory check, do not check more than this /// number of pointers. Notice that the check is quadratic! static const unsigned RuntimeMemoryCheckThreshold = 4; @@ -514,11 +511,12 @@ public: const TargetTransformInfo &TTI) : TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI) {} - /// \return The most profitable vectorization factor. + /// \return The most profitable vectorization factor and the cost of that VF. /// This method checks every power of two up to VF. If UserVF is not ZERO /// then this vectorization factor will be selected if vectorization is /// possible. - unsigned selectVectorizationFactor(bool OptForSize, unsigned UserVF); + std::pair + selectVectorizationFactor(bool OptForSize, unsigned UserVF); /// \returns The size (in bits) of the widest type in the code that /// needs to be vectorized. We ignore values that remain scalar such as @@ -528,7 +526,10 @@ public: /// \return The most profitable unroll factor. /// If UserUF is non-zero then this method finds the best unroll-factor /// based on register pressure and other parameters. - unsigned selectUnrollFactor(bool OptForSize, unsigned UserUF); + /// VF and LoopCost are the selected vectorization factor and the cost of the + /// selected VF. + unsigned selectUnrollFactor(bool OptForSize, unsigned UserUF, unsigned VF, + unsigned LoopCost); /// \brief A struct that represents some properties of the register usage /// of a loop. @@ -626,8 +627,13 @@ struct LoopVectorize : public LoopPass { return false; } - unsigned VF = CM.selectVectorizationFactor(OptForSize, VectorizationFactor); - unsigned UF = CM.selectUnrollFactor(OptForSize, VectorizationUnroll); + // Select the optimal vectorization factor. + std::pair VFPair; + VFPair = CM.selectVectorizationFactor(OptForSize, VectorizationFactor); + // Select the unroll factor. + unsigned UF = CM.selectUnrollFactor(OptForSize, VectorizationUnroll, + VFPair.first, VFPair.second); + unsigned VF = VFPair.first; if (VF == 1) { DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial.\n"); @@ -2633,12 +2639,12 @@ bool LoopVectorizationLegality::hasComputableBounds(Value *Ptr) { return AR->isAffine(); } -unsigned +std::pair LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize, unsigned UserVF) { if (OptForSize && Legal->getRuntimePointerCheck()->Need) { DEBUG(dbgs() << "LV: Aborting. Runtime ptr check is required in Os.\n"); - return 1; + return std::make_pair(1U, 0U); } // Find the trip count. @@ -2657,7 +2663,7 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize, } assert(MaxVectorSize <= 32 && "Did not expect to pack so many elements" - " into one vector."); + " into one vector!"); unsigned VF = MaxVectorSize; @@ -2666,7 +2672,7 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize, // If we are unable to calculate the trip count then don't try to vectorize. if (TC < 2) { DEBUG(dbgs() << "LV: Aborting. A tail loop is required in Os.\n"); - return 1; + return std::make_pair(1U, 0U); } // Find the maximum SIMD width that can fit within the trip count. @@ -2679,7 +2685,7 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize, // zero then we require a tail. if (VF < 2) { DEBUG(dbgs() << "LV: Aborting. A tail loop is required in Os.\n"); - return 1; + return std::make_pair(1U, 0U); } } @@ -2687,7 +2693,7 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize, assert(isPowerOf2_32(UserVF) && "VF needs to be a power of two"); DEBUG(dbgs() << "LV: Using user VF "<(Width, VF * Cost); } unsigned LoopVectorizationCostModel::getWidestType() { @@ -2748,7 +2754,24 @@ unsigned LoopVectorizationCostModel::getWidestType() { unsigned LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize, - unsigned UserUF) { + unsigned UserUF, + unsigned VF, + unsigned LoopCost) { + + // -- The unroll heuristics -- + // We unroll the loop in order to expose ILP and reduce the loop overhead. + // There are many micro-architectural considerations that we can't predict + // at this level. For example frontend pressure (on decode or fetch) due to + // code size, or the number and capabilities of the execution ports. + // + // We use the following heuristics to select the unroll factor: + // 1. If the code has reductions the we unroll in order to break the cross + // iteration dependency. + // 2. If the loop is really small then we unroll in order to reduce the loop + // overhead. + // 3. We don't unroll if we think that we will spill registers to memory due + // to the increased register pressure. + // Use the user preference, unless 'auto' is selected. if (UserUF != 0) return UserUF; @@ -2781,19 +2804,39 @@ LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize, // fit without causing spills. unsigned UF = (TargetVectorRegisters - R.LoopInvariantRegs) / R.MaxLocalUsers; - // We don't want to unroll the loops to the point where they do not fit into - // the decoded cache. Assume that we only allow 32 IR instructions. - UF = std::min(UF, (MaxLoopSizeThreshold / R.NumInstructions)); - // Clamp the unroll factor ranges to reasonable factors. unsigned MaxUnrollSize = TTI.getMaximumUnrollFactor(); - + + // If we did not calculate the cost for VF (because the user selected the VF) + // then we calculate the cost of VF here. + if (LoopCost == 0) + LoopCost = expectedCost(VF); + + // Clamp the calculated UF to be between the 1 and the max unroll factor + // that the target allows. if (UF > MaxUnrollSize) UF = MaxUnrollSize; else if (UF < 1) UF = 1; - return UF; + if (Legal->getReductionVars()->size()) { + DEBUG(dbgs() << "LV: Unrolling because of reductions. \n"); + return UF; + } + + // We want to unroll tiny loops in order to reduce the loop overhead. + // We assume that the cost overhead is 1 and we use the cost model + // to estimate the cost of the loop and unroll until the cost of the + // loop overhead is about 5% of the cost of the loop. + DEBUG(dbgs() << "LV: Loop cost is "<< LoopCost <<" \n"); + if (LoopCost < 20) { + DEBUG(dbgs() << "LV: Unrolling to reduce branch cost. \n"); + unsigned NewUF = 20/LoopCost + 1; + return std::min(NewUF, UF); + } + + DEBUG(dbgs() << "LV: Not Unrolling. \n"); + return 1; } LoopVectorizationCostModel::RegisterUsage -- cgit v1.1 From 0c8607ba6a21578996a7532b9390afba13bd2087 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Sun, 20 Jan 2013 08:35:56 +0000 Subject: Revert 172708. The optimization handles esoteric cases but adds a lot of complexity both to the X86 backend and to other backends. This optimization disables an important canonicalization of chains of SEXT nodes and makes SEXT and ZEXT asymmetrical. Disabling the canonicalization of consecutive SEXT nodes into a single node disables other DAG optimizations that assume that there is only one SEXT node. The AVX mask optimizations is one example. Additionally this optimization does not update the cost model. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172968 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 18 +++++------------- lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 4 +++- lib/Target/X86/X86ISelLowering.cpp | 28 ++-------------------------- 3 files changed, 10 insertions(+), 40 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 3e5a446..a82410a 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -4298,19 +4298,11 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { if (isa(N0)) return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N0); - // Folding (sext (sext x)) is obvious, but we do it only after the type - // legalization phase. When the sequence is like {(T1->T2), (T2->T3)} and - // T1 or T3 (or the both) are illegal types, the TypeLegalizer may not - // give a good sequence for the (T1->T3) pair. - // So we give a chance to target specific combiner to optimize T1->T2 and T2->T3 - // separately and may be fold them in a preceding of subsequent instruction. - if (Level >= AfterLegalizeTypes) { - // fold (sext (sext x)) -> (sext x) - // fold (sext (aext x)) -> (sext x) - if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) - return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, - N0.getOperand(0)); - } + // fold (sext (sext x)) -> (sext x) + // fold (sext (aext x)) -> (sext x) + if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) + return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, + N0.getOperand(0)); if (N0.getOpcode() == ISD::TRUNCATE) { // fold (sext (truncate (load x))) -> (sext (smaller load x)) diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 91491bf..344d144 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2554,7 +2554,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, VT.getVectorNumElements() == Operand.getValueType().getVectorNumElements()) && "Vector element count mismatch!"); - if (OpOpcode == ISD::UNDEF) + if (OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ZERO_EXTEND) + return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0)); + else if (OpOpcode == ISD::UNDEF) // sext(undef) = 0, because the top bits will all be the same. return getConstant(0, VT); break; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index ec9f675..c2b209d 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -17000,38 +17000,14 @@ static SDValue PerformVZEXT_MOVLCombine(SDNode *N, SelectionDAG &DAG) { static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { - EVT VT = N->getValueType(0); - - if (!VT.isVector()) - return SDValue(); - - SDValue In = N->getOperand(0); - EVT InVT = In.getValueType(); - DebugLoc dl = N->getDebugLoc(); - unsigned ExtendedEltSize = VT.getVectorElementType().getSizeInBits(); - - // Split SIGN_EXTEND operation to use vmovsx instruction when possible - if (InVT == MVT::v8i8) { - if (ExtendedEltSize > 16 && !Subtarget->hasInt256()) - In = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, In); - if (ExtendedEltSize > 32) - In = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i32, In); - return DAG.getNode(ISD::SIGN_EXTEND, dl, VT, In); - } - - if ((InVT == MVT::v4i8 || InVT == MVT::v4i16) && - ExtendedEltSize > 32 && !Subtarget->hasInt256()) { - In = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, In); - return DAG.getNode(ISD::SIGN_EXTEND, dl, VT, In); - } - if (!DCI.isBeforeLegalizeOps()) return SDValue(); if (!Subtarget->hasFp256()) return SDValue(); - if (VT.is256BitVector()) { + EVT VT = N->getValueType(0); + if (VT.isVector() && VT.getSizeInBits() == 256) { SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget); if (R.getNode()) return R; -- cgit v1.1 From 26827f3dc5fc2e614fb2409e5371de6132eaa8a7 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 20 Jan 2013 09:02:22 +0000 Subject: Make LowerVSETCC a static function and use MVT instead of EVT. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172969 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 127 +++++++++++++++++++------------------ lib/Target/X86/X86ISelLowering.h | 1 - 2 files changed, 65 insertions(+), 63 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index c2b209d..8f6005f 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -9136,65 +9136,10 @@ SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC, return SDValue(); } -SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { - - if (Op.getValueType().isVector()) return LowerVSETCC(Op, DAG); - - assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer"); - SDValue Op0 = Op.getOperand(0); - SDValue Op1 = Op.getOperand(1); - DebugLoc dl = Op.getDebugLoc(); - ISD::CondCode CC = cast(Op.getOperand(2))->get(); - - // Optimize to BT if possible. - // Lower (X & (1 << N)) == 0 to BT(X, N). - // Lower ((X >>u N) & 1) != 0 to BT(X, N). - // Lower ((X >>s N) & 1) != 0 to BT(X, N). - if (Op0.getOpcode() == ISD::AND && Op0.hasOneUse() && - Op1.getOpcode() == ISD::Constant && - cast(Op1)->isNullValue() && - (CC == ISD::SETEQ || CC == ISD::SETNE)) { - SDValue NewSetCC = LowerToBT(Op0, CC, dl, DAG); - if (NewSetCC.getNode()) - return NewSetCC; - } - - // Look for X == 0, X == 1, X != 0, or X != 1. We can simplify some forms of - // these. - if (Op1.getOpcode() == ISD::Constant && - (cast(Op1)->getZExtValue() == 1 || - cast(Op1)->isNullValue()) && - (CC == ISD::SETEQ || CC == ISD::SETNE)) { - - // If the input is a setcc, then reuse the input setcc or use a new one with - // the inverted condition. - if (Op0.getOpcode() == X86ISD::SETCC) { - X86::CondCode CCode = (X86::CondCode)Op0.getConstantOperandVal(0); - bool Invert = (CC == ISD::SETNE) ^ - cast(Op1)->isNullValue(); - if (!Invert) return Op0; - - CCode = X86::GetOppositeBranchCondition(CCode); - return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, - DAG.getConstant(CCode, MVT::i8), Op0.getOperand(1)); - } - } - - bool isFP = Op1.getValueType().isFloatingPoint(); - unsigned X86CC = TranslateX86CC(CC, isFP, Op0, Op1, DAG); - if (X86CC == X86::COND_INVALID) - return SDValue(); - - SDValue EFLAGS = EmitCmp(Op0, Op1, X86CC, DAG); - EFLAGS = ConvertCmpIfNecessary(EFLAGS, DAG); - return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, - DAG.getConstant(X86CC, MVT::i8), EFLAGS); -} - // Lower256IntVSETCC - Break a VSETCC 256-bit integer VSETCC into two new 128 // ones, and then concatenate the result back. static SDValue Lower256IntVSETCC(SDValue Op, SelectionDAG &DAG) { - EVT VT = Op.getValueType(); + MVT VT = Op.getValueType().getSimpleVT(); assert(VT.is256BitVector() && Op.getOpcode() == ISD::SETCC && "Unsupported value type for operation"); @@ -9214,26 +9159,27 @@ static SDValue Lower256IntVSETCC(SDValue Op, SelectionDAG &DAG) { SDValue RHS2 = Extract128BitVector(RHS, NumElems/2, DAG, dl); // Issue the operation on the smaller types and concatenate the result back - MVT EltVT = VT.getVectorElementType().getSimpleVT(); - EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2); + MVT EltVT = VT.getVectorElementType(); + MVT NewVT = MVT::getVectorVT(EltVT, NumElems/2); return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, RHS1, CC), DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, RHS2, CC)); } -SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { +static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget, + SelectionDAG &DAG) { SDValue Cond; SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); SDValue CC = Op.getOperand(2); - EVT VT = Op.getValueType(); + MVT VT = Op.getValueType().getSimpleVT(); ISD::CondCode SetCCOpcode = cast(CC)->get(); - bool isFP = Op.getOperand(1).getValueType().isFloatingPoint(); + bool isFP = Op.getOperand(1).getValueType().getSimpleVT().isFloatingPoint(); DebugLoc dl = Op.getDebugLoc(); if (isFP) { #ifndef NDEBUG - EVT EltVT = Op0.getValueType().getVectorElementType(); + MVT EltVT = Op0.getValueType().getVectorElementType().getSimpleVT(); assert(EltVT == MVT::f32 || EltVT == MVT::f64); #endif @@ -9374,6 +9320,63 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { return Result; } +SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { + + MVT VT = Op.getValueType().getSimpleVT(); + + if (VT.isVector()) return LowerVSETCC(Op, Subtarget, DAG); + + assert(VT == MVT::i8 && "SetCC type must be 8-bit integer"); + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + DebugLoc dl = Op.getDebugLoc(); + ISD::CondCode CC = cast(Op.getOperand(2))->get(); + + // Optimize to BT if possible. + // Lower (X & (1 << N)) == 0 to BT(X, N). + // Lower ((X >>u N) & 1) != 0 to BT(X, N). + // Lower ((X >>s N) & 1) != 0 to BT(X, N). + if (Op0.getOpcode() == ISD::AND && Op0.hasOneUse() && + Op1.getOpcode() == ISD::Constant && + cast(Op1)->isNullValue() && + (CC == ISD::SETEQ || CC == ISD::SETNE)) { + SDValue NewSetCC = LowerToBT(Op0, CC, dl, DAG); + if (NewSetCC.getNode()) + return NewSetCC; + } + + // Look for X == 0, X == 1, X != 0, or X != 1. We can simplify some forms of + // these. + if (Op1.getOpcode() == ISD::Constant && + (cast(Op1)->getZExtValue() == 1 || + cast(Op1)->isNullValue()) && + (CC == ISD::SETEQ || CC == ISD::SETNE)) { + + // If the input is a setcc, then reuse the input setcc or use a new one with + // the inverted condition. + if (Op0.getOpcode() == X86ISD::SETCC) { + X86::CondCode CCode = (X86::CondCode)Op0.getConstantOperandVal(0); + bool Invert = (CC == ISD::SETNE) ^ + cast(Op1)->isNullValue(); + if (!Invert) return Op0; + + CCode = X86::GetOppositeBranchCondition(CCode); + return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, + DAG.getConstant(CCode, MVT::i8), Op0.getOperand(1)); + } + } + + bool isFP = Op1.getValueType().getSimpleVT().isFloatingPoint(); + unsigned X86CC = TranslateX86CC(CC, isFP, Op0, Op1, DAG); + if (X86CC == X86::COND_INVALID) + return SDValue(); + + SDValue EFLAGS = EmitCmp(Op0, Op1, X86CC, DAG); + EFLAGS = ConvertCmpIfNecessary(EFLAGS, DAG); + return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, + DAG.getConstant(X86CC, MVT::i8), EFLAGS); +} + // isX86LogicalCmp - Return true if opcode is a X86 logical comparison. static bool isX86LogicalCmp(SDValue Op) { unsigned Opc = Op.getNode()->getOpcode(); diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index c1f940a..6856016 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -822,7 +822,6 @@ namespace llvm { SDValue LowerToBT(SDValue And, ISD::CondCode CC, DebugLoc dl, SelectionDAG &DAG) const; SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerMEMSET(SDValue Op, SelectionDAG &DAG) const; -- cgit v1.1 From e6e7606118d6862747a84ecfb0d70f3c0cf601af Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Sun, 20 Jan 2013 09:39:17 +0000 Subject: Fix a build error. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172971 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/LoopVectorize.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index bb8b428..3013c2d 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1593,7 +1593,7 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal, // optimizations will clean it up. VectorParts Cond = createEdgeMask(P->getIncomingBlock(0), P->getParent()); - + for (unsigned part = 0; part < UF; ++part) { VectorParts &In0 = getVectorValue(P->getIncomingValue(0)); VectorParts &In1 = getVectorValue(P->getIncomingValue(1)); @@ -2713,7 +2713,8 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize, } DEBUG(dbgs() << "LV: Selecting VF = : "<< Width << ".\n"); - return std::make_pair(Width, VF * Cost); + unsigned LoopCost = VF * Cost; + return std::make_pair(Width, LoopCost); } unsigned LoopVectorizationCostModel::getWidestType() { -- cgit v1.1 From 62b8786d12ceacafd665d4a1fbb6e90af0ec368c Mon Sep 17 00:00:00 2001 From: Richard Osborne Date: Sun, 20 Jan 2013 17:18:47 +0000 Subject: Add instruction encodings / disassembly support 3r instructions. It is not possible to distinguish 3r instructions from 2r / rus instructions using only the fixed bits. Therefore if an instruction doesn't match the 2r / rus format try to decode it as a 3r instruction before returning Fail. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172984 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../XCore/Disassembler/XCoreDisassembler.cpp | 150 ++++++++++++++++----- lib/Target/XCore/XCoreInstrFormats.td | 4 +- lib/Target/XCore/XCoreInstrInfo.td | 98 ++++++-------- 3 files changed, 167 insertions(+), 85 deletions(-) (limited to 'lib') diff --git a/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp b/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp index 094f18c..baa9566 100644 --- a/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp +++ b/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp @@ -132,6 +132,11 @@ static DecodeStatus DecodeLR2RInstruction(MCInst &Inst, uint64_t Address, const void *Decoder); +static DecodeStatus Decode3RInstruction(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + #include "XCoreGenDisassemblerTables.inc" static DecodeStatus DecodeGRRegsRegisterClass(MCInst &Inst, @@ -159,11 +164,15 @@ static DecodeStatus DecodeBitpOperand(MCInst &Inst, unsigned Val, static DecodeStatus Decode2OpInstruction(unsigned Insn, unsigned &Op1, unsigned &Op2) { - unsigned Combined = fieldFromInstruction(Insn, 6, 5) + - fieldFromInstruction(Insn, 5, 1) * 5 - 27; - if (Combined >= 9) + unsigned Combined = fieldFromInstruction(Insn, 6, 5); + if (Combined < 27) return MCDisassembler::Fail; - + if (fieldFromInstruction(Insn, 5, 1)) { + if (Combined == 31) + return MCDisassembler::Fail; + Combined += 5; + } + Combined -= 27; unsigned Op1High = Combined % 3; unsigned Op2High = Combined / 3; Op1 = (Op1High << 2) | fieldFromInstruction(Insn, 2, 2); @@ -172,14 +181,77 @@ Decode2OpInstruction(unsigned Insn, unsigned &Op1, unsigned &Op2) { } static DecodeStatus +Decode3OpInstruction(unsigned Insn, unsigned &Op1, unsigned &Op2, + unsigned &Op3) { + unsigned Combined = fieldFromInstruction(Insn, 6, 5); + if (Combined >= 27) + return MCDisassembler::Fail; + + unsigned Op1High = Combined % 3; + unsigned Op2High = (Combined / 3) % 3; + unsigned Op3High = Combined / 9; + Op1 = (Op1High << 2) | fieldFromInstruction(Insn, 4, 2); + Op2 = (Op2High << 2) | fieldFromInstruction(Insn, 2, 2); + Op3 = (Op3High << 2) | fieldFromInstruction(Insn, 0, 2); + return MCDisassembler::Success; +} + +static DecodeStatus +Decode2OpInstructionFail(MCInst &Inst, unsigned Insn, uint64_t Address, + const void *Decoder) { + // Try and decode as a 3R instruction. + unsigned Opcode = fieldFromInstruction(Insn, 11, 5); + switch (Opcode) { + case 0x2: + Inst.setOpcode(XCore::ADD_3r); + return Decode3RInstruction(Inst, Insn, Address, Decoder); + case 0x3: + Inst.setOpcode(XCore::SUB_3r); + return Decode3RInstruction(Inst, Insn, Address, Decoder); + case 0x4: + Inst.setOpcode(XCore::SHL_3r); + return Decode3RInstruction(Inst, Insn, Address, Decoder); + case 0x5: + Inst.setOpcode(XCore::SHR_3r); + return Decode3RInstruction(Inst, Insn, Address, Decoder); + case 0x6: + Inst.setOpcode(XCore::EQ_3r); + return Decode3RInstruction(Inst, Insn, Address, Decoder); + case 0x7: + Inst.setOpcode(XCore::AND_3r); + return Decode3RInstruction(Inst, Insn, Address, Decoder); + case 0x8: + Inst.setOpcode(XCore::OR_3r); + return Decode3RInstruction(Inst, Insn, Address, Decoder); + case 0x9: + Inst.setOpcode(XCore::LDW_3r); + return Decode3RInstruction(Inst, Insn, Address, Decoder); + case 0x10: + Inst.setOpcode(XCore::LD16S_3r); + return Decode3RInstruction(Inst, Insn, Address, Decoder); + case 0x11: + Inst.setOpcode(XCore::LD8U_3r); + return Decode3RInstruction(Inst, Insn, Address, Decoder); + case 0x18: + Inst.setOpcode(XCore::LSS_3r); + return Decode3RInstruction(Inst, Insn, Address, Decoder); + case 0x19: + Inst.setOpcode(XCore::LSU_3r); + return Decode3RInstruction(Inst, Insn, Address, Decoder); + } + return MCDisassembler::Fail; +} + +static DecodeStatus Decode2RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { unsigned Op1, Op2; DecodeStatus S = Decode2OpInstruction(Insn, Op1, Op2); - if (S == MCDisassembler::Success) { - DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); - DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder); - } + if (S != MCDisassembler::Success) + return Decode2OpInstructionFail(Inst, Insn, Address, Decoder); + + DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder); return S; } @@ -188,10 +260,11 @@ DecodeR2RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { unsigned Op1, Op2; DecodeStatus S = Decode2OpInstruction(Insn, Op2, Op1); - if (S == MCDisassembler::Success) { - DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); - DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder); - } + if (S != MCDisassembler::Success) + return Decode2OpInstructionFail(Inst, Insn, Address, Decoder); + + DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder); return S; } @@ -200,11 +273,12 @@ Decode2RSrcDstInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { unsigned Op1, Op2; DecodeStatus S = Decode2OpInstruction(Insn, Op1, Op2); - if (S == MCDisassembler::Success) { - DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); - DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); - DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder); - } + if (S != MCDisassembler::Success) + return Decode2OpInstructionFail(Inst, Insn, Address, Decoder); + + DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder); return S; } @@ -213,10 +287,11 @@ DecodeRUSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { unsigned Op1, Op2; DecodeStatus S = Decode2OpInstruction(Insn, Op1, Op2); - if (S == MCDisassembler::Success) { - DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); - Inst.addOperand(MCOperand::CreateImm(Op2)); - } + if (S != MCDisassembler::Success) + return Decode2OpInstructionFail(Inst, Insn, Address, Decoder); + + DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); + Inst.addOperand(MCOperand::CreateImm(Op2)); return S; } @@ -225,10 +300,11 @@ DecodeRUSBitpInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { unsigned Op1, Op2; DecodeStatus S = Decode2OpInstruction(Insn, Op1, Op2); - if (S == MCDisassembler::Success) { - DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); - DecodeBitpOperand(Inst, Op2, Address, Decoder); - } + if (S != MCDisassembler::Success) + return Decode2OpInstructionFail(Inst, Insn, Address, Decoder); + + DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); + DecodeBitpOperand(Inst, Op2, Address, Decoder); return S; } @@ -237,11 +313,12 @@ DecodeRUSSrcDstBitpInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { unsigned Op1, Op2; DecodeStatus S = Decode2OpInstruction(Insn, Op1, Op2); - if (S == MCDisassembler::Success) { - DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); - DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); - DecodeBitpOperand(Inst, Op2, Address, Decoder); - } + if (S != MCDisassembler::Success) + return Decode2OpInstructionFail(Inst, Insn, Address, Decoder); + + DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); + DecodeBitpOperand(Inst, Op2, Address, Decoder); return S; } @@ -271,6 +348,19 @@ DecodeLR2RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, return S; } +static DecodeStatus +Decode3RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, + const void *Decoder) { + unsigned Op1, Op2, Op3; + DecodeStatus S = Decode3OpInstruction(Insn, Op1, Op2, Op3); + if (S == MCDisassembler::Success) { + DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op3, Address, Decoder); + } + return S; +} + MCDisassembler::DecodeStatus XCoreDisassembler::getInstruction(MCInst &instr, uint64_t &Size, diff --git a/lib/Target/XCore/XCoreInstrFormats.td b/lib/Target/XCore/XCoreInstrFormats.td index 44ac45c..b3c2093 100644 --- a/lib/Target/XCore/XCoreInstrFormats.td +++ b/lib/Target/XCore/XCoreInstrFormats.td @@ -33,8 +33,10 @@ class PseudoInstXCore pattern> // Instruction formats //===----------------------------------------------------------------------===// -class _F3R pattern> +class _F3R opc, dag outs, dag ins, string asmstr, list pattern> : InstXCore<2, outs, ins, asmstr, pattern> { + let Inst{15-11} = opc; + let DecoderMethod = "Decode3RInstruction"; } class _FL3R pattern> diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td index 95b076f..bb2ef20 100644 --- a/lib/Target/XCore/XCoreInstrInfo.td +++ b/lib/Target/XCore/XCoreInstrInfo.td @@ -200,48 +200,40 @@ def InlineJT32 : Operand { // Three operand short -multiclass F3R_2RUS { - def _3r: _F3R< - (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c), - !strconcat(OpcStr, " $dst, $b, $c"), - [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>; - def _2rus : _F2RUS< - (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c), - !strconcat(OpcStr, " $dst, $b, $c"), - [(set GRRegs:$dst, (OpNode GRRegs:$b, immUs:$c))]>; +multiclass F3R_2RUS opc, string OpcStr, SDNode OpNode> { + def _3r: _F3R; + def _2rus : _F2RUS<(outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c), + !strconcat(OpcStr, " $dst, $b, $c"), + [(set GRRegs:$dst, (OpNode GRRegs:$b, immUs:$c))]>; } -multiclass F3R_2RUS_np { - def _3r: _F3R< - (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c), - !strconcat(OpcStr, " $dst, $b, $c"), - []>; - def _2rus : _F2RUS< - (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c), - !strconcat(OpcStr, " $dst, $b, $c"), - []>; +multiclass F3R_2RUS_np opc, string OpcStr> { + def _3r: _F3R; + def _2rus : _F2RUS<(outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c), + !strconcat(OpcStr, " $dst, $b, $c"), []>; } -multiclass F3R_2RBITP { - def _3r: _F3R< - (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c), - !strconcat(OpcStr, " $dst, $b, $c"), - [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>; +multiclass F3R_2RBITP opc, string OpcStr, SDNode OpNode> { + def _3r: _F3R; def _2rus : _F2RUS< (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c), !strconcat(OpcStr, " $dst, $b, $c"), [(set GRRegs:$dst, (OpNode GRRegs:$b, immBitp:$c))]>; } -class F3R : _F3R< - (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c), - !strconcat(OpcStr, " $dst, $b, $c"), - [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>; +class F3R opc, string OpcStr, SDNode OpNode> : + _F3R; -class F3R_np : _F3R< - (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c), - !strconcat(OpcStr, " $dst, $b, $c"), - []>; +class F3R_np opc, string OpcStr> : + _F3R; // Three operand long /// FL3R_L2RUS multiclass - Define a normal FL3R/FL2RUS pattern in one shot. @@ -390,46 +382,44 @@ let usesCustomInserter = 1 in { //===----------------------------------------------------------------------===// // Three operand short -defm ADD : F3R_2RUS<"add", add>; -defm SUB : F3R_2RUS<"sub", sub>; +defm ADD : F3R_2RUS<0b00010, "add", add>; +defm SUB : F3R_2RUS<0b00011, "sub", sub>; let neverHasSideEffects = 1 in { -defm EQ : F3R_2RUS_np<"eq">; -def LSS_3r : F3R_np<"lss">; -def LSU_3r : F3R_np<"lsu">; +defm EQ : F3R_2RUS_np<0b00110, "eq">; +def LSS_3r : F3R_np<0b11000, "lss">; +def LSU_3r : F3R_np<0b11001, "lsu">; } -def AND_3r : F3R<"and", and>; -def OR_3r : F3R<"or", or>; +def AND_3r : F3R<0b00111, "and", and>; +def OR_3r : F3R<0b01000, "or", or>; let mayLoad=1 in { -def LDW_3r : _F3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset), - "ldw $dst, $addr[$offset]", - []>; +def LDW_3r : _F3R<0b01001, (outs GRRegs:$dst), + (ins GRRegs:$addr, GRRegs:$offset), + "ldw $dst, $addr[$offset]", []>; def LDW_2rus : _F2RUS<(outs GRRegs:$dst), (ins GRRegs:$addr, i32imm:$offset), "ldw $dst, $addr[$offset]", []>; -def LD16S_3r : _F3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset), - "ld16s $dst, $addr[$offset]", - []>; +def LD16S_3r : _F3R<0b10000, (outs GRRegs:$dst), + (ins GRRegs:$addr, GRRegs:$offset), + "ld16s $dst, $addr[$offset]", []>; -def LD8U_3r : _F3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset), - "ld8u $dst, $addr[$offset]", - []>; +def LD8U_3r : _F3R<0b10001, (outs GRRegs:$dst), + (ins GRRegs:$addr, GRRegs:$offset), + "ld8u $dst, $addr[$offset]", []>; } let mayStore=1 in { -def STW_3r : _F3R<(outs), (ins GRRegs:$val, GRRegs:$addr, GRRegs:$offset), - "stw $val, $addr[$offset]", - []>; +def STW_3r : _FL3R<(outs), (ins GRRegs:$val, GRRegs:$addr, GRRegs:$offset), + "stw $val, $addr[$offset]", []>; def STW_2rus : _F2RUS<(outs), (ins GRRegs:$val, GRRegs:$addr, i32imm:$offset), - "stw $val, $addr[$offset]", - []>; + "stw $val, $addr[$offset]", []>; } -defm SHL : F3R_2RBITP<"shl", shl>; -defm SHR : F3R_2RBITP<"shr", srl>; +defm SHL : F3R_2RBITP<0b00100, "shl", shl>; +defm SHR : F3R_2RBITP<0b00101, "shr", srl>; // TODO tsetr // Three operand long -- cgit v1.1 From a68c64fbb2f1bee7f9313f3ee19c35677563f974 Mon Sep 17 00:00:00 2001 From: Richard Osborne Date: Sun, 20 Jan 2013 17:22:43 +0000 Subject: Add instruction encodings / disassembler support for 2rus instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172985 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../XCore/Disassembler/XCoreDisassembler.cpp | 57 ++++++++++++++++++++++ lib/Target/XCore/XCoreInstrFormats.td | 11 ++++- lib/Target/XCore/XCoreInstrInfo.td | 45 ++++++++--------- 3 files changed, 90 insertions(+), 23 deletions(-) (limited to 'lib') diff --git a/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp b/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp index baa9566..3e0a16f 100644 --- a/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp +++ b/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp @@ -137,6 +137,16 @@ static DecodeStatus Decode3RInstruction(MCInst &Inst, uint64_t Address, const void *Decoder); +static DecodeStatus Decode2RUSInstruction(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + +static DecodeStatus Decode2RUSBitpInstruction(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + #include "XCoreGenDisassemblerTables.inc" static DecodeStatus DecodeGRRegsRegisterClass(MCInst &Inst, @@ -202,6 +212,12 @@ Decode2OpInstructionFail(MCInst &Inst, unsigned Insn, uint64_t Address, // Try and decode as a 3R instruction. unsigned Opcode = fieldFromInstruction(Insn, 11, 5); switch (Opcode) { + case 0x0: + Inst.setOpcode(XCore::STW_2rus); + return Decode2RUSInstruction(Inst, Insn, Address, Decoder); + case 0x1: + Inst.setOpcode(XCore::LDW_2rus); + return Decode2RUSInstruction(Inst, Insn, Address, Decoder); case 0x2: Inst.setOpcode(XCore::ADD_3r); return Decode3RInstruction(Inst, Insn, Address, Decoder); @@ -232,6 +248,21 @@ Decode2OpInstructionFail(MCInst &Inst, unsigned Insn, uint64_t Address, case 0x11: Inst.setOpcode(XCore::LD8U_3r); return Decode3RInstruction(Inst, Insn, Address, Decoder); + case 0x12: + Inst.setOpcode(XCore::ADD_2rus); + return Decode2RUSInstruction(Inst, Insn, Address, Decoder); + case 0x13: + Inst.setOpcode(XCore::SUB_2rus); + return Decode2RUSInstruction(Inst, Insn, Address, Decoder); + case 0x14: + Inst.setOpcode(XCore::SHL_2rus); + return Decode2RUSBitpInstruction(Inst, Insn, Address, Decoder); + case 0x15: + Inst.setOpcode(XCore::SHR_2rus); + return Decode2RUSBitpInstruction(Inst, Insn, Address, Decoder); + case 0x16: + Inst.setOpcode(XCore::EQ_2rus); + return Decode2RUSInstruction(Inst, Insn, Address, Decoder); case 0x18: Inst.setOpcode(XCore::LSS_3r); return Decode3RInstruction(Inst, Insn, Address, Decoder); @@ -361,6 +392,32 @@ Decode3RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, return S; } +static DecodeStatus +Decode2RUSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, + const void *Decoder) { + unsigned Op1, Op2, Op3; + DecodeStatus S = Decode3OpInstruction(Insn, Op1, Op2, Op3); + if (S == MCDisassembler::Success) { + DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder); + Inst.addOperand(MCOperand::CreateImm(Op3)); + } + return S; +} + +static DecodeStatus +Decode2RUSBitpInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, + const void *Decoder) { + unsigned Op1, Op2, Op3; + DecodeStatus S = Decode3OpInstruction(Insn, Op1, Op2, Op3); + if (S == MCDisassembler::Success) { + DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder); + DecodeBitpOperand(Inst, Op3, Address, Decoder); + } + return S; +} + MCDisassembler::DecodeStatus XCoreDisassembler::getInstruction(MCInst &instr, uint64_t &Size, diff --git a/lib/Target/XCore/XCoreInstrFormats.td b/lib/Target/XCore/XCoreInstrFormats.td index b3c2093..e65d477 100644 --- a/lib/Target/XCore/XCoreInstrFormats.td +++ b/lib/Target/XCore/XCoreInstrFormats.td @@ -43,8 +43,17 @@ class _FL3R pattern> : InstXCore<4, outs, ins, asmstr, pattern> { } -class _F2RUS pattern> +class _F2RUS opc, dag outs, dag ins, string asmstr, list pattern> : InstXCore<2, outs, ins, asmstr, pattern> { + let Inst{15-11} = opc; + let DecoderMethod = "Decode2RUSInstruction"; +} + +// 2RUS with bitp operand +class _F2RUSBitp opc, dag outs, dag ins, string asmstr, + list pattern> + : _F2RUS { + let DecoderMethod = "Decode2RUSBitpInstruction"; } class _FL2RUS pattern> diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td index bb2ef20..d66279c 100644 --- a/lib/Target/XCore/XCoreInstrInfo.td +++ b/lib/Target/XCore/XCoreInstrInfo.td @@ -200,30 +200,30 @@ def InlineJT32 : Operand { // Three operand short -multiclass F3R_2RUS opc, string OpcStr, SDNode OpNode> { - def _3r: _F3R opc1, bits<5> opc2, string OpcStr, SDNode OpNode> { + def _3r: _F3R; - def _2rus : _F2RUS<(outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c), + def _2rus : _F2RUS; } -multiclass F3R_2RUS_np opc, string OpcStr> { - def _3r: _F3R opc1, bits<5> opc2, string OpcStr> { + def _3r: _F3R; - def _2rus : _F2RUS<(outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c), + def _2rus : _F2RUS; } -multiclass F3R_2RBITP opc, string OpcStr, SDNode OpNode> { - def _3r: _F3R opc1, bits<5> opc2, string OpcStr, + SDNode OpNode> { + def _3r: _F3R; - def _2rus : _F2RUS< - (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c), - !strconcat(OpcStr, " $dst, $b, $c"), - [(set GRRegs:$dst, (OpNode GRRegs:$b, immBitp:$c))]>; + def _2rus : _F2RUSBitp; } class F3R opc, string OpcStr, SDNode OpNode> : @@ -382,10 +382,10 @@ let usesCustomInserter = 1 in { //===----------------------------------------------------------------------===// // Three operand short -defm ADD : F3R_2RUS<0b00010, "add", add>; -defm SUB : F3R_2RUS<0b00011, "sub", sub>; +defm ADD : F3R_2RUS<0b00010, 0b10010, "add", add>; +defm SUB : F3R_2RUS<0b00011, 0b10011, "sub", sub>; let neverHasSideEffects = 1 in { -defm EQ : F3R_2RUS_np<0b00110, "eq">; +defm EQ : F3R_2RUS_np<0b00110, 0b10110, "eq">; def LSS_3r : F3R_np<0b11000, "lss">; def LSU_3r : F3R_np<0b11001, "lsu">; } @@ -397,9 +397,9 @@ def LDW_3r : _F3R<0b01001, (outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset), "ldw $dst, $addr[$offset]", []>; -def LDW_2rus : _F2RUS<(outs GRRegs:$dst), (ins GRRegs:$addr, i32imm:$offset), - "ldw $dst, $addr[$offset]", - []>; +def LDW_2rus : _F2RUS<0b00001, (outs GRRegs:$dst), + (ins GRRegs:$addr, i32imm:$offset), + "ldw $dst, $addr[$offset]", []>; def LD16S_3r : _F3R<0b10000, (outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset), @@ -414,12 +414,13 @@ let mayStore=1 in { def STW_3r : _FL3R<(outs), (ins GRRegs:$val, GRRegs:$addr, GRRegs:$offset), "stw $val, $addr[$offset]", []>; -def STW_2rus : _F2RUS<(outs), (ins GRRegs:$val, GRRegs:$addr, i32imm:$offset), - "stw $val, $addr[$offset]", []>; +def STW_2rus : _F2RUS<0b0000, (outs), + (ins GRRegs:$val, GRRegs:$addr, i32imm:$offset), + "stw $val, $addr[$offset]", []>; } -defm SHL : F3R_2RBITP<0b00100, "shl", shl>; -defm SHR : F3R_2RBITP<0b00101, "shr", srl>; +defm SHL : F3R_2RBITP<0b00100, 0b10100, "shl", shl>; +defm SHR : F3R_2RBITP<0b00101, 0b10101, "shr", srl>; // TODO tsetr // Three operand long -- cgit v1.1 From c78ec6b6bc05572aed6af1eee4349d76a68ded18 Mon Sep 17 00:00:00 2001 From: Richard Osborne Date: Sun, 20 Jan 2013 18:37:49 +0000 Subject: Add instruction encodings / disassembly support for l3r instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172986 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../XCore/Disassembler/XCoreDisassembler.cpp | 113 +++++++++++++++++++-- lib/Target/XCore/XCoreInstrFormats.td | 14 ++- lib/Target/XCore/XCoreInstrInfo.td | 101 +++++++++--------- 3 files changed, 172 insertions(+), 56 deletions(-) (limited to 'lib') diff --git a/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp b/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp index 3e0a16f..96a985d 100644 --- a/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp +++ b/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp @@ -147,6 +147,16 @@ static DecodeStatus Decode2RUSBitpInstruction(MCInst &Inst, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeL3RInstruction(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeL3RSrcDstInstruction(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + #include "XCoreGenDisassemblerTables.inc" static DecodeStatus DecodeGRRegsRegisterClass(MCInst &Inst, @@ -354,15 +364,72 @@ DecodeRUSSrcDstBitpInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, } static DecodeStatus +DecodeL2OpInstructionFail(MCInst &Inst, unsigned Insn, uint64_t Address, + const void *Decoder) { + // Try and decode as a L3R instruction. + unsigned Opcode = fieldFromInstruction(Insn, 16, 4) | + fieldFromInstruction(Insn, 27, 5) << 4; + switch (Opcode) { + case 0x0c: + Inst.setOpcode(XCore::STW_3r); + return DecodeL3RInstruction(Inst, Insn, Address, Decoder); + case 0x1c: + Inst.setOpcode(XCore::XOR_l3r); + return DecodeL3RInstruction(Inst, Insn, Address, Decoder); + case 0x2c: + Inst.setOpcode(XCore::ASHR_l3r); + return DecodeL3RInstruction(Inst, Insn, Address, Decoder); + case 0x3c: + Inst.setOpcode(XCore::LDAWF_l3r); + return DecodeL3RInstruction(Inst, Insn, Address, Decoder); + case 0x4c: + Inst.setOpcode(XCore::LDAWB_l3r); + return DecodeL3RInstruction(Inst, Insn, Address, Decoder); + case 0x5c: + Inst.setOpcode(XCore::LDA16F_l3r); + return DecodeL3RInstruction(Inst, Insn, Address, Decoder); + case 0x6c: + Inst.setOpcode(XCore::LDA16B_l3r); + return DecodeL3RInstruction(Inst, Insn, Address, Decoder); + case 0x7c: + Inst.setOpcode(XCore::MUL_l3r); + return DecodeL3RInstruction(Inst, Insn, Address, Decoder); + case 0x8c: + Inst.setOpcode(XCore::DIVS_l3r); + return DecodeL3RInstruction(Inst, Insn, Address, Decoder); + case 0x9c: + Inst.setOpcode(XCore::DIVU_l3r); + return DecodeL3RInstruction(Inst, Insn, Address, Decoder); + case 0x10c: + Inst.setOpcode(XCore::ST16_l3r); + return DecodeL3RInstruction(Inst, Insn, Address, Decoder); + case 0x11c: + Inst.setOpcode(XCore::ST8_l3r); + return DecodeL3RInstruction(Inst, Insn, Address, Decoder); + case 0x15c: + Inst.setOpcode(XCore::CRC_l3r); + return DecodeL3RSrcDstInstruction(Inst, Insn, Address, Decoder); + case 0x18c: + Inst.setOpcode(XCore::REMS_l3r); + return DecodeL3RInstruction(Inst, Insn, Address, Decoder); + case 0x19c: + Inst.setOpcode(XCore::REMU_l3r); + return DecodeL3RInstruction(Inst, Insn, Address, Decoder); + } + return MCDisassembler::Fail; +} + +static DecodeStatus DecodeL2RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { unsigned Op1, Op2; DecodeStatus S = Decode2OpInstruction(fieldFromInstruction(Insn, 0, 16), Op1, Op2); - if (S == MCDisassembler::Success) { - DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); - DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder); - } + if (S != MCDisassembler::Success) + return DecodeL2OpInstructionFail(Inst, Insn, Address, Decoder); + + DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder); return S; } @@ -372,10 +439,11 @@ DecodeLR2RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, unsigned Op1, Op2; DecodeStatus S = Decode2OpInstruction(fieldFromInstruction(Insn, 0, 16), Op1, Op2); - if (S == MCDisassembler::Success) { - DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder); - DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); - } + if (S != MCDisassembler::Success) + return DecodeL2OpInstructionFail(Inst, Insn, Address, Decoder); + + DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); return S; } @@ -418,6 +486,35 @@ Decode2RUSBitpInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, return S; } +static DecodeStatus +DecodeL3RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, + const void *Decoder) { + unsigned Op1, Op2, Op3; + DecodeStatus S = + Decode3OpInstruction(fieldFromInstruction(Insn, 0, 16), Op1, Op2, Op3); + if (S == MCDisassembler::Success) { + DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op3, Address, Decoder); + } + return S; +} + +static DecodeStatus +DecodeL3RSrcDstInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, + const void *Decoder) { + unsigned Op1, Op2, Op3; + DecodeStatus S = + Decode3OpInstruction(fieldFromInstruction(Insn, 0, 16), Op1, Op2, Op3); + if (S == MCDisassembler::Success) { + DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op3, Address, Decoder); + } + return S; +} + MCDisassembler::DecodeStatus XCoreDisassembler::getInstruction(MCInst &instr, uint64_t &Size, diff --git a/lib/Target/XCore/XCoreInstrFormats.td b/lib/Target/XCore/XCoreInstrFormats.td index e65d477..817ba49 100644 --- a/lib/Target/XCore/XCoreInstrFormats.td +++ b/lib/Target/XCore/XCoreInstrFormats.td @@ -39,8 +39,20 @@ class _F3R opc, dag outs, dag ins, string asmstr, list pattern> let DecoderMethod = "Decode3RInstruction"; } -class _FL3R pattern> +class _FL3R opc, dag outs, dag ins, string asmstr, list pattern> : InstXCore<4, outs, ins, asmstr, pattern> { + let Inst{31-27} = opc{8-4}; + let Inst{26-20} = 0b1111110; + let Inst{19-16} = opc{3-0}; + + let Inst{15-11} = 0b11111; + let DecoderMethod = "DecodeL3RInstruction"; +} + +// L3R with first operand as both a source and a destination. +class _FL3RSrcDst opc, dag outs, dag ins, string asmstr, + list pattern> : _FL3R { + let DecoderMethod = "DecodeL3RSrcDstInstruction"; } class _F2RUS opc, dag outs, dag ins, string asmstr, list pattern> diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td index d66279c..cb4dc65 100644 --- a/lib/Target/XCore/XCoreInstrInfo.td +++ b/lib/Target/XCore/XCoreInstrInfo.td @@ -237,11 +237,10 @@ class F3R_np opc, string OpcStr> : // Three operand long /// FL3R_L2RUS multiclass - Define a normal FL3R/FL2RUS pattern in one shot. -multiclass FL3R_L2RUS { - def _l3r: _FL3R< - (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c), - !strconcat(OpcStr, " $dst, $b, $c"), - [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>; +multiclass FL3R_L2RUS opc, string OpcStr, SDNode OpNode> { + def _l3r: _FL3R; def _l2rus : _FL2RUS< (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c), !strconcat(OpcStr, " $dst, $b, $c"), @@ -249,21 +248,20 @@ multiclass FL3R_L2RUS { } /// FL3R_L2RUS multiclass - Define a normal FL3R/FL2RUS pattern in one shot. -multiclass FL3R_L2RBITP { - def _l3r: _FL3R< - (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c), - !strconcat(OpcStr, " $dst, $b, $c"), - [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>; +multiclass FL3R_L2RBITP opc, string OpcStr, SDNode OpNode> { + def _l3r: _FL3R; def _l2rus : _FL2RUS< (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c), !strconcat(OpcStr, " $dst, $b, $c"), [(set GRRegs:$dst, (OpNode GRRegs:$b, immBitp:$c))]>; } -class FL3R : _FL3R< - (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c), - !strconcat(OpcStr, " $dst, $b, $c"), - [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>; +class FL3R opc, string OpcStr, SDNode OpNode> : + _FL3R; // Register - U6 // Operand register - U6 @@ -411,8 +409,9 @@ def LD8U_3r : _F3R<0b10001, (outs GRRegs:$dst), } let mayStore=1 in { -def STW_3r : _FL3R<(outs), (ins GRRegs:$val, GRRegs:$addr, GRRegs:$offset), - "stw $val, $addr[$offset]", []>; +def STW_3r : _FL3R<0b000001100, (outs), + (ins GRRegs:$val, GRRegs:$addr, GRRegs:$offset), + "stw $val, $addr[$offset]", []>; def STW_2rus : _F2RUS<0b0000, (outs), (ins GRRegs:$val, GRRegs:$addr, i32imm:$offset), @@ -424,9 +423,11 @@ defm SHR : F3R_2RBITP<0b00101, 0b10101, "shr", srl>; // TODO tsetr // Three operand long -def LDAWF_l3r : _FL3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset), - "ldaw $dst, $addr[$offset]", - [(set GRRegs:$dst, (ldawf GRRegs:$addr, GRRegs:$offset))]>; +def LDAWF_l3r : _FL3R<0b000111100, (outs GRRegs:$dst), + (ins GRRegs:$addr, GRRegs:$offset), + "ldaw $dst, $addr[$offset]", + [(set GRRegs:$dst, + (ldawf GRRegs:$addr, GRRegs:$offset))]>; let neverHasSideEffects = 1 in def LDAWF_l2rus : _FL2RUS<(outs GRRegs:$dst), @@ -434,9 +435,11 @@ def LDAWF_l2rus : _FL2RUS<(outs GRRegs:$dst), "ldaw $dst, $addr[$offset]", []>; -def LDAWB_l3r : _FL3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset), - "ldaw $dst, $addr[-$offset]", - [(set GRRegs:$dst, (ldawb GRRegs:$addr, GRRegs:$offset))]>; +def LDAWB_l3r : _FL3R<0b001001100, (outs GRRegs:$dst), + (ins GRRegs:$addr, GRRegs:$offset), + "ldaw $dst, $addr[-$offset]", + [(set GRRegs:$dst, + (ldawb GRRegs:$addr, GRRegs:$offset))]>; let neverHasSideEffects = 1 in def LDAWB_l2rus : _FL2RUS<(outs GRRegs:$dst), @@ -444,42 +447,46 @@ def LDAWB_l2rus : _FL2RUS<(outs GRRegs:$dst), "ldaw $dst, $addr[-$offset]", []>; -def LDA16F_l3r : _FL3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset), - "lda16 $dst, $addr[$offset]", - [(set GRRegs:$dst, (lda16f GRRegs:$addr, GRRegs:$offset))]>; +def LDA16F_l3r : _FL3R<0b001011100, (outs GRRegs:$dst), + (ins GRRegs:$addr, GRRegs:$offset), + "lda16 $dst, $addr[$offset]", + [(set GRRegs:$dst, + (lda16f GRRegs:$addr, GRRegs:$offset))]>; -def LDA16B_l3r : _FL3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset), - "lda16 $dst, $addr[-$offset]", - [(set GRRegs:$dst, (lda16b GRRegs:$addr, GRRegs:$offset))]>; +def LDA16B_l3r : _FL3R<0b001101100, (outs GRRegs:$dst), + (ins GRRegs:$addr, GRRegs:$offset), + "lda16 $dst, $addr[-$offset]", + [(set GRRegs:$dst, + (lda16b GRRegs:$addr, GRRegs:$offset))]>; -def MUL_l3r : FL3R<"mul", mul>; +def MUL_l3r : FL3R<0b001111100, "mul", mul>; // Instructions which may trap are marked as side effecting. let hasSideEffects = 1 in { -def DIVS_l3r : FL3R<"divs", sdiv>; -def DIVU_l3r : FL3R<"divu", udiv>; -def REMS_l3r : FL3R<"rems", srem>; -def REMU_l3r : FL3R<"remu", urem>; +def DIVS_l3r : FL3R<0b010001100, "divs", sdiv>; +def DIVU_l3r : FL3R<0b010011100, "divu", udiv>; +def REMS_l3r : FL3R<0b110001100, "rems", srem>; +def REMU_l3r : FL3R<0b110011100, "remu", urem>; } -def XOR_l3r : FL3R<"xor", xor>; -defm ASHR : FL3R_L2RBITP<"ashr", sra>; +def XOR_l3r : FL3R<0b000011100, "xor", xor>; +defm ASHR : FL3R_L2RBITP<0b000101100, "ashr", sra>; let Constraints = "$src1 = $dst" in -def CRC_l3r : _FL3R<(outs GRRegs:$dst), - (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3), - "crc32 $dst, $src2, $src3", - [(set GRRegs:$dst, - (int_xcore_crc32 GRRegs:$src1, GRRegs:$src2, - GRRegs:$src3))]>; +def CRC_l3r : _FL3RSrcDst<0b101011100, (outs GRRegs:$dst), + (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3), + "crc32 $dst, $src2, $src3", + [(set GRRegs:$dst, + (int_xcore_crc32 GRRegs:$src1, GRRegs:$src2, + GRRegs:$src3))]>; // TODO inpw, outpw let mayStore=1 in { -def ST16_l3r : _FL3R<(outs), (ins GRRegs:$val, GRRegs:$addr, GRRegs:$offset), - "st16 $val, $addr[$offset]", - []>; +def ST16_l3r : _FL3R<0b100001100, (outs), + (ins GRRegs:$val, GRRegs:$addr, GRRegs:$offset), + "st16 $val, $addr[$offset]", []>; -def ST8_l3r : _FL3R<(outs), (ins GRRegs:$val, GRRegs:$addr, GRRegs:$offset), - "st8 $val, $addr[$offset]", - []>; +def ST8_l3r : _FL3R<0b100011100, (outs), + (ins GRRegs:$val, GRRegs:$addr, GRRegs:$offset), + "st8 $val, $addr[$offset]", []>; } // Four operand long -- cgit v1.1 From b853c415c663c752c669cb191cea95542c1d21f6 Mon Sep 17 00:00:00 2001 From: Richard Osborne Date: Sun, 20 Jan 2013 18:51:15 +0000 Subject: Add instruction encodings / disassembly support for l2rus instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172987 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../XCore/Disassembler/XCoreDisassembler.cpp | 49 +++++++++++++++++++++- lib/Target/XCore/XCoreInstrFormats.td | 15 ++++++- lib/Target/XCore/XCoreInstrInfo.td | 40 +++++++++--------- 3 files changed, 81 insertions(+), 23 deletions(-) (limited to 'lib') diff --git a/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp b/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp index 96a985d..d24d947 100644 --- a/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp +++ b/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp @@ -157,6 +157,16 @@ static DecodeStatus DecodeL3RSrcDstInstruction(MCInst &Inst, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeL2RUSInstruction(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeL2RUSBitpInstruction(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + #include "XCoreGenDisassemblerTables.inc" static DecodeStatus DecodeGRRegsRegisterClass(MCInst &Inst, @@ -366,7 +376,7 @@ DecodeRUSSrcDstBitpInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, static DecodeStatus DecodeL2OpInstructionFail(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { - // Try and decode as a L3R instruction. + // Try and decode as a L3R / L2RUS instruction. unsigned Opcode = fieldFromInstruction(Insn, 16, 4) | fieldFromInstruction(Insn, 27, 5) << 4; switch (Opcode) { @@ -406,6 +416,15 @@ DecodeL2OpInstructionFail(MCInst &Inst, unsigned Insn, uint64_t Address, case 0x11c: Inst.setOpcode(XCore::ST8_l3r); return DecodeL3RInstruction(Inst, Insn, Address, Decoder); + case 0x12c: + Inst.setOpcode(XCore::ASHR_l2rus); + return DecodeL2RUSBitpInstruction(Inst, Insn, Address, Decoder); + case 0x13c: + Inst.setOpcode(XCore::LDAWF_l2rus); + return DecodeL2RUSInstruction(Inst, Insn, Address, Decoder); + case 0x14c: + Inst.setOpcode(XCore::LDAWB_l2rus); + return DecodeL2RUSInstruction(Inst, Insn, Address, Decoder); case 0x15c: Inst.setOpcode(XCore::CRC_l3r); return DecodeL3RSrcDstInstruction(Inst, Insn, Address, Decoder); @@ -515,6 +534,34 @@ DecodeL3RSrcDstInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, return S; } +static DecodeStatus +DecodeL2RUSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, + const void *Decoder) { + unsigned Op1, Op2, Op3; + DecodeStatus S = + Decode3OpInstruction(fieldFromInstruction(Insn, 0, 16), Op1, Op2, Op3); + if (S == MCDisassembler::Success) { + DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder); + Inst.addOperand(MCOperand::CreateImm(Op3)); + } + return S; +} + +static DecodeStatus +DecodeL2RUSBitpInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, + const void *Decoder) { + unsigned Op1, Op2, Op3; + DecodeStatus S = + Decode3OpInstruction(fieldFromInstruction(Insn, 0, 16), Op1, Op2, Op3); + if (S == MCDisassembler::Success) { + DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder); + DecodeBitpOperand(Inst, Op3, Address, Decoder); + } + return S; +} + MCDisassembler::DecodeStatus XCoreDisassembler::getInstruction(MCInst &instr, uint64_t &Size, diff --git a/lib/Target/XCore/XCoreInstrFormats.td b/lib/Target/XCore/XCoreInstrFormats.td index 817ba49..01ed5cb 100644 --- a/lib/Target/XCore/XCoreInstrFormats.td +++ b/lib/Target/XCore/XCoreInstrFormats.td @@ -68,8 +68,21 @@ class _F2RUSBitp opc, dag outs, dag ins, string asmstr, let DecoderMethod = "Decode2RUSBitpInstruction"; } -class _FL2RUS pattern> +class _FL2RUS opc, dag outs, dag ins, string asmstr, list pattern> : InstXCore<4, outs, ins, asmstr, pattern> { + let Inst{31-27} = opc{8-4}; + let Inst{26-20} = 0b1111110; + let Inst{19-16} = opc{3-0}; + + let Inst{15-11} = 0b11111; + let DecoderMethod = "DecodeL2RUSInstruction"; +} + +// L2RUS with bitp operand +class _FL2RUSBitp opc, dag outs, dag ins, string asmstr, + list pattern> + : _FL2RUS { + let DecoderMethod = "DecodeL2RUSBitpInstruction"; } class _FRU6 pattern> diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td index cb4dc65..4018e31 100644 --- a/lib/Target/XCore/XCoreInstrInfo.td +++ b/lib/Target/XCore/XCoreInstrInfo.td @@ -237,25 +237,25 @@ class F3R_np opc, string OpcStr> : // Three operand long /// FL3R_L2RUS multiclass - Define a normal FL3R/FL2RUS pattern in one shot. -multiclass FL3R_L2RUS opc, string OpcStr, SDNode OpNode> { - def _l3r: _FL3R opc1, bits<9> opc2, string OpcStr, + SDNode OpNode> { + def _l3r: _FL3R; - def _l2rus : _FL2RUS< - (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c), - !strconcat(OpcStr, " $dst, $b, $c"), - [(set GRRegs:$dst, (OpNode GRRegs:$b, immUs:$c))]>; + def _l2rus : _FL2RUS; } /// FL3R_L2RUS multiclass - Define a normal FL3R/FL2RUS pattern in one shot. -multiclass FL3R_L2RBITP opc, string OpcStr, SDNode OpNode> { - def _l3r: _FL3R opc1, bits<9> opc2, string OpcStr, + SDNode OpNode> { + def _l3r: _FL3R; - def _l2rus : _FL2RUS< - (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c), - !strconcat(OpcStr, " $dst, $b, $c"), - [(set GRRegs:$dst, (OpNode GRRegs:$b, immBitp:$c))]>; + def _l2rus : _FL2RUSBitp; } class FL3R opc, string OpcStr, SDNode OpNode> : @@ -430,10 +430,9 @@ def LDAWF_l3r : _FL3R<0b000111100, (outs GRRegs:$dst), (ldawf GRRegs:$addr, GRRegs:$offset))]>; let neverHasSideEffects = 1 in -def LDAWF_l2rus : _FL2RUS<(outs GRRegs:$dst), - (ins GRRegs:$addr, i32imm:$offset), - "ldaw $dst, $addr[$offset]", - []>; +def LDAWF_l2rus : _FL2RUS<0b100111100, (outs GRRegs:$dst), + (ins GRRegs:$addr, i32imm:$offset), + "ldaw $dst, $addr[$offset]", []>; def LDAWB_l3r : _FL3R<0b001001100, (outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset), @@ -442,10 +441,9 @@ def LDAWB_l3r : _FL3R<0b001001100, (outs GRRegs:$dst), (ldawb GRRegs:$addr, GRRegs:$offset))]>; let neverHasSideEffects = 1 in -def LDAWB_l2rus : _FL2RUS<(outs GRRegs:$dst), - (ins GRRegs:$addr, i32imm:$offset), - "ldaw $dst, $addr[-$offset]", - []>; +def LDAWB_l2rus : _FL2RUS<0b101001100, (outs GRRegs:$dst), + (ins GRRegs:$addr, i32imm:$offset), + "ldaw $dst, $addr[-$offset]", []>; def LDA16F_l3r : _FL3R<0b001011100, (outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset), @@ -468,7 +466,7 @@ def REMS_l3r : FL3R<0b110001100, "rems", srem>; def REMU_l3r : FL3R<0b110011100, "remu", urem>; } def XOR_l3r : FL3R<0b000011100, "xor", xor>; -defm ASHR : FL3R_L2RBITP<0b000101100, "ashr", sra>; +defm ASHR : FL3R_L2RBITP<0b000101100, 0b100101100, "ashr", sra>; let Constraints = "$src1 = $dst" in def CRC_l3r : _FL3RSrcDst<0b101011100, (outs GRRegs:$dst), -- cgit v1.1 From 5ff7a3f947c245df9ae95a381ef38184527e83e1 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sun, 20 Jan 2013 20:29:52 +0000 Subject: LoopVectorize: Fix a C++11 incompatibility. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172990 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/LoopVectorize.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 3013c2d..ba8987d 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2714,7 +2714,7 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize, DEBUG(dbgs() << "LV: Selecting VF = : "<< Width << ".\n"); unsigned LoopCost = VF * Cost; - return std::make_pair(Width, LoopCost); + return std::make_pair(Width, LoopCost); } unsigned LoopVectorizationCostModel::getWidestType() { -- cgit v1.1 From 053a2119835ac6ca3484f1b496cabd43c37e4279 Mon Sep 17 00:00:00 2001 From: Renato Golin Date: Sun, 20 Jan 2013 20:57:20 +0000 Subject: Revert CostTable algorithm, will re-write git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172992 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/TargetTransformInfo.cpp | 45 -------- lib/Target/X86/X86TargetTransformInfo.cpp | 168 ++++++++++++++++++------------ 2 files changed, 102 insertions(+), 111 deletions(-) (limited to 'lib') diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp index 4873a7f..3ef74eb 100644 --- a/lib/Analysis/TargetTransformInfo.cpp +++ b/lib/Analysis/TargetTransformInfo.cpp @@ -286,48 +286,3 @@ char NoTTI::ID = 0; ImmutablePass *llvm::createNoTargetTransformInfoPass() { return new NoTTI(); } - -//======================================= COST TABLES == - -CostTable::CostTable(const CostTableEntry *table, - const size_t size, - unsigned numTypes) - : table(table), size(size), numTypes(numTypes) { - assert(table && "missing cost table"); - assert(size > 0 && "empty cost table"); -} - -unsigned CostTable::_findCost(int ISD, MVT *Types) const { - for (unsigned i = 0; i < size; ++i) { - if (table[i].ISD != ISD) - continue; - bool found = true; - for (unsigned t=0; tInstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); - // We don't have to scalarize unsupported ops. We can issue two half-sized - // operations and we only need to extract the upper YMM half. - // Two ops + 1 extract + 1 insert = 4. - static const CostTableEntry AVX1CostTable[] = { - { ISD::MUL, { MVT::v8i32 }, 4 }, - { ISD::SUB, { MVT::v8i32 }, 4 }, - { ISD::ADD, { MVT::v8i32 }, 4 }, - { ISD::MUL, { MVT::v4i64 }, 4 }, - { ISD::SUB, { MVT::v4i64 }, 4 }, - { ISD::ADD, { MVT::v4i64 }, 4 }, - }; - UnaryCostTable costTable (AVX1CostTable, array_lengthof(AVX1CostTable)); + static const X86CostTblEntry AVX1CostTable[] = { + // We don't have to scalarize unsupported ops. We can issue two half-sized + // operations and we only need to extract the upper YMM half. + // Two ops + 1 extract + 1 insert = 4. + { ISD::MUL, MVT::v8i32, 4 }, + { ISD::SUB, MVT::v8i32, 4 }, + { ISD::ADD, MVT::v8i32, 4 }, + { ISD::MUL, MVT::v4i64, 4 }, + { ISD::SUB, MVT::v4i64, 4 }, + { ISD::ADD, MVT::v4i64, 4 }, + }; // Look for AVX1 lowering tricks. if (ST->hasAVX()) { - unsigned cost = costTable.findCost(ISD, LT.second); - if (cost != BinaryCostTable::COST_NOT_FOUND) - return LT.first * cost; + int Idx = FindInTable(AVX1CostTable, array_lengthof(AVX1CostTable), ISD, + LT.second); + if (Idx != -1) + return LT.first * AVX1CostTable[Idx].Cost; } // Fallback to the default implementation. return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty); @@ -216,29 +254,30 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const { if (!SrcTy.isSimple() || !DstTy.isSimple()) return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); - static const CostTableEntry AVXConversionTbl[] = { - { ISD::SIGN_EXTEND, { MVT::v8i32, MVT::v8i16 }, 1 }, - { ISD::ZERO_EXTEND, { MVT::v8i32, MVT::v8i16 }, 1 }, - { ISD::SIGN_EXTEND, { MVT::v4i64, MVT::v4i32 }, 1 }, - { ISD::ZERO_EXTEND, { MVT::v4i64, MVT::v4i32 }, 1 }, - { ISD::TRUNCATE, { MVT::v4i32, MVT::v4i64 }, 1 }, - { ISD::TRUNCATE, { MVT::v8i16, MVT::v8i32 }, 1 }, - { ISD::SINT_TO_FP, { MVT::v8f32, MVT::v8i8 }, 1 }, - { ISD::SINT_TO_FP, { MVT::v4f32, MVT::v4i8 }, 1 }, - { ISD::UINT_TO_FP, { MVT::v8f32, MVT::v8i8 }, 1 }, - { ISD::UINT_TO_FP, { MVT::v4f32, MVT::v4i8 }, 1 }, - { ISD::FP_TO_SINT, { MVT::v8i8, MVT::v8f32 }, 1 }, - { ISD::FP_TO_SINT, { MVT::v4i8, MVT::v4f32 }, 1 }, - { ISD::ZERO_EXTEND, { MVT::v8i32, MVT::v8i1 }, 6 }, - { ISD::SIGN_EXTEND, { MVT::v8i32, MVT::v8i1 }, 9 }, - { ISD::TRUNCATE, { MVT::v8i32, MVT::v8i64 }, 3 } + static const X86TypeConversionCostTblEntry AVXConversionTbl[] = { + { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, + { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 1 }, + { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 1 }, + { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 1 }, + { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 1 }, + { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 1 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 1 }, + { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 1 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 1 }, + { ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 1 }, + { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 1 }, + { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 6 }, + { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 9 }, + { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 3 }, }; - BinaryCostTable costTable (AVXConversionTbl, array_lengthof(AVXConversionTbl)); if (ST->hasAVX()) { - unsigned cost = costTable.findCost(ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()); - if (cost != BinaryCostTable::COST_NOT_FOUND) - return cost; + int Idx = FindInConvertTable(AVXConversionTbl, + array_lengthof(AVXConversionTbl), + ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()); + if (Idx != -1) + return AVXConversionTbl[Idx].Cost; } return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); @@ -254,51 +293,48 @@ unsigned X86TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); - static const CostTableEntry SSE42CostTbl[] = { - { ISD::SETCC, { MVT::v2f64 }, 1 }, - { ISD::SETCC, { MVT::v4f32 }, 1 }, - { ISD::SETCC, { MVT::v2i64 }, 1 }, - { ISD::SETCC, { MVT::v4i32 }, 1 }, - { ISD::SETCC, { MVT::v8i16 }, 1 }, - { ISD::SETCC, { MVT::v16i8 }, 1 }, + static const X86CostTblEntry SSE42CostTbl[] = { + { ISD::SETCC, MVT::v2f64, 1 }, + { ISD::SETCC, MVT::v4f32, 1 }, + { ISD::SETCC, MVT::v2i64, 1 }, + { ISD::SETCC, MVT::v4i32, 1 }, + { ISD::SETCC, MVT::v8i16, 1 }, + { ISD::SETCC, MVT::v16i8, 1 }, }; - UnaryCostTable costTableSSE4 (SSE42CostTbl, array_lengthof(SSE42CostTbl)); - static const CostTableEntry AVX1CostTbl[] = { - { ISD::SETCC, { MVT::v4f64 }, 1 }, - { ISD::SETCC, { MVT::v8f32 }, 1 }, + static const X86CostTblEntry AVX1CostTbl[] = { + { ISD::SETCC, MVT::v4f64, 1 }, + { ISD::SETCC, MVT::v8f32, 1 }, // AVX1 does not support 8-wide integer compare. - { ISD::SETCC, { MVT::v4i64 }, 4 }, - { ISD::SETCC, { MVT::v8i32 }, 4 }, - { ISD::SETCC, { MVT::v16i16 }, 4 }, - { ISD::SETCC, { MVT::v32i8 }, 4 }, + { ISD::SETCC, MVT::v4i64, 4 }, + { ISD::SETCC, MVT::v8i32, 4 }, + { ISD::SETCC, MVT::v16i16, 4 }, + { ISD::SETCC, MVT::v32i8, 4 }, }; - UnaryCostTable costTableAVX1 (AVX1CostTbl, array_lengthof(AVX1CostTbl)); - static const CostTableEntry AVX2CostTbl[] = { - { ISD::SETCC, { MVT::v4i64 }, 1 }, - { ISD::SETCC, { MVT::v8i32 }, 1 }, - { ISD::SETCC, { MVT::v16i16 }, 1 }, - { ISD::SETCC, { MVT::v32i8 }, 1 }, + static const X86CostTblEntry AVX2CostTbl[] = { + { ISD::SETCC, MVT::v4i64, 1 }, + { ISD::SETCC, MVT::v8i32, 1 }, + { ISD::SETCC, MVT::v16i16, 1 }, + { ISD::SETCC, MVT::v32i8, 1 }, }; - UnaryCostTable costTableAVX2 (AVX2CostTbl, array_lengthof(AVX2CostTbl)); if (ST->hasAVX2()) { - unsigned cost = costTableAVX2.findCost(ISD, MTy); - if (cost != BinaryCostTable::COST_NOT_FOUND) - return LT.first * cost; + int Idx = FindInTable(AVX2CostTbl, array_lengthof(AVX2CostTbl), ISD, MTy); + if (Idx != -1) + return LT.first * AVX2CostTbl[Idx].Cost; } if (ST->hasAVX()) { - unsigned cost = costTableAVX1.findCost(ISD, MTy); - if (cost != BinaryCostTable::COST_NOT_FOUND) - return LT.first * cost; + int Idx = FindInTable(AVX1CostTbl, array_lengthof(AVX1CostTbl), ISD, MTy); + if (Idx != -1) + return LT.first * AVX1CostTbl[Idx].Cost; } if (ST->hasSSE42()) { - unsigned cost = costTableSSE4.findCost(ISD, MTy); - if (cost != BinaryCostTable::COST_NOT_FOUND) - return LT.first * cost; + int Idx = FindInTable(SSE42CostTbl, array_lengthof(SSE42CostTbl), ISD, MTy); + if (Idx != -1) + return LT.first * SSE42CostTbl[Idx].Cost; } return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy); -- cgit v1.1 From d713c0f7f1556f1ff74b3e953be5d35b614cc081 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 20 Jan 2013 21:34:37 +0000 Subject: Capitalize lowerTRUNCATE so that it matches the other lower functions in this file despite it not matching coding standards. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172994 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 4 ++-- lib/Target/X86/X86ISelLowering.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 8f6005f..6c810bb 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -8379,7 +8379,7 @@ SDValue X86TargetLowering::LowerZERO_EXTEND(SDValue Op, return DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i32, Lo, Hi); } -SDValue X86TargetLowering::lowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { +SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { DebugLoc DL = Op.getDebugLoc(); EVT VT = Op.getValueType(); SDValue In = Op.getOperand(0); @@ -12024,7 +12024,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SRL_PARTS: return LowerShiftParts(Op, DAG); case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG); - case ISD::TRUNCATE: return lowerTRUNCATE(Op, DAG); + case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG); case ISD::ZERO_EXTEND: return LowerZERO_EXTEND(Op, DAG); case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, DAG); case ISD::ANY_EXTEND: return LowerANY_EXTEND(Op, DAG); diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 6856016..69aa980 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -809,7 +809,7 @@ namespace llvm { SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) const; SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) const; SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerANY_EXTEND(SDValue Op, SelectionDAG &DAG) const; -- cgit v1.1 From a080daf5c6d9f62d021efe7c6c608a71d591dc44 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 20 Jan 2013 21:50:27 +0000 Subject: Convert more EVT's to MVT's in the lowering methods. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172995 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 47 +++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 23 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 6c810bb..9f7d4a6 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -8296,9 +8296,9 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned, bool IsReplace) co static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG, const X86Subtarget *Subtarget) { - EVT VT = Op->getValueType(0); + MVT VT = Op->getValueType(0).getSimpleVT(); SDValue In = Op->getOperand(0); - EVT InVT = In.getValueType(); + MVT InVT = In.getValueType().getSimpleVT(); DebugLoc dl = Op->getDebugLoc(); // Optimize vectors in AVX mode: @@ -8327,7 +8327,7 @@ static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG, SDValue OpLo = getUnpackl(DAG, dl, InVT, In, NeedZero ? ZeroVec : Undef); SDValue OpHi = getUnpackh(DAG, dl, InVT, In, NeedZero ? ZeroVec : Undef); - EVT HVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), + MVT HVT = MVT::getVectorVT(VT.getVectorElementType(), VT.getVectorNumElements()/2); OpLo = DAG.getNode(ISD::BITCAST, dl, HVT, OpLo); @@ -8349,9 +8349,9 @@ SDValue X86TargetLowering::LowerANY_EXTEND(SDValue Op, SDValue X86TargetLowering::LowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const { DebugLoc DL = Op.getDebugLoc(); - EVT VT = Op.getValueType(); + MVT VT = Op.getValueType().getSimpleVT(); SDValue In = Op.getOperand(0); - EVT SVT = In.getValueType(); + MVT SVT = In.getValueType().getSimpleVT(); if (Subtarget->hasFp256()) { SDValue Res = LowerAVXExtend(Op, DAG, Subtarget); @@ -8381,9 +8381,9 @@ SDValue X86TargetLowering::LowerZERO_EXTEND(SDValue Op, SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { DebugLoc DL = Op.getDebugLoc(); - EVT VT = Op.getValueType(); + MVT VT = Op.getValueType().getSimpleVT(); SDValue In = Op.getOperand(0); - EVT SVT = In.getValueType(); + MVT SVT = In.getValueType().getSimpleVT(); if ((VT == MVT::v4i32) && (SVT == MVT::v4i64)) { // On AVX2, v4i64 -> v4i32 becomes VPERMD. @@ -8498,9 +8498,10 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const { - if (Op.getValueType().isVector()) { - if (Op.getValueType() == MVT::v8i16) - return DAG.getNode(ISD::TRUNCATE, Op.getDebugLoc(), Op.getValueType(), + MVT VT = Op.getValueType().getSimpleVT(); + if (VT.isVector()) { + if (VT == MVT::v8i16) + return DAG.getNode(ISD::TRUNCATE, Op.getDebugLoc(), VT, DAG.getNode(ISD::FP_TO_SINT, Op.getDebugLoc(), MVT::v8i32, Op.getOperand(0))); return SDValue(); @@ -8542,9 +8543,9 @@ SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op, SDValue X86TargetLowering::lowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { DebugLoc DL = Op.getDebugLoc(); - EVT VT = Op.getValueType(); + MVT VT = Op.getValueType().getSimpleVT(); SDValue In = Op.getOperand(0); - EVT SVT = In.getValueType(); + MVT SVT = In.getValueType().getSimpleVT(); assert(SVT == MVT::v2f32 && "Only customize MVT::v2f32 type legalization!"); @@ -8556,8 +8557,8 @@ SDValue X86TargetLowering::lowerFP_EXTEND(SDValue Op, SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) const { LLVMContext *Context = DAG.getContext(); DebugLoc dl = Op.getDebugLoc(); - EVT VT = Op.getValueType(); - EVT EltVT = VT; + MVT VT = Op.getValueType().getSimpleVT(); + MVT EltVT = VT; unsigned NumElts = VT == MVT::f64 ? 2 : 4; if (VT.isVector()) { EltVT = VT.getVectorElementType(); @@ -8588,8 +8589,8 @@ SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) const { SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) const { LLVMContext *Context = DAG.getContext(); DebugLoc dl = Op.getDebugLoc(); - EVT VT = Op.getValueType(); - EVT EltVT = VT; + MVT VT = Op.getValueType().getSimpleVT(); + MVT EltVT = VT; unsigned NumElts = VT == MVT::f64 ? 2 : 4; if (VT.isVector()) { EltVT = VT.getVectorElementType(); @@ -8623,8 +8624,8 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); DebugLoc dl = Op.getDebugLoc(); - EVT VT = Op.getValueType(); - EVT SrcVT = Op1.getValueType(); + MVT VT = Op.getValueType().getSimpleVT(); + MVT SrcVT = Op1.getValueType().getSimpleVT(); // If second operand is smaller, extend it first. if (SrcVT.bitsLT(VT)) { @@ -8694,7 +8695,7 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { static SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) { SDValue N0 = Op.getOperand(0); DebugLoc dl = Op.getDebugLoc(); - EVT VT = Op.getValueType(); + MVT VT = Op.getValueType().getSimpleVT(); // Lower ISD::FGETSIGN to (AND (X86ISD::FGETSIGNx86 ...) 1). SDValue xFGETSIGN = DAG.getNode(X86ISD::FGETSIGNx86, dl, VT, N0, @@ -9499,7 +9500,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { SDValue Cmp = Cond.getOperand(1); unsigned Opc = Cmp.getOpcode(); - EVT VT = Op.getValueType(); + MVT VT = Op.getValueType().getSimpleVT(); bool IllegalFPCMov = false; if (VT.isFloatingPoint() && !VT.isVector() && @@ -9610,9 +9611,9 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { SDValue X86TargetLowering::LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const { - EVT VT = Op->getValueType(0); + MVT VT = Op->getValueType(0).getSimpleVT(); SDValue In = Op->getOperand(0); - EVT InVT = In.getValueType(); + MVT InVT = In.getValueType().getSimpleVT(); DebugLoc dl = Op->getDebugLoc(); if ((VT != MVT::v4i64 || InVT != MVT::v4i32) && @@ -9646,7 +9647,7 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND(SDValue Op, SDValue OpHi = DAG.getVectorShuffle(InVT, dl, In, Undef, &ShufMask2[0]); - EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(), + MVT HalfVT = MVT::getVectorVT(VT.getScalarType(), VT.getVectorNumElements()/2); OpLo = DAG.getNode(X86ISD::VSEXT_MOVL, dl, HalfVT, OpLo); -- cgit v1.1 From 1e05bd9e714934a71ff933ad15f0b884808b405f Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Mon, 21 Jan 2013 01:27:39 +0000 Subject: Introduce a generic interface for querying an operation's expected lowered cost. Currently, this is a direct port of the logic implementing isInstructionFree in CodeMetrics. The hope is that the interface can be improved (f.ex. supporting un-formed instruction queries) and the implementation abstracted so that as we have test cases and target knowledge we can expose increasingly accurate heuristics to clients. I'll start switching existing consumers over and kill off the routine in CodeMetrics in subsequent commits. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172998 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/TargetTransformInfo.cpp | 123 ++++++++++++++++++++++++++++++++++- 1 file changed, 122 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp index 3ef74eb..16ee6eb 100644 --- a/lib/Analysis/TargetTransformInfo.cpp +++ b/lib/Analysis/TargetTransformInfo.cpp @@ -9,6 +9,11 @@ #define DEBUG_TYPE "tti" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Instructions.h" #include "llvm/Support/ErrorHandling.h" using namespace llvm; @@ -43,6 +48,20 @@ void TargetTransformInfo::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); } +unsigned TargetTransformInfo::getOperationCost(unsigned Opcode, Type *Ty, + Type *OpTy) const { + return PrevTTI->getOperationCost(Opcode, Ty, OpTy); +} + +unsigned TargetTransformInfo::getGEPCost( + const Value *Ptr, ArrayRef Operands) const { + return PrevTTI->getGEPCost(Ptr, Operands); +} + +unsigned TargetTransformInfo::getUserCost(const User *U) const { + return PrevTTI->getUserCost(U); +} + bool TargetTransformInfo::isLegalAddImmediate(int64_t Imm) const { return PrevTTI->isLegalAddImmediate(Imm); } @@ -151,7 +170,9 @@ unsigned TargetTransformInfo::getNumberOfParts(Type *Tp) const { namespace { struct NoTTI : ImmutablePass, TargetTransformInfo { - NoTTI() : ImmutablePass(ID) { + const DataLayout *DL; + + NoTTI() : ImmutablePass(ID), DL(0) { initializeNoTTIPass(*PassRegistry::getPassRegistry()); } @@ -159,6 +180,7 @@ struct NoTTI : ImmutablePass, TargetTransformInfo { // Note that this subclass is special, and must *not* call initializeTTI as // it does not chain. PrevTTI = 0; + DL = getAnalysisIfAvailable(); } virtual void getAnalysisUsage(AnalysisUsage &AU) const { @@ -176,6 +198,105 @@ struct NoTTI : ImmutablePass, TargetTransformInfo { return this; } + unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) const { + switch (Opcode) { + default: + // By default, just classify everything as 'basic'. + return TCC_Basic; + + case Instruction::GetElementPtr: + llvm_unreachable("Use getGEPCost for GEP operations!"); + + case Instruction::BitCast: + assert(OpTy && "Cast instructions must provide the operand type"); + if (Ty == OpTy || (Ty->isPointerTy() && OpTy->isPointerTy())) + // Identity and pointer-to-pointer casts are free. + return TCC_Free; + + // Otherwise, the default basic cost is used. + return TCC_Basic; + + case Instruction::IntToPtr: + // An inttoptr cast is free so long as the input is a legal integer type + // which doesn't contain values outside the range of a pointer. + if (DL && DL->isLegalInteger(OpTy->getScalarSizeInBits()) && + OpTy->getScalarSizeInBits() <= DL->getPointerSizeInBits()) + return TCC_Free; + + // Otherwise it's not a no-op. + return TCC_Basic; + + case Instruction::PtrToInt: + // A ptrtoint cast is free so long as the result is large enough to store + // the pointer, and a legal integer type. + if (DL && DL->isLegalInteger(OpTy->getScalarSizeInBits()) && + OpTy->getScalarSizeInBits() >= DL->getPointerSizeInBits()) + return TCC_Free; + + // Otherwise it's not a no-op. + return TCC_Basic; + + case Instruction::Trunc: + // trunc to a native type is free (assuming the target has compare and + // shift-right of the same width). + if (DL && DL->isLegalInteger(DL->getTypeSizeInBits(Ty))) + return TCC_Free; + + return TCC_Basic; + } + } + + unsigned getGEPCost(const Value *Ptr, + ArrayRef Operands) const { + // In the basic model, we just assume that all-constant GEPs will be folded + // into their uses via addressing modes. + for (unsigned Idx = 0, Size = Operands.size(); Idx != Size; ++Idx) + if (!isa(Operands[Idx])) + return TCC_Basic; + + return TCC_Free; + } + + unsigned getUserCost(const User *U) const { + if (const GEPOperator *GEP = dyn_cast(U)) + // In the basic model we just assume that all-constant GEPs will be + // folded into their uses via addressing modes. + return GEP->hasAllConstantIndices() ? TCC_Free : TCC_Basic; + + // If we have a call of an intrinsic we can provide more detailed analysis + // by inspecting the particular intrinsic called. + // FIXME: Hoist this out into a getIntrinsicCost routine. + if (const IntrinsicInst *II = dyn_cast(U)) { + switch (II->getIntrinsicID()) { + default: + return TCC_Basic; + case Intrinsic::dbg_declare: + case Intrinsic::dbg_value: + case Intrinsic::invariant_start: + case Intrinsic::invariant_end: + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + case Intrinsic::objectsize: + case Intrinsic::ptr_annotation: + case Intrinsic::var_annotation: + // These intrinsics don't count as size. + return TCC_Free; + } + } + + if (const CastInst *CI = dyn_cast(U)) { + // Result of a cmp instruction is often extended (to be used by other + // cmp instructions, logical or return instructions). These are usually + // nop on most sane targets. + if (isa(CI->getOperand(0))) + return TCC_Free; + } + + // Otherwise delegate to the fully generic implementations. + return getOperationCost(Operator::getOpcode(U), U->getType(), + U->getNumOperands() == 1 ? + U->getOperand(0)->getType() : 0); + } bool isLegalAddImmediate(int64_t Imm) const { return false; -- cgit v1.1 From b84b4236343727ab1cd9c1cb4e3e3a43fa69c6c2 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 21 Jan 2013 06:13:28 +0000 Subject: Make helper method static. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173005 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 5 ++--- lib/Target/X86/X86ISelLowering.h | 1 - 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 9f7d4a6..671f0b4 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -8540,8 +8540,7 @@ SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op, return FIST; } -SDValue X86TargetLowering::lowerFP_EXTEND(SDValue Op, - SelectionDAG &DAG) const { +static SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) { DebugLoc DL = Op.getDebugLoc(); MVT VT = Op.getValueType().getSimpleVT(); SDValue In = Op.getOperand(0); @@ -12031,7 +12030,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::ANY_EXTEND: return LowerANY_EXTEND(Op, DAG); case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG); - case ISD::FP_EXTEND: return lowerFP_EXTEND(Op, DAG); + case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG); case ISD::FABS: return LowerFABS(Op, DAG); case ISD::FNEG: return LowerFNEG(Op, DAG); case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 69aa980..6d5e8c2 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -815,7 +815,6 @@ namespace llvm { SDValue LowerANY_EXTEND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFABS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFNEG(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const; -- cgit v1.1 From b99bafe36d0c63b9febc7c620cde3663f9f7792f Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 21 Jan 2013 06:21:54 +0000 Subject: Fix some 80 column violations. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173006 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 671f0b4..d222ba1 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -7508,8 +7508,7 @@ X86TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { SDValue X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl, - int64_t Offset, - SelectionDAG &DAG) const { + int64_t Offset, SelectionDAG &DAG) const { // Create the TargetGlobalAddress node, folding in the constant // offset if it is legal. unsigned char OpFlags = @@ -7729,7 +7728,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { case TLSModel::LocalExec: return LowerToTLSExecModel(GA, DAG, getPointerTy(), model, Subtarget->is64Bit(), - getTargetMachine().getRelocationModel() == Reloc::PIC_); + getTargetMachine().getRelocationModel() == Reloc::PIC_); } llvm_unreachable("Unknown TLS model."); } @@ -8108,7 +8107,8 @@ SDValue X86TargetLowering::lowerUINT_TO_FP_vec(SDValue Op, SVT == MVT::v8i8 || SVT == MVT::v8i16) && "Custom UINT_TO_FP is not supported!"); - EVT NVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, SVT.getVectorNumElements()); + EVT NVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, + SVT.getVectorNumElements()); return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(), DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, N0)); } @@ -8201,8 +8201,9 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, return DAG.getNode(ISD::FP_ROUND, dl, DstVT, Add, DAG.getIntPtrConstant(0)); } -std::pair X86TargetLowering:: -FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned, bool IsReplace) const { +std::pair +X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, + bool IsSigned, bool IsReplace) const { DebugLoc DL = Op.getDebugLoc(); EVT DstTy = Op.getValueType(); @@ -8704,7 +8705,8 @@ static SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) { // LowerVectorAllZeroTest - Check whether an OR'd tree is PTEST-able. // -SDValue X86TargetLowering::LowerVectorAllZeroTest(SDValue Op, SelectionDAG &DAG) const { +SDValue X86TargetLowering::LowerVectorAllZeroTest(SDValue Op, + SelectionDAG &DAG) const { assert(Op.getOpcode() == ISD::OR && "Only check OR'd tree."); if (!Subtarget->hasSSE41()) -- cgit v1.1 From 9b33ef7c67267d1444bf250a54053bd821b773a0 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 21 Jan 2013 06:57:59 +0000 Subject: Remove trailing whitespace. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173008 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index d222ba1..ad7e030 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -5668,25 +5668,25 @@ LowerVECTOR_SHUFFLEtoBlend(ShuffleVectorSDNode *SVOp, // Check the mask for BLEND and build the value. unsigned MaskValue = 0; // There are 2 lanes if (NumElems > 8), and 1 lane otherwise. - unsigned NumLanes = (NumElems-1)/8 + 1; + unsigned NumLanes = (NumElems-1)/8 + 1; unsigned NumElemsInLane = NumElems / NumLanes; // Blend for v16i16 should be symetric for the both lanes. for (unsigned i = 0; i < NumElemsInLane; ++i) { - int SndLaneEltIdx = (NumLanes == 2) ? + int SndLaneEltIdx = (NumLanes == 2) ? SVOp->getMaskElt(i + NumElemsInLane) : -1; int EltIdx = SVOp->getMaskElt(i); - if ((EltIdx == -1 || EltIdx == (int)i) && + if ((EltIdx == -1 || EltIdx == (int)i) && (SndLaneEltIdx == -1 || SndLaneEltIdx == (int)(i + NumElemsInLane))) continue; - if (((unsigned)EltIdx == (i + NumElems)) && - (SndLaneEltIdx == -1 || + if (((unsigned)EltIdx == (i + NumElems)) && + (SndLaneEltIdx == -1 || (unsigned)SndLaneEltIdx == i + NumElems + NumElemsInLane)) MaskValue |= (1<hasInt256())) { - BlendVT = EVT::getVectorVT(*DAG.getContext(), - EVT::getFloatingPointVT(EltVT.getSizeInBits()), + BlendVT = EVT::getVectorVT(*DAG.getContext(), + EVT::getFloatingPointVT(EltVT.getSizeInBits()), NumElems); V1 = DAG.getNode(ISD::BITCAST, dl, VT, V1); V2 = DAG.getNode(ISD::BITCAST, dl, VT, V2); } - + SDValue Ret = DAG.getNode(X86ISD::BLENDI, dl, BlendVT, V1, V2, DAG.getConstant(MaskValue, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, VT, Ret); -- cgit v1.1 From bbf9d3e4936d21c0089d2fa51da7e10910a1fb36 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 21 Jan 2013 07:19:54 +0000 Subject: Use MVT instead of EVT in LowerVECTOR_SHUFFLEtoBlend. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173009 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index ad7e030..e627dd4 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -5692,17 +5692,16 @@ LowerVECTOR_SHUFFLEtoBlend(ShuffleVectorSDNode *SVOp, // Convert i32 vectors to floating point if it is not AVX2. // AVX2 introduced VPBLENDD instruction for 128 and 256-bit vectors. - EVT BlendVT = VT; + MVT BlendVT = VT; if (EltVT == MVT::i64 || (EltVT == MVT::i32 && !Subtarget->hasInt256())) { - BlendVT = EVT::getVectorVT(*DAG.getContext(), - EVT::getFloatingPointVT(EltVT.getSizeInBits()), - NumElems); + BlendVT = MVT::getVectorVT(MVT::getFloatingPointVT(EltVT.getSizeInBits()), + NumElems); V1 = DAG.getNode(ISD::BITCAST, dl, VT, V1); V2 = DAG.getNode(ISD::BITCAST, dl, VT, V2); } - SDValue Ret = DAG.getNode(X86ISD::BLENDI, dl, BlendVT, V1, V2, - DAG.getConstant(MaskValue, MVT::i32)); + SDValue Ret = DAG.getNode(X86ISD::BLENDI, dl, BlendVT, V1, V2, + DAG.getConstant(MaskValue, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, VT, Ret); } -- cgit v1.1 From 04f74a149d16ff92722c3c333ab36b130fd8cae7 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 21 Jan 2013 07:25:16 +0000 Subject: Use <0 checks in place of ==-1 because it results in simpler code. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173010 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index e627dd4..ea514c6 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -5678,12 +5678,12 @@ LowerVECTOR_SHUFFLEtoBlend(ShuffleVectorSDNode *SVOp, SVOp->getMaskElt(i + NumElemsInLane) : -1; int EltIdx = SVOp->getMaskElt(i); - if ((EltIdx == -1 || EltIdx == (int)i) && - (SndLaneEltIdx == -1 || SndLaneEltIdx == (int)(i + NumElemsInLane))) + if ((EltIdx < 0 || EltIdx == (int)i) && + (SndLaneEltIdx < 0 || SndLaneEltIdx == (int)(i + NumElemsInLane))) continue; if (((unsigned)EltIdx == (i + NumElems)) && - (SndLaneEltIdx == -1 || + (SndLaneEltIdx < 0 || (unsigned)SndLaneEltIdx == i + NumElems + NumElemsInLane)) MaskValue |= (1< Date: Mon, 21 Jan 2013 11:39:14 +0000 Subject: Clean up the formatting and doxygen for the simple inliner a bit. No functionality changed. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173028 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/IPO/InlineSimple.cpp | 47 +++++++++++++++++++++++-------------- 1 file changed, 29 insertions(+), 18 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp index 9682923..b6a4e37 100644 --- a/lib/Transforms/IPO/InlineSimple.cpp +++ b/lib/Transforms/IPO/InlineSimple.cpp @@ -28,24 +28,35 @@ using namespace llvm; namespace { - class SimpleInliner : public Inliner { - InlineCostAnalyzer CA; - public: - SimpleInliner() : Inliner(ID) { - initializeSimpleInlinerPass(*PassRegistry::getPassRegistry()); - } - SimpleInliner(int Threshold) : Inliner(ID, Threshold, - /*InsertLifetime*/true) { - initializeSimpleInlinerPass(*PassRegistry::getPassRegistry()); - } - static char ID; // Pass identification, replacement for typeid - InlineCost getInlineCost(CallSite CS) { - return CA.getInlineCost(CS, getInlineThreshold(CS)); - } - using llvm::Pass::doInitialization; - virtual bool doInitialization(CallGraph &CG); - }; -} +/// \brief Actaul inliner pass implementation. +/// +/// The common implementation of the inlining logic is shared between this +/// inliner pass and the always inliner pass. The two passes use different cost +/// analyses to determine when to inline. +class SimpleInliner : public Inliner { + InlineCostAnalyzer CA; + +public: + SimpleInliner() : Inliner(ID) { + initializeSimpleInlinerPass(*PassRegistry::getPassRegistry()); + } + + SimpleInliner(int Threshold) + : Inliner(ID, Threshold, /*InsertLifetime*/ true) { + initializeSimpleInlinerPass(*PassRegistry::getPassRegistry()); + } + + static char ID; // Pass identification, replacement for typeid + + InlineCost getInlineCost(CallSite CS) { + return CA.getInlineCost(CS, getInlineThreshold(CS)); + } + + using llvm::Pass::doInitialization; + virtual bool doInitialization(CallGraph &CG); +}; + +} // end anonymous namespace char SimpleInliner::ID = 0; INITIALIZE_PASS_BEGIN(SimpleInliner, "inline", -- cgit v1.1 From 0378e3916a3d568ee161803d4f0107512e595af8 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Mon, 21 Jan 2013 11:39:16 +0000 Subject: Formatting and comment fixes to the always inliner. Formatting fixes brought to you by clang-format. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173029 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/IPO/InlineAlways.cpp | 53 ++++++++++++++++++++----------------- 1 file changed, 28 insertions(+), 25 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/IPO/InlineAlways.cpp b/lib/Transforms/IPO/InlineAlways.cpp index 2971803..5937fab 100644 --- a/lib/Transforms/IPO/InlineAlways.cpp +++ b/lib/Transforms/IPO/InlineAlways.cpp @@ -30,29 +30,34 @@ using namespace llvm; namespace { - // AlwaysInliner only inlines functions that are mark as "always inline". - class AlwaysInliner : public Inliner { - InlineCostAnalyzer CA; - public: - // Use extremely low threshold. - AlwaysInliner() : Inliner(ID, -2000000000, /*InsertLifetime*/true) { - initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry()); - } - AlwaysInliner(bool InsertLifetime) : Inliner(ID, -2000000000, - InsertLifetime) { - initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry()); - } - static char ID; // Pass identification, replacement for typeid - virtual InlineCost getInlineCost(CallSite CS); - - using llvm::Pass::doInitialization; - using llvm::Pass::doFinalization; - - virtual bool doFinalization(CallGraph &CG) { - return removeDeadFunctions(CG, /*AlwaysInlineOnly=*/true); - } - virtual bool doInitialization(CallGraph &CG); - }; +/// \brief Inliner pass which only handles "always inline" functions. +class AlwaysInliner : public Inliner { + InlineCostAnalyzer CA; + +public: + // Use extremely low threshold. + AlwaysInliner() : Inliner(ID, -2000000000, /*InsertLifetime*/ true) { + initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry()); + } + + AlwaysInliner(bool InsertLifetime) + : Inliner(ID, -2000000000, InsertLifetime) { + initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry()); + } + + static char ID; // Pass identification, replacement for typeid + + virtual InlineCost getInlineCost(CallSite CS); + + using llvm::Pass::doFinalization; + virtual bool doFinalization(CallGraph &CG) { + return removeDeadFunctions(CG, /*AlwaysInlineOnly=*/ true); + } + + using llvm::Pass::doInitialization; + virtual bool doInitialization(CallGraph &CG); +}; + } char AlwaysInliner::ID = 0; @@ -95,8 +100,6 @@ InlineCost AlwaysInliner::getInlineCost(CallSite CS) { return InlineCost::getNever(); } -// doInitialization - Initializes the vector of functions that have not -// been annotated with the "always inline" attribute. bool AlwaysInliner::doInitialization(CallGraph &CG) { CA.setDataLayout(getAnalysisIfAvailable()); return false; -- cgit v1.1 From 86953b5795007eaa98838297360a6987e33e92e7 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Mon, 21 Jan 2013 11:39:18 +0000 Subject: Make the inline cost a proper analysis pass. This remains essentially a dynamic analysis done on each call to the routine. However, now it can use the standard pass infrastructure to reference other analyses, instead of a silly setter method. This will become more interesting as I teach it about more analysis passes. This updates the two inliner passes to use the inline cost analysis. Doing so highlights how utterly redundant these two passes are. Either we should find a cheaper way to do always inlining, or we should merge the two and just fiddle with the thresholds to get the desired behavior. I'm leaning increasingly toward the latter as it would also remove the Inliner sub-class split. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173030 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/InlineCost.cpp | 32 +++++++++++++++++++++++++++----- lib/Transforms/IPO/InlineAlways.cpp | 26 ++++++++++++++++---------- lib/Transforms/IPO/InlineSimple.cpp | 25 ++++++++++++++----------- 3 files changed, 57 insertions(+), 26 deletions(-) (limited to 'lib') diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp index 6e5c035..3292ebe 100644 --- a/lib/Analysis/InlineCost.cpp +++ b/lib/Analysis/InlineCost.cpp @@ -1132,11 +1132,32 @@ void CallAnalyzer::dump() { } #endif -InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, int Threshold) { +INITIALIZE_PASS_BEGIN(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis", + true, true) +INITIALIZE_PASS_END(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis", + true, true) + +char InlineCostAnalysis::ID = 0; + +InlineCostAnalysis::InlineCostAnalysis() : CallGraphSCCPass(ID), TD(0) {} + +InlineCostAnalysis::~InlineCostAnalysis() {} + +void InlineCostAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + CallGraphSCCPass::getAnalysisUsage(AU); +} + +bool InlineCostAnalysis::runOnSCC(CallGraphSCC &SCC) { + TD = getAnalysisIfAvailable(); + return false; +} + +InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, int Threshold) { return getInlineCost(CS, CS.getCalledFunction(), Threshold); } -InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, Function *Callee, +InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee, int Threshold) { // Cannot inline indirect calls. if (!Callee) @@ -1177,9 +1198,10 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, Function *Callee, return llvm::InlineCost::get(CA.getCost(), CA.getThreshold()); } -bool InlineCostAnalyzer::isInlineViable(Function &F) { - bool ReturnsTwice =F.getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::ReturnsTwice); +bool InlineCostAnalysis::isInlineViable(Function &F) { + bool ReturnsTwice = + F.getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::ReturnsTwice); for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) { // Disallow inlining of functions which contain an indirect branch. if (isa(BI->getTerminator())) diff --git a/lib/Transforms/IPO/InlineAlways.cpp b/lib/Transforms/IPO/InlineAlways.cpp index 5937fab..a0095da 100644 --- a/lib/Transforms/IPO/InlineAlways.cpp +++ b/lib/Transforms/IPO/InlineAlways.cpp @@ -32,16 +32,16 @@ namespace { /// \brief Inliner pass which only handles "always inline" functions. class AlwaysInliner : public Inliner { - InlineCostAnalyzer CA; + InlineCostAnalysis *ICA; public: // Use extremely low threshold. - AlwaysInliner() : Inliner(ID, -2000000000, /*InsertLifetime*/ true) { + AlwaysInliner() : Inliner(ID, -2000000000, /*InsertLifetime*/ true), ICA(0) { initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry()); } AlwaysInliner(bool InsertLifetime) - : Inliner(ID, -2000000000, InsertLifetime) { + : Inliner(ID, -2000000000, InsertLifetime), ICA(0) { initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry()); } @@ -49,13 +49,13 @@ public: virtual InlineCost getInlineCost(CallSite CS); + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + virtual bool runOnSCC(CallGraphSCC &SCC); + using llvm::Pass::doFinalization; virtual bool doFinalization(CallGraph &CG) { return removeDeadFunctions(CG, /*AlwaysInlineOnly=*/ true); } - - using llvm::Pass::doInitialization; - virtual bool doInitialization(CallGraph &CG); }; } @@ -64,6 +64,7 @@ char AlwaysInliner::ID = 0; INITIALIZE_PASS_BEGIN(AlwaysInliner, "always-inline", "Inliner for always_inline functions", false, false) INITIALIZE_AG_DEPENDENCY(CallGraph) +INITIALIZE_PASS_DEPENDENCY(InlineCostAnalysis) INITIALIZE_PASS_END(AlwaysInliner, "always-inline", "Inliner for always_inline functions", false, false) @@ -94,13 +95,18 @@ InlineCost AlwaysInliner::getInlineCost(CallSite CS) { if (Callee && !Callee->isDeclaration() && Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex, Attribute::AlwaysInline) && - CA.isInlineViable(*Callee)) + ICA->isInlineViable(*Callee)) return InlineCost::getAlways(); return InlineCost::getNever(); } -bool AlwaysInliner::doInitialization(CallGraph &CG) { - CA.setDataLayout(getAnalysisIfAvailable()); - return false; +bool AlwaysInliner::runOnSCC(CallGraphSCC &SCC) { + ICA = &getAnalysis(); + return Inliner::runOnSCC(SCC); +} + +void AlwaysInliner::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + Inliner::getAnalysisUsage(AU); } diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp index b6a4e37..a4f7026 100644 --- a/lib/Transforms/IPO/InlineSimple.cpp +++ b/lib/Transforms/IPO/InlineSimple.cpp @@ -34,26 +34,26 @@ namespace { /// inliner pass and the always inliner pass. The two passes use different cost /// analyses to determine when to inline. class SimpleInliner : public Inliner { - InlineCostAnalyzer CA; + InlineCostAnalysis *ICA; public: - SimpleInliner() : Inliner(ID) { + SimpleInliner() : Inliner(ID), ICA(0) { initializeSimpleInlinerPass(*PassRegistry::getPassRegistry()); } SimpleInliner(int Threshold) - : Inliner(ID, Threshold, /*InsertLifetime*/ true) { + : Inliner(ID, Threshold, /*InsertLifetime*/ true), ICA(0) { initializeSimpleInlinerPass(*PassRegistry::getPassRegistry()); } static char ID; // Pass identification, replacement for typeid InlineCost getInlineCost(CallSite CS) { - return CA.getInlineCost(CS, getInlineThreshold(CS)); + return ICA->getInlineCost(CS, getInlineThreshold(CS)); } - using llvm::Pass::doInitialization; - virtual bool doInitialization(CallGraph &CG); + virtual bool runOnSCC(CallGraphSCC &SCC); + virtual void getAnalysisUsage(AnalysisUsage &AU) const; }; } // end anonymous namespace @@ -62,6 +62,7 @@ char SimpleInliner::ID = 0; INITIALIZE_PASS_BEGIN(SimpleInliner, "inline", "Function Integration/Inlining", false, false) INITIALIZE_AG_DEPENDENCY(CallGraph) +INITIALIZE_PASS_DEPENDENCY(InlineCostAnalysis) INITIALIZE_PASS_END(SimpleInliner, "inline", "Function Integration/Inlining", false, false) @@ -71,10 +72,12 @@ Pass *llvm::createFunctionInliningPass(int Threshold) { return new SimpleInliner(Threshold); } -// doInitialization - Initializes the vector of functions that have been -// annotated with the noinline attribute. -bool SimpleInliner::doInitialization(CallGraph &CG) { - CA.setDataLayout(getAnalysisIfAvailable()); - return false; +bool SimpleInliner::runOnSCC(CallGraphSCC &SCC) { + ICA = &getAnalysis(); + return Inliner::runOnSCC(SCC); } +void SimpleInliner::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + Inliner::getAnalysisUsage(AU); +} -- cgit v1.1 From 8d6c0f4deeb0f2ff671df7ae92b75ee1e39acd37 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Mon, 21 Jan 2013 11:55:09 +0000 Subject: Now that the inline cost analysis is a pass, we can easily have it depend on and use other analyses (as long as they're either immutable passes or CGSCC passes of course -- nothing in the pass manager has been fixed here). Leverage this to thread TargetTransformInfo down through the inline cost analysis. No functionality changed here, this just threads things through. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173031 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/InlineCost.cpp | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) (limited to 'lib') diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp index 3292ebe..5c4e702 100644 --- a/lib/Analysis/InlineCost.cpp +++ b/lib/Analysis/InlineCost.cpp @@ -20,6 +20,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/GlobalAlias.h" @@ -44,6 +45,9 @@ class CallAnalyzer : public InstVisitor { // DataLayout if available, or null. const DataLayout *const TD; + /// The TargetTransformInfo available for this compilation. + const TargetTransformInfo &TTI; + // The called function. Function &F; @@ -130,16 +134,17 @@ class CallAnalyzer : public InstVisitor { bool visitCallSite(CallSite CS); public: - CallAnalyzer(const DataLayout *TD, Function &Callee, int Threshold) - : TD(TD), F(Callee), Threshold(Threshold), Cost(0), - IsCallerRecursive(false), IsRecursiveCall(false), - ExposesReturnsTwice(false), HasDynamicAlloca(false), ContainsNoDuplicateCall(false), - AllocatedSize(0), NumInstructions(0), NumVectorInstructions(0), - FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0), - NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), - NumConstantPtrCmps(0), NumConstantPtrDiffs(0), - NumInstructionsSimplified(0), SROACostSavings(0), SROACostSavingsLost(0) { - } + CallAnalyzer(const DataLayout *TD, const TargetTransformInfo &TTI, + Function &Callee, int Threshold) + : TD(TD), TTI(TTI), F(Callee), Threshold(Threshold), Cost(0), + IsCallerRecursive(false), IsRecursiveCall(false), + ExposesReturnsTwice(false), HasDynamicAlloca(false), + ContainsNoDuplicateCall(false), AllocatedSize(0), NumInstructions(0), + NumVectorInstructions(0), FiftyPercentVectorBonus(0), + TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0), + NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0), + NumConstantPtrDiffs(0), NumInstructionsSimplified(0), + SROACostSavings(0), SROACostSavingsLost(0) {} bool analyzeCall(CallSite CS); @@ -764,7 +769,7 @@ bool CallAnalyzer::visitCallSite(CallSite CS) { // during devirtualization and so we want to give it a hefty bonus for // inlining, but cap that bonus in the event that inlining wouldn't pan // out. Pretend to inline the function, with a custom threshold. - CallAnalyzer CA(TD, *F, InlineConstants::IndirectCallThreshold); + CallAnalyzer CA(TD, TTI, *F, InlineConstants::IndirectCallThreshold); if (CA.analyzeCall(CS)) { // We were able to inline the indirect call! Subtract the cost from the // bonus we want to apply, but don't go below zero. @@ -1134,6 +1139,7 @@ void CallAnalyzer::dump() { INITIALIZE_PASS_BEGIN(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis", true, true) +INITIALIZE_AG_DEPENDENCY(TargetTransformInfo) INITIALIZE_PASS_END(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis", true, true) @@ -1145,11 +1151,13 @@ InlineCostAnalysis::~InlineCostAnalysis() {} void InlineCostAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); + AU.addRequired(); CallGraphSCCPass::getAnalysisUsage(AU); } bool InlineCostAnalysis::runOnSCC(CallGraphSCC &SCC) { TD = getAnalysisIfAvailable(); + TTI = &getAnalysis(); return false; } @@ -1184,7 +1192,7 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee, DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() << "...\n"); - CallAnalyzer CA(TD, *Callee, Threshold); + CallAnalyzer CA(TD, *TTI, *Callee, Threshold); bool ShouldInline = CA.analyzeCall(CS); DEBUG(CA.dump()); -- cgit v1.1 From b5da8a4ae1fbd8e4ffab06cfeb5b32a94d0381bb Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Mon, 21 Jan 2013 12:05:16 +0000 Subject: Move the inline cost analysis's primary cost query to TTI instead of the old CodeMetrics system. TTI has the specific advantage of being extensible and customizable by targets to reflect target-specific cost metrics. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173032 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/InlineCost.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp index 5c4e702..cd211c4 100644 --- a/lib/Analysis/InlineCost.cpp +++ b/lib/Analysis/InlineCost.cpp @@ -422,7 +422,7 @@ bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) { if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) SROAArgValues[&I] = SROAArg; - return isInstructionFree(&I, TD); + return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I); } bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) { @@ -452,7 +452,7 @@ bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) { if (lookupSROAArgAndCost(Op, SROAArg, CostIt)) SROAArgValues[&I] = SROAArg; - return isInstructionFree(&I, TD); + return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I); } bool CallAnalyzer::visitCastInst(CastInst &I) { @@ -469,7 +469,7 @@ bool CallAnalyzer::visitCastInst(CastInst &I) { // Disable SROA in the face of arbitrary casts we don't whitelist elsewhere. disableSROA(I.getOperand(0)); - return isInstructionFree(&I, TD); + return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I); } bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) { @@ -782,7 +782,7 @@ bool CallAnalyzer::visitCallSite(CallSite CS) { bool CallAnalyzer::visitInstruction(Instruction &I) { // Some instructions are free. All of the free intrinsics can also be // handled by SROA, etc. - if (isInstructionFree(&I, TD)) + if (TargetTransformInfo::TCC_Free == TTI.getUserCost(&I)) return true; // We found something we don't understand or can't handle. Mark any SROA-able -- cgit v1.1 From 184e3ff52c7c1b279439c4a4a80c1d46a5a7293d Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Mon, 21 Jan 2013 12:09:41 +0000 Subject: Sink InlineCost.cpp into IPA -- it is now officially an interprocedural analysis. How cute that it wasn't previously. ;] Part of this confusion stems from the flattened header file tree. Thanks to Benjamin for pointing out the goof on IRC, and we're considering un-flattening the headers, so speak now if that would bug you. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173033 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/CMakeLists.txt | 1 - lib/Analysis/IPA/CMakeLists.txt | 1 + lib/Analysis/IPA/InlineCost.cpp | 1237 +++++++++++++++++++++++++++++++++++++++ lib/Analysis/InlineCost.cpp | 1237 --------------------------------------- 4 files changed, 1238 insertions(+), 1238 deletions(-) create mode 100644 lib/Analysis/IPA/InlineCost.cpp delete mode 100644 lib/Analysis/InlineCost.cpp (limited to 'lib') diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt index 78abe0f..4c64c4a 100644 --- a/lib/Analysis/CMakeLists.txt +++ b/lib/Analysis/CMakeLists.txt @@ -18,7 +18,6 @@ add_llvm_library(LLVMAnalysis DomPrinter.cpp DominanceFrontier.cpp IVUsers.cpp - InlineCost.cpp InstCount.cpp InstructionSimplify.cpp Interval.cpp diff --git a/lib/Analysis/IPA/CMakeLists.txt b/lib/Analysis/IPA/CMakeLists.txt index 318119b..67b4135 100644 --- a/lib/Analysis/IPA/CMakeLists.txt +++ b/lib/Analysis/IPA/CMakeLists.txt @@ -5,6 +5,7 @@ add_llvm_library(LLVMipa FindUsedTypes.cpp GlobalsModRef.cpp IPA.cpp + InlineCost.cpp ) add_dependencies(LLVMipa intrinsics_gen) diff --git a/lib/Analysis/IPA/InlineCost.cpp b/lib/Analysis/IPA/InlineCost.cpp new file mode 100644 index 0000000..cd211c4 --- /dev/null +++ b/lib/Analysis/IPA/InlineCost.cpp @@ -0,0 +1,1237 @@ +//===- InlineCost.cpp - Cost analysis for inliner -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements inline cost analysis. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "inline-cost" +#include "llvm/Analysis/InlineCost.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Operator.h" +#include "llvm/InstVisitor.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed"); + +namespace { + +class CallAnalyzer : public InstVisitor { + typedef InstVisitor Base; + friend class InstVisitor; + + // DataLayout if available, or null. + const DataLayout *const TD; + + /// The TargetTransformInfo available for this compilation. + const TargetTransformInfo &TTI; + + // The called function. + Function &F; + + int Threshold; + int Cost; + + bool IsCallerRecursive; + bool IsRecursiveCall; + bool ExposesReturnsTwice; + bool HasDynamicAlloca; + bool ContainsNoDuplicateCall; + + /// Number of bytes allocated statically by the callee. + uint64_t AllocatedSize; + unsigned NumInstructions, NumVectorInstructions; + int FiftyPercentVectorBonus, TenPercentVectorBonus; + int VectorBonus; + + // While we walk the potentially-inlined instructions, we build up and + // maintain a mapping of simplified values specific to this callsite. The + // idea is to propagate any special information we have about arguments to + // this call through the inlinable section of the function, and account for + // likely simplifications post-inlining. The most important aspect we track + // is CFG altering simplifications -- when we prove a basic block dead, that + // can cause dramatic shifts in the cost of inlining a function. + DenseMap SimplifiedValues; + + // Keep track of the values which map back (through function arguments) to + // allocas on the caller stack which could be simplified through SROA. + DenseMap SROAArgValues; + + // The mapping of caller Alloca values to their accumulated cost savings. If + // we have to disable SROA for one of the allocas, this tells us how much + // cost must be added. + DenseMap SROAArgCosts; + + // Keep track of values which map to a pointer base and constant offset. + DenseMap > ConstantOffsetPtrs; + + // Custom simplification helper routines. + bool isAllocaDerivedArg(Value *V); + bool lookupSROAArgAndCost(Value *V, Value *&Arg, + DenseMap::iterator &CostIt); + void disableSROA(DenseMap::iterator CostIt); + void disableSROA(Value *V); + void accumulateSROACost(DenseMap::iterator CostIt, + int InstructionCost); + bool handleSROACandidate(bool IsSROAValid, + DenseMap::iterator CostIt, + int InstructionCost); + bool isGEPOffsetConstant(GetElementPtrInst &GEP); + bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset); + bool simplifyCallSite(Function *F, CallSite CS); + ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V); + + // Custom analysis routines. + bool analyzeBlock(BasicBlock *BB); + + // Disable several entry points to the visitor so we don't accidentally use + // them by declaring but not defining them here. + void visit(Module *); void visit(Module &); + void visit(Function *); void visit(Function &); + void visit(BasicBlock *); void visit(BasicBlock &); + + // Provide base case for our instruction visit. + bool visitInstruction(Instruction &I); + + // Our visit overrides. + bool visitAlloca(AllocaInst &I); + bool visitPHI(PHINode &I); + bool visitGetElementPtr(GetElementPtrInst &I); + bool visitBitCast(BitCastInst &I); + bool visitPtrToInt(PtrToIntInst &I); + bool visitIntToPtr(IntToPtrInst &I); + bool visitCastInst(CastInst &I); + bool visitUnaryInstruction(UnaryInstruction &I); + bool visitICmp(ICmpInst &I); + bool visitSub(BinaryOperator &I); + bool visitBinaryOperator(BinaryOperator &I); + bool visitLoad(LoadInst &I); + bool visitStore(StoreInst &I); + bool visitExtractValue(ExtractValueInst &I); + bool visitInsertValue(InsertValueInst &I); + bool visitCallSite(CallSite CS); + +public: + CallAnalyzer(const DataLayout *TD, const TargetTransformInfo &TTI, + Function &Callee, int Threshold) + : TD(TD), TTI(TTI), F(Callee), Threshold(Threshold), Cost(0), + IsCallerRecursive(false), IsRecursiveCall(false), + ExposesReturnsTwice(false), HasDynamicAlloca(false), + ContainsNoDuplicateCall(false), AllocatedSize(0), NumInstructions(0), + NumVectorInstructions(0), FiftyPercentVectorBonus(0), + TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0), + NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0), + NumConstantPtrDiffs(0), NumInstructionsSimplified(0), + SROACostSavings(0), SROACostSavingsLost(0) {} + + bool analyzeCall(CallSite CS); + + int getThreshold() { return Threshold; } + int getCost() { return Cost; } + + // Keep a bunch of stats about the cost savings found so we can print them + // out when debugging. + unsigned NumConstantArgs; + unsigned NumConstantOffsetPtrArgs; + unsigned NumAllocaArgs; + unsigned NumConstantPtrCmps; + unsigned NumConstantPtrDiffs; + unsigned NumInstructionsSimplified; + unsigned SROACostSavings; + unsigned SROACostSavingsLost; + + void dump(); +}; + +} // namespace + +/// \brief Test whether the given value is an Alloca-derived function argument. +bool CallAnalyzer::isAllocaDerivedArg(Value *V) { + return SROAArgValues.count(V); +} + +/// \brief Lookup the SROA-candidate argument and cost iterator which V maps to. +/// Returns false if V does not map to a SROA-candidate. +bool CallAnalyzer::lookupSROAArgAndCost( + Value *V, Value *&Arg, DenseMap::iterator &CostIt) { + if (SROAArgValues.empty() || SROAArgCosts.empty()) + return false; + + DenseMap::iterator ArgIt = SROAArgValues.find(V); + if (ArgIt == SROAArgValues.end()) + return false; + + Arg = ArgIt->second; + CostIt = SROAArgCosts.find(Arg); + return CostIt != SROAArgCosts.end(); +} + +/// \brief Disable SROA for the candidate marked by this cost iterator. +/// +/// This marks the candidate as no longer viable for SROA, and adds the cost +/// savings associated with it back into the inline cost measurement. +void CallAnalyzer::disableSROA(DenseMap::iterator CostIt) { + // If we're no longer able to perform SROA we need to undo its cost savings + // and prevent subsequent analysis. + Cost += CostIt->second; + SROACostSavings -= CostIt->second; + SROACostSavingsLost += CostIt->second; + SROAArgCosts.erase(CostIt); +} + +/// \brief If 'V' maps to a SROA candidate, disable SROA for it. +void CallAnalyzer::disableSROA(Value *V) { + Value *SROAArg; + DenseMap::iterator CostIt; + if (lookupSROAArgAndCost(V, SROAArg, CostIt)) + disableSROA(CostIt); +} + +/// \brief Accumulate the given cost for a particular SROA candidate. +void CallAnalyzer::accumulateSROACost(DenseMap::iterator CostIt, + int InstructionCost) { + CostIt->second += InstructionCost; + SROACostSavings += InstructionCost; +} + +/// \brief Helper for the common pattern of handling a SROA candidate. +/// Either accumulates the cost savings if the SROA remains valid, or disables +/// SROA for the candidate. +bool CallAnalyzer::handleSROACandidate(bool IsSROAValid, + DenseMap::iterator CostIt, + int InstructionCost) { + if (IsSROAValid) { + accumulateSROACost(CostIt, InstructionCost); + return true; + } + + disableSROA(CostIt); + return false; +} + +/// \brief Check whether a GEP's indices are all constant. +/// +/// Respects any simplified values known during the analysis of this callsite. +bool CallAnalyzer::isGEPOffsetConstant(GetElementPtrInst &GEP) { + for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I) + if (!isa(*I) && !SimplifiedValues.lookup(*I)) + return false; + + return true; +} + +/// \brief Accumulate a constant GEP offset into an APInt if possible. +/// +/// Returns false if unable to compute the offset for any reason. Respects any +/// simplified values known during the analysis of this callsite. +bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) { + if (!TD) + return false; + + unsigned IntPtrWidth = TD->getPointerSizeInBits(); + assert(IntPtrWidth == Offset.getBitWidth()); + + for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP); + GTI != GTE; ++GTI) { + ConstantInt *OpC = dyn_cast(GTI.getOperand()); + if (!OpC) + if (Constant *SimpleOp = SimplifiedValues.lookup(GTI.getOperand())) + OpC = dyn_cast(SimpleOp); + if (!OpC) + return false; + if (OpC->isZero()) continue; + + // Handle a struct index, which adds its field offset to the pointer. + if (StructType *STy = dyn_cast(*GTI)) { + unsigned ElementIdx = OpC->getZExtValue(); + const StructLayout *SL = TD->getStructLayout(STy); + Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx)); + continue; + } + + APInt TypeSize(IntPtrWidth, TD->getTypeAllocSize(GTI.getIndexedType())); + Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize; + } + return true; +} + +bool CallAnalyzer::visitAlloca(AllocaInst &I) { + // FIXME: Check whether inlining will turn a dynamic alloca into a static + // alloca, and handle that case. + + // Accumulate the allocated size. + if (I.isStaticAlloca()) { + Type *Ty = I.getAllocatedType(); + AllocatedSize += (TD ? TD->getTypeAllocSize(Ty) : + Ty->getPrimitiveSizeInBits()); + } + + // We will happily inline static alloca instructions. + if (I.isStaticAlloca()) + return Base::visitAlloca(I); + + // FIXME: This is overly conservative. Dynamic allocas are inefficient for + // a variety of reasons, and so we would like to not inline them into + // functions which don't currently have a dynamic alloca. This simply + // disables inlining altogether in the presence of a dynamic alloca. + HasDynamicAlloca = true; + return false; +} + +bool CallAnalyzer::visitPHI(PHINode &I) { + // FIXME: We should potentially be tracking values through phi nodes, + // especially when they collapse to a single value due to deleted CFG edges + // during inlining. + + // FIXME: We need to propagate SROA *disabling* through phi nodes, even + // though we don't want to propagate it's bonuses. The idea is to disable + // SROA if it *might* be used in an inappropriate manner. + + // Phi nodes are always zero-cost. + return true; +} + +bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) { + Value *SROAArg; + DenseMap::iterator CostIt; + bool SROACandidate = lookupSROAArgAndCost(I.getPointerOperand(), + SROAArg, CostIt); + + // Try to fold GEPs of constant-offset call site argument pointers. This + // requires target data and inbounds GEPs. + if (TD && I.isInBounds()) { + // Check if we have a base + offset for the pointer. + Value *Ptr = I.getPointerOperand(); + std::pair BaseAndOffset = ConstantOffsetPtrs.lookup(Ptr); + if (BaseAndOffset.first) { + // Check if the offset of this GEP is constant, and if so accumulate it + // into Offset. + if (!accumulateGEPOffset(cast(I), BaseAndOffset.second)) { + // Non-constant GEPs aren't folded, and disable SROA. + if (SROACandidate) + disableSROA(CostIt); + return false; + } + + // Add the result as a new mapping to Base + Offset. + ConstantOffsetPtrs[&I] = BaseAndOffset; + + // Also handle SROA candidates here, we already know that the GEP is + // all-constant indexed. + if (SROACandidate) + SROAArgValues[&I] = SROAArg; + + return true; + } + } + + if (isGEPOffsetConstant(I)) { + if (SROACandidate) + SROAArgValues[&I] = SROAArg; + + // Constant GEPs are modeled as free. + return true; + } + + // Variable GEPs will require math and will disable SROA. + if (SROACandidate) + disableSROA(CostIt); + return false; +} + +bool CallAnalyzer::visitBitCast(BitCastInst &I) { + // Propagate constants through bitcasts. + Constant *COp = dyn_cast(I.getOperand(0)); + if (!COp) + COp = SimplifiedValues.lookup(I.getOperand(0)); + if (COp) + if (Constant *C = ConstantExpr::getBitCast(COp, I.getType())) { + SimplifiedValues[&I] = C; + return true; + } + + // Track base/offsets through casts + std::pair BaseAndOffset + = ConstantOffsetPtrs.lookup(I.getOperand(0)); + // Casts don't change the offset, just wrap it up. + if (BaseAndOffset.first) + ConstantOffsetPtrs[&I] = BaseAndOffset; + + // Also look for SROA candidates here. + Value *SROAArg; + DenseMap::iterator CostIt; + if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) + SROAArgValues[&I] = SROAArg; + + // Bitcasts are always zero cost. + return true; +} + +bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) { + // Propagate constants through ptrtoint. + Constant *COp = dyn_cast(I.getOperand(0)); + if (!COp) + COp = SimplifiedValues.lookup(I.getOperand(0)); + if (COp) + if (Constant *C = ConstantExpr::getPtrToInt(COp, I.getType())) { + SimplifiedValues[&I] = C; + return true; + } + + // Track base/offset pairs when converted to a plain integer provided the + // integer is large enough to represent the pointer. + unsigned IntegerSize = I.getType()->getScalarSizeInBits(); + if (TD && IntegerSize >= TD->getPointerSizeInBits()) { + std::pair BaseAndOffset + = ConstantOffsetPtrs.lookup(I.getOperand(0)); + if (BaseAndOffset.first) + ConstantOffsetPtrs[&I] = BaseAndOffset; + } + + // This is really weird. Technically, ptrtoint will disable SROA. However, + // unless that ptrtoint is *used* somewhere in the live basic blocks after + // inlining, it will be nuked, and SROA should proceed. All of the uses which + // would block SROA would also block SROA if applied directly to a pointer, + // and so we can just add the integer in here. The only places where SROA is + // preserved either cannot fire on an integer, or won't in-and-of themselves + // disable SROA (ext) w/o some later use that we would see and disable. + Value *SROAArg; + DenseMap::iterator CostIt; + if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) + SROAArgValues[&I] = SROAArg; + + return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I); +} + +bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) { + // Propagate constants through ptrtoint. + Constant *COp = dyn_cast(I.getOperand(0)); + if (!COp) + COp = SimplifiedValues.lookup(I.getOperand(0)); + if (COp) + if (Constant *C = ConstantExpr::getIntToPtr(COp, I.getType())) { + SimplifiedValues[&I] = C; + return true; + } + + // Track base/offset pairs when round-tripped through a pointer without + // modifications provided the integer is not too large. + Value *Op = I.getOperand(0); + unsigned IntegerSize = Op->getType()->getScalarSizeInBits(); + if (TD && IntegerSize <= TD->getPointerSizeInBits()) { + std::pair BaseAndOffset = ConstantOffsetPtrs.lookup(Op); + if (BaseAndOffset.first) + ConstantOffsetPtrs[&I] = BaseAndOffset; + } + + // "Propagate" SROA here in the same manner as we do for ptrtoint above. + Value *SROAArg; + DenseMap::iterator CostIt; + if (lookupSROAArgAndCost(Op, SROAArg, CostIt)) + SROAArgValues[&I] = SROAArg; + + return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I); +} + +bool CallAnalyzer::visitCastInst(CastInst &I) { + // Propagate constants through ptrtoint. + Constant *COp = dyn_cast(I.getOperand(0)); + if (!COp) + COp = SimplifiedValues.lookup(I.getOperand(0)); + if (COp) + if (Constant *C = ConstantExpr::getCast(I.getOpcode(), COp, I.getType())) { + SimplifiedValues[&I] = C; + return true; + } + + // Disable SROA in the face of arbitrary casts we don't whitelist elsewhere. + disableSROA(I.getOperand(0)); + + return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I); +} + +bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) { + Value *Operand = I.getOperand(0); + Constant *Ops[1] = { dyn_cast(Operand) }; + if (Ops[0] || (Ops[0] = SimplifiedValues.lookup(Operand))) + if (Constant *C = ConstantFoldInstOperands(I.getOpcode(), I.getType(), + Ops, TD)) { + SimplifiedValues[&I] = C; + return true; + } + + // Disable any SROA on the argument to arbitrary unary operators. + disableSROA(Operand); + + return false; +} + +bool CallAnalyzer::visitICmp(ICmpInst &I) { + Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); + // First try to handle simplified comparisons. + if (!isa(LHS)) + if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS)) + LHS = SimpleLHS; + if (!isa(RHS)) + if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS)) + RHS = SimpleRHS; + if (Constant *CLHS = dyn_cast(LHS)) + if (Constant *CRHS = dyn_cast(RHS)) + if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) { + SimplifiedValues[&I] = C; + return true; + } + + // Otherwise look for a comparison between constant offset pointers with + // a common base. + Value *LHSBase, *RHSBase; + APInt LHSOffset, RHSOffset; + llvm::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS); + if (LHSBase) { + llvm::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS); + if (RHSBase && LHSBase == RHSBase) { + // We have common bases, fold the icmp to a constant based on the + // offsets. + Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset); + Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset); + if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) { + SimplifiedValues[&I] = C; + ++NumConstantPtrCmps; + return true; + } + } + } + + // If the comparison is an equality comparison with null, we can simplify it + // for any alloca-derived argument. + if (I.isEquality() && isa(I.getOperand(1))) + if (isAllocaDerivedArg(I.getOperand(0))) { + // We can actually predict the result of comparisons between an + // alloca-derived value and null. Note that this fires regardless of + // SROA firing. + bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE; + SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType()) + : ConstantInt::getFalse(I.getType()); + return true; + } + + // Finally check for SROA candidates in comparisons. + Value *SROAArg; + DenseMap::iterator CostIt; + if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) { + if (isa(I.getOperand(1))) { + accumulateSROACost(CostIt, InlineConstants::InstrCost); + return true; + } + + disableSROA(CostIt); + } + + return false; +} + +bool CallAnalyzer::visitSub(BinaryOperator &I) { + // Try to handle a special case: we can fold computing the difference of two + // constant-related pointers. + Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); + Value *LHSBase, *RHSBase; + APInt LHSOffset, RHSOffset; + llvm::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS); + if (LHSBase) { + llvm::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS); + if (RHSBase && LHSBase == RHSBase) { + // We have common bases, fold the subtract to a constant based on the + // offsets. + Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset); + Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset); + if (Constant *C = ConstantExpr::getSub(CLHS, CRHS)) { + SimplifiedValues[&I] = C; + ++NumConstantPtrDiffs; + return true; + } + } + } + + // Otherwise, fall back to the generic logic for simplifying and handling + // instructions. + return Base::visitSub(I); +} + +bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) { + Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); + if (!isa(LHS)) + if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS)) + LHS = SimpleLHS; + if (!isa(RHS)) + if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS)) + RHS = SimpleRHS; + Value *SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, TD); + if (Constant *C = dyn_cast_or_null(SimpleV)) { + SimplifiedValues[&I] = C; + return true; + } + + // Disable any SROA on arguments to arbitrary, unsimplified binary operators. + disableSROA(LHS); + disableSROA(RHS); + + return false; +} + +bool CallAnalyzer::visitLoad(LoadInst &I) { + Value *SROAArg; + DenseMap::iterator CostIt; + if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) { + if (I.isSimple()) { + accumulateSROACost(CostIt, InlineConstants::InstrCost); + return true; + } + + disableSROA(CostIt); + } + + return false; +} + +bool CallAnalyzer::visitStore(StoreInst &I) { + Value *SROAArg; + DenseMap::iterator CostIt; + if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) { + if (I.isSimple()) { + accumulateSROACost(CostIt, InlineConstants::InstrCost); + return true; + } + + disableSROA(CostIt); + } + + return false; +} + +bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) { + // Constant folding for extract value is trivial. + Constant *C = dyn_cast(I.getAggregateOperand()); + if (!C) + C = SimplifiedValues.lookup(I.getAggregateOperand()); + if (C) { + SimplifiedValues[&I] = ConstantExpr::getExtractValue(C, I.getIndices()); + return true; + } + + // SROA can look through these but give them a cost. + return false; +} + +bool CallAnalyzer::visitInsertValue(InsertValueInst &I) { + // Constant folding for insert value is trivial. + Constant *AggC = dyn_cast(I.getAggregateOperand()); + if (!AggC) + AggC = SimplifiedValues.lookup(I.getAggregateOperand()); + Constant *InsertedC = dyn_cast(I.getInsertedValueOperand()); + if (!InsertedC) + InsertedC = SimplifiedValues.lookup(I.getInsertedValueOperand()); + if (AggC && InsertedC) { + SimplifiedValues[&I] = ConstantExpr::getInsertValue(AggC, InsertedC, + I.getIndices()); + return true; + } + + // SROA can look through these but give them a cost. + return false; +} + +/// \brief Try to simplify a call site. +/// +/// Takes a concrete function and callsite and tries to actually simplify it by +/// analyzing the arguments and call itself with instsimplify. Returns true if +/// it has simplified the callsite to some other entity (a constant), making it +/// free. +bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) { + // FIXME: Using the instsimplify logic directly for this is inefficient + // because we have to continually rebuild the argument list even when no + // simplifications can be performed. Until that is fixed with remapping + // inside of instsimplify, directly constant fold calls here. + if (!canConstantFoldCallTo(F)) + return false; + + // Try to re-map the arguments to constants. + SmallVector ConstantArgs; + ConstantArgs.reserve(CS.arg_size()); + for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); + I != E; ++I) { + Constant *C = dyn_cast(*I); + if (!C) + C = dyn_cast_or_null(SimplifiedValues.lookup(*I)); + if (!C) + return false; // This argument doesn't map to a constant. + + ConstantArgs.push_back(C); + } + if (Constant *C = ConstantFoldCall(F, ConstantArgs)) { + SimplifiedValues[CS.getInstruction()] = C; + return true; + } + + return false; +} + +bool CallAnalyzer::visitCallSite(CallSite CS) { + if (CS.isCall() && cast(CS.getInstruction())->canReturnTwice() && + !F.getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::ReturnsTwice)) { + // This aborts the entire analysis. + ExposesReturnsTwice = true; + return false; + } + if (CS.isCall() && + cast(CS.getInstruction())->hasFnAttr(Attribute::NoDuplicate)) + ContainsNoDuplicateCall = true; + + if (Function *F = CS.getCalledFunction()) { + // When we have a concrete function, first try to simplify it directly. + if (simplifyCallSite(F, CS)) + return true; + + // Next check if it is an intrinsic we know about. + // FIXME: Lift this into part of the InstVisitor. + if (IntrinsicInst *II = dyn_cast(CS.getInstruction())) { + switch (II->getIntrinsicID()) { + default: + return Base::visitCallSite(CS); + + case Intrinsic::memset: + case Intrinsic::memcpy: + case Intrinsic::memmove: + // SROA can usually chew through these intrinsics, but they aren't free. + return false; + } + } + + if (F == CS.getInstruction()->getParent()->getParent()) { + // This flag will fully abort the analysis, so don't bother with anything + // else. + IsRecursiveCall = true; + return false; + } + + if (!callIsSmall(CS)) { + // We account for the average 1 instruction per call argument setup + // here. + Cost += CS.arg_size() * InlineConstants::InstrCost; + + // Everything other than inline ASM will also have a significant cost + // merely from making the call. + if (!isa(CS.getCalledValue())) + Cost += InlineConstants::CallPenalty; + } + + return Base::visitCallSite(CS); + } + + // Otherwise we're in a very special case -- an indirect function call. See + // if we can be particularly clever about this. + Value *Callee = CS.getCalledValue(); + + // First, pay the price of the argument setup. We account for the average + // 1 instruction per call argument setup here. + Cost += CS.arg_size() * InlineConstants::InstrCost; + + // Next, check if this happens to be an indirect function call to a known + // function in this inline context. If not, we've done all we can. + Function *F = dyn_cast_or_null(SimplifiedValues.lookup(Callee)); + if (!F) + return Base::visitCallSite(CS); + + // If we have a constant that we are calling as a function, we can peer + // through it and see the function target. This happens not infrequently + // during devirtualization and so we want to give it a hefty bonus for + // inlining, but cap that bonus in the event that inlining wouldn't pan + // out. Pretend to inline the function, with a custom threshold. + CallAnalyzer CA(TD, TTI, *F, InlineConstants::IndirectCallThreshold); + if (CA.analyzeCall(CS)) { + // We were able to inline the indirect call! Subtract the cost from the + // bonus we want to apply, but don't go below zero. + Cost -= std::max(0, InlineConstants::IndirectCallThreshold - CA.getCost()); + } + + return Base::visitCallSite(CS); +} + +bool CallAnalyzer::visitInstruction(Instruction &I) { + // Some instructions are free. All of the free intrinsics can also be + // handled by SROA, etc. + if (TargetTransformInfo::TCC_Free == TTI.getUserCost(&I)) + return true; + + // We found something we don't understand or can't handle. Mark any SROA-able + // values in the operand list as no longer viable. + for (User::op_iterator OI = I.op_begin(), OE = I.op_end(); OI != OE; ++OI) + disableSROA(*OI); + + return false; +} + + +/// \brief Analyze a basic block for its contribution to the inline cost. +/// +/// This method walks the analyzer over every instruction in the given basic +/// block and accounts for their cost during inlining at this callsite. It +/// aborts early if the threshold has been exceeded or an impossible to inline +/// construct has been detected. It returns false if inlining is no longer +/// viable, and true if inlining remains viable. +bool CallAnalyzer::analyzeBlock(BasicBlock *BB) { + for (BasicBlock::iterator I = BB->begin(), E = llvm::prior(BB->end()); + I != E; ++I) { + ++NumInstructions; + if (isa(I) || I->getType()->isVectorTy()) + ++NumVectorInstructions; + + // If the instruction simplified to a constant, there is no cost to this + // instruction. Visit the instructions using our InstVisitor to account for + // all of the per-instruction logic. The visit tree returns true if we + // consumed the instruction in any way, and false if the instruction's base + // cost should count against inlining. + if (Base::visit(I)) + ++NumInstructionsSimplified; + else + Cost += InlineConstants::InstrCost; + + // If the visit this instruction detected an uninlinable pattern, abort. + if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca) + return false; + + // If the caller is a recursive function then we don't want to inline + // functions which allocate a lot of stack space because it would increase + // the caller stack usage dramatically. + if (IsCallerRecursive && + AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller) + return false; + + if (NumVectorInstructions > NumInstructions/2) + VectorBonus = FiftyPercentVectorBonus; + else if (NumVectorInstructions > NumInstructions/10) + VectorBonus = TenPercentVectorBonus; + else + VectorBonus = 0; + + // Check if we've past the threshold so we don't spin in huge basic + // blocks that will never inline. + if (Cost > (Threshold + VectorBonus)) + return false; + } + + return true; +} + +/// \brief Compute the base pointer and cumulative constant offsets for V. +/// +/// This strips all constant offsets off of V, leaving it the base pointer, and +/// accumulates the total constant offset applied in the returned constant. It +/// returns 0 if V is not a pointer, and returns the constant '0' if there are +/// no constant offsets applied. +ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) { + if (!TD || !V->getType()->isPointerTy()) + return 0; + + unsigned IntPtrWidth = TD->getPointerSizeInBits(); + APInt Offset = APInt::getNullValue(IntPtrWidth); + + // Even though we don't look through PHI nodes, we could be called on an + // instruction in an unreachable block, which may be on a cycle. + SmallPtrSet Visited; + Visited.insert(V); + do { + if (GEPOperator *GEP = dyn_cast(V)) { + if (!GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset)) + return 0; + V = GEP->getPointerOperand(); + } else if (Operator::getOpcode(V) == Instruction::BitCast) { + V = cast(V)->getOperand(0); + } else if (GlobalAlias *GA = dyn_cast(V)) { + if (GA->mayBeOverridden()) + break; + V = GA->getAliasee(); + } else { + break; + } + assert(V->getType()->isPointerTy() && "Unexpected operand type!"); + } while (Visited.insert(V)); + + Type *IntPtrTy = TD->getIntPtrType(V->getContext()); + return cast(ConstantInt::get(IntPtrTy, Offset)); +} + +/// \brief Analyze a call site for potential inlining. +/// +/// Returns true if inlining this call is viable, and false if it is not +/// viable. It computes the cost and adjusts the threshold based on numerous +/// factors and heuristics. If this method returns false but the computed cost +/// is below the computed threshold, then inlining was forcibly disabled by +/// some artifact of the routine. +bool CallAnalyzer::analyzeCall(CallSite CS) { + ++NumCallsAnalyzed; + + // Track whether the post-inlining function would have more than one basic + // block. A single basic block is often intended for inlining. Balloon the + // threshold by 50% until we pass the single-BB phase. + bool SingleBB = true; + int SingleBBBonus = Threshold / 2; + Threshold += SingleBBBonus; + + // Perform some tweaks to the cost and threshold based on the direct + // callsite information. + + // We want to more aggressively inline vector-dense kernels, so up the + // threshold, and we'll lower it if the % of vector instructions gets too + // low. + assert(NumInstructions == 0); + assert(NumVectorInstructions == 0); + FiftyPercentVectorBonus = Threshold; + TenPercentVectorBonus = Threshold / 2; + + // Give out bonuses per argument, as the instructions setting them up will + // be gone after inlining. + for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) { + if (TD && CS.isByValArgument(I)) { + // We approximate the number of loads and stores needed by dividing the + // size of the byval type by the target's pointer size. + PointerType *PTy = cast(CS.getArgument(I)->getType()); + unsigned TypeSize = TD->getTypeSizeInBits(PTy->getElementType()); + unsigned PointerSize = TD->getPointerSizeInBits(); + // Ceiling division. + unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize; + + // If it generates more than 8 stores it is likely to be expanded as an + // inline memcpy so we take that as an upper bound. Otherwise we assume + // one load and one store per word copied. + // FIXME: The maxStoresPerMemcpy setting from the target should be used + // here instead of a magic number of 8, but it's not available via + // DataLayout. + NumStores = std::min(NumStores, 8U); + + Cost -= 2 * NumStores * InlineConstants::InstrCost; + } else { + // For non-byval arguments subtract off one instruction per call + // argument. + Cost -= InlineConstants::InstrCost; + } + } + + // If there is only one call of the function, and it has internal linkage, + // the cost of inlining it drops dramatically. + bool OnlyOneCallAndLocalLinkage = F.hasLocalLinkage() && F.hasOneUse() && + &F == CS.getCalledFunction(); + if (OnlyOneCallAndLocalLinkage) + Cost += InlineConstants::LastCallToStaticBonus; + + // If the instruction after the call, or if the normal destination of the + // invoke is an unreachable instruction, the function is noreturn. As such, + // there is little point in inlining this unless there is literally zero + // cost. + Instruction *Instr = CS.getInstruction(); + if (InvokeInst *II = dyn_cast(Instr)) { + if (isa(II->getNormalDest()->begin())) + Threshold = 1; + } else if (isa(++BasicBlock::iterator(Instr))) + Threshold = 1; + + // If this function uses the coldcc calling convention, prefer not to inline + // it. + if (F.getCallingConv() == CallingConv::Cold) + Cost += InlineConstants::ColdccPenalty; + + // Check if we're done. This can happen due to bonuses and penalties. + if (Cost > Threshold) + return false; + + if (F.empty()) + return true; + + Function *Caller = CS.getInstruction()->getParent()->getParent(); + // Check if the caller function is recursive itself. + for (Value::use_iterator U = Caller->use_begin(), E = Caller->use_end(); + U != E; ++U) { + CallSite Site(cast(*U)); + if (!Site) + continue; + Instruction *I = Site.getInstruction(); + if (I->getParent()->getParent() == Caller) { + IsCallerRecursive = true; + break; + } + } + + // Track whether we've seen a return instruction. The first return + // instruction is free, as at least one will usually disappear in inlining. + bool HasReturn = false; + + // Populate our simplified values by mapping from function arguments to call + // arguments with known important simplifications. + CallSite::arg_iterator CAI = CS.arg_begin(); + for (Function::arg_iterator FAI = F.arg_begin(), FAE = F.arg_end(); + FAI != FAE; ++FAI, ++CAI) { + assert(CAI != CS.arg_end()); + if (Constant *C = dyn_cast(CAI)) + SimplifiedValues[FAI] = C; + + Value *PtrArg = *CAI; + if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) { + ConstantOffsetPtrs[FAI] = std::make_pair(PtrArg, C->getValue()); + + // We can SROA any pointer arguments derived from alloca instructions. + if (isa(PtrArg)) { + SROAArgValues[FAI] = PtrArg; + SROAArgCosts[PtrArg] = 0; + } + } + } + NumConstantArgs = SimplifiedValues.size(); + NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size(); + NumAllocaArgs = SROAArgValues.size(); + + // The worklist of live basic blocks in the callee *after* inlining. We avoid + // adding basic blocks of the callee which can be proven to be dead for this + // particular call site in order to get more accurate cost estimates. This + // requires a somewhat heavyweight iteration pattern: we need to walk the + // basic blocks in a breadth-first order as we insert live successors. To + // accomplish this, prioritizing for small iterations because we exit after + // crossing our threshold, we use a small-size optimized SetVector. + typedef SetVector, + SmallPtrSet > BBSetVector; + BBSetVector BBWorklist; + BBWorklist.insert(&F.getEntryBlock()); + // Note that we *must not* cache the size, this loop grows the worklist. + for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) { + // Bail out the moment we cross the threshold. This means we'll under-count + // the cost, but only when undercounting doesn't matter. + if (Cost > (Threshold + VectorBonus)) + break; + + BasicBlock *BB = BBWorklist[Idx]; + if (BB->empty()) + continue; + + // Handle the terminator cost here where we can track returns and other + // function-wide constructs. + TerminatorInst *TI = BB->getTerminator(); + + // We never want to inline functions that contain an indirectbr. This is + // incorrect because all the blockaddress's (in static global initializers + // for example) would be referring to the original function, and this + // indirect jump would jump from the inlined copy of the function into the + // original function which is extremely undefined behavior. + // FIXME: This logic isn't really right; we can safely inline functions + // with indirectbr's as long as no other function or global references the + // blockaddress of a block within the current function. And as a QOI issue, + // if someone is using a blockaddress without an indirectbr, and that + // reference somehow ends up in another function or global, we probably + // don't want to inline this function. + if (isa(TI)) + return false; + + if (!HasReturn && isa(TI)) + HasReturn = true; + else + Cost += InlineConstants::InstrCost; + + // Analyze the cost of this block. If we blow through the threshold, this + // returns false, and we can bail on out. + if (!analyzeBlock(BB)) { + if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca) + return false; + + // If the caller is a recursive function then we don't want to inline + // functions which allocate a lot of stack space because it would increase + // the caller stack usage dramatically. + if (IsCallerRecursive && + AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller) + return false; + + break; + } + + // Add in the live successors by first checking whether we have terminator + // that may be simplified based on the values simplified by this call. + if (BranchInst *BI = dyn_cast(TI)) { + if (BI->isConditional()) { + Value *Cond = BI->getCondition(); + if (ConstantInt *SimpleCond + = dyn_cast_or_null(SimplifiedValues.lookup(Cond))) { + BBWorklist.insert(BI->getSuccessor(SimpleCond->isZero() ? 1 : 0)); + continue; + } + } + } else if (SwitchInst *SI = dyn_cast(TI)) { + Value *Cond = SI->getCondition(); + if (ConstantInt *SimpleCond + = dyn_cast_or_null(SimplifiedValues.lookup(Cond))) { + BBWorklist.insert(SI->findCaseValue(SimpleCond).getCaseSuccessor()); + continue; + } + } + + // If we're unable to select a particular successor, just count all of + // them. + for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize; + ++TIdx) + BBWorklist.insert(TI->getSuccessor(TIdx)); + + // If we had any successors at this point, than post-inlining is likely to + // have them as well. Note that we assume any basic blocks which existed + // due to branches or switches which folded above will also fold after + // inlining. + if (SingleBB && TI->getNumSuccessors() > 1) { + // Take off the bonus we applied to the threshold. + Threshold -= SingleBBBonus; + SingleBB = false; + } + } + + // If this is a noduplicate call, we can still inline as long as + // inlining this would cause the removal of the caller (so the instruction + // is not actually duplicated, just moved). + if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall) + return false; + + Threshold += VectorBonus; + + return Cost < Threshold; +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +/// \brief Dump stats about this call's analysis. +void CallAnalyzer::dump() { +#define DEBUG_PRINT_STAT(x) llvm::dbgs() << " " #x ": " << x << "\n" + DEBUG_PRINT_STAT(NumConstantArgs); + DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs); + DEBUG_PRINT_STAT(NumAllocaArgs); + DEBUG_PRINT_STAT(NumConstantPtrCmps); + DEBUG_PRINT_STAT(NumConstantPtrDiffs); + DEBUG_PRINT_STAT(NumInstructionsSimplified); + DEBUG_PRINT_STAT(SROACostSavings); + DEBUG_PRINT_STAT(SROACostSavingsLost); + DEBUG_PRINT_STAT(ContainsNoDuplicateCall); +#undef DEBUG_PRINT_STAT +} +#endif + +INITIALIZE_PASS_BEGIN(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis", + true, true) +INITIALIZE_AG_DEPENDENCY(TargetTransformInfo) +INITIALIZE_PASS_END(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis", + true, true) + +char InlineCostAnalysis::ID = 0; + +InlineCostAnalysis::InlineCostAnalysis() : CallGraphSCCPass(ID), TD(0) {} + +InlineCostAnalysis::~InlineCostAnalysis() {} + +void InlineCostAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired(); + CallGraphSCCPass::getAnalysisUsage(AU); +} + +bool InlineCostAnalysis::runOnSCC(CallGraphSCC &SCC) { + TD = getAnalysisIfAvailable(); + TTI = &getAnalysis(); + return false; +} + +InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, int Threshold) { + return getInlineCost(CS, CS.getCalledFunction(), Threshold); +} + +InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee, + int Threshold) { + // Cannot inline indirect calls. + if (!Callee) + return llvm::InlineCost::getNever(); + + // Calls to functions with always-inline attributes should be inlined + // whenever possible. + if (Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::AlwaysInline)) { + if (isInlineViable(*Callee)) + return llvm::InlineCost::getAlways(); + return llvm::InlineCost::getNever(); + } + + // Don't inline functions which can be redefined at link-time to mean + // something else. Don't inline functions marked noinline or call sites + // marked noinline. + if (Callee->mayBeOverridden() || + Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::NoInline) || + CS.isNoInline()) + return llvm::InlineCost::getNever(); + + DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() + << "...\n"); + + CallAnalyzer CA(TD, *TTI, *Callee, Threshold); + bool ShouldInline = CA.analyzeCall(CS); + + DEBUG(CA.dump()); + + // Check if there was a reason to force inlining or no inlining. + if (!ShouldInline && CA.getCost() < CA.getThreshold()) + return InlineCost::getNever(); + if (ShouldInline && CA.getCost() >= CA.getThreshold()) + return InlineCost::getAlways(); + + return llvm::InlineCost::get(CA.getCost(), CA.getThreshold()); +} + +bool InlineCostAnalysis::isInlineViable(Function &F) { + bool ReturnsTwice = + F.getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::ReturnsTwice); + for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) { + // Disallow inlining of functions which contain an indirect branch. + if (isa(BI->getTerminator())) + return false; + + for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE; + ++II) { + CallSite CS(II); + if (!CS) + continue; + + // Disallow recursive calls. + if (&F == CS.getCalledFunction()) + return false; + + // Disallow calls which expose returns-twice to a function not previously + // attributed as such. + if (!ReturnsTwice && CS.isCall() && + cast(CS.getInstruction())->canReturnTwice()) + return false; + } + } + + return true; +} diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp deleted file mode 100644 index cd211c4..0000000 --- a/lib/Analysis/InlineCost.cpp +++ /dev/null @@ -1,1237 +0,0 @@ -//===- InlineCost.cpp - Cost analysis for inliner -------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements inline cost analysis. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "inline-cost" -#include "llvm/Analysis/InlineCost.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/ConstantFolding.h" -#include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/IR/CallingConv.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/GlobalAlias.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Operator.h" -#include "llvm/InstVisitor.h" -#include "llvm/Support/CallSite.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/GetElementPtrTypeIterator.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed"); - -namespace { - -class CallAnalyzer : public InstVisitor { - typedef InstVisitor Base; - friend class InstVisitor; - - // DataLayout if available, or null. - const DataLayout *const TD; - - /// The TargetTransformInfo available for this compilation. - const TargetTransformInfo &TTI; - - // The called function. - Function &F; - - int Threshold; - int Cost; - - bool IsCallerRecursive; - bool IsRecursiveCall; - bool ExposesReturnsTwice; - bool HasDynamicAlloca; - bool ContainsNoDuplicateCall; - - /// Number of bytes allocated statically by the callee. - uint64_t AllocatedSize; - unsigned NumInstructions, NumVectorInstructions; - int FiftyPercentVectorBonus, TenPercentVectorBonus; - int VectorBonus; - - // While we walk the potentially-inlined instructions, we build up and - // maintain a mapping of simplified values specific to this callsite. The - // idea is to propagate any special information we have about arguments to - // this call through the inlinable section of the function, and account for - // likely simplifications post-inlining. The most important aspect we track - // is CFG altering simplifications -- when we prove a basic block dead, that - // can cause dramatic shifts in the cost of inlining a function. - DenseMap SimplifiedValues; - - // Keep track of the values which map back (through function arguments) to - // allocas on the caller stack which could be simplified through SROA. - DenseMap SROAArgValues; - - // The mapping of caller Alloca values to their accumulated cost savings. If - // we have to disable SROA for one of the allocas, this tells us how much - // cost must be added. - DenseMap SROAArgCosts; - - // Keep track of values which map to a pointer base and constant offset. - DenseMap > ConstantOffsetPtrs; - - // Custom simplification helper routines. - bool isAllocaDerivedArg(Value *V); - bool lookupSROAArgAndCost(Value *V, Value *&Arg, - DenseMap::iterator &CostIt); - void disableSROA(DenseMap::iterator CostIt); - void disableSROA(Value *V); - void accumulateSROACost(DenseMap::iterator CostIt, - int InstructionCost); - bool handleSROACandidate(bool IsSROAValid, - DenseMap::iterator CostIt, - int InstructionCost); - bool isGEPOffsetConstant(GetElementPtrInst &GEP); - bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset); - bool simplifyCallSite(Function *F, CallSite CS); - ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V); - - // Custom analysis routines. - bool analyzeBlock(BasicBlock *BB); - - // Disable several entry points to the visitor so we don't accidentally use - // them by declaring but not defining them here. - void visit(Module *); void visit(Module &); - void visit(Function *); void visit(Function &); - void visit(BasicBlock *); void visit(BasicBlock &); - - // Provide base case for our instruction visit. - bool visitInstruction(Instruction &I); - - // Our visit overrides. - bool visitAlloca(AllocaInst &I); - bool visitPHI(PHINode &I); - bool visitGetElementPtr(GetElementPtrInst &I); - bool visitBitCast(BitCastInst &I); - bool visitPtrToInt(PtrToIntInst &I); - bool visitIntToPtr(IntToPtrInst &I); - bool visitCastInst(CastInst &I); - bool visitUnaryInstruction(UnaryInstruction &I); - bool visitICmp(ICmpInst &I); - bool visitSub(BinaryOperator &I); - bool visitBinaryOperator(BinaryOperator &I); - bool visitLoad(LoadInst &I); - bool visitStore(StoreInst &I); - bool visitExtractValue(ExtractValueInst &I); - bool visitInsertValue(InsertValueInst &I); - bool visitCallSite(CallSite CS); - -public: - CallAnalyzer(const DataLayout *TD, const TargetTransformInfo &TTI, - Function &Callee, int Threshold) - : TD(TD), TTI(TTI), F(Callee), Threshold(Threshold), Cost(0), - IsCallerRecursive(false), IsRecursiveCall(false), - ExposesReturnsTwice(false), HasDynamicAlloca(false), - ContainsNoDuplicateCall(false), AllocatedSize(0), NumInstructions(0), - NumVectorInstructions(0), FiftyPercentVectorBonus(0), - TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0), - NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0), - NumConstantPtrDiffs(0), NumInstructionsSimplified(0), - SROACostSavings(0), SROACostSavingsLost(0) {} - - bool analyzeCall(CallSite CS); - - int getThreshold() { return Threshold; } - int getCost() { return Cost; } - - // Keep a bunch of stats about the cost savings found so we can print them - // out when debugging. - unsigned NumConstantArgs; - unsigned NumConstantOffsetPtrArgs; - unsigned NumAllocaArgs; - unsigned NumConstantPtrCmps; - unsigned NumConstantPtrDiffs; - unsigned NumInstructionsSimplified; - unsigned SROACostSavings; - unsigned SROACostSavingsLost; - - void dump(); -}; - -} // namespace - -/// \brief Test whether the given value is an Alloca-derived function argument. -bool CallAnalyzer::isAllocaDerivedArg(Value *V) { - return SROAArgValues.count(V); -} - -/// \brief Lookup the SROA-candidate argument and cost iterator which V maps to. -/// Returns false if V does not map to a SROA-candidate. -bool CallAnalyzer::lookupSROAArgAndCost( - Value *V, Value *&Arg, DenseMap::iterator &CostIt) { - if (SROAArgValues.empty() || SROAArgCosts.empty()) - return false; - - DenseMap::iterator ArgIt = SROAArgValues.find(V); - if (ArgIt == SROAArgValues.end()) - return false; - - Arg = ArgIt->second; - CostIt = SROAArgCosts.find(Arg); - return CostIt != SROAArgCosts.end(); -} - -/// \brief Disable SROA for the candidate marked by this cost iterator. -/// -/// This marks the candidate as no longer viable for SROA, and adds the cost -/// savings associated with it back into the inline cost measurement. -void CallAnalyzer::disableSROA(DenseMap::iterator CostIt) { - // If we're no longer able to perform SROA we need to undo its cost savings - // and prevent subsequent analysis. - Cost += CostIt->second; - SROACostSavings -= CostIt->second; - SROACostSavingsLost += CostIt->second; - SROAArgCosts.erase(CostIt); -} - -/// \brief If 'V' maps to a SROA candidate, disable SROA for it. -void CallAnalyzer::disableSROA(Value *V) { - Value *SROAArg; - DenseMap::iterator CostIt; - if (lookupSROAArgAndCost(V, SROAArg, CostIt)) - disableSROA(CostIt); -} - -/// \brief Accumulate the given cost for a particular SROA candidate. -void CallAnalyzer::accumulateSROACost(DenseMap::iterator CostIt, - int InstructionCost) { - CostIt->second += InstructionCost; - SROACostSavings += InstructionCost; -} - -/// \brief Helper for the common pattern of handling a SROA candidate. -/// Either accumulates the cost savings if the SROA remains valid, or disables -/// SROA for the candidate. -bool CallAnalyzer::handleSROACandidate(bool IsSROAValid, - DenseMap::iterator CostIt, - int InstructionCost) { - if (IsSROAValid) { - accumulateSROACost(CostIt, InstructionCost); - return true; - } - - disableSROA(CostIt); - return false; -} - -/// \brief Check whether a GEP's indices are all constant. -/// -/// Respects any simplified values known during the analysis of this callsite. -bool CallAnalyzer::isGEPOffsetConstant(GetElementPtrInst &GEP) { - for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I) - if (!isa(*I) && !SimplifiedValues.lookup(*I)) - return false; - - return true; -} - -/// \brief Accumulate a constant GEP offset into an APInt if possible. -/// -/// Returns false if unable to compute the offset for any reason. Respects any -/// simplified values known during the analysis of this callsite. -bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) { - if (!TD) - return false; - - unsigned IntPtrWidth = TD->getPointerSizeInBits(); - assert(IntPtrWidth == Offset.getBitWidth()); - - for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP); - GTI != GTE; ++GTI) { - ConstantInt *OpC = dyn_cast(GTI.getOperand()); - if (!OpC) - if (Constant *SimpleOp = SimplifiedValues.lookup(GTI.getOperand())) - OpC = dyn_cast(SimpleOp); - if (!OpC) - return false; - if (OpC->isZero()) continue; - - // Handle a struct index, which adds its field offset to the pointer. - if (StructType *STy = dyn_cast(*GTI)) { - unsigned ElementIdx = OpC->getZExtValue(); - const StructLayout *SL = TD->getStructLayout(STy); - Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx)); - continue; - } - - APInt TypeSize(IntPtrWidth, TD->getTypeAllocSize(GTI.getIndexedType())); - Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize; - } - return true; -} - -bool CallAnalyzer::visitAlloca(AllocaInst &I) { - // FIXME: Check whether inlining will turn a dynamic alloca into a static - // alloca, and handle that case. - - // Accumulate the allocated size. - if (I.isStaticAlloca()) { - Type *Ty = I.getAllocatedType(); - AllocatedSize += (TD ? TD->getTypeAllocSize(Ty) : - Ty->getPrimitiveSizeInBits()); - } - - // We will happily inline static alloca instructions. - if (I.isStaticAlloca()) - return Base::visitAlloca(I); - - // FIXME: This is overly conservative. Dynamic allocas are inefficient for - // a variety of reasons, and so we would like to not inline them into - // functions which don't currently have a dynamic alloca. This simply - // disables inlining altogether in the presence of a dynamic alloca. - HasDynamicAlloca = true; - return false; -} - -bool CallAnalyzer::visitPHI(PHINode &I) { - // FIXME: We should potentially be tracking values through phi nodes, - // especially when they collapse to a single value due to deleted CFG edges - // during inlining. - - // FIXME: We need to propagate SROA *disabling* through phi nodes, even - // though we don't want to propagate it's bonuses. The idea is to disable - // SROA if it *might* be used in an inappropriate manner. - - // Phi nodes are always zero-cost. - return true; -} - -bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) { - Value *SROAArg; - DenseMap::iterator CostIt; - bool SROACandidate = lookupSROAArgAndCost(I.getPointerOperand(), - SROAArg, CostIt); - - // Try to fold GEPs of constant-offset call site argument pointers. This - // requires target data and inbounds GEPs. - if (TD && I.isInBounds()) { - // Check if we have a base + offset for the pointer. - Value *Ptr = I.getPointerOperand(); - std::pair BaseAndOffset = ConstantOffsetPtrs.lookup(Ptr); - if (BaseAndOffset.first) { - // Check if the offset of this GEP is constant, and if so accumulate it - // into Offset. - if (!accumulateGEPOffset(cast(I), BaseAndOffset.second)) { - // Non-constant GEPs aren't folded, and disable SROA. - if (SROACandidate) - disableSROA(CostIt); - return false; - } - - // Add the result as a new mapping to Base + Offset. - ConstantOffsetPtrs[&I] = BaseAndOffset; - - // Also handle SROA candidates here, we already know that the GEP is - // all-constant indexed. - if (SROACandidate) - SROAArgValues[&I] = SROAArg; - - return true; - } - } - - if (isGEPOffsetConstant(I)) { - if (SROACandidate) - SROAArgValues[&I] = SROAArg; - - // Constant GEPs are modeled as free. - return true; - } - - // Variable GEPs will require math and will disable SROA. - if (SROACandidate) - disableSROA(CostIt); - return false; -} - -bool CallAnalyzer::visitBitCast(BitCastInst &I) { - // Propagate constants through bitcasts. - Constant *COp = dyn_cast(I.getOperand(0)); - if (!COp) - COp = SimplifiedValues.lookup(I.getOperand(0)); - if (COp) - if (Constant *C = ConstantExpr::getBitCast(COp, I.getType())) { - SimplifiedValues[&I] = C; - return true; - } - - // Track base/offsets through casts - std::pair BaseAndOffset - = ConstantOffsetPtrs.lookup(I.getOperand(0)); - // Casts don't change the offset, just wrap it up. - if (BaseAndOffset.first) - ConstantOffsetPtrs[&I] = BaseAndOffset; - - // Also look for SROA candidates here. - Value *SROAArg; - DenseMap::iterator CostIt; - if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) - SROAArgValues[&I] = SROAArg; - - // Bitcasts are always zero cost. - return true; -} - -bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) { - // Propagate constants through ptrtoint. - Constant *COp = dyn_cast(I.getOperand(0)); - if (!COp) - COp = SimplifiedValues.lookup(I.getOperand(0)); - if (COp) - if (Constant *C = ConstantExpr::getPtrToInt(COp, I.getType())) { - SimplifiedValues[&I] = C; - return true; - } - - // Track base/offset pairs when converted to a plain integer provided the - // integer is large enough to represent the pointer. - unsigned IntegerSize = I.getType()->getScalarSizeInBits(); - if (TD && IntegerSize >= TD->getPointerSizeInBits()) { - std::pair BaseAndOffset - = ConstantOffsetPtrs.lookup(I.getOperand(0)); - if (BaseAndOffset.first) - ConstantOffsetPtrs[&I] = BaseAndOffset; - } - - // This is really weird. Technically, ptrtoint will disable SROA. However, - // unless that ptrtoint is *used* somewhere in the live basic blocks after - // inlining, it will be nuked, and SROA should proceed. All of the uses which - // would block SROA would also block SROA if applied directly to a pointer, - // and so we can just add the integer in here. The only places where SROA is - // preserved either cannot fire on an integer, or won't in-and-of themselves - // disable SROA (ext) w/o some later use that we would see and disable. - Value *SROAArg; - DenseMap::iterator CostIt; - if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) - SROAArgValues[&I] = SROAArg; - - return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I); -} - -bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) { - // Propagate constants through ptrtoint. - Constant *COp = dyn_cast(I.getOperand(0)); - if (!COp) - COp = SimplifiedValues.lookup(I.getOperand(0)); - if (COp) - if (Constant *C = ConstantExpr::getIntToPtr(COp, I.getType())) { - SimplifiedValues[&I] = C; - return true; - } - - // Track base/offset pairs when round-tripped through a pointer without - // modifications provided the integer is not too large. - Value *Op = I.getOperand(0); - unsigned IntegerSize = Op->getType()->getScalarSizeInBits(); - if (TD && IntegerSize <= TD->getPointerSizeInBits()) { - std::pair BaseAndOffset = ConstantOffsetPtrs.lookup(Op); - if (BaseAndOffset.first) - ConstantOffsetPtrs[&I] = BaseAndOffset; - } - - // "Propagate" SROA here in the same manner as we do for ptrtoint above. - Value *SROAArg; - DenseMap::iterator CostIt; - if (lookupSROAArgAndCost(Op, SROAArg, CostIt)) - SROAArgValues[&I] = SROAArg; - - return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I); -} - -bool CallAnalyzer::visitCastInst(CastInst &I) { - // Propagate constants through ptrtoint. - Constant *COp = dyn_cast(I.getOperand(0)); - if (!COp) - COp = SimplifiedValues.lookup(I.getOperand(0)); - if (COp) - if (Constant *C = ConstantExpr::getCast(I.getOpcode(), COp, I.getType())) { - SimplifiedValues[&I] = C; - return true; - } - - // Disable SROA in the face of arbitrary casts we don't whitelist elsewhere. - disableSROA(I.getOperand(0)); - - return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I); -} - -bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) { - Value *Operand = I.getOperand(0); - Constant *Ops[1] = { dyn_cast(Operand) }; - if (Ops[0] || (Ops[0] = SimplifiedValues.lookup(Operand))) - if (Constant *C = ConstantFoldInstOperands(I.getOpcode(), I.getType(), - Ops, TD)) { - SimplifiedValues[&I] = C; - return true; - } - - // Disable any SROA on the argument to arbitrary unary operators. - disableSROA(Operand); - - return false; -} - -bool CallAnalyzer::visitICmp(ICmpInst &I) { - Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); - // First try to handle simplified comparisons. - if (!isa(LHS)) - if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS)) - LHS = SimpleLHS; - if (!isa(RHS)) - if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS)) - RHS = SimpleRHS; - if (Constant *CLHS = dyn_cast(LHS)) - if (Constant *CRHS = dyn_cast(RHS)) - if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) { - SimplifiedValues[&I] = C; - return true; - } - - // Otherwise look for a comparison between constant offset pointers with - // a common base. - Value *LHSBase, *RHSBase; - APInt LHSOffset, RHSOffset; - llvm::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS); - if (LHSBase) { - llvm::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS); - if (RHSBase && LHSBase == RHSBase) { - // We have common bases, fold the icmp to a constant based on the - // offsets. - Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset); - Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset); - if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) { - SimplifiedValues[&I] = C; - ++NumConstantPtrCmps; - return true; - } - } - } - - // If the comparison is an equality comparison with null, we can simplify it - // for any alloca-derived argument. - if (I.isEquality() && isa(I.getOperand(1))) - if (isAllocaDerivedArg(I.getOperand(0))) { - // We can actually predict the result of comparisons between an - // alloca-derived value and null. Note that this fires regardless of - // SROA firing. - bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE; - SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType()) - : ConstantInt::getFalse(I.getType()); - return true; - } - - // Finally check for SROA candidates in comparisons. - Value *SROAArg; - DenseMap::iterator CostIt; - if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) { - if (isa(I.getOperand(1))) { - accumulateSROACost(CostIt, InlineConstants::InstrCost); - return true; - } - - disableSROA(CostIt); - } - - return false; -} - -bool CallAnalyzer::visitSub(BinaryOperator &I) { - // Try to handle a special case: we can fold computing the difference of two - // constant-related pointers. - Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); - Value *LHSBase, *RHSBase; - APInt LHSOffset, RHSOffset; - llvm::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS); - if (LHSBase) { - llvm::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS); - if (RHSBase && LHSBase == RHSBase) { - // We have common bases, fold the subtract to a constant based on the - // offsets. - Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset); - Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset); - if (Constant *C = ConstantExpr::getSub(CLHS, CRHS)) { - SimplifiedValues[&I] = C; - ++NumConstantPtrDiffs; - return true; - } - } - } - - // Otherwise, fall back to the generic logic for simplifying and handling - // instructions. - return Base::visitSub(I); -} - -bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) { - Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); - if (!isa(LHS)) - if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS)) - LHS = SimpleLHS; - if (!isa(RHS)) - if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS)) - RHS = SimpleRHS; - Value *SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, TD); - if (Constant *C = dyn_cast_or_null(SimpleV)) { - SimplifiedValues[&I] = C; - return true; - } - - // Disable any SROA on arguments to arbitrary, unsimplified binary operators. - disableSROA(LHS); - disableSROA(RHS); - - return false; -} - -bool CallAnalyzer::visitLoad(LoadInst &I) { - Value *SROAArg; - DenseMap::iterator CostIt; - if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) { - if (I.isSimple()) { - accumulateSROACost(CostIt, InlineConstants::InstrCost); - return true; - } - - disableSROA(CostIt); - } - - return false; -} - -bool CallAnalyzer::visitStore(StoreInst &I) { - Value *SROAArg; - DenseMap::iterator CostIt; - if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) { - if (I.isSimple()) { - accumulateSROACost(CostIt, InlineConstants::InstrCost); - return true; - } - - disableSROA(CostIt); - } - - return false; -} - -bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) { - // Constant folding for extract value is trivial. - Constant *C = dyn_cast(I.getAggregateOperand()); - if (!C) - C = SimplifiedValues.lookup(I.getAggregateOperand()); - if (C) { - SimplifiedValues[&I] = ConstantExpr::getExtractValue(C, I.getIndices()); - return true; - } - - // SROA can look through these but give them a cost. - return false; -} - -bool CallAnalyzer::visitInsertValue(InsertValueInst &I) { - // Constant folding for insert value is trivial. - Constant *AggC = dyn_cast(I.getAggregateOperand()); - if (!AggC) - AggC = SimplifiedValues.lookup(I.getAggregateOperand()); - Constant *InsertedC = dyn_cast(I.getInsertedValueOperand()); - if (!InsertedC) - InsertedC = SimplifiedValues.lookup(I.getInsertedValueOperand()); - if (AggC && InsertedC) { - SimplifiedValues[&I] = ConstantExpr::getInsertValue(AggC, InsertedC, - I.getIndices()); - return true; - } - - // SROA can look through these but give them a cost. - return false; -} - -/// \brief Try to simplify a call site. -/// -/// Takes a concrete function and callsite and tries to actually simplify it by -/// analyzing the arguments and call itself with instsimplify. Returns true if -/// it has simplified the callsite to some other entity (a constant), making it -/// free. -bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) { - // FIXME: Using the instsimplify logic directly for this is inefficient - // because we have to continually rebuild the argument list even when no - // simplifications can be performed. Until that is fixed with remapping - // inside of instsimplify, directly constant fold calls here. - if (!canConstantFoldCallTo(F)) - return false; - - // Try to re-map the arguments to constants. - SmallVector ConstantArgs; - ConstantArgs.reserve(CS.arg_size()); - for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); - I != E; ++I) { - Constant *C = dyn_cast(*I); - if (!C) - C = dyn_cast_or_null(SimplifiedValues.lookup(*I)); - if (!C) - return false; // This argument doesn't map to a constant. - - ConstantArgs.push_back(C); - } - if (Constant *C = ConstantFoldCall(F, ConstantArgs)) { - SimplifiedValues[CS.getInstruction()] = C; - return true; - } - - return false; -} - -bool CallAnalyzer::visitCallSite(CallSite CS) { - if (CS.isCall() && cast(CS.getInstruction())->canReturnTwice() && - !F.getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::ReturnsTwice)) { - // This aborts the entire analysis. - ExposesReturnsTwice = true; - return false; - } - if (CS.isCall() && - cast(CS.getInstruction())->hasFnAttr(Attribute::NoDuplicate)) - ContainsNoDuplicateCall = true; - - if (Function *F = CS.getCalledFunction()) { - // When we have a concrete function, first try to simplify it directly. - if (simplifyCallSite(F, CS)) - return true; - - // Next check if it is an intrinsic we know about. - // FIXME: Lift this into part of the InstVisitor. - if (IntrinsicInst *II = dyn_cast(CS.getInstruction())) { - switch (II->getIntrinsicID()) { - default: - return Base::visitCallSite(CS); - - case Intrinsic::memset: - case Intrinsic::memcpy: - case Intrinsic::memmove: - // SROA can usually chew through these intrinsics, but they aren't free. - return false; - } - } - - if (F == CS.getInstruction()->getParent()->getParent()) { - // This flag will fully abort the analysis, so don't bother with anything - // else. - IsRecursiveCall = true; - return false; - } - - if (!callIsSmall(CS)) { - // We account for the average 1 instruction per call argument setup - // here. - Cost += CS.arg_size() * InlineConstants::InstrCost; - - // Everything other than inline ASM will also have a significant cost - // merely from making the call. - if (!isa(CS.getCalledValue())) - Cost += InlineConstants::CallPenalty; - } - - return Base::visitCallSite(CS); - } - - // Otherwise we're in a very special case -- an indirect function call. See - // if we can be particularly clever about this. - Value *Callee = CS.getCalledValue(); - - // First, pay the price of the argument setup. We account for the average - // 1 instruction per call argument setup here. - Cost += CS.arg_size() * InlineConstants::InstrCost; - - // Next, check if this happens to be an indirect function call to a known - // function in this inline context. If not, we've done all we can. - Function *F = dyn_cast_or_null(SimplifiedValues.lookup(Callee)); - if (!F) - return Base::visitCallSite(CS); - - // If we have a constant that we are calling as a function, we can peer - // through it and see the function target. This happens not infrequently - // during devirtualization and so we want to give it a hefty bonus for - // inlining, but cap that bonus in the event that inlining wouldn't pan - // out. Pretend to inline the function, with a custom threshold. - CallAnalyzer CA(TD, TTI, *F, InlineConstants::IndirectCallThreshold); - if (CA.analyzeCall(CS)) { - // We were able to inline the indirect call! Subtract the cost from the - // bonus we want to apply, but don't go below zero. - Cost -= std::max(0, InlineConstants::IndirectCallThreshold - CA.getCost()); - } - - return Base::visitCallSite(CS); -} - -bool CallAnalyzer::visitInstruction(Instruction &I) { - // Some instructions are free. All of the free intrinsics can also be - // handled by SROA, etc. - if (TargetTransformInfo::TCC_Free == TTI.getUserCost(&I)) - return true; - - // We found something we don't understand or can't handle. Mark any SROA-able - // values in the operand list as no longer viable. - for (User::op_iterator OI = I.op_begin(), OE = I.op_end(); OI != OE; ++OI) - disableSROA(*OI); - - return false; -} - - -/// \brief Analyze a basic block for its contribution to the inline cost. -/// -/// This method walks the analyzer over every instruction in the given basic -/// block and accounts for their cost during inlining at this callsite. It -/// aborts early if the threshold has been exceeded or an impossible to inline -/// construct has been detected. It returns false if inlining is no longer -/// viable, and true if inlining remains viable. -bool CallAnalyzer::analyzeBlock(BasicBlock *BB) { - for (BasicBlock::iterator I = BB->begin(), E = llvm::prior(BB->end()); - I != E; ++I) { - ++NumInstructions; - if (isa(I) || I->getType()->isVectorTy()) - ++NumVectorInstructions; - - // If the instruction simplified to a constant, there is no cost to this - // instruction. Visit the instructions using our InstVisitor to account for - // all of the per-instruction logic. The visit tree returns true if we - // consumed the instruction in any way, and false if the instruction's base - // cost should count against inlining. - if (Base::visit(I)) - ++NumInstructionsSimplified; - else - Cost += InlineConstants::InstrCost; - - // If the visit this instruction detected an uninlinable pattern, abort. - if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca) - return false; - - // If the caller is a recursive function then we don't want to inline - // functions which allocate a lot of stack space because it would increase - // the caller stack usage dramatically. - if (IsCallerRecursive && - AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller) - return false; - - if (NumVectorInstructions > NumInstructions/2) - VectorBonus = FiftyPercentVectorBonus; - else if (NumVectorInstructions > NumInstructions/10) - VectorBonus = TenPercentVectorBonus; - else - VectorBonus = 0; - - // Check if we've past the threshold so we don't spin in huge basic - // blocks that will never inline. - if (Cost > (Threshold + VectorBonus)) - return false; - } - - return true; -} - -/// \brief Compute the base pointer and cumulative constant offsets for V. -/// -/// This strips all constant offsets off of V, leaving it the base pointer, and -/// accumulates the total constant offset applied in the returned constant. It -/// returns 0 if V is not a pointer, and returns the constant '0' if there are -/// no constant offsets applied. -ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) { - if (!TD || !V->getType()->isPointerTy()) - return 0; - - unsigned IntPtrWidth = TD->getPointerSizeInBits(); - APInt Offset = APInt::getNullValue(IntPtrWidth); - - // Even though we don't look through PHI nodes, we could be called on an - // instruction in an unreachable block, which may be on a cycle. - SmallPtrSet Visited; - Visited.insert(V); - do { - if (GEPOperator *GEP = dyn_cast(V)) { - if (!GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset)) - return 0; - V = GEP->getPointerOperand(); - } else if (Operator::getOpcode(V) == Instruction::BitCast) { - V = cast(V)->getOperand(0); - } else if (GlobalAlias *GA = dyn_cast(V)) { - if (GA->mayBeOverridden()) - break; - V = GA->getAliasee(); - } else { - break; - } - assert(V->getType()->isPointerTy() && "Unexpected operand type!"); - } while (Visited.insert(V)); - - Type *IntPtrTy = TD->getIntPtrType(V->getContext()); - return cast(ConstantInt::get(IntPtrTy, Offset)); -} - -/// \brief Analyze a call site for potential inlining. -/// -/// Returns true if inlining this call is viable, and false if it is not -/// viable. It computes the cost and adjusts the threshold based on numerous -/// factors and heuristics. If this method returns false but the computed cost -/// is below the computed threshold, then inlining was forcibly disabled by -/// some artifact of the routine. -bool CallAnalyzer::analyzeCall(CallSite CS) { - ++NumCallsAnalyzed; - - // Track whether the post-inlining function would have more than one basic - // block. A single basic block is often intended for inlining. Balloon the - // threshold by 50% until we pass the single-BB phase. - bool SingleBB = true; - int SingleBBBonus = Threshold / 2; - Threshold += SingleBBBonus; - - // Perform some tweaks to the cost and threshold based on the direct - // callsite information. - - // We want to more aggressively inline vector-dense kernels, so up the - // threshold, and we'll lower it if the % of vector instructions gets too - // low. - assert(NumInstructions == 0); - assert(NumVectorInstructions == 0); - FiftyPercentVectorBonus = Threshold; - TenPercentVectorBonus = Threshold / 2; - - // Give out bonuses per argument, as the instructions setting them up will - // be gone after inlining. - for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) { - if (TD && CS.isByValArgument(I)) { - // We approximate the number of loads and stores needed by dividing the - // size of the byval type by the target's pointer size. - PointerType *PTy = cast(CS.getArgument(I)->getType()); - unsigned TypeSize = TD->getTypeSizeInBits(PTy->getElementType()); - unsigned PointerSize = TD->getPointerSizeInBits(); - // Ceiling division. - unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize; - - // If it generates more than 8 stores it is likely to be expanded as an - // inline memcpy so we take that as an upper bound. Otherwise we assume - // one load and one store per word copied. - // FIXME: The maxStoresPerMemcpy setting from the target should be used - // here instead of a magic number of 8, but it's not available via - // DataLayout. - NumStores = std::min(NumStores, 8U); - - Cost -= 2 * NumStores * InlineConstants::InstrCost; - } else { - // For non-byval arguments subtract off one instruction per call - // argument. - Cost -= InlineConstants::InstrCost; - } - } - - // If there is only one call of the function, and it has internal linkage, - // the cost of inlining it drops dramatically. - bool OnlyOneCallAndLocalLinkage = F.hasLocalLinkage() && F.hasOneUse() && - &F == CS.getCalledFunction(); - if (OnlyOneCallAndLocalLinkage) - Cost += InlineConstants::LastCallToStaticBonus; - - // If the instruction after the call, or if the normal destination of the - // invoke is an unreachable instruction, the function is noreturn. As such, - // there is little point in inlining this unless there is literally zero - // cost. - Instruction *Instr = CS.getInstruction(); - if (InvokeInst *II = dyn_cast(Instr)) { - if (isa(II->getNormalDest()->begin())) - Threshold = 1; - } else if (isa(++BasicBlock::iterator(Instr))) - Threshold = 1; - - // If this function uses the coldcc calling convention, prefer not to inline - // it. - if (F.getCallingConv() == CallingConv::Cold) - Cost += InlineConstants::ColdccPenalty; - - // Check if we're done. This can happen due to bonuses and penalties. - if (Cost > Threshold) - return false; - - if (F.empty()) - return true; - - Function *Caller = CS.getInstruction()->getParent()->getParent(); - // Check if the caller function is recursive itself. - for (Value::use_iterator U = Caller->use_begin(), E = Caller->use_end(); - U != E; ++U) { - CallSite Site(cast(*U)); - if (!Site) - continue; - Instruction *I = Site.getInstruction(); - if (I->getParent()->getParent() == Caller) { - IsCallerRecursive = true; - break; - } - } - - // Track whether we've seen a return instruction. The first return - // instruction is free, as at least one will usually disappear in inlining. - bool HasReturn = false; - - // Populate our simplified values by mapping from function arguments to call - // arguments with known important simplifications. - CallSite::arg_iterator CAI = CS.arg_begin(); - for (Function::arg_iterator FAI = F.arg_begin(), FAE = F.arg_end(); - FAI != FAE; ++FAI, ++CAI) { - assert(CAI != CS.arg_end()); - if (Constant *C = dyn_cast(CAI)) - SimplifiedValues[FAI] = C; - - Value *PtrArg = *CAI; - if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) { - ConstantOffsetPtrs[FAI] = std::make_pair(PtrArg, C->getValue()); - - // We can SROA any pointer arguments derived from alloca instructions. - if (isa(PtrArg)) { - SROAArgValues[FAI] = PtrArg; - SROAArgCosts[PtrArg] = 0; - } - } - } - NumConstantArgs = SimplifiedValues.size(); - NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size(); - NumAllocaArgs = SROAArgValues.size(); - - // The worklist of live basic blocks in the callee *after* inlining. We avoid - // adding basic blocks of the callee which can be proven to be dead for this - // particular call site in order to get more accurate cost estimates. This - // requires a somewhat heavyweight iteration pattern: we need to walk the - // basic blocks in a breadth-first order as we insert live successors. To - // accomplish this, prioritizing for small iterations because we exit after - // crossing our threshold, we use a small-size optimized SetVector. - typedef SetVector, - SmallPtrSet > BBSetVector; - BBSetVector BBWorklist; - BBWorklist.insert(&F.getEntryBlock()); - // Note that we *must not* cache the size, this loop grows the worklist. - for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) { - // Bail out the moment we cross the threshold. This means we'll under-count - // the cost, but only when undercounting doesn't matter. - if (Cost > (Threshold + VectorBonus)) - break; - - BasicBlock *BB = BBWorklist[Idx]; - if (BB->empty()) - continue; - - // Handle the terminator cost here where we can track returns and other - // function-wide constructs. - TerminatorInst *TI = BB->getTerminator(); - - // We never want to inline functions that contain an indirectbr. This is - // incorrect because all the blockaddress's (in static global initializers - // for example) would be referring to the original function, and this - // indirect jump would jump from the inlined copy of the function into the - // original function which is extremely undefined behavior. - // FIXME: This logic isn't really right; we can safely inline functions - // with indirectbr's as long as no other function or global references the - // blockaddress of a block within the current function. And as a QOI issue, - // if someone is using a blockaddress without an indirectbr, and that - // reference somehow ends up in another function or global, we probably - // don't want to inline this function. - if (isa(TI)) - return false; - - if (!HasReturn && isa(TI)) - HasReturn = true; - else - Cost += InlineConstants::InstrCost; - - // Analyze the cost of this block. If we blow through the threshold, this - // returns false, and we can bail on out. - if (!analyzeBlock(BB)) { - if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca) - return false; - - // If the caller is a recursive function then we don't want to inline - // functions which allocate a lot of stack space because it would increase - // the caller stack usage dramatically. - if (IsCallerRecursive && - AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller) - return false; - - break; - } - - // Add in the live successors by first checking whether we have terminator - // that may be simplified based on the values simplified by this call. - if (BranchInst *BI = dyn_cast(TI)) { - if (BI->isConditional()) { - Value *Cond = BI->getCondition(); - if (ConstantInt *SimpleCond - = dyn_cast_or_null(SimplifiedValues.lookup(Cond))) { - BBWorklist.insert(BI->getSuccessor(SimpleCond->isZero() ? 1 : 0)); - continue; - } - } - } else if (SwitchInst *SI = dyn_cast(TI)) { - Value *Cond = SI->getCondition(); - if (ConstantInt *SimpleCond - = dyn_cast_or_null(SimplifiedValues.lookup(Cond))) { - BBWorklist.insert(SI->findCaseValue(SimpleCond).getCaseSuccessor()); - continue; - } - } - - // If we're unable to select a particular successor, just count all of - // them. - for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize; - ++TIdx) - BBWorklist.insert(TI->getSuccessor(TIdx)); - - // If we had any successors at this point, than post-inlining is likely to - // have them as well. Note that we assume any basic blocks which existed - // due to branches or switches which folded above will also fold after - // inlining. - if (SingleBB && TI->getNumSuccessors() > 1) { - // Take off the bonus we applied to the threshold. - Threshold -= SingleBBBonus; - SingleBB = false; - } - } - - // If this is a noduplicate call, we can still inline as long as - // inlining this would cause the removal of the caller (so the instruction - // is not actually duplicated, just moved). - if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall) - return false; - - Threshold += VectorBonus; - - return Cost < Threshold; -} - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -/// \brief Dump stats about this call's analysis. -void CallAnalyzer::dump() { -#define DEBUG_PRINT_STAT(x) llvm::dbgs() << " " #x ": " << x << "\n" - DEBUG_PRINT_STAT(NumConstantArgs); - DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs); - DEBUG_PRINT_STAT(NumAllocaArgs); - DEBUG_PRINT_STAT(NumConstantPtrCmps); - DEBUG_PRINT_STAT(NumConstantPtrDiffs); - DEBUG_PRINT_STAT(NumInstructionsSimplified); - DEBUG_PRINT_STAT(SROACostSavings); - DEBUG_PRINT_STAT(SROACostSavingsLost); - DEBUG_PRINT_STAT(ContainsNoDuplicateCall); -#undef DEBUG_PRINT_STAT -} -#endif - -INITIALIZE_PASS_BEGIN(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis", - true, true) -INITIALIZE_AG_DEPENDENCY(TargetTransformInfo) -INITIALIZE_PASS_END(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis", - true, true) - -char InlineCostAnalysis::ID = 0; - -InlineCostAnalysis::InlineCostAnalysis() : CallGraphSCCPass(ID), TD(0) {} - -InlineCostAnalysis::~InlineCostAnalysis() {} - -void InlineCostAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); - AU.addRequired(); - CallGraphSCCPass::getAnalysisUsage(AU); -} - -bool InlineCostAnalysis::runOnSCC(CallGraphSCC &SCC) { - TD = getAnalysisIfAvailable(); - TTI = &getAnalysis(); - return false; -} - -InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, int Threshold) { - return getInlineCost(CS, CS.getCalledFunction(), Threshold); -} - -InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee, - int Threshold) { - // Cannot inline indirect calls. - if (!Callee) - return llvm::InlineCost::getNever(); - - // Calls to functions with always-inline attributes should be inlined - // whenever possible. - if (Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::AlwaysInline)) { - if (isInlineViable(*Callee)) - return llvm::InlineCost::getAlways(); - return llvm::InlineCost::getNever(); - } - - // Don't inline functions which can be redefined at link-time to mean - // something else. Don't inline functions marked noinline or call sites - // marked noinline. - if (Callee->mayBeOverridden() || - Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::NoInline) || - CS.isNoInline()) - return llvm::InlineCost::getNever(); - - DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() - << "...\n"); - - CallAnalyzer CA(TD, *TTI, *Callee, Threshold); - bool ShouldInline = CA.analyzeCall(CS); - - DEBUG(CA.dump()); - - // Check if there was a reason to force inlining or no inlining. - if (!ShouldInline && CA.getCost() < CA.getThreshold()) - return InlineCost::getNever(); - if (ShouldInline && CA.getCost() >= CA.getThreshold()) - return InlineCost::getAlways(); - - return llvm::InlineCost::get(CA.getCost(), CA.getThreshold()); -} - -bool InlineCostAnalysis::isInlineViable(Function &F) { - bool ReturnsTwice = - F.getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::ReturnsTwice); - for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) { - // Disallow inlining of functions which contain an indirect branch. - if (isa(BI->getTerminator())) - return false; - - for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE; - ++II) { - CallSite CS(II); - if (!CS) - continue; - - // Disallow recursive calls. - if (&F == CS.getCalledFunction()) - return false; - - // Disallow calls which expose returns-twice to a function not previously - // attributed as such. - if (!ReturnsTwice && CS.isCall() && - cast(CS.getInstruction())->canReturnTwice()) - return false; - } - } - - return true; -} -- cgit v1.1 From a5157e68d183e1bdf010e94a15dc0c44b65f889b Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Mon, 21 Jan 2013 13:04:33 +0000 Subject: Switch CodeMetrics itself over to use TTI to determine if an instruction is free. The whole CodeMetrics API should probably be reworked more, but this is enough to allow deleting the duplicate code there for computing whether an instruction is free. All of the passes using this have been updated to pull in TTI and hand it to the CodeMetrics stuff. Further, a dead CodeMetrics API (analyzeFunction) is nuked for lack of users. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173036 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/CodeMetrics.cpp | 82 ++------------------------------ lib/Analysis/TargetTransformInfo.cpp | 3 ++ lib/Transforms/Scalar/LoopRotation.cpp | 7 ++- lib/Transforms/Scalar/LoopUnrollPass.cpp | 12 +++-- lib/Transforms/Scalar/LoopUnswitch.cpp | 11 +++-- 5 files changed, 27 insertions(+), 88 deletions(-) (limited to 'lib') diff --git a/lib/Analysis/CodeMetrics.cpp b/lib/Analysis/CodeMetrics.cpp index 1dff3d4..073234b 100644 --- a/lib/Analysis/CodeMetrics.cpp +++ b/lib/Analysis/CodeMetrics.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/CodeMetrics.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/IR/IntrinsicInst.h" @@ -54,77 +55,15 @@ bool llvm::callIsSmall(ImmutableCallSite CS) { return false; } -bool llvm::isInstructionFree(const Instruction *I, const DataLayout *TD) { - if (isa(I)) - return true; - - // If a GEP has all constant indices, it will probably be folded with - // a load/store. - if (const GetElementPtrInst *GEP = dyn_cast(I)) - return GEP->hasAllConstantIndices(); - - if (const IntrinsicInst *II = dyn_cast(I)) { - switch (II->getIntrinsicID()) { - default: - return false; - case Intrinsic::dbg_declare: - case Intrinsic::dbg_value: - case Intrinsic::invariant_start: - case Intrinsic::invariant_end: - case Intrinsic::lifetime_start: - case Intrinsic::lifetime_end: - case Intrinsic::objectsize: - case Intrinsic::ptr_annotation: - case Intrinsic::var_annotation: - // These intrinsics don't count as size. - return true; - } - } - - if (const CastInst *CI = dyn_cast(I)) { - // Noop casts, including ptr <-> int, don't count. - if (CI->isLosslessCast()) - return true; - - Value *Op = CI->getOperand(0); - // An inttoptr cast is free so long as the input is a legal integer type - // which doesn't contain values outside the range of a pointer. - if (isa(CI) && TD && - TD->isLegalInteger(Op->getType()->getScalarSizeInBits()) && - Op->getType()->getScalarSizeInBits() <= TD->getPointerSizeInBits()) - return true; - - // A ptrtoint cast is free so long as the result is large enough to store - // the pointer, and a legal integer type. - if (isa(CI) && TD && - TD->isLegalInteger(Op->getType()->getScalarSizeInBits()) && - Op->getType()->getScalarSizeInBits() >= TD->getPointerSizeInBits()) - return true; - - // trunc to a native type is free (assuming the target has compare and - // shift-right of the same width). - if (TD && isa(CI) && - TD->isLegalInteger(TD->getTypeSizeInBits(CI->getType()))) - return true; - // Result of a cmp instruction is often extended (to be used by other - // cmp instructions, logical or return instructions). These are usually - // nop on most sane targets. - if (isa(CI->getOperand(0))) - return true; - } - - return false; -} - /// analyzeBasicBlock - Fill in the current structure with information gleaned /// from the specified block. void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB, - const DataLayout *TD) { + const TargetTransformInfo &TTI) { ++NumBlocks; unsigned NumInstsBeforeThisBB = NumInsts; for (BasicBlock::const_iterator II = BB->begin(), E = BB->end(); II != E; ++II) { - if (isInstructionFree(II, TD)) + if (TargetTransformInfo::TCC_Free == TTI.getUserCost(&*II)) continue; // Special handling for calls. @@ -195,18 +134,3 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB, // Remember NumInsts for this BB. NumBBInsts[BB] = NumInsts - NumInstsBeforeThisBB; } - -void CodeMetrics::analyzeFunction(Function *F, const DataLayout *TD) { - // If this function contains a call that "returns twice" (e.g., setjmp or - // _setjmp) and it isn't marked with "returns twice" itself, never inline it. - // This is a hack because we depend on the user marking their local variables - // as volatile if they are live across a setjmp call, and they probably - // won't do this in callers. - exposesReturnsTwice = F->callsFunctionThatReturnsTwice() && - !F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::ReturnsTwice); - - // Look at the size of the callee. - for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB) - analyzeBasicBlock(&*BB, TD); -} diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp index 16ee6eb..99720d4 100644 --- a/lib/Analysis/TargetTransformInfo.cpp +++ b/lib/Analysis/TargetTransformInfo.cpp @@ -258,6 +258,9 @@ struct NoTTI : ImmutablePass, TargetTransformInfo { } unsigned getUserCost(const User *U) const { + if (isa(U)) + return TCC_Free; // Model all PHI nodes as free. + if (const GEPOperator *GEP = dyn_cast(U)) // In the basic model we just assume that all-constant GEPs will be // folded into their uses via addressing modes. diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp index 0ea80f3..e98ae95 100644 --- a/lib/Transforms/Scalar/LoopRotation.cpp +++ b/lib/Transforms/Scalar/LoopRotation.cpp @@ -18,6 +18,7 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Function.h" #include "llvm/IR/IntrinsicInst.h" @@ -51,6 +52,7 @@ namespace { AU.addRequiredID(LCSSAID); AU.addPreservedID(LCSSAID); AU.addPreserved(); + AU.addRequired(); } bool runOnLoop(Loop *L, LPPassManager &LPM); @@ -59,11 +61,13 @@ namespace { private: LoopInfo *LI; + const TargetTransformInfo *TTI; }; } char LoopRotate::ID = 0; INITIALIZE_PASS_BEGIN(LoopRotate, "loop-rotate", "Rotate Loops", false, false) +INITIALIZE_AG_DEPENDENCY(TargetTransformInfo) INITIALIZE_PASS_DEPENDENCY(LoopInfo) INITIALIZE_PASS_DEPENDENCY(LoopSimplify) INITIALIZE_PASS_DEPENDENCY(LCSSA) @@ -75,6 +79,7 @@ Pass *llvm::createLoopRotatePass() { return new LoopRotate(); } /// the loop is rotated at least once. bool LoopRotate::runOnLoop(Loop *L, LPPassManager &LPM) { LI = &getAnalysis(); + TTI = &getAnalysis(); // Simplify the loop latch before attempting to rotate the header // upward. Rotation may not be needed if the loop tail can be folded into the @@ -278,7 +283,7 @@ bool LoopRotate::rotateLoop(Loop *L) { // duplicate blocks inside it. { CodeMetrics Metrics; - Metrics.analyzeBasicBlock(OrigHeader); + Metrics.analyzeBasicBlock(OrigHeader, *TTI); if (Metrics.notDuplicatable) { DEBUG(dbgs() << "LoopRotation: NOT rotating - contains non duplicatable" << " instructions: "; L->dump()); diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index e0f915b..80d060b 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -17,6 +17,7 @@ #include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/CommandLine.h" @@ -90,6 +91,7 @@ namespace { AU.addPreservedID(LCSSAID); AU.addRequired(); AU.addPreserved(); + AU.addRequired(); // FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info. // If loop unroll does not preserve dom info then LCSSA pass on next // loop will receive invalid dom info. @@ -101,6 +103,7 @@ namespace { char LoopUnroll::ID = 0; INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false) +INITIALIZE_AG_DEPENDENCY(TargetTransformInfo) INITIALIZE_PASS_DEPENDENCY(LoopInfo) INITIALIZE_PASS_DEPENDENCY(LoopSimplify) INITIALIZE_PASS_DEPENDENCY(LCSSA) @@ -113,11 +116,12 @@ Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial) { /// ApproximateLoopSize - Approximate the size of the loop. static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls, - bool &NotDuplicatable, const DataLayout *TD) { + bool &NotDuplicatable, + const TargetTransformInfo &TTI) { CodeMetrics Metrics; for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E; ++I) - Metrics.analyzeBasicBlock(*I, TD); + Metrics.analyzeBasicBlock(*I, TTI); NumCalls = Metrics.NumInlineCandidates; NotDuplicatable = Metrics.notDuplicatable; @@ -134,6 +138,7 @@ static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls, bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { LoopInfo *LI = &getAnalysis(); ScalarEvolution *SE = &getAnalysis(); + const TargetTransformInfo &TTI = getAnalysis(); BasicBlock *Header = L->getHeader(); DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName() @@ -181,11 +186,10 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { // Enforce the threshold. if (Threshold != NoThreshold) { - const DataLayout *TD = getAnalysisIfAvailable(); unsigned NumInlineCandidates; bool notDuplicatable; unsigned LoopSize = ApproximateLoopSize(L, NumInlineCandidates, - notDuplicatable, TD); + notDuplicatable, TTI); DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n"); if (notDuplicatable) { DEBUG(dbgs() << " Not unrolling loop which contains non duplicatable" diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp index 68d4423..0e8199f 100644 --- a/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -37,6 +37,7 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" @@ -101,7 +102,7 @@ namespace { // Analyze loop. Check its size, calculate is it possible to unswitch // it. Returns true if we can unswitch this loop. - bool countLoop(const Loop* L); + bool countLoop(const Loop* L, const TargetTransformInfo &TTI); // Clean all data related to given loop. void forgetLoop(const Loop* L); @@ -170,6 +171,7 @@ namespace { AU.addPreservedID(LCSSAID); AU.addPreserved(); AU.addPreserved(); + AU.addRequired(); } private: @@ -221,7 +223,7 @@ namespace { // Analyze loop. Check its size, calculate is it possible to unswitch // it. Returns true if we can unswitch this loop. -bool LUAnalysisCache::countLoop(const Loop* L) { +bool LUAnalysisCache::countLoop(const Loop *L, const TargetTransformInfo &TTI) { std::pair InsertRes = LoopsProperties.insert(std::make_pair(L, LoopProperties())); @@ -243,7 +245,7 @@ bool LUAnalysisCache::countLoop(const Loop* L) { for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E; ++I) - Metrics.analyzeBasicBlock(*I); + Metrics.analyzeBasicBlock(*I, TTI); Props.SizeEstimation = std::min(Metrics.NumInsts, Metrics.NumBlocks * 5); Props.CanBeUnswitchedCount = MaxSize / (Props.SizeEstimation); @@ -334,6 +336,7 @@ void LUAnalysisCache::cloneData(const Loop* NewLoop, const Loop* OldLoop, char LoopUnswitch::ID = 0; INITIALIZE_PASS_BEGIN(LoopUnswitch, "loop-unswitch", "Unswitch loops", false, false) +INITIALIZE_AG_DEPENDENCY(TargetTransformInfo) INITIALIZE_PASS_DEPENDENCY(LoopSimplify) INITIALIZE_PASS_DEPENDENCY(LoopInfo) INITIALIZE_PASS_DEPENDENCY(LCSSA) @@ -424,7 +427,7 @@ bool LoopUnswitch::processCurrentLoop() { // Probably we reach the quota of branches for this loop. If so // stop unswitching. - if (!BranchesInfo.countLoop(currentLoop)) + if (!BranchesInfo.countLoop(currentLoop, getAnalysis())) return false; // Loop over all of the basic blocks in the loop. If we find an interior -- cgit v1.1 From 9262a64b307621a046ef5728d90bef4921b46108 Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Mon, 21 Jan 2013 14:06:48 +0000 Subject: R600/SILowerControlFlow.cpp: Fix a warning. [-Wunused-variable] git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173040 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SILowerControlFlow.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp index 3780e40..1b0dbcc 100644 --- a/lib/Target/R600/SILowerControlFlow.cpp +++ b/lib/Target/R600/SILowerControlFlow.cpp @@ -291,9 +291,8 @@ void SILowerControlFlowPass::Kill(MachineInstr &MI) { DebugLoc DL = MI.getDebugLoc(); // Kill is only allowed in pixel shaders - MachineFunction &MF = *MBB.getParent(); - SIMachineFunctionInfo *Info = MF.getInfo(); - assert(Info->ShaderType == ShaderType::PIXEL); + assert(MBB.getParent()->getInfo()->ShaderType == + ShaderType::PIXEL); // Clear this pixel from the exec mask if the operand is negative BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMPX_LE_F32_e32), AMDGPU::VCC) -- cgit v1.1 From c0e01ddda0ce5e4ae9deff9ad2b577564fcb1e55 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Mon, 21 Jan 2013 15:40:46 +0000 Subject: R600/SI: Derive all sample intrinsics from a single class. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch by: Michel Dänzer Reviewed-by: Tom Stellard Reviewed-by: Christian König Signed-off-by: Michel Dänzer git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173051 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIIntrinsics.td | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Target/R600/SIIntrinsics.td b/lib/Target/R600/SIIntrinsics.td index c322fef..0d1064e 100644 --- a/lib/Target/R600/SIIntrinsics.td +++ b/lib/Target/R600/SIIntrinsics.td @@ -22,9 +22,11 @@ let TargetPrefix = "SI", isTarget = 1 in { def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_v4i32_ty, llvm_i16_ty, llvm_i32_ty], [IntrReadMem]> ; def int_SI_wqm : Intrinsic <[], [], []>; - def int_SI_sample : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_v4f32_ty, llvm_v8i32_ty, llvm_v4i32_ty], [IntrReadMem]>; - def int_SI_sample_bias : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_v4f32_ty, llvm_v8i32_ty, llvm_v4i32_ty], [IntrReadMem]>; - def int_SI_sample_lod : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_v4f32_ty, llvm_v8i32_ty, llvm_v4i32_ty], [IntrReadMem]>; + class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_v4f32_ty, llvm_v8i32_ty, llvm_v4i32_ty], [IntrReadMem]>; + + def int_SI_sample : Sample; + def int_SI_sample_bias : Sample; + def int_SI_sample_lod : Sample; /* Interpolation Intrinsics */ -- cgit v1.1 From ca0e340f79287f806a25e3224e9de2c22d690f9e Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Mon, 21 Jan 2013 15:40:47 +0000 Subject: R600/SI: Take target parameter for sample intrinsics. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch by: Michel Dänzer Reviewed-by: Tom Stellard Reviewed-by: Christian König Signed-off-by: Michel Dänzer git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173052 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIInstructions.td | 6 +++--- lib/Target/R600/SIIntrinsics.td | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index cac42da..0f9d572 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1186,21 +1186,21 @@ def : Pat < /* int_SI_sample */ def : Pat < - (int_SI_sample imm:$writemask, VReg_128:$coord, SReg_256:$rsrc, SReg_128:$sampler), + (int_SI_sample imm:$writemask, VReg_128:$coord, SReg_256:$rsrc, SReg_128:$sampler, imm), (IMAGE_SAMPLE imm:$writemask, 0, 0, 0, 0, 0, 0, 0, VReg_128:$coord, SReg_256:$rsrc, SReg_128:$sampler) >; /* int_SI_sample_lod */ def : Pat < - (int_SI_sample_lod imm:$writemask, VReg_128:$coord, SReg_256:$rsrc, SReg_128:$sampler), + (int_SI_sample_lod imm:$writemask, VReg_128:$coord, SReg_256:$rsrc, SReg_128:$sampler, imm), (IMAGE_SAMPLE_L imm:$writemask, 0, 0, 0, 0, 0, 0, 0, VReg_128:$coord, SReg_256:$rsrc, SReg_128:$sampler) >; /* int_SI_sample_bias */ def : Pat < - (int_SI_sample_bias imm:$writemask, VReg_128:$coord, SReg_256:$rsrc, SReg_128:$sampler), + (int_SI_sample_bias imm:$writemask, VReg_128:$coord, SReg_256:$rsrc, SReg_128:$sampler, imm), (IMAGE_SAMPLE_B imm:$writemask, 0, 0, 0, 0, 0, 0, 0, VReg_128:$coord, SReg_256:$rsrc, SReg_128:$sampler) >; diff --git a/lib/Target/R600/SIIntrinsics.td b/lib/Target/R600/SIIntrinsics.td index 0d1064e..4393144 100644 --- a/lib/Target/R600/SIIntrinsics.td +++ b/lib/Target/R600/SIIntrinsics.td @@ -22,7 +22,7 @@ let TargetPrefix = "SI", isTarget = 1 in { def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_v4i32_ty, llvm_i16_ty, llvm_i32_ty], [IntrReadMem]> ; def int_SI_wqm : Intrinsic <[], [], []>; - class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_v4f32_ty, llvm_v8i32_ty, llvm_v4i32_ty], [IntrReadMem]>; + class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_v4f32_ty, llvm_v8i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrReadMem]>; def int_SI_sample : Sample; def int_SI_sample_bias : Sample; -- cgit v1.1 From 97ff618b0625e31e9690ce406981fa5ef3cfbfc3 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Mon, 21 Jan 2013 15:40:48 +0000 Subject: R600/SI: Use unnormalized coordinates for sampling with the RECT target. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch by: Michel Dänzer Reviewed-by: Tom Stellard Reviewed-by: Christian König Signed-off-by: Michel Dänzer git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173053 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/R600Instructions.td | 7 +++++++ lib/Target/R600/SIInstructions.td | 6 ++++++ 2 files changed, 13 insertions(+) (limited to 'lib') diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index 64bab18..b15ea76 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -342,6 +342,13 @@ def TEX_SHADOW : PatLeaf< }] >; +def TEX_RECT : PatLeaf< + (imm), + [{uint32_t TType = (uint32_t)N->getZExtValue(); + return TType == 5; + }] +>; + class EG_CF_RAT cf_inst, bits <6> rat_inst, bits<4> rat_id, dag outs, dag ins, string asm, list pattern> : InstR600ISA { diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 0f9d572..4164c55 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1191,6 +1191,12 @@ def : Pat < SReg_256:$rsrc, SReg_128:$sampler) >; +def : Pat < + (int_SI_sample imm:$writemask, VReg_128:$coord, SReg_256:$rsrc, SReg_128:$sampler, TEX_RECT), + (IMAGE_SAMPLE imm:$writemask, 1, 0, 0, 0, 0, 0, 0, VReg_128:$coord, + SReg_256:$rsrc, SReg_128:$sampler) +>; + /* int_SI_sample_lod */ def : Pat < (int_SI_sample_lod imm:$writemask, VReg_128:$coord, SReg_256:$rsrc, SReg_128:$sampler, imm), -- cgit v1.1 From dc0a5fd226f85711018663526b4acd1c886367d3 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Mon, 21 Jan 2013 17:59:18 +0000 Subject: Fix a comment. Induction vars dont need to start at zero. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173061 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/LoopVectorize.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index ba8987d..447f24a 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2562,7 +2562,7 @@ LoopVectorizationLegality::isInductionVariable(PHINode *Phi) { if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy()) return IK_NoInduction; - // Check that the PHI is consecutive and starts at zero. + // Check that the PHI is consecutive. const SCEV *PhiScev = SE->getSCEV(Phi); const SCEVAddRecExpr *AR = dyn_cast(PhiScev); if (!AR) { -- cgit v1.1 From fd0543d9be9886398628e411aba8c392b28d17b6 Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Mon, 21 Jan 2013 18:04:19 +0000 Subject: rename "SkipToWord" to "SkipToFourByteBoundary" since a word is not always 4 bytes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173062 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Bitcode/Reader/BitstreamReader.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Bitcode/Reader/BitstreamReader.cpp b/lib/Bitcode/Reader/BitstreamReader.cpp index 83df57b..eb8b5de 100644 --- a/lib/Bitcode/Reader/BitstreamReader.cpp +++ b/lib/Bitcode/Reader/BitstreamReader.cpp @@ -78,7 +78,7 @@ bool BitstreamCursor::EnterSubBlock(unsigned BlockID, unsigned *NumWordsP) { // Get the codesize of this block. CurCodeSize = ReadVBR(bitc::CodeLenWidth); - SkipToWord(); + SkipToFourByteBoundary(); unsigned NumWords = Read(bitc::BlockSizeWidth); if (NumWordsP) *NumWordsP = NumWords; @@ -181,7 +181,7 @@ void BitstreamCursor::skipRecord(unsigned AbbrevID) { assert(Op.getEncoding() == BitCodeAbbrevOp::Blob); // Blob case. Read the number of bytes as a vbr6. unsigned NumElts = ReadVBR(6); - SkipToWord(); // 32-bit alignment + SkipToFourByteBoundary(); // 32-bit alignment // Figure out where the end of this blob will be including tail padding. size_t NewEnd = NextChar+((NumElts+3)&~3); @@ -241,7 +241,7 @@ unsigned BitstreamCursor::readRecord(unsigned AbbrevID, assert(Op.getEncoding() == BitCodeAbbrevOp::Blob); // Blob case. Read the number of bytes as a vbr6. unsigned NumElts = ReadVBR(6); - SkipToWord(); // 32-bit alignment + SkipToFourByteBoundary(); // 32-bit alignment // Figure out where the end of this blob will be including tail padding. size_t NewEnd = NextChar+((NumElts+3)&~3); -- cgit v1.1 From 47543a8a66fb9451126f134808b55853aca57e1c Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Mon, 21 Jan 2013 18:18:25 +0000 Subject: wean Blob handling logic off of banging on NextChar directly. Instead, make it reason about the current bit position, which is always independent of the underlying cursors word size. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173063 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Bitcode/Reader/BitstreamReader.cpp | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) (limited to 'lib') diff --git a/lib/Bitcode/Reader/BitstreamReader.cpp b/lib/Bitcode/Reader/BitstreamReader.cpp index eb8b5de..92133bb 100644 --- a/lib/Bitcode/Reader/BitstreamReader.cpp +++ b/lib/Bitcode/Reader/BitstreamReader.cpp @@ -184,17 +184,17 @@ void BitstreamCursor::skipRecord(unsigned AbbrevID) { SkipToFourByteBoundary(); // 32-bit alignment // Figure out where the end of this blob will be including tail padding. - size_t NewEnd = NextChar+((NumElts+3)&~3); + size_t NewEnd = GetCurrentBitNo()+((NumElts+3)&~3)*8; // If this would read off the end of the bitcode file, just set the // record to empty and return. - if (!canSkipToPos(NewEnd)) { + if (!canSkipToPos(NewEnd/8)) { NextChar = BitStream->getBitcodeBytes().getExtent(); break; } // Skip over the blob. - NextChar = NewEnd; + JumpToBit(NewEnd); } } @@ -244,11 +244,12 @@ unsigned BitstreamCursor::readRecord(unsigned AbbrevID, SkipToFourByteBoundary(); // 32-bit alignment // Figure out where the end of this blob will be including tail padding. - size_t NewEnd = NextChar+((NumElts+3)&~3); + size_t CurBitPos = GetCurrentBitNo(); + size_t NewEnd = CurBitPos+((NumElts+3)&~3)*8; // If this would read off the end of the bitcode file, just set the // record to empty and return. - if (!canSkipToPos(NewEnd)) { + if (!canSkipToPos(NewEnd/8)) { Vals.append(NumElts, 0); NextChar = BitStream->getBitcodeBytes().getExtent(); break; @@ -259,14 +260,16 @@ unsigned BitstreamCursor::readRecord(unsigned AbbrevID, if (Blob) { *Blob = StringRef((const char*)BitStream->getBitcodeBytes().getPointer( - NextChar, NumElts), - NumElts); + CurBitPos/8, NumElts), + NumElts); } else { - for (; NumElts; ++NextChar, --NumElts) - Vals.push_back(getByte(NextChar)); + // FIXME: This is a brutally inefficient way to do this. Why isn't this + // just using getPointer? + for (; NumElts; --NumElts) + Vals.push_back(Read(8)); } // Skip over tail padding. - NextChar = NewEnd; + JumpToBit(NewEnd); } unsigned Code = (unsigned)Vals[0]; -- cgit v1.1 From afe77f33b2a361ed0d001596dcdde0e16d57abee Mon Sep 17 00:00:00 2001 From: Michael Ilseman Date: Mon, 21 Jan 2013 18:18:53 +0000 Subject: Introduce a new data structure, the SparseMultiSet, and changes to the MI scheduler to use it. A SparseMultiSet adds multiset behavior to SparseSet, while retaining SparseSet's desirable properties. Essentially, SparseMultiSet provides multiset behavior by storing its dense data in doubly linked lists that are inlined into the dense vector. This allows it to provide good data locality as well as vector-like constant-time clear() and fast constant time find(), insert(), and erase(). It also allows SparseMultiSet to have a builtin recycler rather than keeping SparseSet's behavior of always swapping upon removal, which allows it to preserve more iterators. It's often a better alternative to a SparseSet of a growable container or vector-of-vector. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173064 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/ScheduleDAGInstrs.cpp | 78 +++++++++++++++++---------------------- 1 file changed, 33 insertions(+), 45 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index 662fc0e..411c46b 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -168,20 +168,6 @@ void ScheduleDAGInstrs::finishBlock() { BB = 0; } -/// Initialize the map with the number of registers. -void Reg2SUnitsMap::setRegLimit(unsigned Limit) { - PhysRegSet.setUniverse(Limit); - SUnits.resize(Limit); -} - -/// Clear the map without deallocating storage. -void Reg2SUnitsMap::clear() { - for (const_iterator I = reg_begin(), E = reg_end(); I != E; ++I) { - SUnits[*I].clear(); - } - PhysRegSet.clear(); -} - /// Initialize the DAG and common scheduler state for the current scheduling /// region. This does not actually create the DAG, only clears it. The /// scheduling driver may call BuildSchedGraph multiple times per scheduling @@ -228,7 +214,7 @@ void ScheduleDAGInstrs::addSchedBarrierDeps() { if (Reg == 0) continue; if (TRI->isPhysicalRegister(Reg)) - Uses[Reg].push_back(PhysRegSUOper(&ExitSU, -1)); + Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg)); else { assert(!IsPostRA && "Virtual register encountered after regalloc."); if (MO.readsReg()) // ignore undef operands @@ -245,7 +231,7 @@ void ScheduleDAGInstrs::addSchedBarrierDeps() { E = (*SI)->livein_end(); I != E; ++I) { unsigned Reg = *I; if (!Uses.contains(Reg)) - Uses[Reg].push_back(PhysRegSUOper(&ExitSU, -1)); + Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg)); } } } @@ -263,15 +249,14 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) { Alias.isValid(); ++Alias) { if (!Uses.contains(*Alias)) continue; - std::vector &UseList = Uses[*Alias]; - for (unsigned i = 0, e = UseList.size(); i != e; ++i) { - SUnit *UseSU = UseList[i].SU; + for (Reg2SUnitsMap::iterator I = Uses.find(*Alias); I != Uses.end(); ++I) { + SUnit *UseSU = I->SU; if (UseSU == SU) continue; // Adjust the dependence latency using operand def/use information, // then allow the target to perform its own adjustments. - int UseOp = UseList[i].OpIdx; + int UseOp = I->OpIdx; MachineInstr *RegUse = 0; SDep Dep; if (UseOp < 0) @@ -311,9 +296,8 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { Alias.isValid(); ++Alias) { if (!Defs.contains(*Alias)) continue; - std::vector &DefList = Defs[*Alias]; - for (unsigned i = 0, e = DefList.size(); i != e; ++i) { - SUnit *DefSU = DefList[i].SU; + for (Reg2SUnitsMap::iterator I = Defs.find(*Alias); I != Defs.end(); ++I) { + SUnit *DefSU = I->SU; if (DefSU == &ExitSU) continue; if (DefSU != SU && @@ -337,33 +321,37 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { // Either insert a new Reg2SUnits entry with an empty SUnits list, or // retrieve the existing SUnits list for this register's uses. // Push this SUnit on the use list. - Uses[MO.getReg()].push_back(PhysRegSUOper(SU, OperIdx)); + Uses.insert(PhysRegSUOper(SU, OperIdx, MO.getReg())); } else { addPhysRegDataDeps(SU, OperIdx); - - // Either insert a new Reg2SUnits entry with an empty SUnits list, or - // retrieve the existing SUnits list for this register's defs. - std::vector &DefList = Defs[MO.getReg()]; + unsigned Reg = MO.getReg(); // clear this register's use list - if (Uses.contains(MO.getReg())) - Uses[MO.getReg()].clear(); - - if (!MO.isDead()) - DefList.clear(); - - // Calls will not be reordered because of chain dependencies (see - // below). Since call operands are dead, calls may continue to be added - // to the DefList making dependence checking quadratic in the size of - // the block. Instead, we leave only one call at the back of the - // DefList. - if (SU->isCall) { - while (!DefList.empty() && DefList.back().SU->isCall) - DefList.pop_back(); + if (Uses.contains(Reg)) + Uses.eraseAll(Reg); + + if (!MO.isDead()) { + Defs.eraseAll(Reg); + } else if (SU->isCall) { + // Calls will not be reordered because of chain dependencies (see + // below). Since call operands are dead, calls may continue to be added + // to the DefList making dependence checking quadratic in the size of + // the block. Instead, we leave only one call at the back of the + // DefList. + Reg2SUnitsMap::RangePair P = Defs.equal_range(Reg); + Reg2SUnitsMap::iterator B = P.first; + Reg2SUnitsMap::iterator I = P.second; + for (bool isBegin = I == B; !isBegin; /* empty */) { + isBegin = (--I) == B; + if (!I->SU->isCall) + break; + I = Defs.erase(I); + } } + // Defs are pushed in the order they are visited and never reordered. - DefList.push_back(PhysRegSUOper(SU, OperIdx)); + Defs.insert(PhysRegSUOper(SU, OperIdx, Reg)); } } @@ -726,8 +714,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, assert(Defs.empty() && Uses.empty() && "Only BuildGraph should update Defs/Uses"); - Defs.setRegLimit(TRI->getNumRegs()); - Uses.setRegLimit(TRI->getNumRegs()); + Defs.setUniverse(TRI->getNumRegs()); + Uses.setUniverse(TRI->getNumRegs()); assert(VRegDefs.empty() && "Only BuildSchedGraph may access VRegDefs"); // FIXME: Allow SparseSet to reserve space for the creation of virtual -- cgit v1.1 From 69582cf6c46456ad542df5aa09c47700c9525645 Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Mon, 21 Jan 2013 18:24:49 +0000 Subject: Fix a heinous inefficiency introduced in r149918, wherein reading each byte of a BLOB (i.e., large, performance intensive data) in a bitcode file was switched to invoking one virtual method call per byte read. Now we do one virtual call per BLOB. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173065 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Bitcode/Reader/BitstreamReader.cpp | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'lib') diff --git a/lib/Bitcode/Reader/BitstreamReader.cpp b/lib/Bitcode/Reader/BitstreamReader.cpp index 92133bb..7984512 100644 --- a/lib/Bitcode/Reader/BitstreamReader.cpp +++ b/lib/Bitcode/Reader/BitstreamReader.cpp @@ -255,18 +255,17 @@ unsigned BitstreamCursor::readRecord(unsigned AbbrevID, break; } - // Otherwise, read the number of bytes. If we can return a reference to - // the data, do so to avoid copying it. + // Otherwise, inform the streamer that we need these bytes in memory. + const char *Ptr = (const char*) + BitStream->getBitcodeBytes().getPointer(CurBitPos/8, NumElts); + + // If we can return a reference to the data, do so to avoid copying it. if (Blob) { - *Blob = - StringRef((const char*)BitStream->getBitcodeBytes().getPointer( - CurBitPos/8, NumElts), - NumElts); + *Blob = StringRef(Ptr, NumElts); } else { - // FIXME: This is a brutally inefficient way to do this. Why isn't this - // just using getPointer? + // Otherwise, unpack into Vals with zero extension. for (; NumElts; --NumElts) - Vals.push_back(Read(8)); + Vals.push_back((unsigned char)*Ptr++); } // Skip over tail padding. JumpToBit(NewEnd); -- cgit v1.1 From a3458380b97bd732baf84f1bf0d21f215bf99727 Mon Sep 17 00:00:00 2001 From: Richard Osborne Date: Mon, 21 Jan 2013 20:32:54 +0000 Subject: Use correct format for the LDAWCP instruction (u6). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173083 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/XCore/XCoreInstrInfo.td | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td index 4018e31..20bdad2 100644 --- a/lib/Target/XCore/XCoreInstrInfo.td +++ b/lib/Target/XCore/XCoreInstrInfo.td @@ -688,15 +688,11 @@ def BRFU_lu6 : _FLU6< //let Uses = [CP] in ... let Defs = [R11], neverHasSideEffects = 1, isReMaterializable = 1 in -def LDAWCP_u6: _FRU6<(outs), (ins MEMii:$a), - "ldaw r11, cp[$a]", - []>; +def LDAWCP_u6: _FU6<(outs), (ins MEMii:$a), "ldaw r11, cp[$a]", []>; let Defs = [R11], isReMaterializable = 1 in -def LDAWCP_lu6: _FLRU6< - (outs), (ins MEMii:$a), - "ldaw r11, cp[$a]", - [(set R11, ADDRcpii:$a)]>; +def LDAWCP_lu6: _FLU6<(outs), (ins MEMii:$a), "ldaw r11, cp[$a]", + [(set R11, ADDRcpii:$a)]>; defm SETSR : FU6_LU6_int<"setsr", int_xcore_setsr>; -- cgit v1.1 From 9b709f8b3f3fa6e9bfb5007b70e096f6192f3ef8 Mon Sep 17 00:00:00 2001 From: Richard Osborne Date: Mon, 21 Jan 2013 20:42:16 +0000 Subject: Add instruction encoding / disassembly support for ru6 / lru6 instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173085 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../XCore/Disassembler/XCoreDisassembler.cpp | 10 ++ lib/Target/XCore/XCoreInstrFormats.td | 18 ++- lib/Target/XCore/XCoreInstrInfo.td | 163 +++++++++------------ 3 files changed, 97 insertions(+), 94 deletions(-) (limited to 'lib') diff --git a/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp b/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp index d24d947..e6861bf 100644 --- a/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp +++ b/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp @@ -92,6 +92,9 @@ static DecodeStatus DecodeGRRegsRegisterClass(MCInst &Inst, static DecodeStatus DecodeBitpOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeMEMiiOperand(MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); + static DecodeStatus Decode2RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, @@ -192,6 +195,13 @@ static DecodeStatus DecodeBitpOperand(MCInst &Inst, unsigned Val, return MCDisassembler::Success; } +static DecodeStatus DecodeMEMiiOperand(MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + Inst.addOperand(MCOperand::CreateImm(Val)); + Inst.addOperand(MCOperand::CreateImm(0)); + return MCDisassembler::Success; +} + static DecodeStatus Decode2OpInstruction(unsigned Insn, unsigned &Op1, unsigned &Op2) { unsigned Combined = fieldFromInstruction(Insn, 6, 5); diff --git a/lib/Target/XCore/XCoreInstrFormats.td b/lib/Target/XCore/XCoreInstrFormats.td index 01ed5cb..fa5c0a2 100644 --- a/lib/Target/XCore/XCoreInstrFormats.td +++ b/lib/Target/XCore/XCoreInstrFormats.td @@ -85,12 +85,26 @@ class _FL2RUSBitp opc, dag outs, dag ins, string asmstr, let DecoderMethod = "DecodeL2RUSBitpInstruction"; } -class _FRU6 pattern> +class _FRU6 opc, dag outs, dag ins, string asmstr, list pattern> : InstXCore<2, outs, ins, asmstr, pattern> { + bits<4> a; + bits<6> b; + + let Inst{15-10} = opc; + let Inst{9-6} = a; + let Inst{5-0} = b; } -class _FLRU6 pattern> +class _FLRU6 opc, dag outs, dag ins, string asmstr, list pattern> : InstXCore<4, outs, ins, asmstr, pattern> { + bits<4> a; + bits<16> b; + + let Inst{31-26} = opc; + let Inst{25-22} = a; + let Inst{21-16} = b{5-0}; + let Inst{15-10} = 0b111100; + let Inst{9-0} = b{15-6}; } class _FU6 pattern> diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td index 20bdad2..5ca2b0f 100644 --- a/lib/Target/XCore/XCoreInstrInfo.td +++ b/lib/Target/XCore/XCoreInstrInfo.td @@ -182,6 +182,7 @@ def ADDRcpii : ComplexPattern { let PrintMethod = "printMemOperand"; + let DecoderMethod = "DecodeMEMiiOperand"; let MIOperandInfo = (ops i32imm, i32imm); } @@ -265,26 +266,25 @@ class FL3R opc, string OpcStr, SDNode OpNode> : // Register - U6 // Operand register - U6 -multiclass FRU6_LRU6_branch { - def _ru6: _FRU6< - (outs), (ins GRRegs:$cond, brtarget:$dest), - !strconcat(OpcStr, " $cond, $dest"), - []>; - def _lru6: _FLRU6< - (outs), (ins GRRegs:$cond, brtarget:$dest), - !strconcat(OpcStr, " $cond, $dest"), - []>; +multiclass FRU6_LRU6_branch opc, string OpcStr> { + def _ru6: _FRU6; + def _lru6: _FLRU6; } -multiclass FRU6_LRU6_cp { - def _ru6: _FRU6< - (outs GRRegs:$dst), (ins i32imm:$a), - !strconcat(OpcStr, " $dst, cp[$a]"), - []>; - def _lru6: _FLRU6< - (outs GRRegs:$dst), (ins i32imm:$a), - !strconcat(OpcStr, " $dst, cp[$a]"), - []>; +multiclass FRU6_LRU6_backwards_branch opc, string OpcStr> { + def _ru6: _FRU6; + def _lru6: _FLRU6; +} + +multiclass FRU6_LRU6_cp opc, string OpcStr> { + def _ru6: _FRU6; + def _lru6: _FLRU6; } // U6 @@ -537,113 +537,92 @@ def LMUL_l6r : _L6R<(outs GRRegs:$dst1, GRRegs:$dst2), //let Uses = [DP] in ... let neverHasSideEffects = 1, isReMaterializable = 1 in -def LDAWDP_ru6: _FRU6<(outs GRRegs:$dst), (ins MEMii:$a), - "ldaw $dst, dp[$a]", - []>; +def LDAWDP_ru6: _FRU6<0b011000, (outs GRRegs:$a), (ins MEMii:$b), + "ldaw $a, dp[$b]", []>; let isReMaterializable = 1 in -def LDAWDP_lru6: _FLRU6< - (outs GRRegs:$dst), (ins MEMii:$a), - "ldaw $dst, dp[$a]", - [(set GRRegs:$dst, ADDRdpii:$a)]>; +def LDAWDP_lru6: _FLRU6<0b011000, (outs GRRegs:$a), (ins MEMii:$b), + "ldaw $a, dp[$b]", + [(set GRRegs:$a, ADDRdpii:$b)]>; let mayLoad=1 in -def LDWDP_ru6: _FRU6<(outs GRRegs:$dst), (ins MEMii:$a), - "ldw $dst, dp[$a]", - []>; - -def LDWDP_lru6: _FLRU6< - (outs GRRegs:$dst), (ins MEMii:$a), - "ldw $dst, dp[$a]", - [(set GRRegs:$dst, (load ADDRdpii:$a))]>; +def LDWDP_ru6: _FRU6<0b010110, (outs GRRegs:$a), (ins MEMii:$b), + "ldw $a, dp[$b]", []>; + +def LDWDP_lru6: _FLRU6<0b010110, (outs GRRegs:$a), (ins MEMii:$b), + "ldw $a, dp[$b]", + [(set GRRegs:$a, (load ADDRdpii:$b))]>; let mayStore=1 in -def STWDP_ru6 : _FRU6<(outs), (ins GRRegs:$val, MEMii:$addr), - "stw $val, dp[$addr]", - []>; +def STWDP_ru6 : _FRU6<0b010100, (outs), (ins GRRegs:$a, MEMii:$b), + "stw $a, dp[$b]", []>; -def STWDP_lru6 : _FLRU6<(outs), (ins GRRegs:$val, MEMii:$addr), - "stw $val, dp[$addr]", - [(store GRRegs:$val, ADDRdpii:$addr)]>; +def STWDP_lru6 : _FLRU6<0b010100, (outs), (ins GRRegs:$a, MEMii:$b), + "stw $a, dp[$b]", + [(store GRRegs:$a, ADDRdpii:$b)]>; //let Uses = [CP] in .. let mayLoad = 1, isReMaterializable = 1, neverHasSideEffects = 1 in -defm LDWCP : FRU6_LRU6_cp<"ldw">; +defm LDWCP : FRU6_LRU6_cp<0b011011, "ldw">; let Uses = [SP] in { let mayStore=1 in { -def STWSP_ru6 : _FRU6< - (outs), (ins GRRegs:$val, i32imm:$index), - "stw $val, sp[$index]", - [(XCoreStwsp GRRegs:$val, immU6:$index)]>; - -def STWSP_lru6 : _FLRU6< - (outs), (ins GRRegs:$val, i32imm:$index), - "stw $val, sp[$index]", - [(XCoreStwsp GRRegs:$val, immU16:$index)]>; +def STWSP_ru6 : _FRU6<0b010101, (outs), (ins GRRegs:$a, i32imm:$b), + "stw $a, sp[$b]", + [(XCoreStwsp GRRegs:$a, immU6:$b)]>; + +def STWSP_lru6 : _FLRU6<0b010101, (outs), (ins GRRegs:$a, i32imm:$b), + "stw $a, sp[$b]", + [(XCoreStwsp GRRegs:$a, immU16:$b)]>; } let mayLoad=1 in { -def LDWSP_ru6 : _FRU6< - (outs GRRegs:$dst), (ins i32imm:$b), - "ldw $dst, sp[$b]", - []>; +def LDWSP_ru6 : _FRU6<0b010111, (outs GRRegs:$a), (ins i32imm:$b), + "ldw $a, sp[$b]", []>; -def LDWSP_lru6 : _FLRU6< - (outs GRRegs:$dst), (ins i32imm:$b), - "ldw $dst, sp[$b]", - []>; +def LDWSP_lru6 : _FLRU6<0b010111, (outs GRRegs:$a), (ins i32imm:$b), + "ldw $a, sp[$b]", []>; } let neverHasSideEffects = 1 in { -def LDAWSP_ru6 : _FRU6< - (outs GRRegs:$dst), (ins i32imm:$b), - "ldaw $dst, sp[$b]", - []>; +def LDAWSP_ru6 : _FRU6<0b011001, (outs GRRegs:$a), (ins i32imm:$b), + "ldaw $a, sp[$b]", []>; -def LDAWSP_lru6 : _FLRU6< - (outs GRRegs:$dst), (ins i32imm:$b), - "ldaw $dst, sp[$b]", - []>; +def LDAWSP_lru6 : _FLRU6<0b011001, (outs GRRegs:$a), (ins i32imm:$b), + "ldaw $a, sp[$b]", []>; -def LDAWSP_ru6_RRegs : _FRU6< - (outs RRegs:$dst), (ins i32imm:$b), - "ldaw $dst, sp[$b]", - []>; +let isCodeGenOnly = 1 in +def LDAWSP_ru6_RRegs : _FRU6<0b011001, (outs RRegs:$a), (ins i32imm:$b), + "ldaw $a, sp[$b]", []>; -def LDAWSP_lru6_RRegs : _FLRU6< - (outs RRegs:$dst), (ins i32imm:$b), - "ldaw $dst, sp[$b]", - []>; +let isCodeGenOnly = 1 in +def LDAWSP_lru6_RRegs : _FLRU6<0b011001, (outs RRegs:$a), (ins i32imm:$b), + "ldaw $a, sp[$b]", []>; } } let isReMaterializable = 1 in { -def LDC_ru6 : _FRU6< - (outs GRRegs:$dst), (ins i32imm:$b), - "ldc $dst, $b", - [(set GRRegs:$dst, immU6:$b)]>; - -def LDC_lru6 : _FLRU6< - (outs GRRegs:$dst), (ins i32imm:$b), - "ldc $dst, $b", - [(set GRRegs:$dst, immU16:$b)]>; +def LDC_ru6 : _FRU6<0b011010, (outs GRRegs:$a), (ins i32imm:$b), + "ldc $a, $b", [(set GRRegs:$a, immU6:$b)]>; + +def LDC_lru6 : _FLRU6<0b011010, (outs GRRegs:$a), (ins i32imm:$b), + "ldc $a, $b", [(set GRRegs:$a, immU16:$b)]>; } -def SETC_ru6 : _FRU6<(outs), (ins GRRegs:$r, i32imm:$val), - "setc res[$r], $val", - [(int_xcore_setc GRRegs:$r, immU6:$val)]>; +def SETC_ru6 : _FRU6<0b111010, (outs), (ins GRRegs:$a, i32imm:$b), + "setc res[$a], $b", + [(int_xcore_setc GRRegs:$a, immU6:$b)]>; -def SETC_lru6 : _FLRU6<(outs), (ins GRRegs:$r, i32imm:$val), - "setc res[$r], $val", - [(int_xcore_setc GRRegs:$r, immU16:$val)]>; +def SETC_lru6 : _FLRU6<0b111010, (outs), (ins GRRegs:$a, i32imm:$b), + "setc res[$a], $b", + [(int_xcore_setc GRRegs:$a, immU16:$b)]>; // Operand register - U6 let isBranch = 1, isTerminator = 1 in { -defm BRFT: FRU6_LRU6_branch<"bt">; -defm BRBT: FRU6_LRU6_branch<"bt">; -defm BRFF: FRU6_LRU6_branch<"bf">; -defm BRBF: FRU6_LRU6_branch<"bf">; +defm BRFT: FRU6_LRU6_branch<0b011100, "bt">; +defm BRBT: FRU6_LRU6_backwards_branch<0b011101, "bt">; +defm BRFF: FRU6_LRU6_branch<0b011110, "bf">; +defm BRBF: FRU6_LRU6_backwards_branch<0b011111, "bf">; } // U6 -- cgit v1.1 From 8da543434664986ac19f4753a691fb613ba80778 Mon Sep 17 00:00:00 2001 From: Richard Osborne Date: Mon, 21 Jan 2013 20:44:17 +0000 Subject: Add instruction encodings / disassembly support for u6 / lu6 instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173086 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/XCore/XCoreInstrFormats.td | 14 +++++- lib/Target/XCore/XCoreInstrInfo.td | 87 ++++++++++++----------------------- 2 files changed, 42 insertions(+), 59 deletions(-) (limited to 'lib') diff --git a/lib/Target/XCore/XCoreInstrFormats.td b/lib/Target/XCore/XCoreInstrFormats.td index fa5c0a2..0e0aab9 100644 --- a/lib/Target/XCore/XCoreInstrFormats.td +++ b/lib/Target/XCore/XCoreInstrFormats.td @@ -107,12 +107,22 @@ class _FLRU6 opc, dag outs, dag ins, string asmstr, list pattern> let Inst{9-0} = b{15-6}; } -class _FU6 pattern> +class _FU6 opc, dag outs, dag ins, string asmstr, list pattern> : InstXCore<2, outs, ins, asmstr, pattern> { + bits<6> a; + + let Inst{15-6} = opc; + let Inst{5-0} = a; } -class _FLU6 pattern> +class _FLU6 opc, dag outs, dag ins, string asmstr, list pattern> : InstXCore<4, outs, ins, asmstr, pattern> { + bits<16> a; + + let Inst{31-22} = opc; + let Inst{21-16} = a{5-0}; + let Inst{15-10} = 0b111100; + let Inst{9-0} = a{15-6}; } class _FU10 pattern> diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td index 5ca2b0f..a3e16be 100644 --- a/lib/Target/XCore/XCoreInstrInfo.td +++ b/lib/Target/XCore/XCoreInstrInfo.td @@ -288,36 +288,23 @@ multiclass FRU6_LRU6_cp opc, string OpcStr> { } // U6 -multiclass FU6_LU6 { - def _u6: _FU6< - (outs), (ins i32imm:$b), - !strconcat(OpcStr, " $b"), - [(OpNode immU6:$b)]>; - def _lu6: _FLU6< - (outs), (ins i32imm:$b), - !strconcat(OpcStr, " $b"), - [(OpNode immU16:$b)]>; +multiclass FU6_LU6 opc, string OpcStr, SDNode OpNode> { + def _u6: _FU6; + def _lu6: _FLU6; } -multiclass FU6_LU6_int { - def _u6: _FU6< - (outs), (ins i32imm:$b), - !strconcat(OpcStr, " $b"), - [(Int immU6:$b)]>; - def _lu6: _FLU6< - (outs), (ins i32imm:$b), - !strconcat(OpcStr, " $b"), - [(Int immU16:$b)]>; + +multiclass FU6_LU6_int opc, string OpcStr, Intrinsic Int> { + def _u6: _FU6; + def _lu6: _FLU6; } -multiclass FU6_LU6_np { - def _u6: _FU6< - (outs), (ins i32imm:$b), - !strconcat(OpcStr, " $b"), - []>; - def _lu6: _FLU6< - (outs), (ins i32imm:$b), - !strconcat(OpcStr, " $b"), - []>; +multiclass FU6_LU6_np opc, string OpcStr> { + def _u6: _FU6; + def _lu6: _FLU6; } // U10 @@ -628,60 +615,46 @@ defm BRBF: FRU6_LRU6_backwards_branch<0b011111, "bf">; // U6 let Defs = [SP], Uses = [SP] in { let neverHasSideEffects = 1 in -defm EXTSP : FU6_LU6_np<"extsp">; +defm EXTSP : FU6_LU6_np<0b0111011110, "extsp">; let mayStore = 1 in -defm ENTSP : FU6_LU6_np<"entsp">; +defm ENTSP : FU6_LU6_np<0b0111011101, "entsp">; let isReturn = 1, isTerminator = 1, mayLoad = 1, isBarrier = 1 in { -defm RETSP : FU6_LU6<"retsp", XCoreRetsp>; +defm RETSP : FU6_LU6<0b0111011111, "retsp", XCoreRetsp>; } } // TODO extdp, kentsp, krestsp, blat // getsr, kalli let isBranch = 1, isTerminator = 1, isBarrier = 1 in { -def BRBU_u6 : _FU6< - (outs), - (ins brtarget:$target), - "bu $target", - []>; +def BRBU_u6 : _FU6<0b0111011100, (outs), (ins brtarget:$a), "bu -$a", []>; -def BRBU_lu6 : _FLU6< - (outs), - (ins brtarget:$target), - "bu $target", - []>; +def BRBU_lu6 : _FLU6<0b0111011100, (outs), (ins brtarget:$a), "bu -$a", []>; -def BRFU_u6 : _FU6< - (outs), - (ins brtarget:$target), - "bu $target", - []>; +def BRFU_u6 : _FU6<0b0111001100, (outs), (ins brtarget:$a), "bu $a", []>; -def BRFU_lu6 : _FLU6< - (outs), - (ins brtarget:$target), - "bu $target", - []>; +def BRFU_lu6 : _FLU6<0b0111001100, (outs), (ins brtarget:$a), "bu $a", []>; } //let Uses = [CP] in ... let Defs = [R11], neverHasSideEffects = 1, isReMaterializable = 1 in -def LDAWCP_u6: _FU6<(outs), (ins MEMii:$a), "ldaw r11, cp[$a]", []>; +def LDAWCP_u6: _FU6<0b0111111101, (outs), (ins MEMii:$a), "ldaw r11, cp[$a]", + []>; let Defs = [R11], isReMaterializable = 1 in -def LDAWCP_lu6: _FLU6<(outs), (ins MEMii:$a), "ldaw r11, cp[$a]", +def LDAWCP_lu6: _FLU6<0b0111111101, (outs), (ins MEMii:$a), "ldaw r11, cp[$a]", [(set R11, ADDRcpii:$a)]>; -defm SETSR : FU6_LU6_int<"setsr", int_xcore_setsr>; +defm SETSR : FU6_LU6_int<0b0111101101, "setsr", int_xcore_setsr>; -defm CLRSR : FU6_LU6_int<"clrsr", int_xcore_clrsr>; +defm CLRSR : FU6_LU6_int<0b0111101100, "clrsr", int_xcore_clrsr>; // setsr may cause a branch if it is used to enable events. clrsr may // branch if it is executed while events are enabled. -let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in { -defm SETSR_branch : FU6_LU6_np<"setsr">; -defm CLRSR_branch : FU6_LU6_np<"clrsr">; +let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1, + isCodeGenOnly = 1 in { +defm SETSR_branch : FU6_LU6_np<0b0111101101, "setsr">; +defm CLRSR_branch : FU6_LU6_np<0b0111101100, "clrsr">; } // U10 -- cgit v1.1 From d5ab457c0e3878918502b2070f1d8dd90af55701 Mon Sep 17 00:00:00 2001 From: Richard Osborne Date: Mon, 21 Jan 2013 20:50:54 +0000 Subject: Remove unused multiclass. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173087 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/XCore/XCoreInstrInfo.td | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'lib') diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td index a3e16be..6288bb7 100644 --- a/lib/Target/XCore/XCoreInstrInfo.td +++ b/lib/Target/XCore/XCoreInstrInfo.td @@ -307,18 +307,6 @@ multiclass FU6_LU6_np opc, string OpcStr> { def _lu6: _FLU6; } -// U10 -multiclass FU10_LU10_np { - def _u10: _FU10< - (outs), (ins i32imm:$b), - !strconcat(OpcStr, " $b"), - []>; - def _lu10: _FLU10< - (outs), (ins i32imm:$b), - !strconcat(OpcStr, " $b"), - []>; -} - // Two operand short class F2R_np opc, string OpcStr> : -- cgit v1.1 From 923cc3ebb06768bee730789a6cb44b9e8c6576bf Mon Sep 17 00:00:00 2001 From: Richard Osborne Date: Mon, 21 Jan 2013 21:12:30 +0000 Subject: Fix some incorrectly named u10 / lu10 instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173090 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/XCore/XCoreInstrInfo.td | 37 ++++++++++++------------------------- 1 file changed, 12 insertions(+), 25 deletions(-) (limited to 'lib') diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td index 6288bb7..3984140 100644 --- a/lib/Target/XCore/XCoreInstrInfo.td +++ b/lib/Target/XCore/XCoreInstrInfo.td @@ -649,37 +649,24 @@ defm CLRSR_branch : FU6_LU6_np<0b0111101100, "clrsr">; // TODO ldwcpl, blacp let Defs = [R11], isReMaterializable = 1, neverHasSideEffects = 1 in -def LDAP_u10 : _FU10< - (outs), - (ins i32imm:$addr), - "ldap r11, $addr", - []>; +def LDAPF_u10 : _FU10<(outs), (ins i32imm:$addr), "ldap r11, $addr", []>; let Defs = [R11], isReMaterializable = 1 in -def LDAP_lu10 : _FLU10< - (outs), - (ins i32imm:$addr), - "ldap r11, $addr", - [(set R11, (pcrelwrapper tglobaladdr:$addr))]>; +def LDAPF_lu10 : _FLU10<(outs), (ins i32imm:$addr), "ldap r11, $addr", + [(set R11, (pcrelwrapper tglobaladdr:$addr))]>; let Defs = [R11], isReMaterializable = 1 in -def LDAP_lu10_ba : _FLU10<(outs), - (ins i32imm:$addr), - "ldap r11, $addr", - [(set R11, (pcrelwrapper tblockaddress:$addr))]>; +def LDAPF_lu10_ba : _FLU10<(outs), (ins i32imm:$addr), "ldap r11, $addr", + [(set R11, (pcrelwrapper tblockaddress:$addr))]>; let isCall=1, // All calls clobber the link register and the non-callee-saved registers: Defs = [R0, R1, R2, R3, R11, LR], Uses = [SP] in { -def BL_u10 : _FU10< - (outs), (ins calltarget:$target), - "bl $target", - [(XCoreBranchLink immU10:$target)]>; - -def BL_lu10 : _FLU10< - (outs), (ins calltarget:$target), - "bl $target", - [(XCoreBranchLink immU20:$target)]>; +def BLRF_u10 : _FU10<(outs), (ins calltarget:$target), "bl $target", + [(XCoreBranchLink immU10:$target)]>; + +def BLRF_lu10 : _FLU10<(outs), (ins calltarget:$target), "bl $target", + [(XCoreBranchLink immU20:$target)]>; } // Two operand short @@ -979,8 +966,8 @@ def WAITEU_0R : _F0R<0b0000001100, (outs), (ins), // Non-Instruction Patterns //===----------------------------------------------------------------------===// -def : Pat<(XCoreBranchLink tglobaladdr:$addr), (BL_lu10 tglobaladdr:$addr)>; -def : Pat<(XCoreBranchLink texternalsym:$addr), (BL_lu10 texternalsym:$addr)>; +def : Pat<(XCoreBranchLink tglobaladdr:$addr), (BLRF_lu10 tglobaladdr:$addr)>; +def : Pat<(XCoreBranchLink texternalsym:$addr), (BLRF_lu10 texternalsym:$addr)>; /// sext_inreg def : Pat<(sext_inreg GRRegs:$b, i1), (SEXT_rus GRRegs:$b, 1)>; -- cgit v1.1 From 8e528100d210e225cee417229d94af91355118c0 Mon Sep 17 00:00:00 2001 From: Paul Redmond Date: Mon, 21 Jan 2013 21:57:20 +0000 Subject: Transform (sub 0, (zext bool to A)) to (sext bool to A) and (sub 0, (sext bool to A)) to (zext bool to A). Patch by Muhammad Ahmad Reviewed by Duncan Sands git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173093 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/InstCombine/InstCombineAddSub.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'lib') diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 03be8ef..c6d60d6 100644 --- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1250,6 +1250,16 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { if (SimplifyDemandedInstructionBits(I)) return &I; + + // Fold (sub 0, (zext bool to B)) --> (sext bool to B) + if (C->isZero() && match(Op1, m_ZExt(m_Value(X)))) + if (X->getType()->isIntegerTy(1)) + return CastInst::CreateSExtOrBitCast(X, Op1->getType()); + + // Fold (sub 0, (sext bool to B)) --> (zext bool to B) + if (C->isZero() && match(Op1, m_SExt(m_Value(X)))) + if (X->getType()->isIntegerTy(1)) + return CastInst::CreateZExtOrBitCast(X, Op1->getType()); } -- cgit v1.1 From c5f1bc88a2eb7ad9ff924ca90cf88494e5f947b9 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Mon, 21 Jan 2013 21:57:28 +0000 Subject: Make AttributeSet::getFnAttributes() return an AttributeSet instead of an Attribute. This is more code to isolate the use of the Attribute class to that of just holding one attribute instead of a collection of attributes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173094 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/AttributeImpl.h | 2 ++ lib/IR/Attributes.cpp | 20 ++++++++++++++++++++ lib/Transforms/IPO/ArgumentPromotion.cpp | 6 ++++-- lib/Transforms/IPO/DeadArgumentElimination.cpp | 24 ++++++++++++------------ lib/Transforms/InstCombine/InstCombineCalls.cpp | 13 +++++++------ 5 files changed, 45 insertions(+), 20 deletions(-) (limited to 'lib') diff --git a/lib/IR/AttributeImpl.h b/lib/IR/AttributeImpl.h index 10f30e7..b5d292e 100644 --- a/lib/IR/AttributeImpl.h +++ b/lib/IR/AttributeImpl.h @@ -39,6 +39,8 @@ public: ArrayRef values); AttributeImpl(LLVMContext &C, StringRef data); + LLVMContext &getContext() { return Context; } + ArrayRef getValues() const { return Vals; } bool hasAttribute(Attribute::AttrKind A) const; diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 5de1827..d3f284a 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -530,9 +530,29 @@ void AttributeImpl::Profile(FoldingSetNodeID &ID, Constant *Data, } //===----------------------------------------------------------------------===// +// AttributeWithIndex Definition +//===----------------------------------------------------------------------===// + +AttributeWithIndex AttributeWithIndex::get(LLVMContext &C, unsigned Idx, + AttributeSet AS) { + // FIXME: This is temporary, but necessary for the conversion. + AttrBuilder B(AS, Idx); + return get(Idx, Attribute::get(C, B)); +} + +//===----------------------------------------------------------------------===// // AttributeSetImpl Definition //===----------------------------------------------------------------------===// +AttributeSet AttributeSet::getFnAttributes() const { + // FIXME: Remove. + return AttrList ? + AttributeSet::get(AttrList->getContext(), + AttributeWithIndex::get(FunctionIndex, + getAttributes(FunctionIndex))) : + AttributeSet(); +} + AttributeSet AttributeSet::get(LLVMContext &C, ArrayRef Attrs) { // If there are no attributes then return a null AttributesList pointer. diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index 75c0504..c5b17db 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -591,7 +591,8 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, // Add any function attributes. if (PAL.hasAttributes(AttributeSet::FunctionIndex)) - AttributesVec.push_back(AttributeWithIndex::get(AttributeSet::FunctionIndex, + AttributesVec.push_back(AttributeWithIndex::get(FTy->getContext(), + AttributeSet::FunctionIndex, PAL.getFnAttributes())); Type *RetTy = FTy->getReturnType(); @@ -719,7 +720,8 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, // Add any function attributes. if (CallPAL.hasAttributes(AttributeSet::FunctionIndex)) - AttributesVec.push_back(AttributeWithIndex::get(AttributeSet::FunctionIndex, + AttributesVec.push_back(AttributeWithIndex::get(Call->getContext(), + AttributeSet::FunctionIndex, CallPAL.getFnAttributes())); Instruction *New; diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp index 5b5a015..f6486e1 100644 --- a/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -276,10 +276,10 @@ bool DAE::DeleteDeadVarargs(Function &Fn) { SmallVector AttributesVec; for (unsigned i = 0; PAL.getSlot(i).Index <= NumArgs; ++i) AttributesVec.push_back(PAL.getSlot(i)); - Attribute FnAttrs = PAL.getFnAttributes(); if (PAL.hasAttributes(AttributeSet::FunctionIndex)) - AttributesVec.push_back(AttributeWithIndex::get(AttributeSet::FunctionIndex, - FnAttrs)); + AttributesVec.push_back(AttributeWithIndex::get(Fn.getContext(), + AttributeSet::FunctionIndex, + PAL.getFnAttributes())); PAL = AttributeSet::get(Fn.getContext(), AttributesVec); } @@ -702,10 +702,8 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { // The existing function return attributes. Attribute RAttrs = PAL.getRetAttributes(); - Attribute FnAttrs = PAL.getFnAttributes(); // Find out the new return value. - Type *RetTy = FTy->getReturnType(); Type *NRetTy = NULL; unsigned RetCount = NumRetVals(F); @@ -801,9 +799,10 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { } } - if (FnAttrs.hasAttributes()) - AttributesVec.push_back(AttributeWithIndex::get(AttributeSet::FunctionIndex, - FnAttrs)); + if (PAL.hasAttributes(AttributeSet::FunctionIndex)) + AttributesVec.push_back(AttributeWithIndex::get(F->getContext(), + AttributeSet::FunctionIndex, + PAL.getFnAttributes())); // Reconstruct the AttributesList based on the vector we constructed. AttributeSet NewPAL = AttributeSet::get(F->getContext(), AttributesVec); @@ -837,7 +836,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { // The call return attributes. Attribute RAttrs = CallPAL.getRetAttributes(); - Attribute FnAttrs = CallPAL.getFnAttributes(); + // Adjust in case the function was changed to return void. RAttrs = Attribute::get(NF->getContext(), AttrBuilder(RAttrs). @@ -869,9 +868,10 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs)); } - if (FnAttrs.hasAttributes()) - AttributesVec.push_back(AttributeWithIndex::get(AttributeSet::FunctionIndex, - FnAttrs)); + if (CallPAL.hasAttributes(AttributeSet::FunctionIndex)) + AttributesVec.push_back(AttributeWithIndex::get(Call->getContext(), + AttributeSet::FunctionIndex, + CallPAL.getFnAttributes())); // Reconstruct the AttributesList based on the vector we constructed. AttributeSet NewCallPAL = AttributeSet::get(F->getContext(), AttributesVec); diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 1ac9a9d..f3036d8 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1175,16 +1175,17 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { } } - Attribute FnAttrs = CallerPAL.getFnAttributes(); + AttributeSet FnAttrs = CallerPAL.getFnAttributes(); if (CallerPAL.hasAttributes(AttributeSet::FunctionIndex)) - attrVec.push_back(AttributeWithIndex::get(AttributeSet::FunctionIndex, + attrVec.push_back(AttributeWithIndex::get(Callee->getContext(), + AttributeSet::FunctionIndex, FnAttrs)); if (NewRetTy->isVoidTy()) Caller->setName(""); // Void type should not have a name. const AttributeSet &NewCallerPAL = AttributeSet::get(Callee->getContext(), - attrVec); + attrVec); Instruction *NC; if (InvokeInst *II = dyn_cast(Caller)) { @@ -1319,10 +1320,10 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, } // Add any function attributes. - Attr = Attrs.getFnAttributes(); if (Attrs.hasAttributes(AttributeSet::FunctionIndex)) - NewAttrs.push_back(AttributeWithIndex::get(AttributeSet::FunctionIndex, - Attr)); + NewAttrs.push_back(AttributeWithIndex::get(FTy->getContext(), + AttributeSet::FunctionIndex, + Attrs.getFnAttributes())); // The trampoline may have been bitcast to a bogus type (FTy). // Handle this by synthesizing a new function type, equal to FTy -- cgit v1.1 From 3fc4b96b503fa202411317684a2ba02e41e43072 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Mon, 21 Jan 2013 22:44:49 +0000 Subject: Have AttributeSet::getRetAttributes() return an AttributeSet instead of Attribute. This further restricts the use of the Attribute class to the Attribute family of classes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173098 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Attributes.cpp | 35 ++++++++++++++++--------- lib/Transforms/IPO/ArgumentPromotion.cpp | 6 +++-- lib/Transforms/IPO/DeadArgumentElimination.cpp | 28 +++++++++++--------- lib/Transforms/InstCombine/InstCombineCalls.cpp | 8 +++--- 4 files changed, 47 insertions(+), 30 deletions(-) (limited to 'lib') diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index d3f284a..5c95d4a 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -544,9 +544,18 @@ AttributeWithIndex AttributeWithIndex::get(LLVMContext &C, unsigned Idx, // AttributeSetImpl Definition //===----------------------------------------------------------------------===// +AttributeSet AttributeSet::getRetAttributes() const { + // FIXME: Remove. + return AttrList && hasAttributes(ReturnIndex) ? + AttributeSet::get(AttrList->getContext(), + AttributeWithIndex::get(ReturnIndex, + getAttributes(ReturnIndex))) : + AttributeSet(); +} + AttributeSet AttributeSet::getFnAttributes() const { // FIXME: Remove. - return AttrList ? + return AttrList && hasAttributes(FunctionIndex) ? AttributeSet::get(AttrList->getContext(), AttributeWithIndex::get(FunctionIndex, getAttributes(FunctionIndex))) : @@ -588,20 +597,22 @@ AttributeSet AttributeSet::get(LLVMContext &C, } AttributeSet AttributeSet::get(LLVMContext &C, unsigned Idx, AttrBuilder &B) { - SmallVector Attrs; - for (AttrBuilder::iterator I = B.begin(), E = B.end(); I != E; ++I) { - Attribute::AttrKind Kind = *I; - Attribute A = Attribute::get(C, Kind); + // FIXME: This should be implemented as a loop that creates the + // AttributeWithIndexes that then are used to create the AttributeSet. + if (!B.hasAttributes()) + return AttributeSet(); - if (Kind == Attribute::Alignment) - A.setAlignment(B.getAlignment()); - else if (Kind == Attribute::StackAlignment) - A.setStackAlignment(B.getStackAlignment()); + uint64_t Mask = 0; - Attrs.push_back(AttributeWithIndex::get(Idx, A)); - } + for (AttrBuilder::iterator I = B.begin(), E = B.end(); I != E; ++I) + Mask |= AttributeImpl::getAttrMask(*I); - return get(C, Attrs); + Attribute A = Attribute::decodeLLVMAttributesForBitcode(C, Mask); + if (B.getAlignment()) + A.setAlignment(B.getAlignment()); + if (B.getStackAlignment()) + A.setStackAlignment(B.getStackAlignment()); + return get(C, AttributeWithIndex::get(Idx, A)); } //===----------------------------------------------------------------------===// diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index c5b17db..39062e6 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -519,7 +519,8 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, // Add any return attributes. if (PAL.hasAttributes(AttributeSet::ReturnIndex)) - AttributesVec.push_back(AttributeWithIndex::get(AttributeSet::ReturnIndex, + AttributesVec.push_back(AttributeWithIndex::get(F->getContext(), + AttributeSet::ReturnIndex, PAL.getRetAttributes())); // First, determine the new argument list @@ -639,7 +640,8 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, // Add any return attributes. if (CallPAL.hasAttributes(AttributeSet::ReturnIndex)) - AttributesVec.push_back(AttributeWithIndex::get(AttributeSet::ReturnIndex, + AttributesVec.push_back(AttributeWithIndex::get(F->getContext(), + AttributeSet::ReturnIndex, CallPAL.getRetAttributes())); // Loop over the operands, inserting GEP and loads in the caller as diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp index f6486e1..5204248 100644 --- a/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -700,9 +700,6 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { SmallVector AttributesVec; const AttributeSet &PAL = F->getAttributes(); - // The existing function return attributes. - Attribute RAttrs = PAL.getRetAttributes(); - // Find out the new return value. Type *RetTy = FTy->getReturnType(); Type *NRetTy = NULL; @@ -757,21 +754,26 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { assert(NRetTy && "No new return type found?"); + // The existing function return attributes. + AttributeSet RAttrs = PAL.getRetAttributes(); + // Remove any incompatible attributes, but only if we removed all return // values. Otherwise, ensure that we don't have any conflicting attributes // here. Currently, this should not be possible, but special handling might be // required when new return value attributes are added. if (NRetTy->isVoidTy()) RAttrs = - Attribute::get(NRetTy->getContext(), AttrBuilder(RAttrs). - removeAttributes(Attribute::typeIncompatible(NRetTy))); + AttributeSet::get(NRetTy->getContext(), AttributeSet::ReturnIndex, + AttrBuilder(RAttrs, AttributeSet::ReturnIndex). + removeAttributes(Attribute::typeIncompatible(NRetTy))); else - assert(!AttrBuilder(RAttrs). + assert(!AttrBuilder(RAttrs, AttributeSet::ReturnIndex). hasAttributes(Attribute::typeIncompatible(NRetTy)) && "Return attributes no longer compatible?"); - if (RAttrs.hasAttributes()) - AttributesVec.push_back(AttributeWithIndex::get(AttributeSet::ReturnIndex, + if (RAttrs.hasAttributes(AttributeSet::ReturnIndex)) + AttributesVec.push_back(AttributeWithIndex::get(NRetTy->getContext(), + AttributeSet::ReturnIndex, RAttrs)); // Remember which arguments are still alive. @@ -835,14 +837,16 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { const AttributeSet &CallPAL = CS.getAttributes(); // The call return attributes. - Attribute RAttrs = CallPAL.getRetAttributes(); + AttributeSet RAttrs = CallPAL.getRetAttributes(); // Adjust in case the function was changed to return void. RAttrs = - Attribute::get(NF->getContext(), AttrBuilder(RAttrs). + AttributeSet::get(NF->getContext(), AttributeSet::ReturnIndex, + AttrBuilder(RAttrs, AttributeSet::ReturnIndex). removeAttributes(Attribute::typeIncompatible(NF->getReturnType()))); - if (RAttrs.hasAttributes()) - AttributesVec.push_back(AttributeWithIndex::get(AttributeSet::ReturnIndex, + if (RAttrs.hasAttributes(AttributeSet::ReturnIndex)) + AttributesVec.push_back(AttributeWithIndex::get(NF->getContext(), + AttributeSet::ReturnIndex, RAttrs)); // Declare these outside of the loops, so we can reuse them for the second diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index f3036d8..6d4f188 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1287,10 +1287,10 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, // mean appending it. Likewise for attributes. // Add any result attributes. - Attribute Attr = Attrs.getRetAttributes(); if (Attrs.hasAttributes(AttributeSet::ReturnIndex)) - NewAttrs.push_back(AttributeWithIndex::get(AttributeSet::ReturnIndex, - Attr)); + NewAttrs.push_back(AttributeWithIndex::get(Caller->getContext(), + AttributeSet::ReturnIndex, + Attrs.getRetAttributes())); { unsigned Idx = 1; @@ -1310,7 +1310,7 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, // Add the original argument and attributes. NewArgs.push_back(*I); - Attr = Attrs.getParamAttributes(Idx); + Attribute Attr = Attrs.getParamAttributes(Idx); if (Attr.hasAttributes()) NewAttrs.push_back (AttributeWithIndex::get(Idx + (Idx >= NestIdx), Attr)); -- cgit v1.1 From bed8059b320e273cd187b12cffb62fa147f010fe Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Mon, 21 Jan 2013 23:03:18 +0000 Subject: Use AttributeSet instead of Attribute to verify things. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173101 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Verifier.cpp | 105 ++++++++++++++++++++++++++-------------------------- 1 file changed, 52 insertions(+), 53 deletions(-) (limited to 'lib') diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp index 07176fe..ee20f53 100644 --- a/lib/IR/Verifier.cpp +++ b/lib/IR/Verifier.cpp @@ -301,7 +301,7 @@ namespace { bool VerifyIntrinsicType(Type *Ty, ArrayRef &Infos, SmallVectorImpl &ArgTys); - void VerifyParameterAttrs(Attribute Attrs, Type *Ty, + void VerifyParameterAttrs(AttributeSet Attrs, uint64_t Idx, Type *Ty, bool isReturnValue, const Value *V); void VerifyFunctionAttrs(FunctionType *FT, const AttributeSet &Attrs, const Value *V); @@ -628,81 +628,81 @@ void Verifier::visitModuleFlag(MDNode *Op, DenseMap&SeenIDs, // VerifyParameterAttrs - Check the given attributes for an argument or return // value of the specified type. The value V is printed in error messages. -void Verifier::VerifyParameterAttrs(Attribute Attrs, Type *Ty, +void Verifier::VerifyParameterAttrs(AttributeSet Attrs, uint64_t Idx, Type *Ty, bool isReturnValue, const Value *V) { - if (!Attrs.hasAttributes()) + if (!Attrs.hasAttributes(Idx)) return; - Assert1(!Attrs.hasAttribute(Attribute::NoReturn) && - !Attrs.hasAttribute(Attribute::NoUnwind) && - !Attrs.hasAttribute(Attribute::ReadNone) && - !Attrs.hasAttribute(Attribute::ReadOnly) && - !Attrs.hasAttribute(Attribute::NoInline) && - !Attrs.hasAttribute(Attribute::AlwaysInline) && - !Attrs.hasAttribute(Attribute::OptimizeForSize) && - !Attrs.hasAttribute(Attribute::StackProtect) && - !Attrs.hasAttribute(Attribute::StackProtectReq) && - !Attrs.hasAttribute(Attribute::NoRedZone) && - !Attrs.hasAttribute(Attribute::NoImplicitFloat) && - !Attrs.hasAttribute(Attribute::Naked) && - !Attrs.hasAttribute(Attribute::InlineHint) && - !Attrs.hasAttribute(Attribute::StackAlignment) && - !Attrs.hasAttribute(Attribute::UWTable) && - !Attrs.hasAttribute(Attribute::NonLazyBind) && - !Attrs.hasAttribute(Attribute::ReturnsTwice) && - !Attrs.hasAttribute(Attribute::AddressSafety) && - !Attrs.hasAttribute(Attribute::MinSize), - "Some attributes in '" + Attrs.getAsString() + + Assert1(!Attrs.hasAttribute(Idx, Attribute::NoReturn) && + !Attrs.hasAttribute(Idx, Attribute::NoUnwind) && + !Attrs.hasAttribute(Idx, Attribute::ReadNone) && + !Attrs.hasAttribute(Idx, Attribute::ReadOnly) && + !Attrs.hasAttribute(Idx, Attribute::NoInline) && + !Attrs.hasAttribute(Idx, Attribute::AlwaysInline) && + !Attrs.hasAttribute(Idx, Attribute::OptimizeForSize) && + !Attrs.hasAttribute(Idx, Attribute::StackProtect) && + !Attrs.hasAttribute(Idx, Attribute::StackProtectReq) && + !Attrs.hasAttribute(Idx, Attribute::NoRedZone) && + !Attrs.hasAttribute(Idx, Attribute::NoImplicitFloat) && + !Attrs.hasAttribute(Idx, Attribute::Naked) && + !Attrs.hasAttribute(Idx, Attribute::InlineHint) && + !Attrs.hasAttribute(Idx, Attribute::StackAlignment) && + !Attrs.hasAttribute(Idx, Attribute::UWTable) && + !Attrs.hasAttribute(Idx, Attribute::NonLazyBind) && + !Attrs.hasAttribute(Idx, Attribute::ReturnsTwice) && + !Attrs.hasAttribute(Idx, Attribute::AddressSafety) && + !Attrs.hasAttribute(Idx, Attribute::MinSize), + "Some attributes in '" + Attrs.getAsString(Idx) + "' only apply to functions!", V); if (isReturnValue) - Assert1(!Attrs.hasAttribute(Attribute::ByVal) && - !Attrs.hasAttribute(Attribute::Nest) && - !Attrs.hasAttribute(Attribute::StructRet) && - !Attrs.hasAttribute(Attribute::NoCapture), + Assert1(!Attrs.hasAttribute(Idx, Attribute::ByVal) && + !Attrs.hasAttribute(Idx, Attribute::Nest) && + !Attrs.hasAttribute(Idx, Attribute::StructRet) && + !Attrs.hasAttribute(Idx, Attribute::NoCapture), "Attribute 'byval', 'nest', 'sret', and 'nocapture' " "do not apply to return values!", V); // Check for mutually incompatible attributes. - Assert1(!((Attrs.hasAttribute(Attribute::ByVal) && - Attrs.hasAttribute(Attribute::Nest)) || - (Attrs.hasAttribute(Attribute::ByVal) && - Attrs.hasAttribute(Attribute::StructRet)) || - (Attrs.hasAttribute(Attribute::Nest) && - Attrs.hasAttribute(Attribute::StructRet))), "Attributes " + Assert1(!((Attrs.hasAttribute(Idx, Attribute::ByVal) && + Attrs.hasAttribute(Idx, Attribute::Nest)) || + (Attrs.hasAttribute(Idx, Attribute::ByVal) && + Attrs.hasAttribute(Idx, Attribute::StructRet)) || + (Attrs.hasAttribute(Idx, Attribute::Nest) && + Attrs.hasAttribute(Idx, Attribute::StructRet))), "Attributes " "'byval, nest, and sret' are incompatible!", V); - Assert1(!((Attrs.hasAttribute(Attribute::ByVal) && - Attrs.hasAttribute(Attribute::Nest)) || - (Attrs.hasAttribute(Attribute::ByVal) && - Attrs.hasAttribute(Attribute::InReg)) || - (Attrs.hasAttribute(Attribute::Nest) && - Attrs.hasAttribute(Attribute::InReg))), "Attributes " + Assert1(!((Attrs.hasAttribute(Idx, Attribute::ByVal) && + Attrs.hasAttribute(Idx, Attribute::Nest)) || + (Attrs.hasAttribute(Idx, Attribute::ByVal) && + Attrs.hasAttribute(Idx, Attribute::InReg)) || + (Attrs.hasAttribute(Idx, Attribute::Nest) && + Attrs.hasAttribute(Idx, Attribute::InReg))), "Attributes " "'byval, nest, and inreg' are incompatible!", V); - Assert1(!(Attrs.hasAttribute(Attribute::ZExt) && - Attrs.hasAttribute(Attribute::SExt)), "Attributes " + Assert1(!(Attrs.hasAttribute(Idx, Attribute::ZExt) && + Attrs.hasAttribute(Idx, Attribute::SExt)), "Attributes " "'zeroext and signext' are incompatible!", V); - Assert1(!(Attrs.hasAttribute(Attribute::ReadNone) && - Attrs.hasAttribute(Attribute::ReadOnly)), "Attributes " + Assert1(!(Attrs.hasAttribute(Idx, Attribute::ReadNone) && + Attrs.hasAttribute(Idx, Attribute::ReadOnly)), "Attributes " "'readnone and readonly' are incompatible!", V); - Assert1(!(Attrs.hasAttribute(Attribute::NoInline) && - Attrs.hasAttribute(Attribute::AlwaysInline)), "Attributes " + Assert1(!(Attrs.hasAttribute(Idx, Attribute::NoInline) && + Attrs.hasAttribute(Idx, Attribute::AlwaysInline)), "Attributes " "'noinline and alwaysinline' are incompatible!", V); - Assert1(!AttrBuilder(Attrs). + Assert1(!AttrBuilder(Attrs, Idx). hasAttributes(Attribute::typeIncompatible(Ty)), "Wrong types for attribute: " + Attribute::typeIncompatible(Ty).getAsString(), V); if (PointerType *PTy = dyn_cast(Ty)) - Assert1(!Attrs.hasAttribute(Attribute::ByVal) || + Assert1(!Attrs.hasAttribute(Idx, Attribute::ByVal) || PTy->getElementType()->isSized(), "Attribute 'byval' does not support unsized types!", V); else - Assert1(!Attrs.hasAttribute(Attribute::ByVal), + Assert1(!Attrs.hasAttribute(Idx, Attribute::ByVal), "Attribute 'byval' only applies to parameters with pointer type!", V); } @@ -728,7 +728,7 @@ void Verifier::VerifyFunctionAttrs(FunctionType *FT, else break; // VarArgs attributes, verified elsewhere. - VerifyParameterAttrs(Attr.Attrs, Ty, Attr.Index == 0, V); + VerifyParameterAttrs(Attrs, Attr.Index, Ty, Attr.Index == 0, V); if (Attr.Attrs.hasAttribute(Attribute::Nest)) { Assert1(!SawNest, "More than one parameter has attribute nest!", V); @@ -1356,11 +1356,10 @@ void Verifier::VerifyCallSite(CallSite CS) { if (FTy->isVarArg()) // Check attributes on the varargs part. for (unsigned Idx = 1 + FTy->getNumParams(); Idx <= CS.arg_size(); ++Idx) { - Attribute Attr = Attrs.getParamAttributes(Idx); + VerifyParameterAttrs(Attrs, Idx, CS.getArgument(Idx-1)->getType(), + false, I); - VerifyParameterAttrs(Attr, CS.getArgument(Idx-1)->getType(), false, I); - - Assert1(!Attr.hasAttribute(Attribute::StructRet), + Assert1(!Attrs.hasAttribute(Idx, Attribute::StructRet), "Attribute 'sret' cannot be used for vararg call arguments!", I); } -- cgit v1.1 From da6d5623aefeb3c77a610954c77605107e1532b8 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Tue, 22 Jan 2013 00:41:33 +0000 Subject: Add the attributes that are passed in instead of the ones we're merging into. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173108 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Attributes.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 5c95d4a..fe6366d 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -693,12 +693,12 @@ bool AttributeSet::hasAttrSomewhere(Attribute::AttrKind Attr) const { AttributeSet AttributeSet::addRetAttributes(LLVMContext &C, AttributeSet Attrs) const { - return addAttr(C, ReturnIndex, getAttributes(ReturnIndex)); + return addAttr(C, ReturnIndex, Attrs.getAttributes(ReturnIndex)); } AttributeSet AttributeSet::addFnAttributes(LLVMContext &C, AttributeSet Attrs) const { - return addAttr(C, FunctionIndex, getAttributes(FunctionIndex)); + return addAttr(C, FunctionIndex, Attrs.getAttributes(FunctionIndex)); } AttributeSet AttributeSet::addAttr(LLVMContext &C, unsigned Idx, -- cgit v1.1 From e4e85f17564c28cd571dda30146c3f310521acf0 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Tue, 22 Jan 2013 00:53:12 +0000 Subject: Add a new method that adds the AttributeSet at the given index. No functional change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173109 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Attributes.cpp | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index fe6366d..420b2e8 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -691,14 +691,9 @@ bool AttributeSet::hasAttrSomewhere(Attribute::AttrKind Attr) const { return false; } -AttributeSet AttributeSet::addRetAttributes(LLVMContext &C, - AttributeSet Attrs) const { - return addAttr(C, ReturnIndex, Attrs.getAttributes(ReturnIndex)); -} - -AttributeSet AttributeSet::addFnAttributes(LLVMContext &C, - AttributeSet Attrs) const { - return addAttr(C, FunctionIndex, Attrs.getAttributes(FunctionIndex)); +AttributeSet AttributeSet::addAttributes(LLVMContext &C, unsigned Idx, + AttributeSet Attrs) const { + return addAttr(C, Idx, Attrs.getAttributes(Idx)); } AttributeSet AttributeSet::addAttr(LLVMContext &C, unsigned Idx, -- cgit v1.1 From 849209686f778e5d6fce675bea9a8300aa596d25 Mon Sep 17 00:00:00 2001 From: Daniel Dunbar Date: Tue, 22 Jan 2013 03:42:49 +0000 Subject: [MC/Mach-O] Load commands are supposed to 8-byte aligned on 64-bit. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173120 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MachObjectWriter.cpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp index d13c178..a5ba3c3 100644 --- a/lib/MC/MachObjectWriter.cpp +++ b/lib/MC/MachObjectWriter.cpp @@ -377,25 +377,25 @@ void MachObjectWriter::WriteLinkeditLoadCommand(uint32_t Type, } static unsigned ComputeLinkerOptionsLoadCommandSize( - const std::vector &Options) + const std::vector &Options, bool is64Bit) { unsigned Size = sizeof(macho::LinkerOptionsLoadCommand); for (unsigned i = 0, e = Options.size(); i != e; ++i) Size += Options[i].size() + 1; - return RoundUpToAlignment(Size, 4); + return RoundUpToAlignment(Size, is64Bit ? 8 : 4); } void MachObjectWriter::WriteLinkerOptionsLoadCommand( const std::vector &Options) { - unsigned Size = ComputeLinkerOptionsLoadCommandSize(Options); + unsigned Size = ComputeLinkerOptionsLoadCommandSize(Options, is64Bit()); uint64_t Start = OS.tell(); (void) Start; Write32(macho::LCT_LinkerOptions); Write32(Size); Write32(Options.size()); - uint64_t BytesWritten = 0; + uint64_t BytesWritten = sizeof(macho::LinkerOptionsLoadCommand); for (unsigned i = 0, e = Options.size(); i != e; ++i) { // Write each string, including the null byte. const std::string &Option = Options[i]; @@ -403,8 +403,8 @@ void MachObjectWriter::WriteLinkerOptionsLoadCommand( BytesWritten += Option.size() + 1; } - // Pad to a multiple of 4. - WriteBytes("", OffsetToAlignment(BytesWritten, 4)); + // Pad to a multiple of the pointer size. + WriteBytes("", OffsetToAlignment(BytesWritten, is64Bit() ? 8 : 4)); assert(OS.tell() - Start == Size); } @@ -747,7 +747,8 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm, Asm.getLinkerOptions(); for (unsigned i = 0, e = LinkerOptions.size(); i != e; ++i) { ++NumLoadCommands; - LoadCommandsSize += ComputeLinkerOptionsLoadCommandSize(LinkerOptions[i]); + LoadCommandsSize += ComputeLinkerOptionsLoadCommandSize(LinkerOptions[i], + is64Bit()); } // Compute the total size of the section data, as well as its file size and vm -- cgit v1.1 From 0a29cb045444c13160e90fe7942a9d7c720185ed Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Tue, 22 Jan 2013 09:46:31 +0000 Subject: Make APFloat constructor require explicit semantics. Previously we tried to infer it from the bit width size, with an added IsIEEE argument for the PPC/IEEE 128-bit case, which had a default value. This default value allowed bugs to creep in, where it was inappropriate. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173138 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/ConstantFolding.cpp | 2 +- lib/AsmParser/LLLexer.cpp | 9 +++-- lib/Bitcode/Reader/BitcodeReader.cpp | 18 ++++++--- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 3 +- lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp | 20 ++++++---- lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp | 15 +------ lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 30 +++++--------- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 3 +- lib/ExecutionEngine/ExecutionEngine.cpp | 19 +++++---- lib/ExecutionEngine/JIT/JIT.cpp | 3 +- lib/IR/ConstantFold.cpp | 8 ++-- lib/IR/Constants.cpp | 3 +- lib/Support/APFloat.cpp | 48 +++++++++++++++-------- lib/Target/X86/X86ISelLowering.cpp | 48 ++++++++++++++--------- 14 files changed, 129 insertions(+), 100 deletions(-) (limited to 'lib') diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index 2b7d3bd..e2b1e25 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -1337,7 +1337,7 @@ llvm::ConstantFoldCall(Function *F, ArrayRef Operands, case Intrinsic::ctpop: return ConstantInt::get(Ty, Op->getValue().countPopulation()); case Intrinsic::convert_from_fp16: { - APFloat Val(Op->getValue()); + APFloat Val(APFloat::IEEEhalf, Op->getValue()); bool lost = false; APFloat::opStatus status = diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp index 925f9bc..2efa1f0 100644 --- a/lib/AsmParser/LLLexer.cpp +++ b/lib/AsmParser/LLLexer.cpp @@ -713,20 +713,21 @@ lltok::Kind LLLexer::Lex0x() { case 'K': // F80HexFPConstant - x87 long double in hexadecimal format (10 bytes) FP80HexToIntPair(TokStart+3, CurPtr, Pair); - APFloatVal = APFloat(APInt(80, Pair)); + APFloatVal = APFloat(APFloat::x87DoubleExtended, APInt(80, Pair)); return lltok::APFloat; case 'L': // F128HexFPConstant - IEEE 128-bit in hexadecimal format (16 bytes) HexToIntPair(TokStart+3, CurPtr, Pair); - APFloatVal = APFloat(APInt(128, Pair), true); + APFloatVal = APFloat(APFloat::IEEEquad, APInt(128, Pair)); return lltok::APFloat; case 'M': // PPC128HexFPConstant - PowerPC 128-bit in hexadecimal format (16 bytes) HexToIntPair(TokStart+3, CurPtr, Pair); - APFloatVal = APFloat(APInt(128, Pair)); + APFloatVal = APFloat(APFloat::PPCDoubleDouble, APInt(128, Pair)); return lltok::APFloat; case 'H': - APFloatVal = APFloat(APInt(16,HexIntToVal(TokStart+3, CurPtr))); + APFloatVal = APFloat(APFloat::IEEEhalf, + APInt(16,HexIntToVal(TokStart+3, CurPtr))); return lltok::APFloat; } } diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index 00474ec..f09b93b 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -986,21 +986,27 @@ bool BitcodeReader::ParseConstants() { if (Record.empty()) return Error("Invalid FLOAT record"); if (CurTy->isHalfTy()) - V = ConstantFP::get(Context, APFloat(APInt(16, (uint16_t)Record[0]))); + V = ConstantFP::get(Context, APFloat(APFloat::IEEEhalf, + APInt(16, (uint16_t)Record[0]))); else if (CurTy->isFloatTy()) - V = ConstantFP::get(Context, APFloat(APInt(32, (uint32_t)Record[0]))); + V = ConstantFP::get(Context, APFloat(APFloat::IEEEsingle, + APInt(32, (uint32_t)Record[0]))); else if (CurTy->isDoubleTy()) - V = ConstantFP::get(Context, APFloat(APInt(64, Record[0]))); + V = ConstantFP::get(Context, APFloat(APFloat::IEEEdouble, + APInt(64, Record[0]))); else if (CurTy->isX86_FP80Ty()) { // Bits are not stored the same way as a normal i80 APInt, compensate. uint64_t Rearrange[2]; Rearrange[0] = (Record[1] & 0xffffLL) | (Record[0] << 16); Rearrange[1] = Record[0] >> 48; - V = ConstantFP::get(Context, APFloat(APInt(80, Rearrange))); + V = ConstantFP::get(Context, APFloat(APFloat::x87DoubleExtended, + APInt(80, Rearrange))); } else if (CurTy->isFP128Ty()) - V = ConstantFP::get(Context, APFloat(APInt(128, Record), true)); + V = ConstantFP::get(Context, APFloat(APFloat::IEEEquad, + APInt(128, Record))); else if (CurTy->isPPC_FP128Ty()) - V = ConstantFP::get(Context, APFloat(APInt(128, Record))); + V = ConstantFP::get(Context, APFloat(APFloat::PPCDoubleDouble, + APInt(128, Record))); else V = UndefValue::get(CurTy); break; diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index db3abaf..d37edab 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -2805,7 +2805,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { SDValue True, False; EVT VT = Node->getOperand(0).getValueType(); EVT NVT = Node->getValueType(0); - APFloat apf(APInt::getNullValue(VT.getSizeInBits())); + APFloat apf(DAG.EVTToAPFloatSemantics(VT), + APInt::getNullValue(VT.getSizeInBits())); APInt x = APInt::getSignBit(NVT.getSizeInBits()); (void)apf.convertFromAPInt(x, false, APFloat::rmNearestTiesToEven); Tmp1 = DAG.getConstantFP(apf, VT); diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 4859ad0..1ee2192 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -813,9 +813,11 @@ void DAGTypeLegalizer::ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo, assert(NVT.getSizeInBits() == integerPartWidth && "Do not know how to expand this float constant!"); APInt C = cast(N)->getValueAPF().bitcastToAPInt(); - Lo = DAG.getConstantFP(APFloat(APInt(integerPartWidth, C.getRawData()[1])), + Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT), + APInt(integerPartWidth, C.getRawData()[1])), NVT); - Hi = DAG.getConstantFP(APFloat(APInt(integerPartWidth, C.getRawData()[0])), + Hi = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT), + APInt(integerPartWidth, C.getRawData()[0])), NVT); } @@ -987,7 +989,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FP_EXTEND(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); Hi = DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), NVT, N->getOperand(0)); - Lo = DAG.getConstantFP(APFloat(APInt(NVT.getSizeInBits(), 0)), NVT); + Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT), + APInt(NVT.getSizeInBits(), 0)), NVT); } void DAGTypeLegalizer::ExpandFloatRes_FPOW(SDNode *N, @@ -1082,7 +1085,8 @@ void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo, Chain = Hi.getValue(1); // The low part is zero. - Lo = DAG.getConstantFP(APFloat(APInt(NVT.getSizeInBits(), 0)), NVT); + Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT), + APInt(NVT.getSizeInBits(), 0)), NVT); // Modified the chain - switch anything that used the old chain to use the // new one. @@ -1106,7 +1110,8 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, // The integer can be represented exactly in an f64. Src = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl, MVT::i32, Src); - Lo = DAG.getConstantFP(APFloat(APInt(NVT.getSizeInBits(), 0)), NVT); + Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT), + APInt(NVT.getSizeInBits(), 0)), NVT); Hi = DAG.getNode(ISD::SINT_TO_FP, dl, NVT, Src); } else { RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; @@ -1152,7 +1157,8 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, } Lo = DAG.getNode(ISD::FADD, dl, VT, Hi, - DAG.getConstantFP(APFloat(APInt(128, Parts)), + DAG.getConstantFP(APFloat(APFloat::PPCDoubleDouble, + APInt(128, Parts)), MVT::ppcf128)); Lo = DAG.getNode(ISD::SELECT_CC, dl, VT, Src, DAG.getConstant(0, SrcVT), Lo, Hi, DAG.getCondCode(ISD::SETLT)); @@ -1304,7 +1310,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) { assert(N->getOperand(0).getValueType() == MVT::ppcf128 && "Logic only correct for ppcf128!"); const uint64_t TwoE31[] = {0x41e0000000000000LL, 0}; - APFloat APF = APFloat(APInt(128, TwoE31)); + APFloat APF = APFloat(APFloat::PPCDoubleDouble, APInt(128, TwoE31)); SDValue Tmp = DAG.getConstantFP(APF, MVT::ppcf128); // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X // FIXME: generated code sucks. diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 18748f5..5da6d29 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -2767,17 +2767,6 @@ SDValue DAGTypeLegalizer::ExpandIntOp_TRUNCATE(SDNode *N) { return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), N->getValueType(0), InL); } -static const fltSemantics *EVTToAPFloatSemantics(EVT VT) { - switch (VT.getSimpleVT().SimpleTy) { - default: llvm_unreachable("Unknown FP format"); - case MVT::f32: return &APFloat::IEEEsingle; - case MVT::f64: return &APFloat::IEEEdouble; - case MVT::f80: return &APFloat::x87DoubleExtended; - case MVT::f128: return &APFloat::IEEEquad; - case MVT::ppcf128: return &APFloat::PPCDoubleDouble; - } -} - SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { SDValue Op = N->getOperand(0); EVT SrcVT = Op.getValueType(); @@ -2787,8 +2776,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { // The following optimization is valid only if every value in SrcVT (when // treated as signed) is representable in DstVT. Check that the mantissa // size of DstVT is >= than the number of bits in SrcVT -1. - const fltSemantics *sem = EVTToAPFloatSemantics(DstVT); - if (APFloat::semanticsPrecision(*sem) >= SrcVT.getSizeInBits()-1 && + const fltSemantics &sem = DAG.EVTToAPFloatSemantics(DstVT); + if (APFloat::semanticsPrecision(sem) >= SrcVT.getSizeInBits()-1 && TLI.getOperationAction(ISD::SINT_TO_FP, SrcVT) == TargetLowering::Custom){ // Do a signed conversion then adjust the result. SDValue SignedConv = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Op); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 344d144..b672926 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -60,18 +60,6 @@ static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) { return Res; } -static const fltSemantics *EVTToAPFloatSemantics(EVT VT) { - switch (VT.getSimpleVT().SimpleTy) { - default: llvm_unreachable("Unknown FP format"); - case MVT::f16: return &APFloat::IEEEhalf; - case MVT::f32: return &APFloat::IEEEsingle; - case MVT::f64: return &APFloat::IEEEdouble; - case MVT::f80: return &APFloat::x87DoubleExtended; - case MVT::f128: return &APFloat::IEEEquad; - case MVT::ppcf128: return &APFloat::PPCDoubleDouble; - } -} - // Default null implementations of the callbacks. void SelectionDAG::DAGUpdateListener::NodeDeleted(SDNode*, SDNode*) {} void SelectionDAG::DAGUpdateListener::NodeUpdated(SDNode*) {} @@ -95,7 +83,8 @@ bool ConstantFPSDNode::isValueValidForType(EVT VT, // convert modifies in place, so make a copy. APFloat Val2 = APFloat(Val); bool losesInfo; - (void) Val2.convert(*EVTToAPFloatSemantics(VT), APFloat::rmNearestTiesToEven, + (void) Val2.convert(SelectionDAG::EVTToAPFloatSemantics(VT), + APFloat::rmNearestTiesToEven, &losesInfo); return !losesInfo; } @@ -1081,7 +1070,7 @@ SDValue SelectionDAG::getConstantFP(double Val, EVT VT, bool isTarget) { EltVT==MVT::f16) { bool ignored; APFloat apf = APFloat(Val); - apf.convert(*EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven, + apf.convert(EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven, &ignored); return getConstantFP(apf, VT, isTarget); } else @@ -2442,7 +2431,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), VT); case ISD::UINT_TO_FP: case ISD::SINT_TO_FP: { - APFloat apf(APInt::getNullValue(VT.getSizeInBits())); + APFloat apf(EVTToAPFloatSemantics(VT), + APInt::getNullValue(VT.getSizeInBits())); (void)apf.convertFromAPInt(Val, Opcode==ISD::SINT_TO_FP, APFloat::rmNearestTiesToEven); @@ -2450,9 +2440,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, } case ISD::BITCAST: if (VT == MVT::f32 && C->getValueType(0) == MVT::i32) - return getConstantFP(APFloat(Val), VT); + return getConstantFP(APFloat(APFloat::IEEEsingle, Val), VT); else if (VT == MVT::f64 && C->getValueType(0) == MVT::i64) - return getConstantFP(APFloat(Val), VT); + return getConstantFP(APFloat(APFloat::IEEEdouble, Val), VT); break; case ISD::BSWAP: return getConstant(Val.byteSwap(), VT); @@ -2499,7 +2489,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, bool ignored; // This can return overflow, underflow, or inexact; we don't care. // FIXME need to be more flexible about rounding mode. - (void)V.convert(*EVTToAPFloatSemantics(VT), + (void)V.convert(EVTToAPFloatSemantics(VT), APFloat::rmNearestTiesToEven, &ignored); return getConstantFP(V, VT); } @@ -3084,7 +3074,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, bool ignored; // This can return overflow, underflow, or inexact; we don't care. // FIXME need to be more flexible about rounding mode. - (void)V.convert(*EVTToAPFloatSemantics(VT), + (void)V.convert(EVTToAPFloatSemantics(VT), APFloat::rmNearestTiesToEven, &ignored); return getConstantFP(V, VT); } @@ -3338,7 +3328,7 @@ static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG, APInt Val = SplatByte(NumBits, C->getZExtValue() & 255); if (VT.isInteger()) return DAG.getConstant(Val, VT); - return DAG.getConstantFP(APFloat(Val), VT); + return DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(VT), Val), VT); } Value = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Value); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index cf74860..393a6be 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3694,7 +3694,8 @@ GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI, /// getF32Constant - Get 32-bit floating point constant. static SDValue getF32Constant(SelectionDAG &DAG, unsigned Flt) { - return DAG.getConstantFP(APFloat(APInt(32, Flt)), MVT::f32); + return DAG.getConstantFP(APFloat(APFloat::IEEEsingle, APInt(32, Flt)), + MVT::f32); } /// expandExp - Lower an exp intrinsic. Handles the special sequences for diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp index 6f71ffb..3d59d25 100644 --- a/lib/ExecutionEngine/ExecutionEngine.cpp +++ b/lib/ExecutionEngine/ExecutionEngine.cpp @@ -632,7 +632,7 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) { else if (Op0->getType()->isDoubleTy()) GV.IntVal = APIntOps::RoundDoubleToAPInt(GV.DoubleVal, BitWidth); else if (Op0->getType()->isX86_FP80Ty()) { - APFloat apf = APFloat(GV.IntVal); + APFloat apf = APFloat(APFloat::x87DoubleExtended, GV.IntVal); uint64_t v; bool ignored; (void)apf.convertToInteger(&v, BitWidth, @@ -751,27 +751,32 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) { case Type::X86_FP80TyID: case Type::PPC_FP128TyID: case Type::FP128TyID: { - APFloat apfLHS = APFloat(LHS.IntVal); + const fltSemantics &Sem = CE->getOperand(0)->getType()->getFltSemantics(); + APFloat apfLHS = APFloat(Sem, LHS.IntVal); switch (CE->getOpcode()) { default: llvm_unreachable("Invalid long double opcode"); case Instruction::FAdd: - apfLHS.add(APFloat(RHS.IntVal), APFloat::rmNearestTiesToEven); + apfLHS.add(APFloat(Sem, RHS.IntVal), APFloat::rmNearestTiesToEven); GV.IntVal = apfLHS.bitcastToAPInt(); break; case Instruction::FSub: - apfLHS.subtract(APFloat(RHS.IntVal), APFloat::rmNearestTiesToEven); + apfLHS.subtract(APFloat(Sem, RHS.IntVal), + APFloat::rmNearestTiesToEven); GV.IntVal = apfLHS.bitcastToAPInt(); break; case Instruction::FMul: - apfLHS.multiply(APFloat(RHS.IntVal), APFloat::rmNearestTiesToEven); + apfLHS.multiply(APFloat(Sem, RHS.IntVal), + APFloat::rmNearestTiesToEven); GV.IntVal = apfLHS.bitcastToAPInt(); break; case Instruction::FDiv: - apfLHS.divide(APFloat(RHS.IntVal), APFloat::rmNearestTiesToEven); + apfLHS.divide(APFloat(Sem, RHS.IntVal), + APFloat::rmNearestTiesToEven); GV.IntVal = apfLHS.bitcastToAPInt(); break; case Instruction::FRem: - apfLHS.mod(APFloat(RHS.IntVal), APFloat::rmNearestTiesToEven); + apfLHS.mod(APFloat(Sem, RHS.IntVal), + APFloat::rmNearestTiesToEven); GV.IntVal = apfLHS.bitcastToAPInt(); break; } diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp index 103c0c0..53ea0a2 100644 --- a/lib/ExecutionEngine/JIT/JIT.cpp +++ b/lib/ExecutionEngine/JIT/JIT.cpp @@ -522,7 +522,8 @@ GenericValue JIT::runFunction(Function *F, case Type::PPC_FP128TyID: case Type::X86_FP80TyID: case Type::FP128TyID: - C = ConstantFP::get(F->getContext(), APFloat(AV.IntVal)); + C = ConstantFP::get(F->getContext(), APFloat(ArgTy->getFltSemantics(), + AV.IntVal)); break; case Type::PointerTyID: void *ArgPtr = GVTOP(AV); diff --git a/lib/IR/ConstantFold.cpp b/lib/IR/ConstantFold.cpp index 0ffb24e..417e0d1 100644 --- a/lib/IR/ConstantFold.cpp +++ b/lib/IR/ConstantFold.cpp @@ -168,8 +168,8 @@ static Constant *FoldBitCast(Constant *V, Type *DestTy) { if (DestTy->isFloatingPointTy()) return ConstantFP::get(DestTy->getContext(), - APFloat(CI->getValue(), - !DestTy->isPPC_FP128Ty())); + APFloat(DestTy->getFltSemantics(), + CI->getValue())); // Otherwise, can't fold this (vector?) return 0; @@ -647,8 +647,8 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V, case Instruction::SIToFP: if (ConstantInt *CI = dyn_cast(V)) { APInt api = CI->getValue(); - APFloat apf(APInt::getNullValue(DestTy->getPrimitiveSizeInBits()), - !DestTy->isPPC_FP128Ty() /* isEEEE */); + APFloat apf(DestTy->getFltSemantics(), + APInt::getNullValue(DestTy->getPrimitiveSizeInBits())); (void)apf.convertFromAPInt(api, opc==Instruction::SIToFP, APFloat::rmNearestTiesToEven); diff --git a/lib/IR/Constants.cpp b/lib/IR/Constants.cpp index e984aac..8093a09 100644 --- a/lib/IR/Constants.cpp +++ b/lib/IR/Constants.cpp @@ -119,7 +119,8 @@ Constant *Constant::getNullValue(Type *Ty) { APFloat::getZero(APFloat::IEEEquad)); case Type::PPC_FP128TyID: return ConstantFP::get(Ty->getContext(), - APFloat(APInt::getNullValue(128))); + APFloat(APFloat::PPCDoubleDouble, + APInt::getNullValue(128))); case Type::PointerTyID: return ConstantPointerNull::get(cast(Ty)); case Type::StructTyID: diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp index 2ac86a2..686f91b 100644 --- a/lib/Support/APFloat.cpp +++ b/lib/Support/APFloat.cpp @@ -3013,7 +3013,7 @@ APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) // Unless we have a special case, add in second double. if (category == fcNormal) { - APFloat v(APInt(64, i2)); + APFloat v(IEEEdouble, APInt(64, i2)); fs = v.convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo); assert(fs == opOK && !losesInfo); (void)fs; @@ -3166,27 +3166,43 @@ APFloat::initFromHalfAPInt(const APInt & api) /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful /// when the size is anything else). void -APFloat::initFromAPInt(const APInt& api, bool isIEEE) +APFloat::initFromAPInt(const fltSemantics* Sem, const APInt& api) { - if (api.getBitWidth() == 16) + if (Sem == &IEEEhalf) return initFromHalfAPInt(api); - else if (api.getBitWidth() == 32) + if (Sem == &IEEEsingle) return initFromFloatAPInt(api); - else if (api.getBitWidth()==64) + if (Sem == &IEEEdouble) return initFromDoubleAPInt(api); - else if (api.getBitWidth()==80) + if (Sem == &x87DoubleExtended) return initFromF80LongDoubleAPInt(api); - else if (api.getBitWidth()==128) - return (isIEEE ? - initFromQuadrupleAPInt(api) : initFromPPCDoubleDoubleAPInt(api)); - else - llvm_unreachable(0); + if (Sem == &IEEEquad) + return initFromQuadrupleAPInt(api); + if (Sem == &PPCDoubleDouble) + return initFromPPCDoubleDoubleAPInt(api); + + llvm_unreachable(0); } APFloat APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE) { - return APFloat(APInt::getAllOnesValue(BitWidth), isIEEE); + switch (BitWidth) { + case 16: + return APFloat(IEEEhalf, APInt::getAllOnesValue(BitWidth)); + case 32: + return APFloat(IEEEsingle, APInt::getAllOnesValue(BitWidth)); + case 64: + return APFloat(IEEEdouble, APInt::getAllOnesValue(BitWidth)); + case 80: + return APFloat(x87DoubleExtended, APInt::getAllOnesValue(BitWidth)); + case 128: + if (isIEEE) + return APFloat(IEEEquad, APInt::getAllOnesValue(BitWidth)); + return APFloat(PPCDoubleDouble, APInt::getAllOnesValue(BitWidth)); + default: + llvm_unreachable("Unknown floating bit width"); + } } APFloat APFloat::getLargest(const fltSemantics &Sem, bool Negative) { @@ -3244,16 +3260,16 @@ APFloat APFloat::getSmallestNormalized(const fltSemantics &Sem, bool Negative) { return Val; } -APFloat::APFloat(const APInt& api, bool isIEEE) { - initFromAPInt(api, isIEEE); +APFloat::APFloat(const fltSemantics &Sem, const APInt &API) { + initFromAPInt(&Sem, API); } APFloat::APFloat(float f) { - initFromAPInt(APInt::floatToBits(f)); + initFromAPInt(&IEEEsingle, APInt::floatToBits(f)); } APFloat::APFloat(double d) { - initFromAPInt(APInt::doubleToBits(d)); + initFromAPInt(&IEEEdouble, APInt::doubleToBits(d)); } namespace { diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index ea514c6..b740897 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -8010,9 +8010,11 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, SmallVector CV1; CV1.push_back( - ConstantFP::get(*Context, APFloat(APInt(64, 0x4330000000000000ULL)))); + ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble, + APInt(64, 0x4330000000000000ULL)))); CV1.push_back( - ConstantFP::get(*Context, APFloat(APInt(64, 0x4530000000000000ULL)))); + ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble, + APInt(64, 0x4530000000000000ULL)))); Constant *C1 = ConstantVector::get(CV1); SDValue CPIdx1 = DAG.getConstantPool(C1, getPointerTy(), 16); @@ -8565,9 +8567,11 @@ SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) const { } Constant *C; if (EltVT == MVT::f64) - C = ConstantFP::get(*Context, APFloat(APInt(64, ~(1ULL << 63)))); + C = ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble, + APInt(64, ~(1ULL << 63)))); else - C = ConstantFP::get(*Context, APFloat(APInt(32, ~(1U << 31)))); + C = ConstantFP::get(*Context, APFloat(APFloat::IEEEsingle, + APInt(32, ~(1U << 31)))); C = ConstantVector::getSplat(NumElts, C); SDValue CPIdx = DAG.getConstantPool(C, getPointerTy()); unsigned Alignment = cast(CPIdx)->getAlignment(); @@ -8597,9 +8601,11 @@ SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) const { } Constant *C; if (EltVT == MVT::f64) - C = ConstantFP::get(*Context, APFloat(APInt(64, 1ULL << 63))); + C = ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble, + APInt(64, 1ULL << 63))); else - C = ConstantFP::get(*Context, APFloat(APInt(32, 1U << 31))); + C = ConstantFP::get(*Context, APFloat(APFloat::IEEEsingle, + APInt(32, 1U << 31))); C = ConstantVector::getSplat(NumElts, C); SDValue CPIdx = DAG.getConstantPool(C, getPointerTy()); unsigned Alignment = cast(CPIdx)->getAlignment(); @@ -8643,13 +8649,15 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { // First get the sign bit of second operand. SmallVector CV; if (SrcVT == MVT::f64) { - CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 1ULL << 63)))); - CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 0)))); + const fltSemantics &Sem = APFloat::IEEEdouble; + CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(64, 1ULL << 63)))); + CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(64, 0)))); } else { - CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 1U << 31)))); - CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0)))); - CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0)))); - CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0)))); + const fltSemantics &Sem = APFloat::IEEEsingle; + CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 1U << 31)))); + CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0)))); + CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0)))); + CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0)))); } Constant *C = ConstantVector::get(CV); SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); @@ -8672,13 +8680,17 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { // Clear first operand sign bit. CV.clear(); if (VT == MVT::f64) { - CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, ~(1ULL << 63))))); - CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 0)))); + const fltSemantics &Sem = APFloat::IEEEdouble; + CV.push_back(ConstantFP::get(*Context, APFloat(Sem, + APInt(64, ~(1ULL << 63))))); + CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(64, 0)))); } else { - CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, ~(1U << 31))))); - CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0)))); - CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0)))); - CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0)))); + const fltSemantics &Sem = APFloat::IEEEsingle; + CV.push_back(ConstantFP::get(*Context, APFloat(Sem, + APInt(32, ~(1U << 31))))); + CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0)))); + CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0)))); + CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0)))); } C = ConstantVector::get(CV); CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); -- cgit v1.1 From 13086a658ae06046ded902229f9918b8bad505bd Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Tue, 22 Jan 2013 11:26:02 +0000 Subject: Begin fleshing out an interface in TTI for modelling the costs of generic function calls and intrinsics. This is somewhat overlapping with an existing intrinsic cost method, but that one seems targetted at vector intrinsics. I'll merge them or separate their names and use cases in a separate commit. This sinks the test of 'callIsSmall' down into TTI where targets can control it. The whole thing feels very hack-ish to me though. I've left a FIXME comment about the fundamental design problem this presents. It isn't yet clear to me what the users of this function *really* care about. I'll have to do more analysis to figure that out. Putting this here at least provides it access to proper analysis pass tools and other such. It also allows us to more cleanly implement the baseline cost interfaces in TTI. With this commit, it is now theoretically possible to simplify much of the inline cost analysis's handling of calls by calling through to this interface. That conversion will have to happen in subsequent commits as it requires more extensive restructuring of the inline cost analysis. The CodeMetrics class is now really only in the business of running over a block of code and aggregating the metrics on that block of code, with the actual cost evaluation done entirely in terms of TTI. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173148 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/CodeMetrics.cpp | 48 +----- lib/Analysis/IPA/InlineCost.cpp | 2 +- lib/Analysis/TargetTransformInfo.cpp | 172 ++++++++++++++++++--- lib/Transforms/Scalar/TailRecursionElimination.cpp | 20 ++- 4 files changed, 176 insertions(+), 66 deletions(-) (limited to 'lib') diff --git a/lib/Analysis/CodeMetrics.cpp b/lib/Analysis/CodeMetrics.cpp index 073234b..8cda01a 100644 --- a/lib/Analysis/CodeMetrics.cpp +++ b/lib/Analysis/CodeMetrics.cpp @@ -20,41 +20,6 @@ using namespace llvm; -/// callIsSmall - If a call is likely to lower to a single target instruction, -/// or is otherwise deemed small return true. -/// TODO: Perhaps calls like memcpy, strcpy, etc? -bool llvm::callIsSmall(ImmutableCallSite CS) { - if (isa(CS.getInstruction())) - return true; - - const Function *F = CS.getCalledFunction(); - if (!F) return false; - - if (F->hasLocalLinkage()) return false; - - if (!F->hasName()) return false; - - StringRef Name = F->getName(); - - // These will all likely lower to a single selection DAG node. - if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" || - Name == "fabs" || Name == "fabsf" || Name == "fabsl" || - Name == "sin" || Name == "sinf" || Name == "sinl" || - Name == "cos" || Name == "cosf" || Name == "cosl" || - Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl" ) - return true; - - // These are all likely to be optimized into something smaller. - if (Name == "pow" || Name == "powf" || Name == "powl" || - Name == "exp2" || Name == "exp2l" || Name == "exp2f" || - Name == "floor" || Name == "floorf" || Name == "ceil" || - Name == "round" || Name == "ffs" || Name == "ffsl" || - Name == "abs" || Name == "labs" || Name == "llabs") - return true; - - return false; -} - /// analyzeBasicBlock - Fill in the current structure with information gleaned /// from the specified block. void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB, @@ -63,9 +28,6 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB, unsigned NumInstsBeforeThisBB = NumInsts; for (BasicBlock::const_iterator II = BB->begin(), E = BB->end(); II != E; ++II) { - if (TargetTransformInfo::TCC_Free == TTI.getUserCost(&*II)) - continue; - // Special handling for calls. if (isa(II) || isa(II)) { ImmutableCallSite CS(cast(II)); @@ -83,12 +45,10 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB, // for that case. if (F == BB->getParent()) isRecursive = true; - } - - if (!callIsSmall(CS)) { - // Each argument to a call takes on average one instruction to set up. - NumInsts += CS.arg_size(); + if (TTI.isLoweredToCall(F)) + ++NumCalls; + } else { // We don't want inline asm to count as a call - that would prevent loop // unrolling. The argument setup cost is still real, though. if (!isa(CS.getCalledValue())) @@ -112,7 +72,7 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB, if (InvI->hasFnAttr(Attribute::NoDuplicate)) notDuplicatable = true; - ++NumInsts; + NumInsts += TTI.getUserCost(&*II); } if (isa(BB->getTerminator())) diff --git a/lib/Analysis/IPA/InlineCost.cpp b/lib/Analysis/IPA/InlineCost.cpp index cd211c4..3292e00 100644 --- a/lib/Analysis/IPA/InlineCost.cpp +++ b/lib/Analysis/IPA/InlineCost.cpp @@ -736,7 +736,7 @@ bool CallAnalyzer::visitCallSite(CallSite CS) { return false; } - if (!callIsSmall(CS)) { + if (TTI.isLoweredToCall(F)) { // We account for the average 1 instruction per call argument setup // here. Cost += CS.arg_size() * InlineConstants::InstrCost; diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp index 99720d4..9fc21fd 100644 --- a/lib/Analysis/TargetTransformInfo.cpp +++ b/lib/Analysis/TargetTransformInfo.cpp @@ -14,6 +14,7 @@ #include "llvm/IR/Instruction.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Instructions.h" +#include "llvm/Support/CallSite.h" #include "llvm/Support/ErrorHandling.h" using namespace llvm; @@ -58,10 +59,39 @@ unsigned TargetTransformInfo::getGEPCost( return PrevTTI->getGEPCost(Ptr, Operands); } +unsigned TargetTransformInfo::getCallCost(FunctionType *FTy, + int NumArgs) const { + return PrevTTI->getCallCost(FTy, NumArgs); +} + +unsigned TargetTransformInfo::getCallCost(const Function *F, + int NumArgs) const { + return PrevTTI->getCallCost(F, NumArgs); +} + +unsigned TargetTransformInfo::getCallCost( + const Function *F, ArrayRef Arguments) const { + return PrevTTI->getCallCost(F, Arguments); +} + +unsigned TargetTransformInfo::getIntrinsicCost( + Intrinsic::ID IID, Type *RetTy, ArrayRef ParamTys) const { + return PrevTTI->getIntrinsicCost(IID, RetTy, ParamTys); +} + +unsigned TargetTransformInfo::getIntrinsicCost( + Intrinsic::ID IID, Type *RetTy, ArrayRef Arguments) const { + return PrevTTI->getIntrinsicCost(IID, RetTy, Arguments); +} + unsigned TargetTransformInfo::getUserCost(const User *U) const { return PrevTTI->getUserCost(U); } +bool TargetTransformInfo::isLoweredToCall(const Function *F) const { + return PrevTTI->isLoweredToCall(F); +} + bool TargetTransformInfo::isLegalAddImmediate(int64_t Imm) const { return PrevTTI->isLegalAddImmediate(Imm); } @@ -179,6 +209,7 @@ struct NoTTI : ImmutablePass, TargetTransformInfo { virtual void initializePass() { // Note that this subclass is special, and must *not* call initializeTTI as // it does not chain. + TopTTI = this; PrevTTI = 0; DL = getAnalysisIfAvailable(); } @@ -257,6 +288,84 @@ struct NoTTI : ImmutablePass, TargetTransformInfo { return TCC_Free; } + unsigned getCallCost(FunctionType *FTy, int NumArgs = -1) const { + assert(FTy && "FunctionType must be provided to this routine."); + + // The target-independent implementation just measures the size of the + // function by approximating that each argument will take on average one + // instruction to prepare. + + if (NumArgs < 0) + // Set the argument number to the number of explicit arguments in the + // function. + NumArgs = FTy->getNumParams(); + + return TCC_Basic * (NumArgs + 1); + } + + unsigned getCallCost(const Function *F, int NumArgs = -1) const { + assert(F && "A concrete function must be provided to this routine."); + + if (NumArgs < 0) + // Set the argument number to the number of explicit arguments in the + // function. + NumArgs = F->arg_size(); + + if (Intrinsic::ID IID = (Intrinsic::ID)F->getIntrinsicID()) { + FunctionType *FTy = F->getFunctionType(); + SmallVector ParamTys(FTy->param_begin(), FTy->param_end()); + return TopTTI->getIntrinsicCost(IID, FTy->getReturnType(), ParamTys); + } + + if (!TopTTI->isLoweredToCall(F)) + return TCC_Basic; // Give a basic cost if it will be lowered directly. + + return TopTTI->getCallCost(F->getFunctionType(), NumArgs); + } + + unsigned getCallCost(const Function *F, + ArrayRef Arguments) const { + // Simply delegate to generic handling of the call. + // FIXME: We should use instsimplify or something else to catch calls which + // will constant fold with these arguments. + return TopTTI->getCallCost(F, Arguments.size()); + } + + unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, + ArrayRef ParamTys) const { + switch (IID) { + default: + // Intrinsics rarely (if ever) have normal argument setup constraints. + // Model them as having a basic instruction cost. + // FIXME: This is wrong for libc intrinsics. + return TCC_Basic; + + case Intrinsic::dbg_declare: + case Intrinsic::dbg_value: + case Intrinsic::invariant_start: + case Intrinsic::invariant_end: + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + case Intrinsic::objectsize: + case Intrinsic::ptr_annotation: + case Intrinsic::var_annotation: + // These intrinsics don't actually represent code after lowering. + return TCC_Free; + } + } + + unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, + ArrayRef Arguments) const { + // Delegate to the generic intrinsic handling code. This mostly provides an + // opportunity for targets to (for example) special case the cost of + // certain intrinsics based on constants used as arguments. + SmallVector ParamTys; + ParamTys.reserve(Arguments.size()); + for (unsigned Idx = 0, Size = Arguments.size(); Idx != Size; ++Idx) + ParamTys.push_back(Arguments[Idx]->getType()); + return TopTTI->getIntrinsicCost(IID, RetTy, ParamTys); + } + unsigned getUserCost(const User *U) const { if (isa(U)) return TCC_Free; // Model all PHI nodes as free. @@ -266,25 +375,21 @@ struct NoTTI : ImmutablePass, TargetTransformInfo { // folded into their uses via addressing modes. return GEP->hasAllConstantIndices() ? TCC_Free : TCC_Basic; - // If we have a call of an intrinsic we can provide more detailed analysis - // by inspecting the particular intrinsic called. - // FIXME: Hoist this out into a getIntrinsicCost routine. - if (const IntrinsicInst *II = dyn_cast(U)) { - switch (II->getIntrinsicID()) { - default: - return TCC_Basic; - case Intrinsic::dbg_declare: - case Intrinsic::dbg_value: - case Intrinsic::invariant_start: - case Intrinsic::invariant_end: - case Intrinsic::lifetime_start: - case Intrinsic::lifetime_end: - case Intrinsic::objectsize: - case Intrinsic::ptr_annotation: - case Intrinsic::var_annotation: - // These intrinsics don't count as size. - return TCC_Free; + if (ImmutableCallSite CS = U) { + const Function *F = CS.getCalledFunction(); + if (!F) { + // Just use the called value type. + Type *FTy = CS.getCalledValue()->getType()->getPointerElementType(); + return TopTTI->getCallCost(cast(FTy), CS.arg_size()); } + + SmallVector Arguments; + for (ImmutableCallSite::arg_iterator AI = CS.arg_begin(), + AE = CS.arg_end(); + AI != AE; ++AI) + Arguments.push_back(*AI); + + return TopTTI->getCallCost(F, Arguments); } if (const CastInst *CI = dyn_cast(U)) { @@ -301,6 +406,37 @@ struct NoTTI : ImmutablePass, TargetTransformInfo { U->getOperand(0)->getType() : 0); } + bool isLoweredToCall(const Function *F) const { + // FIXME: These should almost certainly not be handled here, and instead + // handled with the help of TLI or the target itself. This was largely + // ported from existing analysis heuristics here so that such refactorings + // can take place in the future. + + if (F->isIntrinsic()) + return false; + + if (F->hasLocalLinkage() || !F->hasName()) + return true; + + StringRef Name = F->getName(); + + // These will all likely lower to a single selection DAG node. + if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" || + Name == "fabs" || Name == "fabsf" || Name == "fabsl" || Name == "sin" || + Name == "sinf" || Name == "sinl" || Name == "cos" || Name == "cosf" || + Name == "cosl" || Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") + return false; + + // These are all likely to be optimized into something smaller. + if (Name == "pow" || Name == "powf" || Name == "powl" || Name == "exp2" || + Name == "exp2l" || Name == "exp2f" || Name == "floor" || Name == + "floorf" || Name == "ceil" || Name == "round" || Name == "ffs" || + Name == "ffsl" || Name == "abs" || Name == "labs" || Name == "llabs") + return false; + + return true; + } + bool isLegalAddImmediate(int64_t Imm) const { return false; } diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp index 6572e09..2002e68 100644 --- a/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -58,6 +58,7 @@ #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" @@ -79,11 +80,15 @@ STATISTIC(NumAccumAdded, "Number of accumulators introduced"); namespace { struct TailCallElim : public FunctionPass { + const TargetTransformInfo *TTI; + static char ID; // Pass identification, replacement for typeid TailCallElim() : FunctionPass(ID) { initializeTailCallElimPass(*PassRegistry::getPassRegistry()); } + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + virtual bool runOnFunction(Function &F); private: @@ -109,14 +114,21 @@ namespace { } char TailCallElim::ID = 0; -INITIALIZE_PASS(TailCallElim, "tailcallelim", - "Tail Call Elimination", false, false) +INITIALIZE_PASS_BEGIN(TailCallElim, "tailcallelim", + "Tail Call Elimination", false, false) +INITIALIZE_AG_DEPENDENCY(TargetTransformInfo) +INITIALIZE_PASS_END(TailCallElim, "tailcallelim", + "Tail Call Elimination", false, false) // Public interface to the TailCallElimination pass FunctionPass *llvm::createTailCallEliminationPass() { return new TailCallElim(); } +void TailCallElim::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); +} + /// AllocaMightEscapeToCalls - Return true if this alloca may be accessed by /// callees of this function. We only do very simple analysis right now, this /// could be expanded in the future to use mod/ref information for particular @@ -151,6 +163,7 @@ bool TailCallElim::runOnFunction(Function &F) { // right, so don't even try to convert it... if (F.getFunctionType()->isVarArg()) return false; + TTI = &getAnalysis(); BasicBlock *OldEntry = 0; bool TailCallsAreMarkedTail = false; SmallVector ArgumentPHIs; @@ -391,7 +404,8 @@ TailCallElim::FindTRECandidate(Instruction *TI, if (BB == &F->getEntryBlock() && FirstNonDbg(BB->front()) == CI && FirstNonDbg(llvm::next(BB->begin())) == TI && - callIsSmall(CI)) { + CI->getCalledFunction() && + !TTI->isLoweredToCall(CI->getCalledFunction())) { // A single-block function with just a call and a return. Check that // the arguments match. CallSite::arg_iterator I = CallSite(CI).arg_begin(), -- cgit v1.1 From 4247b13252a99b284e1ed396be9755878d42df2a Mon Sep 17 00:00:00 2001 From: Evgeniy Stepanov Date: Tue, 22 Jan 2013 12:30:52 +0000 Subject: [msan] Do not insert check on volatile store. Volatile bitfields can cause valid stores of uninitialized bits. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173153 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Instrumentation/MemorySanitizer.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 618a6f0..8bb8115 100644 --- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -451,9 +451,7 @@ struct MemorySanitizerVisitor : public InstVisitor { IRB.CreateAlignedStore(Shadow, ShadowPtr, I.getAlignment()); DEBUG(dbgs() << " STORE: " << *NewSI << "\n"); (void)NewSI; - // If the store is volatile, add a check. - if (I.isVolatile()) - insertCheck(Val, &I); + if (ClCheckAccessAddress) insertCheck(Addr, &I); @@ -847,7 +845,6 @@ struct MemorySanitizerVisitor : public InstVisitor { /// /// Stores the corresponding shadow and (optionally) origin. /// Optionally, checks that the store address is fully defined. - /// Volatile stores check that the value being stored is fully defined. void visitStoreInst(StoreInst &I) { StoreList.push_back(&I); } -- cgit v1.1 From be0008a4df72bf9da3246707cdec2766ace75d32 Mon Sep 17 00:00:00 2001 From: Evgeniy Stepanov Date: Tue, 22 Jan 2013 13:26:53 +0000 Subject: [msan] Export the value of msan-keep-going flag for the runtime. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173156 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Instrumentation/MemorySanitizer.cpp | 3 +++ 1 file changed, 3 insertions(+) (limited to 'lib') diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 8bb8115..20b6de2 100644 --- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -361,6 +361,9 @@ bool MemorySanitizer::doInitialization(Module &M) { new GlobalVariable(M, IRB.getInt32Ty(), true, GlobalValue::WeakODRLinkage, IRB.getInt32(TrackOrigins), "__msan_track_origins"); + new GlobalVariable(M, IRB.getInt32Ty(), true, GlobalValue::WeakODRLinkage, + IRB.getInt32(ClKeepGoing), "__msan_keep_going"); + return true; } -- cgit v1.1 From 9dd2a3b1f2c253e20262535bb89b1ab6cc680ece Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Tue, 22 Jan 2013 18:02:49 +0000 Subject: Initial patch for x32 ABI support. Add the x32 environment kind to the triple, and separate the concept of pointer size and callee save stack slot size, since they're not equal on x32. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173175 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCAsmInfo.cpp | 2 ++ lib/MC/MCDwarf.cpp | 2 +- lib/Support/Triple.cpp | 2 ++ lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp | 14 +++++++++++--- 4 files changed, 16 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/MC/MCAsmInfo.cpp b/lib/MC/MCAsmInfo.cpp index a6fa658..de1095b 100644 --- a/lib/MC/MCAsmInfo.cpp +++ b/lib/MC/MCAsmInfo.cpp @@ -24,6 +24,8 @@ using namespace llvm; MCAsmInfo::MCAsmInfo() { PointerSize = 4; + CalleeSaveStackSlotSize = 0; // 0 means PointerSize is used in getter. + IsLittleEndian = true; StackGrowsUp = false; HasSubsectionsViaSymbols = false; diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp index 74851ce..3cf47bc 100644 --- a/lib/MC/MCDwarf.cpp +++ b/lib/MC/MCDwarf.cpp @@ -792,7 +792,7 @@ void MCGenDwarfLabelEntry::Make(MCSymbol *Symbol, MCStreamer *MCOS, static int getDataAlignmentFactor(MCStreamer &streamer) { MCContext &context = streamer.getContext(); const MCAsmInfo &asmInfo = context.getAsmInfo(); - int size = asmInfo.getPointerSize(); + int size = asmInfo.getCalleeSaveStackSlotSize(); if (asmInfo.isStackGrowthDirectionUp()) return size; else diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp index eefb96b..ad7b189 100644 --- a/lib/Support/Triple.cpp +++ b/lib/Support/Triple.cpp @@ -140,6 +140,7 @@ const char *Triple::getEnvironmentTypeName(EnvironmentType Kind) { case GNU: return "gnu"; case GNUEABIHF: return "gnueabihf"; case GNUEABI: return "gnueabi"; + case GNUX32: return "gnux32"; case EABI: return "eabi"; case MachO: return "macho"; case Android: return "android"; @@ -284,6 +285,7 @@ static Triple::EnvironmentType parseEnvironment(StringRef EnvironmentName) { .StartsWith("eabi", Triple::EABI) .StartsWith("gnueabihf", Triple::GNUEABIHF) .StartsWith("gnueabi", Triple::GNUEABI) + .StartsWith("gnux32", Triple::GNUX32) .StartsWith("gnu", Triple::GNU) .StartsWith("macho", Triple::MachO) .StartsWith("android", Triple::Android) diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp index 16488eb..7815ae9 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp @@ -44,7 +44,7 @@ void X86MCAsmInfoDarwin::anchor() { } X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &T) { bool is64Bit = T.getArch() == Triple::x86_64; if (is64Bit) - PointerSize = 8; + PointerSize = CalleeSaveStackSlotSize = 8; AssemblerDialect = AsmWriterFlavor; @@ -76,8 +76,16 @@ X86_64MCAsmInfoDarwin::X86_64MCAsmInfoDarwin(const Triple &Triple) void X86ELFMCAsmInfo::anchor() { } X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) { - if (T.getArch() == Triple::x86_64) - PointerSize = 8; + bool is64Bit = T.getArch() == Triple::x86_64; + bool isX32 = T.getEnvironment() == Triple::GNUX32; + + // For ELF, x86-64 pointer size depends on the ABI. + // For x86-64 without the x32 ABI, pointer size is 8. For x86 and for x86-64 + // with the x32 ABI, pointer size remains the default 4. + PointerSize = (is64Bit && !isX32) ? 8 : 4; + + // OTOH, stack slot size is always 8 for x86-64, even with the x32 ABI. + CalleeSaveStackSlotSize = is64Bit ? 8 : 4; AssemblerDialect = AsmWriterFlavor; -- cgit v1.1 From 341c5fbe840cffedc4155a2cf130626d2bba11b5 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Tue, 22 Jan 2013 18:05:59 +0000 Subject: X86: Make sure we account for the FMA4 register immediate value, otherwise rip-rel relocations will be off by one byte. PR15040. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173176 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrFormats.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index 6151d5c..44e574d 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -570,7 +570,7 @@ class FMA3 o, Format F, dag outs, dag ins, string asm, // FMA4 Instruction Templates class FMA4 o, Format F, dag outs, dag ins, string asm, listpattern, InstrItinClass itin = IIC_DEFAULT> - : I, TA, + : Ii8, TA, OpSize, VEX_4V, VEX_I8IMM, Requires<[HasFMA4]>; // XOP 2, 3 and 4 Operand Instruction Template -- cgit v1.1 From bf6a77b98715012c0fa3bdbb3ba55fa7c24c1548 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Tue, 22 Jan 2013 20:05:56 +0000 Subject: [mips] Clean up code in MipsTargetLowering::LowerCall. No functional change intended git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173189 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsISelLowering.cpp | 32 ++++++++++++-------------------- lib/Target/Mips/MipsISelLowering.h | 3 ++- 2 files changed, 14 insertions(+), 21 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index a309040..e148da1 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -2810,7 +2810,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, getPointerTy()); // With EABI is it possible to have 16 args on registers. - SmallVector, 16> RegsToPass; + std::deque< std::pair > RegsToPass; SmallVector MemOpChains; MipsCC::byval_iterator ByValArg = MipsCCInfo.byval_begin(); @@ -2928,23 +2928,16 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, GlobalOrExternal = true; } - SDValue InFlag; - - // T9 register operand. - SDValue T9; + SDValue JumpTarget = Callee; // T9 should contain the address of the callee function if // -reloction-model=pic or it is an indirect call. if (IsPICCall || !GlobalOrExternal) { - // copy to T9 unsigned T9Reg = IsN64 ? Mips::T9_64 : Mips::T9; - Chain = DAG.getCopyToReg(Chain, dl, T9Reg, Callee, SDValue(0, 0)); - InFlag = Chain.getValue(1); + RegsToPass.push_front(std::make_pair(T9Reg, Callee)); - if (Subtarget->inMips16Mode()) - T9 = DAG.getRegister(T9Reg, getPointerTy()); - else - Callee = DAG.getRegister(T9Reg, getPointerTy()); + if (!Subtarget->inMips16Mode()) + JumpTarget = SDValue(); } // Insert node "GP copy globalreg" before call to function. @@ -2962,6 +2955,8 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // chain and flag operands which copy the outgoing args into registers. // The InFlag in necessary since all emitted instructions must be // stuck together. + SDValue InFlag; + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, RegsToPass[i].second, InFlag); @@ -2973,9 +2968,10 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // // Returns a chain & a flag for retval copy to use. SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); - SmallVector Ops; - Ops.push_back(Chain); - Ops.push_back(Callee); + SmallVector Ops(1, Chain); + + if (JumpTarget.getNode()) + Ops.push_back(JumpTarget); // Add argument registers to the end of the list so that they are // known live into the call. @@ -2983,10 +2979,6 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, Ops.push_back(DAG.getRegister(RegsToPass[i].first, RegsToPass[i].second.getValueType())); - // Add T9 register operand. - if (T9.getNode()) - Ops.push_back(T9); - // Add a register mask operand representing the call-preserved registers. const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); const uint32_t *Mask = TRI->getCallPreservedMask(CallConv); @@ -3726,7 +3718,7 @@ copyByValRegs(SDValue Chain, DebugLoc DL, std::vector &OutChains, // Copy byVal arg to registers and stack. void MipsTargetLowering:: passByValArg(SDValue Chain, DebugLoc DL, - SmallVector, 16> &RegsToPass, + std::deque< std::pair > &RegsToPass, SmallVector &MemOpChains, SDValue StackPtr, MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg, const MipsCC &CC, const ByValArgInfo &ByVal, diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index c4b38c6..deb6ad0 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -20,6 +20,7 @@ #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Target/TargetLowering.h" +#include namespace llvm { namespace MipsISD { @@ -294,7 +295,7 @@ namespace llvm { /// passByValArg - Pass a byval argument in registers or on stack. void passByValArg(SDValue Chain, DebugLoc DL, - SmallVector, 16> &RegsToPass, + std::deque< std::pair > &RegsToPass, SmallVector &MemOpChains, SDValue StackPtr, MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg, const MipsCC &CC, const ByValArgInfo &ByVal, -- cgit v1.1 From 5de048ec30f9ef9f56c89f9fdb50022beca6ae88 Mon Sep 17 00:00:00 2001 From: Kevin Enderby Date: Tue, 22 Jan 2013 21:09:20 +0000 Subject: Have the integrated assembler give an error if $1 is used as an identifier in an expression. Currently this bug causes the line to be ignored in a release build and an assert in a debug build. rdar://13062484 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173195 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCParser/AsmParser.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index 7d4b4d8..b5f51d8 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -734,7 +734,9 @@ bool AsmParser::ParseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc) { /// primaryexpr ::= '.' /// primaryexpr ::= ~,+,- primaryexpr bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { - switch (Lexer.getKind()) { + SMLoc FirstTokenLoc = getLexer().getLoc(); + AsmToken::TokenKind FirstTokenKind = Lexer.getKind(); + switch (FirstTokenKind) { default: return TokError("unknown token in expression"); // If we have an error assume that we've already handled it. @@ -750,8 +752,11 @@ bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { case AsmToken::String: case AsmToken::Identifier: { StringRef Identifier; - if (ParseIdentifier(Identifier)) + if (ParseIdentifier(Identifier)) { + if (FirstTokenKind == AsmToken::Dollar) + return Error(FirstTokenLoc, "invalid token in expression"); return true; + } EndLoc = SMLoc::getFromPointer(Identifier.end()); -- cgit v1.1 From defaca00b8087d452df2b783250a48a32658a910 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Tue, 22 Jan 2013 21:15:51 +0000 Subject: More encapsulation work. Use the AttributeSet when we're talking about more than one attribute. Add a function that adds a single attribute. No functionality change intended. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173196 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Attributes.cpp | 45 +++++++++++++++------------------------ lib/IR/Core.cpp | 16 ++++++++------ lib/IR/Function.cpp | 4 +++- lib/IR/Instructions.cpp | 8 +++++-- lib/Transforms/IPO/PruneEH.cpp | 8 ++++--- lib/Transforms/Scalar/ObjCARC.cpp | 38 ++++++++++++++++----------------- 6 files changed, 60 insertions(+), 59 deletions(-) (limited to 'lib') diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 420b2e8..c67b1f3 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -150,7 +150,7 @@ uint64_t Attribute::encodeLLVMAttributesForBitcode(Attribute Attrs) { /// the LLVM attributes that have been decoded from the given integer. This /// function must stay in sync with 'encodeLLVMAttributesForBitcode'. Attribute Attribute::decodeLLVMAttributesForBitcode(LLVMContext &C, - uint64_t EncodedAttrs) { + uint64_t EncodedAttrs) { // The alignment is stored as a 16-bit raw value from bits 31--16. We shift // the bits above 31 down by 11 bits. unsigned Alignment = (EncodedAttrs & (0xffffULL << 16)) >> 16; @@ -318,32 +318,29 @@ AttrBuilder &AttrBuilder::addRawValue(uint64_t Val) { I = Attribute::AttrKind(I + 1)) { if (uint64_t A = (Val & AttributeImpl::getAttrMask(I))) { Attrs.insert(I); - + if (I == Attribute::Alignment) Alignment = 1ULL << ((A >> 16) - 1); else if (I == Attribute::StackAlignment) StackAlignment = 1ULL << ((A >> 26)-1); } } - + return *this; } -AttrBuilder &AttrBuilder::addAttributes(const Attribute &A) { - uint64_t Mask = A.Raw(); +AttrBuilder &AttrBuilder::addAttributes(const Attribute &Attr) { + uint64_t Mask = Attr.Raw(); for (Attribute::AttrKind I = Attribute::None; I != Attribute::EndAttrKinds; - I = Attribute::AttrKind(I + 1)) { - if (uint64_t A = (Mask & AttributeImpl::getAttrMask(I))) { + I = Attribute::AttrKind(I + 1)) + if ((Mask & AttributeImpl::getAttrMask(I)) != 0) Attrs.insert(I); - if (I == Attribute::Alignment) - Alignment = 1ULL << ((A >> 16) - 1); - else if (I == Attribute::StackAlignment) - StackAlignment = 1ULL << ((A >> 26)-1); - } - } - + if (Attr.getAlignment()) + Alignment = Attr.getAlignment(); + if (Attr.getStackAlignment()) + StackAlignment = Attr.getStackAlignment(); return *this; } @@ -601,18 +598,7 @@ AttributeSet AttributeSet::get(LLVMContext &C, unsigned Idx, AttrBuilder &B) { // AttributeWithIndexes that then are used to create the AttributeSet. if (!B.hasAttributes()) return AttributeSet(); - - uint64_t Mask = 0; - - for (AttrBuilder::iterator I = B.begin(), E = B.end(); I != E; ++I) - Mask |= AttributeImpl::getAttrMask(*I); - - Attribute A = Attribute::decodeLLVMAttributesForBitcode(C, Mask); - if (B.getAlignment()) - A.setAlignment(B.getAlignment()); - if (B.getStackAlignment()) - A.setStackAlignment(B.getStackAlignment()); - return get(C, AttributeWithIndex::get(Idx, A)); + return get(C, AttributeWithIndex::get(Idx, Attribute::get(C, B))); } //===----------------------------------------------------------------------===// @@ -665,8 +651,6 @@ uint64_t AttributeSet::Raw(unsigned Index) const { } /// getAttributes - The attributes for the specified index are returned. -/// Attributes for the result are denoted with Idx = 0. Function attributes are -/// denoted with Idx = ~0. Attribute AttributeSet::getAttributes(unsigned Idx) const { if (AttrList == 0) return Attribute(); @@ -691,6 +675,11 @@ bool AttributeSet::hasAttrSomewhere(Attribute::AttrKind Attr) const { return false; } +AttributeSet AttributeSet::addAttribute(LLVMContext &C, unsigned Idx, + Attribute::AttrKind Attr) const { + return addAttr(C, Idx, Attribute::get(C, Attr)); +} + AttributeSet AttributeSet::addAttributes(LLVMContext &C, unsigned Idx, AttributeSet Attrs) const { return addAttr(C, Idx, Attrs.getAttributes(Idx)); diff --git a/lib/IR/Core.cpp b/lib/IR/Core.cpp index 12cb971..e72eb69 100644 --- a/lib/IR/Core.cpp +++ b/lib/IR/Core.cpp @@ -1383,8 +1383,9 @@ void LLVMAddFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA) { const AttributeSet PAL = Func->getAttributes(); AttrBuilder B(PA); const AttributeSet PALnew = - PAL.addAttr(Func->getContext(), AttributeSet::FunctionIndex, - Attribute::get(Func->getContext(), B)); + PAL.addFnAttributes(Func->getContext(), + AttributeSet::get(Func->getContext(), + AttributeSet::FunctionIndex, B)); Func->setAttributes(PALnew); } @@ -1676,8 +1677,9 @@ void LLVMAddInstrAttribute(LLVMValueRef Instr, unsigned index, CallSite Call = CallSite(unwrap(Instr)); AttrBuilder B(PA); Call.setAttributes( - Call.getAttributes().addAttr(Call->getContext(), index, - Attribute::get(Call->getContext(), B))); + Call.getAttributes().addAttributes(Call->getContext(), index, + AttributeSet::get(Call->getContext(), + index, B))); } void LLVMRemoveInstrAttribute(LLVMValueRef Instr, unsigned index, @@ -1694,8 +1696,10 @@ void LLVMSetInstrParamAlignment(LLVMValueRef Instr, unsigned index, CallSite Call = CallSite(unwrap(Instr)); AttrBuilder B; B.addAlignmentAttr(align); - Call.setAttributes(Call.getAttributes().addAttr(Call->getContext(), index, - Attribute::get(Call->getContext(), B))); + Call.setAttributes(Call.getAttributes() + .addAttributes(Call->getContext(), index, + AttributeSet::get(Call->getContext(), + index, B))); } /*--.. Operations on call instructions (only) ..............................--*/ diff --git a/lib/IR/Function.cpp b/lib/IR/Function.cpp index cd35aff..6a5e616 100644 --- a/lib/IR/Function.cpp +++ b/lib/IR/Function.cpp @@ -250,7 +250,9 @@ void Function::dropAllReferences() { void Function::addAttribute(unsigned i, Attribute attr) { AttributeSet PAL = getAttributes(); - PAL = PAL.addAttr(getContext(), i, attr); + AttrBuilder B(attr); + PAL = PAL.addAttributes(getContext(), i, + AttributeSet::get(getContext(), i, B)); setAttributes(PAL); } diff --git a/lib/IR/Instructions.cpp b/lib/IR/Instructions.cpp index aba0fc9..8597d5c 100644 --- a/lib/IR/Instructions.cpp +++ b/lib/IR/Instructions.cpp @@ -333,7 +333,9 @@ CallInst::CallInst(const CallInst &CI) void CallInst::addAttribute(unsigned i, Attribute attr) { AttributeSet PAL = getAttributes(); - PAL = PAL.addAttr(getContext(), i, attr); + AttrBuilder B(attr); + PAL = PAL.addAttributes(getContext(), i, + AttributeSet::get(getContext(), i, B)); setAttributes(PAL); } @@ -589,7 +591,9 @@ bool InvokeInst::paramHasAttr(unsigned i, Attribute::AttrKind A) const { void InvokeInst::addAttribute(unsigned i, Attribute attr) { AttributeSet PAL = getAttributes(); - PAL = PAL.addAttr(getContext(), i, attr); + AttrBuilder B(attr); + PAL = PAL.addAttributes(getContext(), i, + AttributeSet::get(getContext(), i, B)); setAttributes(PAL); } diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp index d872f0c..98c2602 100644 --- a/lib/Transforms/IPO/PruneEH.cpp +++ b/lib/Transforms/IPO/PruneEH.cpp @@ -146,9 +146,11 @@ bool PruneEH::runOnSCC(CallGraphSCC &SCC) { Function *F = (*I)->getFunction(); const AttributeSet &PAL = F->getAttributes(); - const AttributeSet &NPAL = PAL.addAttr(F->getContext(), ~0, - Attribute::get(F->getContext(), - NewAttributes)); + const AttributeSet &NPAL = + PAL.addFnAttributes(F->getContext(), + AttributeSet::get(F->getContext(), + AttributeSet::FunctionIndex, + NewAttributes)); if (PAL != NPAL) { MadeChange = true; F->setAttributes(NPAL); diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp index a63e0e0..1c054f9 100644 --- a/lib/Transforms/Scalar/ObjCARC.cpp +++ b/lib/Transforms/Scalar/ObjCARC.cpp @@ -1914,8 +1914,8 @@ Constant *ObjCARCOpt::getRetainRVCallee(Module *M) { Type *Params[] = { I8X }; FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false); AttributeSet Attribute = - AttributeSet().addAttr(M->getContext(), AttributeSet::FunctionIndex, - Attribute::get(C, Attribute::NoUnwind)); + AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex, + Attribute::NoUnwind); RetainRVCallee = M->getOrInsertFunction("objc_retainAutoreleasedReturnValue", FTy, Attribute); @@ -1930,8 +1930,8 @@ Constant *ObjCARCOpt::getAutoreleaseRVCallee(Module *M) { Type *Params[] = { I8X }; FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false); AttributeSet Attribute = - AttributeSet().addAttr(M->getContext(), AttributeSet::FunctionIndex, - Attribute::get(C, Attribute::NoUnwind)); + AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex, + Attribute::NoUnwind); AutoreleaseRVCallee = M->getOrInsertFunction("objc_autoreleaseReturnValue", FTy, Attribute); @@ -1944,8 +1944,8 @@ Constant *ObjCARCOpt::getReleaseCallee(Module *M) { LLVMContext &C = M->getContext(); Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) }; AttributeSet Attribute = - AttributeSet().addAttr(M->getContext(), AttributeSet::FunctionIndex, - Attribute::get(C, Attribute::NoUnwind)); + AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex, + Attribute::NoUnwind); ReleaseCallee = M->getOrInsertFunction( "objc_release", @@ -1960,8 +1960,8 @@ Constant *ObjCARCOpt::getRetainCallee(Module *M) { LLVMContext &C = M->getContext(); Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) }; AttributeSet Attribute = - AttributeSet().addAttr(M->getContext(), AttributeSet::FunctionIndex, - Attribute::get(C, Attribute::NoUnwind)); + AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex, + Attribute::NoUnwind); RetainCallee = M->getOrInsertFunction( "objc_retain", @@ -1991,8 +1991,8 @@ Constant *ObjCARCOpt::getAutoreleaseCallee(Module *M) { LLVMContext &C = M->getContext(); Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) }; AttributeSet Attribute = - AttributeSet().addAttr(M->getContext(), AttributeSet::FunctionIndex, - Attribute::get(C, Attribute::NoUnwind)); + AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex, + Attribute::NoUnwind); AutoreleaseCallee = M->getOrInsertFunction( "objc_autorelease", @@ -4105,16 +4105,16 @@ Constant *ObjCARCContract::getStoreStrongCallee(Module *M) { Type *I8XX = PointerType::getUnqual(I8X); Type *Params[] = { I8XX, I8X }; - AttributeSet Attribute = AttributeSet() - .addAttr(M->getContext(), AttributeSet::FunctionIndex, - Attribute::get(C, Attribute::NoUnwind)) - .addAttr(M->getContext(), 1, Attribute::get(C, Attribute::NoCapture)); + AttributeSet Attr = AttributeSet() + .addAttribute(M->getContext(), AttributeSet::FunctionIndex, + Attribute::NoUnwind) + .addAttribute(M->getContext(), 1, Attribute::NoCapture); StoreStrongCallee = M->getOrInsertFunction( "objc_storeStrong", FunctionType::get(Type::getVoidTy(C), Params, /*isVarArg=*/false), - Attribute); + Attr); } return StoreStrongCallee; } @@ -4126,8 +4126,8 @@ Constant *ObjCARCContract::getRetainAutoreleaseCallee(Module *M) { Type *Params[] = { I8X }; FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false); AttributeSet Attribute = - AttributeSet().addAttr(M->getContext(), AttributeSet::FunctionIndex, - Attribute::get(C, Attribute::NoUnwind)); + AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex, + Attribute::NoUnwind); RetainAutoreleaseCallee = M->getOrInsertFunction("objc_retainAutorelease", FTy, Attribute); } @@ -4141,8 +4141,8 @@ Constant *ObjCARCContract::getRetainAutoreleaseRVCallee(Module *M) { Type *Params[] = { I8X }; FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false); AttributeSet Attribute = - AttributeSet().addAttr(M->getContext(), AttributeSet::FunctionIndex, - Attribute::get(C, Attribute::NoUnwind)); + AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex, + Attribute::NoUnwind); RetainAutoreleaseRVCallee = M->getOrInsertFunction("objc_retainAutoreleaseReturnValue", FTy, Attribute); -- cgit v1.1 From a88322c283a001019bd5cd4ddeafc425cc4d00af Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Tue, 22 Jan 2013 21:34:25 +0000 Subject: [mips] Implement MipsRegisterInfo::getRegPressureLimit. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173197 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsRegisterInfo.cpp | 22 ++++++++++++++++++++++ lib/Target/Mips/MipsRegisterInfo.h | 2 ++ 2 files changed, 24 insertions(+) (limited to 'lib') diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp index 70eb6f3..6486e98 100644 --- a/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/lib/Target/Mips/MipsRegisterInfo.cpp @@ -47,6 +47,28 @@ MipsRegisterInfo::MipsRegisterInfo(const MipsSubtarget &ST) unsigned MipsRegisterInfo::getPICCallReg() { return Mips::T9; } + +unsigned +MipsRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, + MachineFunction &MF) const { + switch (RC->getID()) { + default: + return 0; + case Mips::CPURegsRegClassID: + case Mips::CPU64RegsRegClassID: + case Mips::DSPRegsRegClassID: { + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + return 28 - TFI->hasFP(MF); + } + case Mips::FGR32RegClassID: + return 32; + case Mips::AFGR64RegClassID: + return 16; + case Mips::FGR64RegClassID: + return 32; + } +} + //===----------------------------------------------------------------------===// // Callee Saved Registers methods //===----------------------------------------------------------------------===// diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h index 78adf7f..032c2fd 100644 --- a/lib/Target/Mips/MipsRegisterInfo.h +++ b/lib/Target/Mips/MipsRegisterInfo.h @@ -42,6 +42,8 @@ public: void adjustMipsStackFrame(MachineFunction &MF) const; /// Code Generation virtual methods... + unsigned getRegPressureLimit(const TargetRegisterClass *RC, + MachineFunction &MF) const; const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const; const uint32_t *getCallPreservedMask(CallingConv::ID) const; -- cgit v1.1 From 221514efe92676ce84a5e21bea91d8a6b21f9ed7 Mon Sep 17 00:00:00 2001 From: Kevin Enderby Date: Tue, 22 Jan 2013 21:44:53 +0000 Subject: Add a warning when there is a macro defintion that has named parameters but the body does not use them and it appears the body has positional parameters. This can cause unexpected results as in the added test case. As the darwin version of gas(1) which only supported positional parameters, happened to ignore the named parameters. Now that we want to support both styles of macros we issue a warning in this specific case. rdar://12861644 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173199 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCParser/AsmParser.cpp | 104 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 104 insertions(+) (limited to 'lib') diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index b5f51d8..4d6756e 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -237,6 +237,8 @@ private: void EatToEndOfLine(); bool ParseCppHashLineFilenameComment(const SMLoc &L); + void CheckForBadMacro(SMLoc DirectiveLoc, StringRef Name, StringRef Body, + MCAsmMacroParameters Parameters); bool expandMacro(raw_svector_ostream &OS, StringRef Body, const MCAsmMacroParameters &Parameters, const MCAsmMacroArguments &A, @@ -3044,10 +3046,112 @@ bool AsmParser::ParseDirectiveMacro(SMLoc DirectiveLoc) { const char *BodyStart = StartToken.getLoc().getPointer(); const char *BodyEnd = EndToken.getLoc().getPointer(); StringRef Body = StringRef(BodyStart, BodyEnd - BodyStart); + CheckForBadMacro(DirectiveLoc, Name, Body, Parameters); DefineMacro(Name, MCAsmMacro(Name, Body, Parameters)); return false; } +/// CheckForBadMacro +/// +/// With the support added for named parameters there may be code out there that +/// is transitioning from positional parameters. In versions of gas that did +/// not support named parameters they would be ignored on the macro defintion. +/// But to support both styles of parameters this is not possible so if a macro +/// defintion has named parameters but does not use them and has what appears +/// to be positional parameters, strings like $1, $2, ... and $n, then issue a +/// warning that the positional parameter found in body which have no effect. +/// Hoping the developer will either remove the named parameters from the macro +/// definiton so the positional parameters get used if that was what was +/// intended or change the macro to use the named parameters. It is possible +/// this warning will trigger when the none of the named parameters are used +/// and the strings like $1 are infact to simply to be passed trough unchanged. +void AsmParser::CheckForBadMacro(SMLoc DirectiveLoc, StringRef Name, + StringRef Body, + MCAsmMacroParameters Parameters) { + // If this macro is not defined with named parameters the warning we are + // checking for here doesn't apply. + unsigned NParameters = Parameters.size(); + if (NParameters == 0) + return; + + bool NamedParametersFound = false; + bool PositionalParametersFound = false; + + // Look at the body of the macro for use of both the named parameters and what + // are likely to be positional parameters. This is what expandMacro() is + // doing when it finds the parameters in the body. + while (!Body.empty()) { + // Scan for the next possible parameter. + std::size_t End = Body.size(), Pos = 0; + for (; Pos != End; ++Pos) { + // Check for a substitution or escape. + // This macro is defined with parameters, look for \foo, \bar, etc. + if (Body[Pos] == '\\' && Pos + 1 != End) + break; + + // This macro should have parameters, but look for $0, $1, ..., $n too. + if (Body[Pos] != '$' || Pos + 1 == End) + continue; + char Next = Body[Pos + 1]; + if (Next == '$' || Next == 'n' || isdigit(Next)) + break; + } + + // Check if we reached the end. + if (Pos == End) + break; + + if (Body[Pos] == '$') { + switch (Body[Pos+1]) { + // $$ => $ + case '$': + break; + + // $n => number of arguments + case 'n': + PositionalParametersFound = true; + break; + + // $[0-9] => argument + default: { + PositionalParametersFound = true; + break; + } + } + Pos += 2; + } else { + unsigned I = Pos + 1; + while (isIdentifierChar(Body[I]) && I + 1 != End) + ++I; + + const char *Begin = Body.data() + Pos +1; + StringRef Argument(Begin, I - (Pos +1)); + unsigned Index = 0; + for (; Index < NParameters; ++Index) + if (Parameters[Index].first == Argument) + break; + + if (Index == NParameters) { + if (Body[Pos+1] == '(' && Body[Pos+2] == ')') + Pos += 3; + else { + Pos = I; + } + } else { + NamedParametersFound = true; + Pos += 1 + Argument.size(); + } + } + // Update the scan point. + Body = Body.substr(Pos); + } + + if (!NamedParametersFound && PositionalParametersFound) + Warning(DirectiveLoc, "macro defined with named parameters which are not " + "used in macro body, possible positional parameter " + "found in body which will have no effect"); +} + /// ParseDirectiveEndMacro /// ::= .endm /// ::= .endmacro -- cgit v1.1 From 13d08bf4159d8ffb780bb2e29a03763c17543001 Mon Sep 17 00:00:00 2001 From: Michael Liao Date: Tue, 22 Jan 2013 21:47:38 +0000 Subject: Fix an issue of pseudo atomic instruction DAG schedule - Add list of physical registers clobbered in pseudo atomic insts Physical registers are clobbered when pseudo atomic instructions are expanded. Add them in clobber list to prevent DAG scheduler to mis-schedule them after these insns are declared side-effect free. - Add test case from Michael Kuperstein git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173200 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 4 ++-- lib/Target/X86/X86InstrCompiler.td | 7 ++++++- 2 files changed, 8 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index b740897..0475814 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -12840,7 +12840,7 @@ X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI, MachineFunction::iterator I = MBB; ++I; - assert(MI->getNumOperands() <= X86::AddrNumOperands + 2 && + assert(MI->getNumOperands() <= X86::AddrNumOperands + 4 && "Unexpected number of operands"); assert(MI->hasOneMemOperand() && @@ -13072,7 +13072,7 @@ X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI, MachineFunction::iterator I = MBB; ++I; - assert(MI->getNumOperands() <= X86::AddrNumOperands + 4 && + assert(MI->getNumOperands() <= X86::AddrNumOperands + 7 && "Unexpected number of operands"); assert(MI->hasOneMemOperand() && diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index 2a26a22..f387962 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -513,15 +513,19 @@ def CMOV_RFP80 : I<0, Pseudo, multiclass PSEUDO_ATOMIC_LOAD_BINOP { let usesCustomInserter = 1, mayLoad = 1, mayStore = 1 in { + let Defs = [EFLAGS, AL] in def NAME#8 : I<0, Pseudo, (outs GR8:$dst), (ins i8mem:$ptr, GR8:$val), !strconcat(mnemonic, "8 PSEUDO!"), []>; + let Defs = [EFLAGS, AX] in def NAME#16 : I<0, Pseudo,(outs GR16:$dst), (ins i16mem:$ptr, GR16:$val), !strconcat(mnemonic, "16 PSEUDO!"), []>; + let Defs = [EFLAGS, EAX] in def NAME#32 : I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$ptr, GR32:$val), !strconcat(mnemonic, "32 PSEUDO!"), []>; + let Defs = [EFLAGS, RAX] in def NAME#64 : I<0, Pseudo, (outs GR64:$dst), (ins i64mem:$ptr, GR64:$val), !strconcat(mnemonic, "64 PSEUDO!"), []>; @@ -559,7 +563,8 @@ defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMUMAX", "atomic_load_umax">; defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMUMIN", "atomic_load_umin">; multiclass PSEUDO_ATOMIC_LOAD_BINOP6432 { - let usesCustomInserter = 1, mayLoad = 1, mayStore = 1, hasSideEffects = 0 in + let usesCustomInserter = 1, Defs = [EFLAGS, EAX, EDX], + mayLoad = 1, mayStore = 1, hasSideEffects = 0 in def NAME#6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), (ins i64mem:$ptr, GR32:$val1, GR32:$val2), !strconcat(mnemonic, "6432 PSEUDO!"), []>; -- cgit v1.1 From 862d51fc671af273e52593246e7607fe9a6ba80c Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Tue, 22 Jan 2013 21:49:00 +0000 Subject: [ObjCARC] Refactored out the inner most 2-loops from PerformCodePlacement into the method ConnectTDBUTraversals. The method PerformCodePlacement was doing too much (i.e. 3x loops, lots of different checking). This refactoring separates the analysis section of the method into a separate function while leaving the actual code placement and analysis preparation in PerformCodePlacement. *NOTE* Really this part of ObjCARC should be refactored out of the main pass class into its own seperate class/struct. But, it is not time to make that change yet though (don't want to make such an invasive change without fixing all of the bugs first). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173201 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/ObjCARC.cpp | 351 +++++++++++++++++++++----------------- 1 file changed, 198 insertions(+), 153 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp index 1c054f9..32e4b9c 100644 --- a/lib/Transforms/Scalar/ObjCARC.cpp +++ b/lib/Transforms/Scalar/ObjCARC.cpp @@ -1849,6 +1849,19 @@ namespace { SmallVectorImpl &DeadInsts, Module *M); + bool ConnectTDBUTraversals(DenseMap &BBStates, + MapVector &Retains, + DenseMap &Releases, + Module *M, + SmallVector &NewRetains, + SmallVector &NewReleases, + SmallVector &DeadInsts, + RRInfo &RetainsToMove, + RRInfo &ReleasesToMove, + Value *Arg, + bool KnownSafe, + bool &AnyPairsCompletelyEliminated); + bool PerformCodePlacement(DenseMap &BBStates, MapVector &Retains, DenseMap &Releases, @@ -3398,6 +3411,179 @@ void ObjCARCOpt::MoveCalls(Value *Arg, } } +bool +ObjCARCOpt::ConnectTDBUTraversals(DenseMap + &BBStates, + MapVector &Retains, + DenseMap &Releases, + Module *M, + SmallVector &NewRetains, + SmallVector &NewReleases, + SmallVector &DeadInsts, + RRInfo &RetainsToMove, + RRInfo &ReleasesToMove, + Value *Arg, + bool KnownSafe, + bool &AnyPairsCompletelyEliminated) { + // If a pair happens in a region where it is known that the reference count + // is already incremented, we can similarly ignore possible decrements. + bool KnownSafeTD = true, KnownSafeBU = true; + + // Connect the dots between the top-down-collected RetainsToMove and + // bottom-up-collected ReleasesToMove to form sets of related calls. + // This is an iterative process so that we connect multiple releases + // to multiple retains if needed. + unsigned OldDelta = 0; + unsigned NewDelta = 0; + unsigned OldCount = 0; + unsigned NewCount = 0; + bool FirstRelease = true; + bool FirstRetain = true; + for (;;) { + for (SmallVectorImpl::const_iterator + NI = NewRetains.begin(), NE = NewRetains.end(); NI != NE; ++NI) { + Instruction *NewRetain = *NI; + MapVector::const_iterator It = Retains.find(NewRetain); + assert(It != Retains.end()); + const RRInfo &NewRetainRRI = It->second; + KnownSafeTD &= NewRetainRRI.KnownSafe; + for (SmallPtrSet::const_iterator + LI = NewRetainRRI.Calls.begin(), + LE = NewRetainRRI.Calls.end(); LI != LE; ++LI) { + Instruction *NewRetainRelease = *LI; + DenseMap::const_iterator Jt = + Releases.find(NewRetainRelease); + if (Jt == Releases.end()) + return false; + const RRInfo &NewRetainReleaseRRI = Jt->second; + assert(NewRetainReleaseRRI.Calls.count(NewRetain)); + if (ReleasesToMove.Calls.insert(NewRetainRelease)) { + OldDelta -= + BBStates[NewRetainRelease->getParent()].GetAllPathCount(); + + // Merge the ReleaseMetadata and IsTailCallRelease values. + if (FirstRelease) { + ReleasesToMove.ReleaseMetadata = + NewRetainReleaseRRI.ReleaseMetadata; + ReleasesToMove.IsTailCallRelease = + NewRetainReleaseRRI.IsTailCallRelease; + FirstRelease = false; + } else { + if (ReleasesToMove.ReleaseMetadata != + NewRetainReleaseRRI.ReleaseMetadata) + ReleasesToMove.ReleaseMetadata = 0; + if (ReleasesToMove.IsTailCallRelease != + NewRetainReleaseRRI.IsTailCallRelease) + ReleasesToMove.IsTailCallRelease = false; + } + + // Collect the optimal insertion points. + if (!KnownSafe) + for (SmallPtrSet::const_iterator + RI = NewRetainReleaseRRI.ReverseInsertPts.begin(), + RE = NewRetainReleaseRRI.ReverseInsertPts.end(); + RI != RE; ++RI) { + Instruction *RIP = *RI; + if (ReleasesToMove.ReverseInsertPts.insert(RIP)) + NewDelta -= BBStates[RIP->getParent()].GetAllPathCount(); + } + NewReleases.push_back(NewRetainRelease); + } + } + } + NewRetains.clear(); + if (NewReleases.empty()) break; + + // Back the other way. + for (SmallVectorImpl::const_iterator + NI = NewReleases.begin(), NE = NewReleases.end(); NI != NE; ++NI) { + Instruction *NewRelease = *NI; + DenseMap::const_iterator It = + Releases.find(NewRelease); + assert(It != Releases.end()); + const RRInfo &NewReleaseRRI = It->second; + KnownSafeBU &= NewReleaseRRI.KnownSafe; + for (SmallPtrSet::const_iterator + LI = NewReleaseRRI.Calls.begin(), + LE = NewReleaseRRI.Calls.end(); LI != LE; ++LI) { + Instruction *NewReleaseRetain = *LI; + MapVector::const_iterator Jt = + Retains.find(NewReleaseRetain); + if (Jt == Retains.end()) + return false; + const RRInfo &NewReleaseRetainRRI = Jt->second; + assert(NewReleaseRetainRRI.Calls.count(NewRelease)); + if (RetainsToMove.Calls.insert(NewReleaseRetain)) { + unsigned PathCount = + BBStates[NewReleaseRetain->getParent()].GetAllPathCount(); + OldDelta += PathCount; + OldCount += PathCount; + + // Merge the IsRetainBlock values. + if (FirstRetain) { + RetainsToMove.IsRetainBlock = NewReleaseRetainRRI.IsRetainBlock; + FirstRetain = false; + } else if (ReleasesToMove.IsRetainBlock != + NewReleaseRetainRRI.IsRetainBlock) + // It's not possible to merge the sequences if one uses + // objc_retain and the other uses objc_retainBlock. + return false; + + // Collect the optimal insertion points. + if (!KnownSafe) + for (SmallPtrSet::const_iterator + RI = NewReleaseRetainRRI.ReverseInsertPts.begin(), + RE = NewReleaseRetainRRI.ReverseInsertPts.end(); + RI != RE; ++RI) { + Instruction *RIP = *RI; + if (RetainsToMove.ReverseInsertPts.insert(RIP)) { + PathCount = BBStates[RIP->getParent()].GetAllPathCount(); + NewDelta += PathCount; + NewCount += PathCount; + } + } + NewRetains.push_back(NewReleaseRetain); + } + } + } + NewReleases.clear(); + if (NewRetains.empty()) break; + } + + // If the pointer is known incremented or nested, we can safely delete the + // pair regardless of what's between them. + if (KnownSafeTD || KnownSafeBU) { + RetainsToMove.ReverseInsertPts.clear(); + ReleasesToMove.ReverseInsertPts.clear(); + NewCount = 0; + } else { + // Determine whether the new insertion points we computed preserve the + // balance of retain and release calls through the program. + // TODO: If the fully aggressive solution isn't valid, try to find a + // less aggressive solution which is. + if (NewDelta != 0) + return false; + } + + // Determine whether the original call points are balanced in the retain and + // release calls through the program. If not, conservatively don't touch + // them. + // TODO: It's theoretically possible to do code motion in this case, as + // long as the existing imbalances are maintained. + if (OldDelta != 0) + return false; + + Changed = true; + assert(OldCount != 0 && "Unreachable code?"); + NumRRs += OldCount - NewCount; + + // Set to true if we completely removed any RR pairs. + AnyPairsCompletelyEliminated |= NewCount == 0; + + // We can move calls! + return true; +} + /// Identify pairings between the retains and releases, and delete and/or move /// them. bool @@ -3440,164 +3626,23 @@ ObjCARCOpt::PerformCodePlacement(DenseMap if (GV->isConstant()) KnownSafe = true; - // If a pair happens in a region where it is known that the reference count - // is already incremented, we can similarly ignore possible decrements. - bool KnownSafeTD = true, KnownSafeBU = true; - // Connect the dots between the top-down-collected RetainsToMove and // bottom-up-collected ReleasesToMove to form sets of related calls. - // This is an iterative process so that we connect multiple releases - // to multiple retains if needed. - unsigned OldDelta = 0; - unsigned NewDelta = 0; - unsigned OldCount = 0; - unsigned NewCount = 0; - bool FirstRelease = true; - bool FirstRetain = true; NewRetains.push_back(Retain); - for (;;) { - for (SmallVectorImpl::const_iterator - NI = NewRetains.begin(), NE = NewRetains.end(); NI != NE; ++NI) { - Instruction *NewRetain = *NI; - MapVector::const_iterator It = Retains.find(NewRetain); - assert(It != Retains.end()); - const RRInfo &NewRetainRRI = It->second; - KnownSafeTD &= NewRetainRRI.KnownSafe; - for (SmallPtrSet::const_iterator - LI = NewRetainRRI.Calls.begin(), - LE = NewRetainRRI.Calls.end(); LI != LE; ++LI) { - Instruction *NewRetainRelease = *LI; - DenseMap::const_iterator Jt = - Releases.find(NewRetainRelease); - if (Jt == Releases.end()) - goto next_retain; - const RRInfo &NewRetainReleaseRRI = Jt->second; - assert(NewRetainReleaseRRI.Calls.count(NewRetain)); - if (ReleasesToMove.Calls.insert(NewRetainRelease)) { - OldDelta -= - BBStates[NewRetainRelease->getParent()].GetAllPathCount(); - - // Merge the ReleaseMetadata and IsTailCallRelease values. - if (FirstRelease) { - ReleasesToMove.ReleaseMetadata = - NewRetainReleaseRRI.ReleaseMetadata; - ReleasesToMove.IsTailCallRelease = - NewRetainReleaseRRI.IsTailCallRelease; - FirstRelease = false; - } else { - if (ReleasesToMove.ReleaseMetadata != - NewRetainReleaseRRI.ReleaseMetadata) - ReleasesToMove.ReleaseMetadata = 0; - if (ReleasesToMove.IsTailCallRelease != - NewRetainReleaseRRI.IsTailCallRelease) - ReleasesToMove.IsTailCallRelease = false; - } - - // Collect the optimal insertion points. - if (!KnownSafe) - for (SmallPtrSet::const_iterator - RI = NewRetainReleaseRRI.ReverseInsertPts.begin(), - RE = NewRetainReleaseRRI.ReverseInsertPts.end(); - RI != RE; ++RI) { - Instruction *RIP = *RI; - if (ReleasesToMove.ReverseInsertPts.insert(RIP)) - NewDelta -= BBStates[RIP->getParent()].GetAllPathCount(); - } - NewReleases.push_back(NewRetainRelease); - } - } - } - NewRetains.clear(); - if (NewReleases.empty()) break; - - // Back the other way. - for (SmallVectorImpl::const_iterator - NI = NewReleases.begin(), NE = NewReleases.end(); NI != NE; ++NI) { - Instruction *NewRelease = *NI; - DenseMap::const_iterator It = - Releases.find(NewRelease); - assert(It != Releases.end()); - const RRInfo &NewReleaseRRI = It->second; - KnownSafeBU &= NewReleaseRRI.KnownSafe; - for (SmallPtrSet::const_iterator - LI = NewReleaseRRI.Calls.begin(), - LE = NewReleaseRRI.Calls.end(); LI != LE; ++LI) { - Instruction *NewReleaseRetain = *LI; - MapVector::const_iterator Jt = - Retains.find(NewReleaseRetain); - if (Jt == Retains.end()) - goto next_retain; - const RRInfo &NewReleaseRetainRRI = Jt->second; - assert(NewReleaseRetainRRI.Calls.count(NewRelease)); - if (RetainsToMove.Calls.insert(NewReleaseRetain)) { - unsigned PathCount = - BBStates[NewReleaseRetain->getParent()].GetAllPathCount(); - OldDelta += PathCount; - OldCount += PathCount; - - // Merge the IsRetainBlock values. - if (FirstRetain) { - RetainsToMove.IsRetainBlock = NewReleaseRetainRRI.IsRetainBlock; - FirstRetain = false; - } else if (ReleasesToMove.IsRetainBlock != - NewReleaseRetainRRI.IsRetainBlock) - // It's not possible to merge the sequences if one uses - // objc_retain and the other uses objc_retainBlock. - goto next_retain; - - // Collect the optimal insertion points. - if (!KnownSafe) - for (SmallPtrSet::const_iterator - RI = NewReleaseRetainRRI.ReverseInsertPts.begin(), - RE = NewReleaseRetainRRI.ReverseInsertPts.end(); - RI != RE; ++RI) { - Instruction *RIP = *RI; - if (RetainsToMove.ReverseInsertPts.insert(RIP)) { - PathCount = BBStates[RIP->getParent()].GetAllPathCount(); - NewDelta += PathCount; - NewCount += PathCount; - } - } - NewRetains.push_back(NewReleaseRetain); - } - } - } - NewReleases.clear(); - if (NewRetains.empty()) break; - } - - // If the pointer is known incremented or nested, we can safely delete the - // pair regardless of what's between them. - if (KnownSafeTD || KnownSafeBU) { - RetainsToMove.ReverseInsertPts.clear(); - ReleasesToMove.ReverseInsertPts.clear(); - NewCount = 0; - } else { - // Determine whether the new insertion points we computed preserve the - // balance of retain and release calls through the program. - // TODO: If the fully aggressive solution isn't valid, try to find a - // less aggressive solution which is. - if (NewDelta != 0) - goto next_retain; + bool PerformMoveCalls = + ConnectTDBUTraversals(BBStates, Retains, Releases, M, NewRetains, + NewReleases, DeadInsts, RetainsToMove, + ReleasesToMove, Arg, KnownSafe, + AnyPairsCompletelyEliminated); + + if (PerformMoveCalls) { + // Ok, everything checks out and we're all set. Let's move/delete some + // code! + MoveCalls(Arg, RetainsToMove, ReleasesToMove, + Retains, Releases, DeadInsts, M); } - // Determine whether the original call points are balanced in the retain and - // release calls through the program. If not, conservatively don't touch - // them. - // TODO: It's theoretically possible to do code motion in this case, as - // long as the existing imbalances are maintained. - if (OldDelta != 0) - goto next_retain; - - // Ok, everything checks out and we're all set. Let's move some code! - Changed = true; - assert(OldCount != 0 && "Unreachable code?"); - AnyPairsCompletelyEliminated = NewCount == 0; - NumRRs += OldCount - NewCount; - MoveCalls(Arg, RetainsToMove, ReleasesToMove, - Retains, Releases, DeadInsts, M); - - next_retain: + // Clean up state for next retain. NewReleases.clear(); NewRetains.clear(); RetainsToMove.clear(); -- cgit v1.1 From e65d46246c3e8fc279a9b6b7aaf985060ba88ab0 Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Tue, 22 Jan 2013 21:53:43 +0000 Subject: Fixed typo. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173202 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/ObjCARC.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp index 32e4b9c..45bc7ab 100644 --- a/lib/Transforms/Scalar/ObjCARC.cpp +++ b/lib/Transforms/Scalar/ObjCARC.cpp @@ -3576,9 +3576,8 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap Changed = true; assert(OldCount != 0 && "Unreachable code?"); NumRRs += OldCount - NewCount; - // Set to true if we completely removed any RR pairs. - AnyPairsCompletelyEliminated |= NewCount == 0; + AnyPairsCompletelyEliminated = NewCount == 0; // We can move calls! return true; -- cgit v1.1 From 0ec35ac4fcd5c83e2ec35d04fc20db9eb387d289 Mon Sep 17 00:00:00 2001 From: Richard Osborne Date: Tue, 22 Jan 2013 22:55:04 +0000 Subject: Add instruction encodings / disassembly support for u10 / lu10 instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173204 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/XCore/XCoreInstrFormats.td | 14 ++++++++++++-- lib/Target/XCore/XCoreInstrInfo.td | 20 ++++++++++---------- 2 files changed, 22 insertions(+), 12 deletions(-) (limited to 'lib') diff --git a/lib/Target/XCore/XCoreInstrFormats.td b/lib/Target/XCore/XCoreInstrFormats.td index 0e0aab9..29bc658 100644 --- a/lib/Target/XCore/XCoreInstrFormats.td +++ b/lib/Target/XCore/XCoreInstrFormats.td @@ -125,12 +125,22 @@ class _FLU6 opc, dag outs, dag ins, string asmstr, list pattern> let Inst{9-0} = a{15-6}; } -class _FU10 pattern> +class _FU10 opc, dag outs, dag ins, string asmstr, list pattern> : InstXCore<2, outs, ins, asmstr, pattern> { + bits<10> a; + + let Inst{15-10} = opc; + let Inst{9-0} = a; } -class _FLU10 pattern> +class _FLU10 opc, dag outs, dag ins, string asmstr, list pattern> : InstXCore<4, outs, ins, asmstr, pattern> { + bits<20> a; + + let Inst{31-26} = opc; + let Inst{25-16} = a{9-0}; + let Inst{15-10} = 0b111100; + let Inst{9-0} = a{19-10}; } class _F2R opc, dag outs, dag ins, string asmstr, list pattern> diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td index 3984140..d193b45 100644 --- a/lib/Target/XCore/XCoreInstrInfo.td +++ b/lib/Target/XCore/XCoreInstrInfo.td @@ -649,24 +649,24 @@ defm CLRSR_branch : FU6_LU6_np<0b0111101100, "clrsr">; // TODO ldwcpl, blacp let Defs = [R11], isReMaterializable = 1, neverHasSideEffects = 1 in -def LDAPF_u10 : _FU10<(outs), (ins i32imm:$addr), "ldap r11, $addr", []>; +def LDAPF_u10 : _FU10<0b110110, (outs), (ins i32imm:$a), "ldap r11, $a", []>; let Defs = [R11], isReMaterializable = 1 in -def LDAPF_lu10 : _FLU10<(outs), (ins i32imm:$addr), "ldap r11, $addr", - [(set R11, (pcrelwrapper tglobaladdr:$addr))]>; +def LDAPF_lu10 : _FLU10<0b110110, (outs), (ins i32imm:$a), "ldap r11, $a", + [(set R11, (pcrelwrapper tglobaladdr:$a))]>; -let Defs = [R11], isReMaterializable = 1 in -def LDAPF_lu10_ba : _FLU10<(outs), (ins i32imm:$addr), "ldap r11, $addr", - [(set R11, (pcrelwrapper tblockaddress:$addr))]>; +let Defs = [R11], isReMaterializable = 1, isCodeGenOnly = 1 in +def LDAPF_lu10_ba : _FLU10<0b110110, (outs), (ins i32imm:$a), "ldap r11, $a", + [(set R11, (pcrelwrapper tblockaddress:$a))]>; let isCall=1, // All calls clobber the link register and the non-callee-saved registers: Defs = [R0, R1, R2, R3, R11, LR], Uses = [SP] in { -def BLRF_u10 : _FU10<(outs), (ins calltarget:$target), "bl $target", - [(XCoreBranchLink immU10:$target)]>; +def BLRF_u10 : _FU10<0b110100, (outs), (ins calltarget:$a), "bl $a", + [(XCoreBranchLink immU10:$a)]>; -def BLRF_lu10 : _FLU10<(outs), (ins calltarget:$target), "bl $target", - [(XCoreBranchLink immU20:$target)]>; +def BLRF_lu10 : _FLU10<0b110100, (outs), (ins calltarget:$a), "bl $a", + [(XCoreBranchLink immU20:$a)]>; } // Two operand short -- cgit v1.1 From 70d2ca0725b05a2d372e4dc3336e8ea350093e98 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Wed, 23 Jan 2013 00:20:53 +0000 Subject: Use the AttributeSet when adding multiple attributes and an Attribute::AttrKind when adding a single attribute to the function. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173210 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Function.cpp | 21 ++++++++++++++------- lib/Transforms/IPO/FunctionAttrs.cpp | 4 +--- 2 files changed, 15 insertions(+), 10 deletions(-) (limited to 'lib') diff --git a/lib/IR/Function.cpp b/lib/IR/Function.cpp index 6a5e616..4d047f6 100644 --- a/lib/IR/Function.cpp +++ b/lib/IR/Function.cpp @@ -125,7 +125,10 @@ bool Argument::hasStructRetAttr() const { /// addAttr - Add a Attribute to an argument void Argument::addAttr(Attribute attr) { - getParent()->addAttribute(getArgNo() + 1, attr); + AttrBuilder B(attr); + getParent()->addAttributes(getArgNo() + 1, + AttributeSet::get(getParent()->getContext(), + getArgNo() + 1, B)); } /// removeAttr - Remove a Attribute from an argument @@ -248,17 +251,21 @@ void Function::dropAllReferences() { BasicBlocks.begin()->eraseFromParent(); } -void Function::addAttribute(unsigned i, Attribute attr) { +void Function::addAttribute(unsigned i, Attribute::AttrKind attr) { AttributeSet PAL = getAttributes(); - AttrBuilder B(attr); - PAL = PAL.addAttributes(getContext(), i, - AttributeSet::get(getContext(), i, B)); + PAL = PAL.addAttribute(getContext(), i, attr); + setAttributes(PAL); +} + +void Function::addAttributes(unsigned i, AttributeSet attrs) { + AttributeSet PAL = getAttributes(); + PAL = PAL.addAttributes(getContext(), i, attrs); setAttributes(PAL); } -void Function::removeAttribute(unsigned i, Attribute attr) { +void Function::removeAttribute(unsigned i, Attribute attrs) { AttributeSet PAL = getAttributes(); - PAL = PAL.removeAttr(getContext(), i, attr); + PAL = PAL.removeAttr(getContext(), i, attrs); setAttributes(PAL); } diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp index e9bc4ad..c267097 100644 --- a/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/lib/Transforms/IPO/FunctionAttrs.cpp @@ -219,10 +219,8 @@ bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) { Attribute::get(F->getContext(), B)); // Add in the new attribute. - B.clear(); - B.addAttribute(ReadsMemory ? Attribute::ReadOnly : Attribute::ReadNone); F->addAttribute(AttributeSet::FunctionIndex, - Attribute::get(F->getContext(), B)); + ReadsMemory ? Attribute::ReadOnly : Attribute::ReadNone); if (ReadsMemory) ++NumReadOnly; -- cgit v1.1 From 8246df61f6de716acf1f8c64fac3c19970a2c174 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Wed, 23 Jan 2013 00:45:55 +0000 Subject: Use the AttributeSet when removing multiple attributes. Use Attribute::AttrKind when removing one attribute. This further encapsulates the use of the attributes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173214 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Attributes.cpp | 10 ++++++++++ lib/IR/Core.cpp | 12 +++++++----- lib/IR/Function.cpp | 9 ++++++--- lib/IR/Instructions.cpp | 14 ++++++++++---- lib/Transforms/IPO/FunctionAttrs.cpp | 5 +++-- lib/Transforms/IPO/GlobalOpt.cpp | 3 +-- lib/Transforms/Instrumentation/MemorySanitizer.cpp | 11 +++++++---- 7 files changed, 44 insertions(+), 20 deletions(-) (limited to 'lib') diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index c67b1f3..a3abd36 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -730,6 +730,16 @@ AttributeSet AttributeSet::addAttr(LLVMContext &C, unsigned Idx, return get(C, NewAttrList); } +AttributeSet AttributeSet::removeAttribute(LLVMContext &C, unsigned Idx, + Attribute::AttrKind Attr) const { + return removeAttr(C, Idx, Attribute::get(C, Attr)); +} + +AttributeSet AttributeSet::removeAttributes(LLVMContext &C, unsigned Idx, + AttributeSet Attrs) const { + return removeAttr(C, Idx, Attrs.getAttributes(Idx)); +} + AttributeSet AttributeSet::removeAttr(LLVMContext &C, unsigned Idx, Attribute Attrs) const { #ifndef NDEBUG diff --git a/lib/IR/Core.cpp b/lib/IR/Core.cpp index e72eb69..0e42536 100644 --- a/lib/IR/Core.cpp +++ b/lib/IR/Core.cpp @@ -1394,8 +1394,9 @@ void LLVMRemoveFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA) { const AttributeSet PAL = Func->getAttributes(); AttrBuilder B(PA); const AttributeSet PALnew = - PAL.removeAttr(Func->getContext(), AttributeSet::FunctionIndex, - Attribute::get(Func->getContext(), B)); + PAL.removeAttributes(Func->getContext(), AttributeSet::FunctionIndex, + AttributeSet::get(Func->getContext(), + AttributeSet::FunctionIndex, B)); Func->setAttributes(PALnew); } @@ -1686,9 +1687,10 @@ void LLVMRemoveInstrAttribute(LLVMValueRef Instr, unsigned index, LLVMAttribute PA) { CallSite Call = CallSite(unwrap(Instr)); AttrBuilder B(PA); - Call.setAttributes( - Call.getAttributes().removeAttr(Call->getContext(), index, - Attribute::get(Call->getContext(), B))); + Call.setAttributes(Call.getAttributes() + .removeAttributes(Call->getContext(), index, + AttributeSet::get(Call->getContext(), + index, B))); } void LLVMSetInstrParamAlignment(LLVMValueRef Instr, unsigned index, diff --git a/lib/IR/Function.cpp b/lib/IR/Function.cpp index 4d047f6..f2f3ec9 100644 --- a/lib/IR/Function.cpp +++ b/lib/IR/Function.cpp @@ -133,7 +133,10 @@ void Argument::addAttr(Attribute attr) { /// removeAttr - Remove a Attribute from an argument void Argument::removeAttr(Attribute attr) { - getParent()->removeAttribute(getArgNo() + 1, attr); + AttrBuilder B(attr); + getParent()->removeAttributes(getArgNo() + 1, + AttributeSet::get(getParent()->getContext(), + getArgNo() + 1, B)); } @@ -263,9 +266,9 @@ void Function::addAttributes(unsigned i, AttributeSet attrs) { setAttributes(PAL); } -void Function::removeAttribute(unsigned i, Attribute attrs) { +void Function::removeAttributes(unsigned i, AttributeSet attrs) { AttributeSet PAL = getAttributes(); - PAL = PAL.removeAttr(getContext(), i, attrs); + PAL = PAL.removeAttributes(getContext(), i, attrs); setAttributes(PAL); } diff --git a/lib/IR/Instructions.cpp b/lib/IR/Instructions.cpp index 8597d5c..8a0a465 100644 --- a/lib/IR/Instructions.cpp +++ b/lib/IR/Instructions.cpp @@ -334,14 +334,18 @@ CallInst::CallInst(const CallInst &CI) void CallInst::addAttribute(unsigned i, Attribute attr) { AttributeSet PAL = getAttributes(); AttrBuilder B(attr); - PAL = PAL.addAttributes(getContext(), i, - AttributeSet::get(getContext(), i, B)); + LLVMContext &Context = getContext(); + PAL = PAL.addAttributes(Context, i, + AttributeSet::get(Context, i, B)); setAttributes(PAL); } void CallInst::removeAttribute(unsigned i, Attribute attr) { AttributeSet PAL = getAttributes(); - PAL = PAL.removeAttr(getContext(), i, attr); + AttrBuilder B(attr); + LLVMContext &Context = getContext(); + PAL = PAL.removeAttributes(Context, i, + AttributeSet::get(Context, i, B)); setAttributes(PAL); } @@ -599,7 +603,9 @@ void InvokeInst::addAttribute(unsigned i, Attribute attr) { void InvokeInst::removeAttribute(unsigned i, Attribute attr) { AttributeSet PAL = getAttributes(); - PAL = PAL.removeAttr(getContext(), i, attr); + AttrBuilder B(attr); + PAL = PAL.removeAttributes(getContext(), i, + AttributeSet::get(getContext(), i, B)); setAttributes(PAL); } diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp index c267097..7e46dcb 100644 --- a/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/lib/Transforms/IPO/FunctionAttrs.cpp @@ -215,8 +215,9 @@ bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) { AttrBuilder B; B.addAttribute(Attribute::ReadOnly) .addAttribute(Attribute::ReadNone); - F->removeAttribute(AttributeSet::FunctionIndex, - Attribute::get(F->getContext(), B)); + F->removeAttributes(AttributeSet::FunctionIndex, + AttributeSet::get(F->getContext(), + AttributeSet::FunctionIndex, B)); // Add in the new attribute. F->addAttribute(AttributeSet::FunctionIndex, diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index efec788..6fe4316 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -2072,8 +2072,7 @@ static AttributeSet StripNest(LLVMContext &C, const AttributeSet &Attrs) { continue; // There can be only one. - return Attrs.removeAttr(C, Attrs.getSlot(i).Index, - Attribute::get(C, Attribute::Nest)); + return Attrs.removeAttribute(C, Attrs.getSlot(i).Index, Attribute::Nest); } return Attrs; diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 20b6de2..40f0ebb 100644 --- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -1461,8 +1461,10 @@ struct MemorySanitizerVisitor : public InstVisitor { AttrBuilder B; B.addAttribute(Attribute::ReadOnly) .addAttribute(Attribute::ReadNone); - Func->removeAttribute(AttributeSet::FunctionIndex, - Attribute::get(Func->getContext(), B)); + Func->removeAttributes(AttributeSet::FunctionIndex, + AttributeSet::get(Func->getContext(), + AttributeSet::FunctionIndex, + B)); } } IRBuilder<> IRB(&I); @@ -1853,8 +1855,9 @@ bool MemorySanitizer::runOnFunction(Function &F) { AttrBuilder B; B.addAttribute(Attribute::ReadOnly) .addAttribute(Attribute::ReadNone); - F.removeAttribute(AttributeSet::FunctionIndex, - Attribute::get(F.getContext(), B)); + F.removeAttributes(AttributeSet::FunctionIndex, + AttributeSet::get(F.getContext(), + AttributeSet::FunctionIndex, B)); return Visitor.runOnFunction(); } -- cgit v1.1 From f148c66ce4c22130ff1ae242582e024ea18492bb Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Wed, 23 Jan 2013 01:35:00 +0000 Subject: Add support for reverse pointer induction variables. These are loops that contain pointers that count backwards. For example, this is the hot loop in BZIP: do { m = *--p; *p = ( ... ); } while (--n); git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173219 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/LoopVectorize.cpp | 89 +++++++++++++++++++++++++++--- 1 file changed, 82 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 447f24a..0996b7b 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -327,7 +327,8 @@ public: IK_NoInduction, ///< Not an induction variable. IK_IntInduction, ///< Integer induction variable. Step = 1. IK_ReverseIntInduction, ///< Reverse int induction variable. Step = -1. - IK_PtrInduction ///< Pointer induction variable. Step = sizeof(elem). + IK_PtrInduction, ///< Pointer induction var. Step = sizeof(elem). + IK_ReversePtrInduction ///< Reverse ptr indvar. Step = - sizeof(elem). }; /// This POD struct holds information about reduction variables. @@ -734,6 +735,9 @@ Value *InnerLoopVectorizer::getConsecutiveVector(Value* Val, unsigned StartIdx, int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { assert(Ptr->getType()->isPointerTy() && "Unexpected non ptr"); + // Make sure that the pointer does not point to structs. + if (cast(Ptr->getType())->getElementType()->isAggregateType()) + return 0; // If this value is a pointer induction variable we know it is consecutive. PHINode *Phi = dyn_cast_or_null(Ptr); @@ -741,6 +745,8 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { InductionInfo II = Inductions[Phi]; if (IK_PtrInduction == II.IK) return 1; + else if (IK_ReversePtrInduction == II.IK) + return -1; } GetElementPtrInst *Gep = dyn_cast_or_null(Ptr); @@ -750,6 +756,29 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { unsigned NumOperands = Gep->getNumOperands(); Value *LastIndex = Gep->getOperand(NumOperands - 1); + Value *GpPtr = Gep->getPointerOperand(); + // If this GEP value is a consecutive pointer induction variable and all of + // the indices are constant then we know it is consecutive. We can + Phi = dyn_cast(GpPtr); + if (Phi && Inductions.count(Phi)) { + + // Make sure that the pointer does not point to structs. + PointerType *GepPtrType = cast(GpPtr->getType()); + if (GepPtrType->getElementType()->isAggregateType()) + return 0; + + // Make sure that all of the index operands are loop invariant. + for (unsigned i = 1; i < NumOperands; ++i) + if (!SE->isLoopInvariant(SE->getSCEV(Gep->getOperand(i)), TheLoop)) + return 0; + + InductionInfo II = Inductions[Phi]; + if (IK_PtrInduction == II.IK) + return 1; + else if (IK_ReversePtrInduction == II.IK) + return -1; + } + // Check that all of the gep indices are uniform except for the last. for (unsigned i = 0; i < NumOperands - 1; ++i) if (!SE->isLoopInvariant(SE->getSCEV(Gep->getOperand(i)), TheLoop)) @@ -1148,6 +1177,18 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { LoopBypassBlocks.back()->getTerminator()); break; } + case LoopVectorizationLegality::IK_ReversePtrInduction: { + // The value at the end of the loop for the reverse pointer is calculated + // by creating a GEP with a negative index starting from the start value. + Value *Zero = ConstantInt::get(CountRoundDown->getType(), 0); + Value *NegIdx = BinaryOperator::CreateSub(Zero, CountRoundDown, + "rev.ind.end", + LoopBypassBlocks.back()->getTerminator()); + EndValue = GetElementPtrInst::Create(II.StartValue, NegIdx, + "rev.ptr.ind.end", + LoopBypassBlocks.back()->getTerminator()); + break; + } }// end of case // The new PHI merges the original incoming value, in case of a bypass, @@ -1625,6 +1666,7 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal, } case LoopVectorizationLegality::IK_ReverseIntInduction: case LoopVectorizationLegality::IK_PtrInduction: + case LoopVectorizationLegality::IK_ReversePtrInduction: // Handle reverse integer and pointer inductions. Value *StartIdx = 0; // If we have a single integer induction variable then use it. @@ -1660,15 +1702,23 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal, // Handle the pointer induction variable case. assert(P->getType()->isPointerTy() && "Unexpected type."); + // Is this a reverse induction ptr or a consecutive induction ptr. + bool Reverse = (LoopVectorizationLegality::IK_ReversePtrInduction == + II.IK); + // This is the vector of results. Notice that we don't generate // vector geps because scalar geps result in better code. for (unsigned part = 0; part < UF; ++part) { Value *VecVal = UndefValue::get(VectorType::get(P->getType(), VF)); for (unsigned int i = 0; i < VF; ++i) { - Constant *Idx = ConstantInt::get(Induction->getType(), - i + part * VF); - Value *GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx, - "gep.idx"); + int EltIndex = (i + part * VF) * (Reverse ? -1 : 1); + Constant *Idx = ConstantInt::get(Induction->getType(), EltIndex); + Value *GlobalIdx; + if (!Reverse) + GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx, "gep.idx"); + else + GlobalIdx = Builder.CreateSub(Idx, NormalizedIdx, "gep.ridx"); + Value *SclrGep = Builder.CreateGEP(II.StartValue, GlobalIdx, "next.gep"); VecVal = Builder.CreateInsertElement(VecVal, SclrGep, @@ -1786,7 +1836,19 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal, // Handle consecutive stores. GetElementPtrInst *Gep = dyn_cast(Ptr); - if (Gep) { + if (Gep && Legal->isInductionVariable(Gep->getPointerOperand())) { + Value *PtrOperand = Gep->getPointerOperand(); + Value *FirstBasePtr = getVectorValue(PtrOperand)[0]; + FirstBasePtr = Builder.CreateExtractElement(FirstBasePtr, Zero); + + // Create the new GEP with the new induction variable. + GetElementPtrInst *Gep2 = cast(Gep->clone()); + Gep2->setOperand(0, FirstBasePtr); + Ptr = Builder.Insert(Gep2); + } else if (Gep) { + assert(SE->isLoopInvariant(SE->getSCEV(Gep->getPointerOperand()), + OrigLoop) && "Base ptr must be invariant"); + // The last index does not have to be the induction. It can be // consecutive and be a function of the index. For example A[I+1]; unsigned NumOperands = Gep->getNumOperands(); @@ -1844,7 +1906,18 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal, } GetElementPtrInst *Gep = dyn_cast(Ptr); - if (Gep) { + if (Gep && Legal->isInductionVariable(Gep->getPointerOperand())) { + Value *PtrOperand = Gep->getPointerOperand(); + Value *FirstBasePtr = getVectorValue(PtrOperand)[0]; + FirstBasePtr = Builder.CreateExtractElement(FirstBasePtr, Zero); + // Create the new GEP with the new induction variable. + GetElementPtrInst *Gep2 = cast(Gep->clone()); + Gep2->setOperand(0, FirstBasePtr); + Ptr = Builder.Insert(Gep2); + } else if (Gep) { + assert(SE->isLoopInvariant(SE->getSCEV(Gep->getPointerOperand()), + OrigLoop) && "Base ptr must be invariant"); + // The last index does not have to be the induction. It can be // consecutive and be a function of the index. For example A[I+1]; unsigned NumOperands = Gep->getNumOperands(); @@ -2589,6 +2662,8 @@ LoopVectorizationLegality::isInductionVariable(PHINode *Phi) { uint64_t Size = DL->getTypeAllocSize(PhiTy->getPointerElementType()); if (C->getValue()->equalsInt(Size)) return IK_PtrInduction; + else if (C->getValue()->equalsInt(0 - Size)) + return IK_ReversePtrInduction; return IK_NoInduction; } -- cgit v1.1 From 80537b995d661cbbf478b34a228921b101a14b42 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Wed, 23 Jan 2013 02:09:01 +0000 Subject: R600: Factorise VTX_WORD0 and VTX_WORD1 in tblgen def Patch by: Vincent Lejeune Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173220 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/R600Instructions.td | 110 +++++++++++++++++++++--------------- 1 file changed, 65 insertions(+), 45 deletions(-) (limited to 'lib') diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index b15ea76..8a0c52d 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -173,6 +173,55 @@ class R600ALU_Word1_OP3 alu_inst> : R600ALU_Word1{ let Word1{17-13} = alu_inst; } +class VTX_WORD0 { + field bits<32> Word0; + bits<7> SRC_GPR; + bits<5> VC_INST; + bits<2> FETCH_TYPE; + bits<1> FETCH_WHOLE_QUAD; + bits<8> BUFFER_ID; + bits<1> SRC_REL; + bits<2> SRC_SEL_X; + bits<6> MEGA_FETCH_COUNT; + + let Word0{4-0} = VC_INST; + let Word0{6-5} = FETCH_TYPE; + let Word0{7} = FETCH_WHOLE_QUAD; + let Word0{15-8} = BUFFER_ID; + let Word0{22-16} = SRC_GPR; + let Word0{23} = SRC_REL; + let Word0{25-24} = SRC_SEL_X; + let Word0{31-26} = MEGA_FETCH_COUNT; +} + +class VTX_WORD1_GPR { + field bits<32> Word1; + bits<7> DST_GPR; + bits<1> DST_REL; + bits<3> DST_SEL_X; + bits<3> DST_SEL_Y; + bits<3> DST_SEL_Z; + bits<3> DST_SEL_W; + bits<1> USE_CONST_FIELDS; + bits<6> DATA_FORMAT; + bits<2> NUM_FORMAT_ALL; + bits<1> FORMAT_COMP_ALL; + bits<1> SRF_MODE_ALL; + + let Word1{6-0} = DST_GPR; + let Word1{7} = DST_REL; + let Word1{8} = 0; // Reserved + let Word1{11-9} = DST_SEL_X; + let Word1{14-12} = DST_SEL_Y; + let Word1{17-15} = DST_SEL_Z; + let Word1{20-18} = DST_SEL_W; + let Word1{21} = USE_CONST_FIELDS; + let Word1{27-22} = DATA_FORMAT; + let Word1{29-28} = NUM_FORMAT_ALL; + let Word1{30} = FORMAT_COMP_ALL; + let Word1{31} = SRF_MODE_ALL; +} + /* XXX: R600 subtarget uses a slightly different encoding than the other subtargets. We currently handle this in R600MCCodeEmitter, but we may @@ -1235,37 +1284,30 @@ def RAT_WRITE_CACHELESS_128_eg : RAT_WRITE_CACHELESS_eg < >; class VTX_READ_eg buffer_id, dag outs, list pattern> - : InstR600ISA { - - // Operands - bits<7> DST_GPR; - bits<7> SRC_GPR; + : InstR600ISA , + VTX_WORD1_GPR, VTX_WORD0 { // Static fields - bits<5> VC_INST = 0; - bits<2> FETCH_TYPE = 2; - bits<1> FETCH_WHOLE_QUAD = 0; - bits<8> BUFFER_ID = buffer_id; - bits<1> SRC_REL = 0; + let VC_INST = 0; + let FETCH_TYPE = 2; + let FETCH_WHOLE_QUAD = 0; + let BUFFER_ID = buffer_id; + let SRC_REL = 0; // XXX: We can infer this field based on the SRC_GPR. This would allow us // to store vertex addresses in any channel, not just X. - bits<2> SRC_SEL_X = 0; - bits<6> MEGA_FETCH_COUNT; - bits<1> DST_REL = 0; - bits<3> DST_SEL_X; - bits<3> DST_SEL_Y; - bits<3> DST_SEL_Z; - bits<3> DST_SEL_W; + let SRC_SEL_X = 0; + let DST_REL = 0; // The docs say that if this bit is set, then DATA_FORMAT, NUM_FORMAT_ALL, // FORMAT_COMP_ALL, SRF_MODE_ALL, and ENDIAN_SWAP fields will be ignored, // however, based on my testing if USE_CONST_FIELDS is set, then all // these fields need to be set to 0. - bits<1> USE_CONST_FIELDS = 0; - bits<6> DATA_FORMAT; - bits<2> NUM_FORMAT_ALL = 1; - bits<1> FORMAT_COMP_ALL = 0; - bits<1> SRF_MODE_ALL = 0; + let USE_CONST_FIELDS = 0; + let NUM_FORMAT_ALL = 1; + let FORMAT_COMP_ALL = 0; + let SRF_MODE_ALL = 0; + let Inst{31-0} = Word0; + let Inst{63-32} = Word1; // LLVM can only encode 64-bit instructions, so these fields are manually // encoded in R600CodeEmitter // @@ -1276,29 +1318,7 @@ class VTX_READ_eg buffer_id, dag outs, list pattern> // bits<1> ALT_CONST = 0; // bits<2> BUFFER_INDEX_MODE = 0; - // VTX_WORD0 - let Inst{4-0} = VC_INST; - let Inst{6-5} = FETCH_TYPE; - let Inst{7} = FETCH_WHOLE_QUAD; - let Inst{15-8} = BUFFER_ID; - let Inst{22-16} = SRC_GPR; - let Inst{23} = SRC_REL; - let Inst{25-24} = SRC_SEL_X; - let Inst{31-26} = MEGA_FETCH_COUNT; - - // VTX_WORD1_GPR - let Inst{38-32} = DST_GPR; - let Inst{39} = DST_REL; - let Inst{40} = 0; // Reserved - let Inst{43-41} = DST_SEL_X; - let Inst{46-44} = DST_SEL_Y; - let Inst{49-47} = DST_SEL_Z; - let Inst{52-50} = DST_SEL_W; - let Inst{53} = USE_CONST_FIELDS; - let Inst{59-54} = DATA_FORMAT; - let Inst{61-60} = NUM_FORMAT_ALL; - let Inst{62} = FORMAT_COMP_ALL; - let Inst{63} = SRF_MODE_ALL; + // VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding // is done in R600CodeEmitter -- cgit v1.1 From c7e1888d93f4cb2982266986f3af7e99df631fa1 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Wed, 23 Jan 2013 02:09:03 +0000 Subject: R600: Add a CONST_ADDRESS node to model constant buf read Patch by: Vincent Lejeune Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173221 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelLowering.cpp | 1 + lib/Target/R600/AMDGPUISelLowering.h | 1 + lib/Target/R600/R600Instructions.td | 11 ++++++++++- 3 files changed, 12 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 473dac4..309bcf5 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -413,5 +413,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(INTERP) NODE_NAME_CASE(INTERP_P0) NODE_NAME_CASE(EXPORT) + NODE_NAME_CASE(CONST_ADDRESS) } } diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h index c7abaf6..9938c65 100644 --- a/lib/Target/R600/AMDGPUISelLowering.h +++ b/lib/Target/R600/AMDGPUISelLowering.h @@ -123,6 +123,7 @@ enum { INTERP, INTERP_P0, EXPORT, + CONST_ADDRESS, LAST_AMDGPU_ISD_NUMBER }; diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index 8a0c52d..d4fa3d6 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -470,7 +470,7 @@ def isR600toCayman : Predicate< "Subtarget.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX">; //===----------------------------------------------------------------------===// -// Interpolation Instructions +// R600 SDNodes //===----------------------------------------------------------------------===// def INTERP: SDNode<"AMDGPUISD::INTERP", @@ -481,6 +481,15 @@ def INTERP_P0: SDNode<"AMDGPUISD::INTERP_P0", SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisInt<1>]> >; +def CONST_ADDRESS: SDNode<"AMDGPUISD::CONST_ADDRESS", + SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisPtrTy<1>]>, + [SDNPMayLoad] +>; + +//===----------------------------------------------------------------------===// +// Interpolation Instructions +//===----------------------------------------------------------------------===// + let usesCustomInserter = 1 in { def input_perspective : AMDGPUShaderInst < (outs R600_Reg128:$dst), -- cgit v1.1 From 9f7818d9bdfce2e9c7a2cbe31490a135aa6d1211 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Wed, 23 Jan 2013 02:09:06 +0000 Subject: R600: rework handling of the constants Remove Cxxx registers, add new special register - "ALU_CONST" and new operand for each alu src - "sel". ALU_CONST is used to designate that the new operand contains the value to override src.sel, src.kc_bank, src.chan for constants in the driver. Patch by: Vadim Girlin Vincent Lejeune: - Use pointers for constants - Fold CONST_ADDRESS when possible Tom Stellard: - Give CONSTANT_BUFFER_0 its own address space - Use integer types for constant loads Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173222 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPU.h | 1 + lib/Target/R600/AMDGPUTargetMachine.cpp | 1 + lib/Target/R600/AMDIL.h | 20 +++- lib/Target/R600/AMDILISelDAGToDAG.cpp | 84 +++++++++++++- lib/Target/R600/CMakeLists.txt | 1 + lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp | 24 ++++ lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h | 1 + lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp | 81 +++++++------- lib/Target/R600/R600Defines.h | 15 +++ lib/Target/R600/R600ISelLowering.cpp | 122 +++++++++++++++++++-- lib/Target/R600/R600ISelLowering.h | 1 + lib/Target/R600/R600InstrInfo.cpp | 18 +-- lib/Target/R600/R600Instructions.td | 112 +++++++++++++++---- lib/Target/R600/R600LowerConstCopy.cpp | 74 +++++++++++++ lib/Target/R600/R600RegisterInfo.cpp | 6 +- lib/Target/R600/R600RegisterInfo.td | 26 ++--- 16 files changed, 483 insertions(+), 104 deletions(-) create mode 100644 lib/Target/R600/R600LowerConstCopy.cpp (limited to 'lib') diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h index c75ec24..1aa607f 100644 --- a/lib/Target/R600/AMDGPU.h +++ b/lib/Target/R600/AMDGPU.h @@ -23,6 +23,7 @@ class AMDGPUTargetMachine; // R600 Passes FunctionPass* createR600KernelParametersPass(const DataLayout *TD); FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm); +FunctionPass *createR600LowerConstCopy(TargetMachine &tm); // SI Passes FunctionPass *createSIAnnotateControlFlowPass(); diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp index 26ac928..7b069e7 100644 --- a/lib/Target/R600/AMDGPUTargetMachine.cpp +++ b/lib/Target/R600/AMDGPUTargetMachine.cpp @@ -136,6 +136,7 @@ bool AMDGPUPassConfig::addPreEmitPass() { addPass(createAMDGPUCFGPreparationPass(*TM)); addPass(createAMDGPUCFGStructurizerPass(*TM)); addPass(createR600ExpandSpecialInstrsPass(*TM)); + addPass(createR600LowerConstCopy(*TM)); addPass(&FinalizeMachineBundlesID); } else { addPass(createSILowerLiteralConstantsPass(*TM)); diff --git a/lib/Target/R600/AMDIL.h b/lib/Target/R600/AMDIL.h index 4e577dc..b39fbdb 100644 --- a/lib/Target/R600/AMDIL.h +++ b/lib/Target/R600/AMDIL.h @@ -90,14 +90,30 @@ namespace AMDGPUAS { enum AddressSpaces { PRIVATE_ADDRESS = 0, ///< Address space for private memory. GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0). - CONSTANT_ADDRESS = 2, ///< Address space for constant memory. + CONSTANT_ADDRESS = 2, ///< Address space for constant memory LOCAL_ADDRESS = 3, ///< Address space for local memory. REGION_ADDRESS = 4, ///< Address space for region memory. ADDRESS_NONE = 5, ///< Address space for unknown memory. PARAM_D_ADDRESS = 6, ///< Address space for direct addressible parameter memory (CONST0) PARAM_I_ADDRESS = 7, ///< Address space for indirect addressible parameter memory (VTX1) USER_SGPR_ADDRESS = 8, ///< Address space for USER_SGPRS on SI - LAST_ADDRESS = 9 + CONSTANT_BUFFER_0 = 9, + CONSTANT_BUFFER_1 = 10, + CONSTANT_BUFFER_2 = 11, + CONSTANT_BUFFER_3 = 12, + CONSTANT_BUFFER_4 = 13, + CONSTANT_BUFFER_5 = 14, + CONSTANT_BUFFER_6 = 15, + CONSTANT_BUFFER_7 = 16, + CONSTANT_BUFFER_8 = 17, + CONSTANT_BUFFER_9 = 18, + CONSTANT_BUFFER_10 = 19, + CONSTANT_BUFFER_11 = 20, + CONSTANT_BUFFER_12 = 21, + CONSTANT_BUFFER_13 = 22, + CONSTANT_BUFFER_14 = 23, + CONSTANT_BUFFER_15 = 24, + LAST_ADDRESS = 25 }; } // namespace AMDGPUAS diff --git a/lib/Target/R600/AMDILISelDAGToDAG.cpp b/lib/Target/R600/AMDILISelDAGToDAG.cpp index d15ed39..567b3e2 100644 --- a/lib/Target/R600/AMDILISelDAGToDAG.cpp +++ b/lib/Target/R600/AMDILISelDAGToDAG.cpp @@ -20,6 +20,7 @@ #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Support/Compiler.h" +#include "llvm/CodeGen/SelectionDAG.h" #include #include @@ -45,6 +46,7 @@ public: private: inline SDValue getSmallIPtrImm(unsigned Imm); + bool FoldOperands(unsigned, const R600InstrInfo *, std::vector &); // Complex pattern selectors bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2); @@ -67,6 +69,9 @@ private: static bool isLocalLoad(const LoadSDNode *N); static bool isRegionLoad(const LoadSDNode *N); + bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr); + bool SelectGlobalValueVariableOffset(SDValue Addr, + SDValue &BaseReg, SDValue& Offset); bool SelectADDR8BitOffset(SDValue Addr, SDValue& Base, SDValue& Offset); bool SelectADDRReg(SDValue Addr, SDValue& Base, SDValue& Offset); bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); @@ -259,7 +264,65 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { break; } } - return SelectCode(N); + SDNode *Result = SelectCode(N); + + // Fold operands of selected node + + const AMDGPUSubtarget &ST = TM.getSubtarget(); + if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) { + const R600InstrInfo *TII = + static_cast(TM.getInstrInfo()); + if (Result && TII->isALUInstr(Result->getMachineOpcode())) { + bool IsModified = false; + do { + std::vector Ops; + for(SDNode::op_iterator I = Result->op_begin(), E = Result->op_end(); + I != E; ++I) + Ops.push_back(*I); + IsModified = FoldOperands(Result->getMachineOpcode(), TII, Ops); + if (IsModified) { + Result = CurDAG->MorphNodeTo(Result, Result->getOpcode(), + Result->getVTList(), Ops.data(), Ops.size()); + } + } while (IsModified); + } + } + + return Result; +} + +bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode, + const R600InstrInfo *TII, std::vector &Ops) { + int OperandIdx[] = { + TII->getOperandIdx(Opcode, R600Operands::SRC0), + TII->getOperandIdx(Opcode, R600Operands::SRC1), + TII->getOperandIdx(Opcode, R600Operands::SRC2) + }; + int SelIdx[] = { + TII->getOperandIdx(Opcode, R600Operands::SRC0_SEL), + TII->getOperandIdx(Opcode, R600Operands::SRC1_SEL), + TII->getOperandIdx(Opcode, R600Operands::SRC2_SEL) + }; + for (unsigned i = 0; i < 3; i++) { + if (OperandIdx[i] < 0) + return false; + SDValue Operand = Ops[OperandIdx[i] - 1]; + switch (Operand.getOpcode()) { + case AMDGPUISD::CONST_ADDRESS: { + SDValue CstOffset; + if (!Operand.getValueType().isVector() && + SelectGlobalValueConstantOffset(Operand.getOperand(0), CstOffset)) { + Ops[OperandIdx[i] - 1] = CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32); + Ops[SelIdx[i] - 1] = CstOffset; + return true; + } + } + break; + default: + break; + } + } + return false; } bool AMDGPUDAGToDAGISel::checkType(const Value *ptr, unsigned int addrspace) { @@ -406,6 +469,25 @@ const char *AMDGPUDAGToDAGISel::getPassName() const { ///==== AMDGPU Functions ====/// +bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr, + SDValue& IntPtr) { + if (ConstantSDNode *Cst = dyn_cast(Addr)) { + IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, true); + return true; + } + return false; +} + +bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr, + SDValue& BaseReg, SDValue &Offset) { + if (!dyn_cast(Addr)) { + BaseReg = Addr; + Offset = CurDAG->getIntPtrConstant(0, true); + return true; + } + return false; +} + bool AMDGPUDAGToDAGISel::SelectADDR8BitOffset(SDValue Addr, SDValue& Base, SDValue& Offset) { if (Addr.getOpcode() == ISD::TargetExternalSymbol || diff --git a/lib/Target/R600/CMakeLists.txt b/lib/Target/R600/CMakeLists.txt index 790a4aa..a8be7ed 100644 --- a/lib/Target/R600/CMakeLists.txt +++ b/lib/Target/R600/CMakeLists.txt @@ -36,6 +36,7 @@ add_llvm_target(R600CodeGen R600ExpandSpecialInstrs.cpp R600InstrInfo.cpp R600ISelLowering.cpp + R600LowerConstCopy.cpp R600MachineFunctionInfo.cpp R600RegisterInfo.cpp SIAnnotateControlFlow.cpp diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp index e6c550b..e76c6c8 100644 --- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp +++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp @@ -129,4 +129,28 @@ void AMDGPUInstPrinter::printWrite(const MCInst *MI, unsigned OpNo, } } +void AMDGPUInstPrinter::printSel(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + const char * chans = "XYZW"; + int sel = MI->getOperand(OpNo).getImm(); + + int chan = sel & 3; + sel >>= 2; + + if (sel >= 512) { + sel -= 512; + int cb = sel >> 12; + sel &= 4095; + O << cb << "[" << sel << "]"; + } else if (sel >= 448) { + sel -= 448; + O << sel; + } else if (sel >= 0){ + O << sel; + } + + if (sel >= 0) + O << "." << chans[chan]; +} + #include "AMDGPUGenAsmWriter.inc" diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h index 96e0e46..e775c4c 100644 --- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h +++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h @@ -45,6 +45,7 @@ private: void printUpdateExecMask(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printUpdatePred(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printWrite(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printSel(const MCInst *MI, unsigned OpNo, raw_ostream &O); }; } // End namespace llvm diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp index 36deae9..01df808 100644 --- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp +++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp @@ -63,8 +63,8 @@ private: void EmitALUInstr(const MCInst &MI, SmallVectorImpl &Fixups, raw_ostream &OS) const; void EmitSrc(const MCInst &MI, unsigned OpIdx, raw_ostream &OS) const; - void EmitSrcISA(const MCInst &MI, unsigned OpIdx, uint64_t &Value, - raw_ostream &OS) const; + void EmitSrcISA(const MCInst &MI, unsigned RegOpIdx, unsigned SelOpIdx, + raw_ostream &OS) const; void EmitDst(const MCInst &MI, raw_ostream &OS) const; void EmitTexInstr(const MCInst &MI, SmallVectorImpl &Fixups, raw_ostream &OS) const; @@ -163,7 +163,8 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS, case AMDGPU::VTX_READ_PARAM_32_eg: case AMDGPU::VTX_READ_GLOBAL_8_eg: case AMDGPU::VTX_READ_GLOBAL_32_eg: - case AMDGPU::VTX_READ_GLOBAL_128_eg: { + case AMDGPU::VTX_READ_GLOBAL_128_eg: + case AMDGPU::TEX_VTX_CONSTBUF: { uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups); uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset @@ -193,7 +194,6 @@ void R600MCCodeEmitter::EmitALUInstr(const MCInst &MI, SmallVectorImpl &Fixups, raw_ostream &OS) const { const MCInstrDesc &MCDesc = MCII.get(MI.getOpcode()); - unsigned NumOperands = MI.getNumOperands(); // Emit instruction type EmitByte(INSTR_ALU, OS); @@ -209,19 +209,21 @@ void R600MCCodeEmitter::EmitALUInstr(const MCInst &MI, InstWord01 |= ISAOpCode << 1; } - unsigned SrcIdx = 0; - for (unsigned int OpIdx = 1; OpIdx < NumOperands; ++OpIdx) { - if (MI.getOperand(OpIdx).isImm() || MI.getOperand(OpIdx).isFPImm() || - OpIdx == (unsigned)MCDesc.findFirstPredOperandIdx()) { - continue; - } - EmitSrcISA(MI, OpIdx, InstWord01, OS); - SrcIdx++; - } + unsigned SrcNum = MCDesc.TSFlags & R600_InstFlag::OP3 ? 3 : + MCDesc.TSFlags & R600_InstFlag::OP2 ? 2 : 1; - // Emit zeros for unused sources - for ( ; SrcIdx < 3; SrcIdx++) { - EmitNullBytes(SRC_BYTE_COUNT - 6, OS); + EmitByte(SrcNum, OS); + + const unsigned SrcOps[3][2] = { + {R600Operands::SRC0, R600Operands::SRC0_SEL}, + {R600Operands::SRC1, R600Operands::SRC1_SEL}, + {R600Operands::SRC2, R600Operands::SRC2_SEL} + }; + + for (unsigned SrcIdx = 0; SrcIdx < SrcNum; ++SrcIdx) { + unsigned RegOpIdx = R600Operands::ALUOpTable[SrcNum-1][SrcOps[SrcIdx][0]]; + unsigned SelOpIdx = R600Operands::ALUOpTable[SrcNum-1][SrcOps[SrcIdx][1]]; + EmitSrcISA(MI, RegOpIdx, SelOpIdx, OS); } Emit(InstWord01, OS); @@ -292,34 +294,37 @@ void R600MCCodeEmitter::EmitSrc(const MCInst &MI, unsigned OpIdx, } -void R600MCCodeEmitter::EmitSrcISA(const MCInst &MI, unsigned OpIdx, - uint64_t &Value, raw_ostream &OS) const { - const MCOperand &MO = MI.getOperand(OpIdx); +void R600MCCodeEmitter::EmitSrcISA(const MCInst &MI, unsigned RegOpIdx, + unsigned SelOpIdx, raw_ostream &OS) const { + const MCOperand &RegMO = MI.getOperand(RegOpIdx); + const MCOperand &SelMO = MI.getOperand(SelOpIdx); + union { float f; uint32_t i; } InlineConstant; InlineConstant.i = 0; - // Emit the source select (2 bytes). For GPRs, this is the register index. - // For other potential instruction operands, (e.g. constant registers) the - // value of the source select is defined in the r600isa docs. - if (MO.isReg()) { - unsigned Reg = MO.getReg(); - if (AMDGPUMCRegisterClasses[AMDGPU::R600_CReg32RegClassID].contains(Reg)) { - EmitByte(1, OS); - } else { - EmitByte(0, OS); - } + // Emit source type (1 byte) and source select (4 bytes). For GPRs type is 0 + // and select is 0 (GPR index is encoded in the instr encoding. For constants + // type is 1 and select is the original const select passed from the driver. + unsigned Reg = RegMO.getReg(); + if (Reg == AMDGPU::ALU_CONST) { + EmitByte(1, OS); + uint32_t Sel = SelMO.getImm(); + Emit(Sel, OS); + } else { + EmitByte(0, OS); + Emit((uint32_t)0, OS); + } - if (Reg == AMDGPU::ALU_LITERAL_X) { - unsigned ImmOpIndex = MI.getNumOperands() - 1; - MCOperand ImmOp = MI.getOperand(ImmOpIndex); - if (ImmOp.isFPImm()) { - InlineConstant.f = ImmOp.getFPImm(); - } else { - assert(ImmOp.isImm()); - InlineConstant.i = ImmOp.getImm(); - } + if (Reg == AMDGPU::ALU_LITERAL_X) { + unsigned ImmOpIndex = MI.getNumOperands() - 1; + MCOperand ImmOp = MI.getOperand(ImmOpIndex); + if (ImmOp.isFPImm()) { + InlineConstant.f = ImmOp.getFPImm(); + } else { + assert(ImmOp.isImm()); + InlineConstant.i = ImmOp.getImm(); } } diff --git a/lib/Target/R600/R600Defines.h b/lib/Target/R600/R600Defines.h index 7dea8e4..e19eea3 100644 --- a/lib/Target/R600/R600Defines.h +++ b/lib/Target/R600/R600Defines.h @@ -62,18 +62,33 @@ namespace R600Operands { SRC0_NEG, SRC0_REL, SRC0_ABS, + SRC0_SEL, SRC1, SRC1_NEG, SRC1_REL, SRC1_ABS, + SRC1_SEL, SRC2, SRC2_NEG, SRC2_REL, + SRC2_SEL, LAST, PRED_SEL, IMM, COUNT }; + + const static int ALUOpTable[3][R600Operands::COUNT] = { +// W C S S S S S S S S S S S +// R O D L S R R R R S R R R R S R R R L P +// D U I M R A R C C C C R C C C C R C C C A R I +// S E U T O E M C 0 0 0 0 C 1 1 1 1 C 2 2 2 S E M +// T M P E D L P 0 N R A S 1 N R A S 2 N R S T D M + {0,-1,-1, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1,-1,-1,-1,10,11,12}, + {0, 1, 2, 3, 4 ,5 ,6 ,7, 8, 9,10,11,12,13,14,15,16,-1,-1,-1,-1,17,18,19}, + {0,-1,-1,-1,-1, 1, 2, 3, 4, 5,-1, 6, 7, 8, 9,-1,10,11,12,13,14,15,16,17} + }; + } #endif // R600DEFINES_H_ diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index f0eece3..69ca3f5 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -74,7 +74,10 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) : setOperationAction(ISD::STORE, MVT::i32, Custom); setOperationAction(ISD::STORE, MVT::v4i32, Custom); + setOperationAction(ISD::LOAD, MVT::i32, Custom); + setOperationAction(ISD::LOAD, MVT::v4i32, Custom); setTargetDAGCombine(ISD::FP_ROUND); + setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); setSchedulingPreference(Sched::VLIW); } @@ -115,15 +118,6 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( break; } - case AMDGPU::R600_LOAD_CONST: { - int64_t RegIndex = MI->getOperand(1).getImm(); - unsigned ConstantReg = AMDGPU::R600_CReg32RegClass.getRegister(RegIndex); - BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY)) - .addOperand(MI->getOperand(0)) - .addReg(ConstantReg); - break; - } - case AMDGPU::MASK_WRITE: { unsigned maskedRegister = MI->getOperand(0).getReg(); assert(TargetRegisterInfo::isVirtualRegister(maskedRegister)); @@ -364,6 +358,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const case ISD::SELECT: return LowerSELECT(Op, DAG); case ISD::SETCC: return LowerSETCC(Op, DAG); case ISD::STORE: return LowerSTORE(Op, DAG); + case ISD::LOAD: return LowerLOAD(Op, DAG); case ISD::FPOW: return LowerFPOW(Op, DAG); case ISD::INTRINSIC_VOID: { SDValue Chain = Op.getOperand(0); @@ -527,6 +522,16 @@ void R600TargetLowering::ReplaceNodeResults(SDNode *N, switch (N->getOpcode()) { default: return; case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG)); + return; + case ISD::LOAD: { + SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode(); + Results.push_back(SDValue(Node, 0)); + Results.push_back(SDValue(Node, 1)); + // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode + // function + DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1)); + return; + } } } @@ -832,6 +837,94 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { return SDValue(); } +// return (512 + (kc_bank << 12) +static int +ConstantAddressBlock(unsigned AddressSpace) { + switch (AddressSpace) { + case AMDGPUAS::CONSTANT_BUFFER_0: + return 512; + case AMDGPUAS::CONSTANT_BUFFER_1: + return 512 + 4096; + case AMDGPUAS::CONSTANT_BUFFER_2: + return 512 + 4096 * 2; + case AMDGPUAS::CONSTANT_BUFFER_3: + return 512 + 4096 * 3; + case AMDGPUAS::CONSTANT_BUFFER_4: + return 512 + 4096 * 4; + case AMDGPUAS::CONSTANT_BUFFER_5: + return 512 + 4096 * 5; + case AMDGPUAS::CONSTANT_BUFFER_6: + return 512 + 4096 * 6; + case AMDGPUAS::CONSTANT_BUFFER_7: + return 512 + 4096 * 7; + case AMDGPUAS::CONSTANT_BUFFER_8: + return 512 + 4096 * 8; + case AMDGPUAS::CONSTANT_BUFFER_9: + return 512 + 4096 * 9; + case AMDGPUAS::CONSTANT_BUFFER_10: + return 512 + 4096 * 10; + case AMDGPUAS::CONSTANT_BUFFER_11: + return 512 + 4096 * 11; + case AMDGPUAS::CONSTANT_BUFFER_12: + return 512 + 4096 * 12; + case AMDGPUAS::CONSTANT_BUFFER_13: + return 512 + 4096 * 13; + case AMDGPUAS::CONSTANT_BUFFER_14: + return 512 + 4096 * 14; + case AMDGPUAS::CONSTANT_BUFFER_15: + return 512 + 4096 * 15; + default: + return -1; + } +} + +SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const +{ + EVT VT = Op.getValueType(); + DebugLoc DL = Op.getDebugLoc(); + LoadSDNode *LoadNode = cast(Op); + SDValue Chain = Op.getOperand(0); + SDValue Ptr = Op.getOperand(1); + SDValue LoweredLoad; + + int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace()); + if (ConstantBlock > -1) { + SDValue Result; + if (dyn_cast(LoadNode->getSrcValue()) || + dyn_cast(LoadNode->getSrcValue())) { + SDValue Slots[4]; + for (unsigned i = 0; i < 4; i++) { + // We want Const position encoded with the following formula : + // (((512 + (kc_bank << 12) + const_index) << 2) + chan) + // const_index is Ptr computed by llvm using an alignment of 16. + // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and + // then div by 4 at the ISel step + SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, + DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32)); + Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr); + } + Result = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Slots, 4); + } else { + // non constant ptr cant be folded, keeps it as a v4f32 load + Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32, + DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)) + ); + } + + if (!VT.isVector()) { + Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result, + DAG.getConstant(0, MVT::i32)); + } + + SDValue MergedValues[2] = { + Result, + Chain + }; + return DAG.getMergeValues(MergedValues, 2, DL); + } + + return SDValue(); +} SDValue R600TargetLowering::LowerFPOW(SDValue Op, SelectionDAG &DAG) const { @@ -904,6 +997,17 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N, } break; } + // Extract_vec (Build_vector) generated by custom lowering + // also needs to be customly combined + case ISD::EXTRACT_VECTOR_ELT: { + SDValue Arg = N->getOperand(0); + if (Arg.getOpcode() == ISD::BUILD_VECTOR) { + if (ConstantSDNode *Const = dyn_cast(N->getOperand(1))) { + unsigned Element = Const->getZExtValue(); + return Arg->getOperand(Element); + } + } + } } return SDValue(); } diff --git a/lib/Target/R600/R600ISelLowering.h b/lib/Target/R600/R600ISelLowering.h index 2b954da..c141d50 100644 --- a/lib/Target/R600/R600ISelLowering.h +++ b/lib/Target/R600/R600ISelLowering.h @@ -63,6 +63,7 @@ private: SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFPOW(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; bool isZero(SDValue Op) const; }; diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp index 06b78d0..1adb142 100644 --- a/lib/Target/R600/R600InstrInfo.cpp +++ b/lib/Target/R600/R600InstrInfo.cpp @@ -486,13 +486,15 @@ MachineInstrBuilder R600InstrInfo::buildDefaultInstruction(MachineBasicBlock &MB .addReg(Src0Reg) // $src0 .addImm(0) // $src0_neg .addImm(0) // $src0_rel - .addImm(0); // $src0_abs + .addImm(0) // $src0_abs + .addImm(-1); // $src0_sel if (Src1Reg) { MIB.addReg(Src1Reg) // $src1 .addImm(0) // $src1_neg .addImm(0) // $src1_rel - .addImm(0); // $src1_abs + .addImm(0) // $src1_abs + .addImm(-1); // $src1_sel } //XXX: The r600g finalizer expects this to be 1, once we've moved the @@ -521,16 +523,6 @@ int R600InstrInfo::getOperandIdx(const MachineInstr &MI, int R600InstrInfo::getOperandIdx(unsigned Opcode, R600Operands::Ops Op) const { - const static int OpTable[3][R600Operands::COUNT] = { -// W C S S S S S S S S -// R O D L S R R R S R R R S R R L P -// D U I M R A R C C C C C C C R C C A R I -// S E U T O E M C 0 0 0 C 1 1 1 C 2 2 S E M -// T M P E D L P 0 N R A 1 N R A 2 N R T D M - {0,-1,-1, 1, 2, 3, 4, 5, 6, 7, 8,-1,-1,-1,-1,-1,-1,-1, 9,10,11}, - {0, 1, 2, 3, 4 ,5 ,6 ,7, 8, 9,10,11,12,-1,-1,-1,13,14,15,16,17}, - {0,-1,-1,-1,-1, 1, 2, 3, 4, 5,-1, 6, 7, 8,-1, 9,10,11,12,13,14} - }; unsigned TargetFlags = get(Opcode).TSFlags; unsigned OpTableIdx; @@ -556,7 +548,7 @@ int R600InstrInfo::getOperandIdx(unsigned Opcode, OpTableIdx = 2; } - return OpTable[OpTableIdx][Op]; + return R600Operands::ALUOpTable[OpTableIdx][Op]; } void R600InstrInfo::setImmOperand(MachineInstr *MI, R600Operands::Ops Op, diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index d4fa3d6..a6c3910 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -70,6 +70,11 @@ class InstFlag let PrintMethod = PM; } +// src_sel for ALU src operands, see also ALU_CONST, ALU_PARAM registers +def SEL : OperandWithDefaultOps { + let PrintMethod = "printSel"; +} + def LITERAL : InstFlag<"printLiteral">; def WRITE : InstFlag <"printWrite", 1>; @@ -89,6 +94,8 @@ def LAST : InstFlag<"printLast", 1>; def ADDRParam : ComplexPattern; def ADDRDWord : ComplexPattern; def ADDRVTX_READ : ComplexPattern; +def ADDRGA_CONST_OFFSET : ComplexPattern; +def ADDRGA_VAR_OFFSET : ComplexPattern; class R600ALU_Word0 { field bits<32> Word0; @@ -263,11 +270,11 @@ class R600_1OP inst, string opName, list pattern, InstR600 <0, (outs R600_Reg32:$dst), (ins WRITE:$write, OMOD:$omod, REL:$dst_rel, CLAMP:$clamp, - R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, + R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel, LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal), !strconcat(opName, "$clamp $dst$write$dst_rel$omod, " - "$src0_neg$src0_abs$src0$src0_abs$src0_rel, " + "$src0_neg$src0_abs$src0$src0_sel$src0_abs$src0_rel, " "$literal $pred_sel$last"), pattern, itin>, @@ -303,13 +310,13 @@ class R600_2OP inst, string opName, list pattern, (outs R600_Reg32:$dst), (ins UEM:$update_exec_mask, UP:$update_pred, WRITE:$write, OMOD:$omod, REL:$dst_rel, CLAMP:$clamp, - R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, - R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, ABS:$src1_abs, + R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel, + R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, ABS:$src1_abs, SEL:$src1_sel, LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal), !strconcat(opName, "$clamp $update_exec_mask$update_pred$dst$write$dst_rel$omod, " - "$src0_neg$src0_abs$src0$src0_abs$src0_rel, " - "$src1_neg$src1_abs$src1$src1_abs$src1_rel, " + "$src0_neg$src0_abs$src0$src0_sel$src0_abs$src0_rel, " + "$src1_neg$src1_abs$src1$src1_sel$src1_abs$src1_rel, " "$literal $pred_sel$last"), pattern, itin>, @@ -340,14 +347,14 @@ class R600_3OP inst, string opName, list pattern, InstR600 <0, (outs R600_Reg32:$dst), (ins REL:$dst_rel, CLAMP:$clamp, - R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, - R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, - R600_Reg32:$src2, NEG:$src2_neg, REL:$src2_rel, + R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, SEL:$src0_sel, + R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, SEL:$src1_sel, + R600_Reg32:$src2, NEG:$src2_neg, REL:$src2_rel, SEL:$src2_sel, LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal), !strconcat(opName, "$clamp $dst$dst_rel, " - "$src0_neg$src0$src0_rel, " - "$src1_neg$src1$src1_rel, " - "$src2_neg$src2$src2_rel, " + "$src0_neg$src0$src0_sel$src0_rel, " + "$src1_neg$src1$src1_sel$src1_rel, " + "$src2_neg$src2$src2_sel$src2_rel, " "$literal $pred_sel$last"), pattern, itin>, @@ -482,7 +489,7 @@ def INTERP_P0: SDNode<"AMDGPUISD::INTERP_P0", >; def CONST_ADDRESS: SDNode<"AMDGPUISD::CONST_ADDRESS", - SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisPtrTy<1>]>, + SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisPtrTy<1>]>, [SDNPMayLoad] >; @@ -1538,12 +1545,6 @@ def MASK_WRITE : AMDGPUShaderInst < } // End mayLoad = 0, mayStore = 0, hasSideEffects = 1 -def R600_LOAD_CONST : AMDGPUShaderInst < - (outs R600_Reg32:$dst), - (ins i32imm:$src0), - "R600_LOAD_CONST $dst, $src0", - [(set R600_Reg32:$dst, (int_AMDGPU_load_const imm:$src0))] ->; def RESERVE_REG : AMDGPUShaderInst < (outs), @@ -1551,7 +1552,6 @@ def RESERVE_REG : AMDGPUShaderInst < "RESERVE_REG $src", [(int_AMDGPU_reserve_reg imm:$src)] >; - def TXD: AMDGPUShaderInst < (outs R600_Reg128:$dst), (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget), @@ -1581,6 +1581,78 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1 in { "RETURN", [(IL_retflag)]>; } + +//===----------------------------------------------------------------------===// +// Constant Buffer Addressing Support +//===----------------------------------------------------------------------===// + +let isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU" in { +def CONST_COPY : Instruction { + let OutOperandList = (outs R600_Reg32:$dst); + let InOperandList = (ins i32imm:$src); + let Pattern = [(set R600_Reg32:$dst, (CONST_ADDRESS ADDRGA_CONST_OFFSET:$src))]; + let AsmString = "CONST_COPY"; + let neverHasSideEffects = 1; + let isAsCheapAsAMove = 1; + let Itinerary = NullALU; +} +} // end isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU" + +def TEX_VTX_CONSTBUF : + InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr), "VTX_READ_eg $dst, $ptr", + [(set R600_Reg128:$dst, (CONST_ADDRESS ADDRGA_VAR_OFFSET:$ptr))]>, + VTX_WORD1_GPR, VTX_WORD0 { + + let VC_INST = 0; + let FETCH_TYPE = 2; + let FETCH_WHOLE_QUAD = 0; + let BUFFER_ID = 0; + let SRC_REL = 0; + let SRC_SEL_X = 0; + let DST_REL = 0; + let USE_CONST_FIELDS = 0; + let NUM_FORMAT_ALL = 2; + let FORMAT_COMP_ALL = 1; + let SRF_MODE_ALL = 1; + let MEGA_FETCH_COUNT = 16; + let DST_SEL_X = 0; + let DST_SEL_Y = 1; + let DST_SEL_Z = 2; + let DST_SEL_W = 3; + let DATA_FORMAT = 35; + + let Inst{31-0} = Word0; + let Inst{63-32} = Word1; + +// LLVM can only encode 64-bit instructions, so these fields are manually +// encoded in R600CodeEmitter +// +// bits<16> OFFSET; +// bits<2> ENDIAN_SWAP = 0; +// bits<1> CONST_BUF_NO_STRIDE = 0; +// bits<1> MEGA_FETCH = 0; +// bits<1> ALT_CONST = 0; +// bits<2> BUFFER_INDEX_MODE = 0; + + + +// VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding +// is done in R600CodeEmitter +// +// Inst{79-64} = OFFSET; +// Inst{81-80} = ENDIAN_SWAP; +// Inst{82} = CONST_BUF_NO_STRIDE; +// Inst{83} = MEGA_FETCH; +// Inst{84} = ALT_CONST; +// Inst{86-85} = BUFFER_INDEX_MODE; +// Inst{95-86} = 0; Reserved + +// VTX_WORD3 (Padding) +// +// Inst{127-96} = 0; +} + + //===--------------------------------------------------------------------===// // Instructions support //===--------------------------------------------------------------------===// diff --git a/lib/Target/R600/R600LowerConstCopy.cpp b/lib/Target/R600/R600LowerConstCopy.cpp new file mode 100644 index 0000000..70a2b13 --- /dev/null +++ b/lib/Target/R600/R600LowerConstCopy.cpp @@ -0,0 +1,74 @@ +//===-- R600LowerConstCopy.cpp - Propagate ConstCopy / lower them to MOV---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This pass is intended to handle remaining ConstCopy pseudo MachineInstr. +/// ISel will fold each Const Buffer read inside scalar ALU. However it cannot +/// fold them inside vector instruction, like DOT4 or Cube ; ISel emits +/// ConstCopy instead. This pass (executed after ExpandingSpecialInstr) will try +/// to fold them if possible or replace them by MOV otherwise. +/// TODO : Implement the folding part, using Copy Propagation algorithm. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "R600InstrInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/IR/GlobalValue.h" + +namespace llvm { + +class R600LowerConstCopy : public MachineFunctionPass { +private: + static char ID; + const R600InstrInfo *TII; +public: + R600LowerConstCopy(TargetMachine &tm); + virtual bool runOnMachineFunction(MachineFunction &MF); + + const char *getPassName() const { return "R600 Eliminate Symbolic Operand"; } +}; + +char R600LowerConstCopy::ID = 0; + + +R600LowerConstCopy::R600LowerConstCopy(TargetMachine &tm) : + MachineFunctionPass(ID), + TII (static_cast(tm.getInstrInfo())) +{ +} + +bool R600LowerConstCopy::runOnMachineFunction(MachineFunction &MF) { + for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); + BB != BB_E; ++BB) { + MachineBasicBlock &MBB = *BB; + for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); + I != E;) { + MachineInstr &MI = *I; + I = llvm::next(I); + if (MI.getOpcode() != AMDGPU::CONST_COPY) + continue; + MachineInstr *NewMI = TII->buildDefaultInstruction(MBB, I, AMDGPU::MOV, + MI.getOperand(0).getReg(), AMDGPU::ALU_CONST); + NewMI->getOperand(9).setImm(MI.getOperand(1).getImm()); + MI.eraseFromParent(); + } + } + return false; +} + +FunctionPass *createR600LowerConstCopy(TargetMachine &tm) { + return new R600LowerConstCopy(tm); +} + +} + + diff --git a/lib/Target/R600/R600RegisterInfo.cpp b/lib/Target/R600/R600RegisterInfo.cpp index a39f83d..0441e4a 100644 --- a/lib/Target/R600/R600RegisterInfo.cpp +++ b/lib/Target/R600/R600RegisterInfo.cpp @@ -38,16 +38,12 @@ BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(AMDGPU::NEG_ONE); Reserved.set(AMDGPU::PV_X); Reserved.set(AMDGPU::ALU_LITERAL_X); + Reserved.set(AMDGPU::ALU_CONST); Reserved.set(AMDGPU::PREDICATE_BIT); Reserved.set(AMDGPU::PRED_SEL_OFF); Reserved.set(AMDGPU::PRED_SEL_ZERO); Reserved.set(AMDGPU::PRED_SEL_ONE); - for (TargetRegisterClass::iterator I = AMDGPU::R600_CReg32RegClass.begin(), - E = AMDGPU::R600_CReg32RegClass.end(); I != E; ++I) { - Reserved.set(*I); - } - for (std::vector::const_iterator I = MFI->ReservedRegs.begin(), E = MFI->ReservedRegs.end(); I != E; ++I) { Reserved.set(*I); diff --git a/lib/Target/R600/R600RegisterInfo.td b/lib/Target/R600/R600RegisterInfo.td index d3d6d25..993fefc 100644 --- a/lib/Target/R600/R600RegisterInfo.td +++ b/lib/Target/R600/R600RegisterInfo.td @@ -27,10 +27,6 @@ foreach Index = 0-127 in { foreach Chan = [ "X", "Y", "Z", "W" ] in { // 32-bit Temporary Registers def T#Index#_#Chan : R600RegWithChan <"T"#Index#"."#Chan, Index, Chan>; - - // 32-bit Constant Registers (There are more than 128, this the number - // that is currently supported. - def C#Index#_#Chan : R600RegWithChan <"C"#Index#"."#Chan, Index, Chan>; } // 128-bit Temporary Registers def T#Index#_XYZW : R600Reg_128 <"T"#Index#".XYZW", @@ -64,13 +60,11 @@ def PRED_SEL_ONE : R600Reg<"Pred_sel_one", 3>; def R600_ArrayBase : RegisterClass <"AMDGPU", [f32, i32], 32, (add (sequence "ArrayBase%u", 448, 464))>; - -def R600_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32, - (add (interleave - (interleave (sequence "C%u_X", 0, 127), - (sequence "C%u_Z", 0, 127)), - (interleave (sequence "C%u_Y", 0, 127), - (sequence "C%u_W", 0, 127))))>; +// special registers for ALU src operands +// const buffer reference, SRCx_SEL contains index +def ALU_CONST : R600Reg<"CBuf", 0>; +// interpolation param reference, SRCx_SEL contains index +def ALU_PARAM : R600Reg<"Param", 0>; def R600_TReg32_X : RegisterClass <"AMDGPU", [f32, i32], 32, (add (sequence "T%u_X", 0, 127))>; @@ -85,15 +79,15 @@ def R600_TReg32_W : RegisterClass <"AMDGPU", [f32, i32], 32, (add (sequence "T%u_W", 0, 127))>; def R600_TReg32 : RegisterClass <"AMDGPU", [f32, i32], 32, - (add (interleave - (interleave R600_TReg32_X, R600_TReg32_Z), - (interleave R600_TReg32_Y, R600_TReg32_W)))>; + (interleave R600_TReg32_X, R600_TReg32_Y, + R600_TReg32_Z, R600_TReg32_W)>; def R600_Reg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add R600_TReg32, - R600_CReg32, R600_ArrayBase, - ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF)>; + ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF, + ALU_CONST, ALU_PARAM + )>; def R600_Predicate : RegisterClass <"AMDGPU", [i32], 32, (add PRED_SEL_OFF, PRED_SEL_ZERO, PRED_SEL_ONE)>; -- cgit v1.1 From 28d65722d6f283b327b5815914382077fe9c0ab4 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Wed, 23 Jan 2013 06:14:59 +0000 Subject: Remove the last of uses that use the Attribute object as a collection of attributes. Collections of attributes are handled via the AttributeSet class now. This finally frees us up to make significant changes to how attributes are structured. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173228 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Attributes.cpp | 13 ++++++++++ lib/IR/Core.cpp | 8 +++--- lib/IR/Function.cpp | 19 +++++--------- lib/Transforms/IPO/ArgumentPromotion.cpp | 29 +++++++++++++-------- lib/Transforms/IPO/DeadArgumentElimination.cpp | 27 +++++++++++++------- lib/Transforms/IPO/FunctionAttrs.cpp | 10 +++++--- lib/Transforms/InstCombine/InstCombineCalls.cpp | 34 +++++++++++++++---------- lib/Transforms/Utils/CloneFunction.cpp | 2 +- 8 files changed, 88 insertions(+), 54 deletions(-) (limited to 'lib') diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index a3abd36..4bd2391 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -541,6 +541,14 @@ AttributeWithIndex AttributeWithIndex::get(LLVMContext &C, unsigned Idx, // AttributeSetImpl Definition //===----------------------------------------------------------------------===// +AttributeSet AttributeSet::getParamAttributes(unsigned Idx) const { + // FIXME: Remove. + return AttrList && hasAttributes(Idx) ? + AttributeSet::get(AttrList->getContext(), + AttributeWithIndex::get(Idx, getAttributes(Idx))) : + AttributeSet(); +} + AttributeSet AttributeSet::getRetAttributes() const { // FIXME: Remove. return AttrList && hasAttributes(ReturnIndex) ? @@ -601,6 +609,11 @@ AttributeSet AttributeSet::get(LLVMContext &C, unsigned Idx, AttrBuilder &B) { return get(C, AttributeWithIndex::get(Idx, Attribute::get(C, B))); } +AttributeSet AttributeSet::get(LLVMContext &C, unsigned Idx, + Attribute::AttrKind Kind) { + return get(C, AttributeWithIndex::get(Idx, Attribute::get(C, Kind))); +} + //===----------------------------------------------------------------------===// // AttributeSet Method Implementations //===----------------------------------------------------------------------===// diff --git a/lib/IR/Core.cpp b/lib/IR/Core.cpp index 0e42536..1e3258f 100644 --- a/lib/IR/Core.cpp +++ b/lib/IR/Core.cpp @@ -1467,13 +1467,13 @@ LLVMValueRef LLVMGetPreviousParam(LLVMValueRef Arg) { void LLVMAddAttribute(LLVMValueRef Arg, LLVMAttribute PA) { Argument *A = unwrap(Arg); AttrBuilder B(PA); - A->addAttr(Attribute::get(A->getContext(), B)); + A->addAttr(AttributeSet::get(A->getContext(), A->getArgNo() + 1, B)); } void LLVMRemoveAttribute(LLVMValueRef Arg, LLVMAttribute PA) { Argument *A = unwrap(Arg); AttrBuilder B(PA); - A->removeAttr(Attribute::get(A->getContext(), B)); + A->removeAttr(AttributeSet::get(A->getContext(), A->getArgNo() + 1, B)); } LLVMAttribute LLVMGetAttribute(LLVMValueRef Arg) { @@ -1484,10 +1484,10 @@ LLVMAttribute LLVMGetAttribute(LLVMValueRef Arg) { void LLVMSetParamAlignment(LLVMValueRef Arg, unsigned align) { + Argument *A = unwrap(Arg); AttrBuilder B; B.addAlignmentAttr(align); - unwrap(Arg)->addAttr(Attribute:: - get(unwrap(Arg)->getContext(), B)); + A->addAttr(AttributeSet::get(A->getContext(),A->getArgNo() + 1, B)); } /*--.. Operations on basic blocks ..........................................--*/ diff --git a/lib/IR/Function.cpp b/lib/IR/Function.cpp index f2f3ec9..839e496 100644 --- a/lib/IR/Function.cpp +++ b/lib/IR/Function.cpp @@ -123,23 +123,16 @@ bool Argument::hasStructRetAttr() const { hasAttribute(1, Attribute::StructRet); } -/// addAttr - Add a Attribute to an argument -void Argument::addAttr(Attribute attr) { - AttrBuilder B(attr); - getParent()->addAttributes(getArgNo() + 1, - AttributeSet::get(getParent()->getContext(), - getArgNo() + 1, B)); +/// addAttr - Add attributes to an argument. +void Argument::addAttr(AttributeSet AS) { + getParent()->addAttributes(getArgNo() + 1, AS); } -/// removeAttr - Remove a Attribute from an argument -void Argument::removeAttr(Attribute attr) { - AttrBuilder B(attr); - getParent()->removeAttributes(getArgNo() + 1, - AttributeSet::get(getParent()->getContext(), - getArgNo() + 1, B)); +/// removeAttr - Remove attributes from an argument. +void Argument::removeAttr(AttributeSet AS) { + getParent()->removeAttributes(getArgNo() + 1, AS); } - //===----------------------------------------------------------------------===// // Helper Methods in Function //===----------------------------------------------------------------------===// diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index 39062e6..627012f 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -537,9 +537,13 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, } else if (!ArgsToPromote.count(I)) { // Unchanged argument Params.push_back(I->getType()); - Attribute attrs = PAL.getParamAttributes(ArgIndex); - if (attrs.hasAttributes()) - AttributesVec.push_back(AttributeWithIndex::get(Params.size(), attrs)); + AttributeSet attrs = PAL.getParamAttributes(ArgIndex); + if (attrs.hasAttributes(ArgIndex)) { + AttributesVec. + push_back(AttributeWithIndex::get(F->getContext(), + ArgIndex, attrs)); + AttributesVec.back().Index = Params.size(); + } } else if (I->use_empty()) { // Dead argument (which are always marked as promotable) ++NumArgumentsDead; @@ -653,10 +657,12 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, if (!ArgsToPromote.count(I) && !ByValArgsToTransform.count(I)) { Args.push_back(*AI); // Unmodified argument - Attribute Attrs = CallPAL.getParamAttributes(ArgIndex); - if (Attrs.hasAttributes()) - AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs)); - + if (CallPAL.hasAttributes(ArgIndex)) { + AttributesVec. + push_back(AttributeWithIndex::get(F->getContext(), ArgIndex, + CallPAL.getParamAttributes(ArgIndex))); + AttributesVec.back().Index = Args.size(); + } } else if (ByValArgsToTransform.count(I)) { // Emit a GEP and load for each element of the struct. Type *AgTy = cast(I->getType())->getElementType(); @@ -715,9 +721,12 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, // Push any varargs arguments on the list. for (; AI != CS.arg_end(); ++AI, ++ArgIndex) { Args.push_back(*AI); - Attribute Attrs = CallPAL.getParamAttributes(ArgIndex); - if (Attrs.hasAttributes()) - AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs)); + if (CallPAL.hasAttributes(ArgIndex)) { + AttributesVec. + push_back(AttributeWithIndex::get(F->getContext(), ArgIndex, + CallPAL.getParamAttributes(ArgIndex))); + AttributesVec.back().Index = Args.size(); + } } // Add any function attributes. diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp index 5204248..3a38ca4 100644 --- a/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -791,9 +791,12 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { // Get the original parameter attributes (skipping the first one, that is // for the return value. - Attribute Attrs = PAL.getParamAttributes(i + 1); - if (Attrs.hasAttributes()) - AttributesVec.push_back(AttributeWithIndex::get(Params.size(), Attrs)); + if (PAL.hasAttributes(i + 1)) { + AttributesVec. + push_back(AttributeWithIndex::get(F->getContext(), i + 1, + PAL.getParamAttributes(i + 1))); + AttributesVec.back().Index = Params.size(); + } } else { ++NumArgumentsEliminated; DEBUG(dbgs() << "DAE - Removing argument " << i << " (" << I->getName() @@ -859,17 +862,23 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { if (ArgAlive[i]) { Args.push_back(*I); // Get original parameter attributes, but skip return attributes. - Attribute Attrs = CallPAL.getParamAttributes(i + 1); - if (Attrs.hasAttributes()) - AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs)); + if (CallPAL.hasAttributes(i + 1)) { + AttributesVec. + push_back(AttributeWithIndex::get(F->getContext(), i + 1, + CallPAL.getParamAttributes(i + 1))); + AttributesVec.back().Index = Args.size(); + } } // Push any varargs arguments on the list. Don't forget their attributes. for (CallSite::arg_iterator E = CS.arg_end(); I != E; ++I, ++i) { Args.push_back(*I); - Attribute Attrs = CallPAL.getParamAttributes(i + 1); - if (Attrs.hasAttributes()) - AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs)); + if (CallPAL.hasAttributes(i + 1)) { + AttributesVec. + push_back(AttributeWithIndex::get(F->getContext(), i + 1, + CallPAL.getParamAttributes(i + 1))); + AttributesVec.back().Index = Args.size(); + } } if (CallPAL.hasAttributes(AttributeSet::FunctionIndex)) diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp index 7e46dcb..a75212a 100644 --- a/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/lib/Transforms/IPO/FunctionAttrs.cpp @@ -380,7 +380,7 @@ bool FunctionAttrs::AddNoCaptureAttrs(const CallGraphSCC &SCC) { for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); A != E; ++A) { if (A->getType()->isPointerTy() && !A->hasNoCaptureAttr()) { - A->addAttr(Attribute::get(F->getContext(), B)); + A->addAttr(AttributeSet::get(F->getContext(), A->getArgNo() + 1, B)); ++NumNoCapture; Changed = true; } @@ -395,7 +395,7 @@ bool FunctionAttrs::AddNoCaptureAttrs(const CallGraphSCC &SCC) { if (!Tracker.Captured) { if (Tracker.Uses.empty()) { // If it's trivially not captured, mark it nocapture now. - A->addAttr(Attribute::get(F->getContext(), B)); + A->addAttr(AttributeSet::get(F->getContext(), A->getArgNo()+1, B)); ++NumNoCapture; Changed = true; } else { @@ -430,7 +430,9 @@ bool FunctionAttrs::AddNoCaptureAttrs(const CallGraphSCC &SCC) { ArgumentSCC[0]->Uses[0] == ArgumentSCC[0]) { ArgumentSCC[0]-> Definition-> - addAttr(Attribute::get(ArgumentSCC[0]->Definition->getContext(), B)); + addAttr(AttributeSet::get(ArgumentSCC[0]->Definition->getContext(), + ArgumentSCC[0]->Definition->getArgNo() + 1, + B)); ++NumNoCapture; Changed = true; } @@ -472,7 +474,7 @@ bool FunctionAttrs::AddNoCaptureAttrs(const CallGraphSCC &SCC) { for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) { Argument *A = ArgumentSCC[i]->Definition; - A->addAttr(Attribute::get(A->getContext(), B)); + A->addAttr(AttributeSet::get(A->getContext(), A->getArgNo() + 1, B)); ++NumNoCapture; Changed = true; } diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 6d4f188..2fd3549 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1044,14 +1044,15 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { if (!CastInst::isCastable(ActTy, ParamTy)) return false; // Cannot transform this parameter value. - Attribute Attrs = CallerPAL.getParamAttributes(i + 1); - if (AttrBuilder(Attrs). + if (AttrBuilder(CallerPAL.getParamAttributes(i + 1), i + 1). hasAttributes(Attribute::typeIncompatible(ParamTy))) return false; // Attribute not compatible with transformed value. // If the parameter is passed as a byval argument, then we have to have a // sized type and the sized type has to have the same size as the old type. - if (ParamTy != ActTy && Attrs.hasAttribute(Attribute::ByVal)) { + if (ParamTy != ActTy && + CallerPAL.getParamAttributes(i + 1).hasAttribute(i + 1, + Attribute::ByVal)) { PointerType *ParamPTy = dyn_cast(ParamTy); if (ParamPTy == 0 || !ParamPTy->getElementType()->isSized() || TD == 0) return false; @@ -1141,9 +1142,11 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { } // Add any parameter attributes. - Attribute PAttrs = CallerPAL.getParamAttributes(i + 1); + AttrBuilder PAttrs(CallerPAL.getParamAttributes(i + 1), i + 1); if (PAttrs.hasAttributes()) - attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs)); + attrVec.push_back( + AttributeWithIndex::get(i + 1, + Attribute::get(FT->getContext(), PAttrs))); } // If the function takes more arguments than the call was taking, add them @@ -1168,9 +1171,11 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { } // Add any parameter attributes. - Attribute PAttrs = CallerPAL.getParamAttributes(i + 1); + AttrBuilder PAttrs(CallerPAL.getParamAttributes(i + 1), i + 1); if (PAttrs.hasAttributes()) - attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs)); + attrVec.push_back( + AttributeWithIndex::get(i + 1, + Attribute::get(FT->getContext(), PAttrs))); } } } @@ -1263,12 +1268,12 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, if (!NestAttrs.isEmpty()) { unsigned NestIdx = 1; Type *NestTy = 0; - Attribute NestAttr; + AttributeSet NestAttr; // Look for a parameter marked with the 'nest' attribute. for (FunctionType::param_iterator I = NestFTy->param_begin(), E = NestFTy->param_end(); I != E; ++NestIdx, ++I) - if (NestAttrs.getParamAttributes(NestIdx).hasAttribute(Attribute::Nest)){ + if (NestAttrs.hasAttribute(NestIdx, Attribute::Nest)) { // Record the parameter type and any other attributes. NestTy = *I; NestAttr = NestAttrs.getParamAttributes(NestIdx); @@ -1302,7 +1307,8 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, if (NestVal->getType() != NestTy) NestVal = Builder->CreateBitCast(NestVal, NestTy, "nest"); NewArgs.push_back(NestVal); - NewAttrs.push_back(AttributeWithIndex::get(NestIdx, NestAttr)); + NewAttrs.push_back(AttributeWithIndex::get(Caller->getContext(), + NestIdx, NestAttr)); } if (I == E) @@ -1310,10 +1316,12 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, // Add the original argument and attributes. NewArgs.push_back(*I); - Attribute Attr = Attrs.getParamAttributes(Idx); - if (Attr.hasAttributes()) + AttributeSet Attr = Attrs.getParamAttributes(Idx); + if (Attr.hasAttributes(Idx)) { NewAttrs.push_back - (AttributeWithIndex::get(Idx + (Idx >= NestIdx), Attr)); + (AttributeWithIndex::get(Caller->getContext(), Idx, Attr)); + NewAttrs.back().Index = Idx + (Idx >= NestIdx); + } ++Idx, ++I; } while (1); diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index 1ba332b..12311c3 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -95,7 +95,7 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, for (Function::const_arg_iterator I = OldFunc->arg_begin(), E = OldFunc->arg_end(); I != E; ++I) if (Argument* Anew = dyn_cast(VMap[I])) - Anew->addAttr( OldFunc->getAttributes() + Anew->addAttr(OldFunc->getAttributes() .getParamAttributes(I->getArgNo() + 1)); NewFunc->setAttributes(NewFunc->getAttributes() .addRetAttributes(NewFunc->getContext(), -- cgit v1.1 From 114baee1fa017daefad2339c77b45b9ca3d79a41 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Wed, 23 Jan 2013 06:41:41 +0000 Subject: Add the IR attribute 'sspstrong'. SSPStrong applies a heuristic to insert stack protectors in these situations: * A Protector is required for functions which contain an array, regardless of type or length. * A Protector is required for functions which contain a structure/union which contains an array, regardless of type or length. Note, there is no limit to the depth of nesting. * A protector is required when the address of a local variable (i.e., stack based variable) is exposed. (E.g., such as through a local whose address is taken as part of the RHS of an assignment or a local whose address is taken as part of a function argument.) This patch implements the SSPString attribute to be equivalent to SSPRequired. This will change in a subsequent patch. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173230 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/AsmParser/LLLexer.cpp | 1 + lib/AsmParser/LLParser.cpp | 11 +++++---- lib/AsmParser/LLToken.h | 1 + lib/CodeGen/StackProtector.cpp | 6 +++++ lib/IR/Attributes.cpp | 3 +++ lib/Target/CppBackend/CPPBackend.cpp | 1 + lib/Transforms/IPO/Inliner.cpp | 45 ++++++++++++++++++++++++++++-------- 7 files changed, 53 insertions(+), 15 deletions(-) (limited to 'lib') diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp index 2efa1f0..85e7574 100644 --- a/lib/AsmParser/LLLexer.cpp +++ b/lib/AsmParser/LLLexer.cpp @@ -549,6 +549,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(optsize); KEYWORD(ssp); KEYWORD(sspreq); + KEYWORD(sspstrong); KEYWORD(noredzone); KEYWORD(noimplicitfloat); KEYWORD(naked); diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index fea5ec8..0eb6023 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -956,6 +956,7 @@ bool LLParser::ParseOptionalFuncAttrs(AttrBuilder &B) { case lltok::kw_returns_twice: B.addAttribute(Attribute::ReturnsTwice); break; case lltok::kw_ssp: B.addAttribute(Attribute::StackProtect); break; case lltok::kw_sspreq: B.addAttribute(Attribute::StackProtectReq); break; + case lltok::kw_sspstrong: B.addAttribute(Attribute::StackProtectStrong); break; case lltok::kw_uwtable: B.addAttribute(Attribute::UWTable); break; case lltok::kw_noduplicate: B.addAttribute(Attribute::NoDuplicate); break; @@ -1050,11 +1051,11 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) { case lltok::kw_readonly: case lltok::kw_inlinehint: case lltok::kw_alwaysinline: case lltok::kw_optsize: case lltok::kw_ssp: case lltok::kw_sspreq: - case lltok::kw_noredzone: case lltok::kw_noimplicitfloat: - case lltok::kw_naked: case lltok::kw_nonlazybind: - case lltok::kw_address_safety: case lltok::kw_minsize: - case lltok::kw_alignstack: case lltok::kw_align: - case lltok::kw_noduplicate: + case lltok::kw_sspstrong: case lltok::kw_noimplicitfloat: + case lltok::kw_noredzone: case lltok::kw_naked: + case lltok::kw_nonlazybind: case lltok::kw_address_safety: + case lltok::kw_minsize: case lltok::kw_alignstack: + case lltok::kw_align: case lltok::kw_noduplicate: HaveError |= Error(Lex.getLoc(), "invalid use of function-only attribute"); break; } diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h index 5b4d415..fea5f75 100644 --- a/lib/AsmParser/LLToken.h +++ b/lib/AsmParser/LLToken.h @@ -110,6 +110,7 @@ namespace lltok { kw_optsize, kw_ssp, kw_sspreq, + kw_sspstrong, kw_noredzone, kw_noimplicitfloat, kw_naked, diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp index e242804..049efc1 100644 --- a/lib/CodeGen/StackProtector.cpp +++ b/lib/CodeGen/StackProtector.cpp @@ -141,6 +141,12 @@ bool StackProtector::RequiresStackProtector() const { Attribute::StackProtectReq)) return true; + // FIXME: Dummy SSP-strong implementation. Default to required until + // strong heuristic is implemented. + if (F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::StackProtectStrong)) + return true; + if (!F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, Attribute::StackProtect)) return false; diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 4bd2391..964a404 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -206,6 +206,8 @@ std::string Attribute::getAsString() const { Result += "ssp "; if (hasAttribute(Attribute::StackProtectReq)) Result += "sspreq "; + if (hasAttribute(Attribute::StackProtectStrong)) + Result += "sspstrong "; if (hasAttribute(Attribute::NoRedZone)) Result += "noredzone "; if (hasAttribute(Attribute::NoImplicitFloat)) @@ -487,6 +489,7 @@ uint64_t AttributeImpl::getAttrMask(Attribute::AttrKind Val) { case Attribute::AddressSafety: return 1ULL << 32; case Attribute::MinSize: return 1ULL << 33; case Attribute::NoDuplicate: return 1ULL << 34; + case Attribute::StackProtectStrong: return 1ULL << 35; } llvm_unreachable("Unsupported attribute type"); } diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp index f468861..50bfef5 100644 --- a/lib/Target/CppBackend/CPPBackend.cpp +++ b/lib/Target/CppBackend/CPPBackend.cpp @@ -499,6 +499,7 @@ void CppWriter::printAttributes(const AttributeSet &PAL, HANDLE_ATTR(OptimizeForSize); HANDLE_ATTR(StackProtect); HANDLE_ATTR(StackProtectReq); + HANDLE_ATTR(StackProtectStrong); HANDLE_ATTR(NoCapture); HANDLE_ATTR(NoRedZone); HANDLE_ATTR(NoImplicitFloat); diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp index 2187a2a..663ddb7 100644 --- a/lib/Transforms/IPO/Inliner.cpp +++ b/lib/Transforms/IPO/Inliner.cpp @@ -72,6 +72,40 @@ void Inliner::getAnalysisUsage(AnalysisUsage &AU) const { typedef DenseMap > InlinedArrayAllocasTy; +/// \brief If the inlined function had a higher stack protection level than the +/// calling function, then bump up the caller's stack protection level. +static void AdjustCallerSSPLevel(Function *Caller, Function *Callee) { + // If upgrading the SSP attribute, clear out the old SSP Attributes first. + // Having multiple SSP attributes doesn't actually hurt, but it adds useless + // clutter to the IR. + AttrBuilder B; + B.addAttribute(Attribute::StackProtect) + .addAttribute(Attribute::StackProtectStrong); + AttributeSet OldSSPAttr = AttributeSet::get(Caller->getContext(), + AttributeSet::FunctionIndex, + B); + AttributeSet CallerAttr = Caller->getAttributes(), + CalleeAttr = Callee->getAttributes(); + + if (CalleeAttr.hasAttribute(AttributeSet::FunctionIndex, + Attribute::StackProtectReq)) { + Caller->removeAttributes(AttributeSet::FunctionIndex, OldSSPAttr); + Caller->addFnAttr(Attribute::StackProtectReq); + } else if (CalleeAttr.hasAttribute(AttributeSet::FunctionIndex, + Attribute::StackProtectStrong) && + !CallerAttr.hasAttribute(AttributeSet::FunctionIndex, + Attribute::StackProtectReq)) { + Caller->removeAttributes(AttributeSet::FunctionIndex, OldSSPAttr); + Caller->addFnAttr(Attribute::StackProtectStrong); + } else if (CalleeAttr.hasAttribute(AttributeSet::FunctionIndex, + Attribute::StackProtect) && + !CallerAttr.hasAttribute(AttributeSet::FunctionIndex, + Attribute::StackProtectReq) && + !CallerAttr.hasAttribute(AttributeSet::FunctionIndex, + Attribute::StackProtectStrong)) + Caller->addFnAttr(Attribute::StackProtect); +} + /// InlineCallIfPossible - If it is possible to inline the specified call site, /// do so and update the CallGraph for this operation. /// @@ -91,16 +125,7 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI, if (!InlineFunction(CS, IFI, InsertLifetime)) return false; - // If the inlined function had a higher stack protection level than the - // calling function, then bump up the caller's stack protection level. - if (Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::StackProtectReq)) - Caller->addFnAttr(Attribute::StackProtectReq); - else if (Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::StackProtect) && - !Caller->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::StackProtectReq)) - Caller->addFnAttr(Attribute::StackProtect); + AdjustCallerSSPLevel(Caller, Callee); // Look at all of the allocas that we inlined through this call site. If we // have already inlined other allocas through other calls into this function, -- cgit v1.1 From e4957fb9b77a4fbdf711b9e5a722d107d86ccc50 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Wed, 23 Jan 2013 06:43:53 +0000 Subject: Add the heuristic to differentiate SSPStrong from SSPRequired. The requirements of the strong heuristic are: * A Protector is required for functions which contain an array, regardless of type or length. * A Protector is required for functions which contain a structure/union which contains an array, regardless of type or length. Note, there is no limit to the depth of nesting. * A protector is required when the address of a local variable (i.e., stack based variable) is exposed. (E.g., such as through a local whose address is taken as part of the RHS of an assignment or a local whose address is taken as part of a function argument.) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173231 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/StackProtector.cpp | 126 +++++++++++++++++++++++++++++++++-------- 1 file changed, 103 insertions(+), 23 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp index 049efc1..f3be37c 100644 --- a/lib/CodeGen/StackProtector.cpp +++ b/lib/CodeGen/StackProtector.cpp @@ -16,6 +16,8 @@ #define DEBUG_TYPE "stack-protector" #include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" #include "llvm/ADT/Triple.h" #include "llvm/Analysis/Dominators.h" #include "llvm/IR/Attributes.h" @@ -32,6 +34,10 @@ #include "llvm/Target/TargetOptions.h" using namespace llvm; +STATISTIC(NumFunProtected, "Number of functions protected"); +STATISTIC(NumAddrTaken, "Number of local variables that have their address" + " taken."); + namespace { class StackProtector : public FunctionPass { /// TLI - Keep a pointer of a TargetLowering to consult for determining @@ -43,6 +49,12 @@ namespace { DominatorTree *DT; + /// VisitedPHIs - The set of PHI nodes visited when determining + /// if a variable's reference has been taken. This set + /// is maintained to ensure we don't visit the same PHI node multiple + /// times. + SmallPtrSet VisitedPHIs; + /// InsertStackProtectors - Insert code into the prologue and epilogue of /// the function. /// @@ -58,11 +70,15 @@ namespace { /// ContainsProtectableArray - Check whether the type either is an array or /// contains an array of sufficient size so that we need stack protectors /// for it. - bool ContainsProtectableArray(Type *Ty, bool InStruct = false) const; + bool ContainsProtectableArray(Type *Ty, bool Strong = false, + bool InStruct = false) const; + + /// \brief Check whether a stack allocation has its address taken. + bool HasAddressTaken(const Instruction *AI); /// RequiresStackProtector - Check whether or not this function needs a /// stack protector based upon the stack protector level. - bool RequiresStackProtector() const; + bool RequiresStackProtector(); public: static char ID; // Pass identification, replacement for typeid. StackProtector() : FunctionPass(ID), TLI(0) { @@ -96,15 +112,21 @@ bool StackProtector::runOnFunction(Function &Fn) { if (!RequiresStackProtector()) return false; + ++NumFunProtected; return InsertStackProtectors(); } /// ContainsProtectableArray - Check whether the type either is an array or /// contains a char array of sufficient size so that we need stack protectors /// for it. -bool StackProtector::ContainsProtectableArray(Type *Ty, bool InStruct) const { +bool StackProtector::ContainsProtectableArray(Type *Ty, bool Strong, + bool InStruct) const { if (!Ty) return false; if (ArrayType *AT = dyn_cast(Ty)) { + // In strong mode any array, regardless of type and size, triggers a + // protector + if (Strong) + return true; const TargetMachine &TM = TLI->getTargetMachine(); if (!AT->getElementType()->isIntegerTy(8)) { Triple Trip(TM.getTargetTriple()); @@ -126,45 +148,103 @@ bool StackProtector::ContainsProtectableArray(Type *Ty, bool InStruct) const { for (StructType::element_iterator I = ST->element_begin(), E = ST->element_end(); I != E; ++I) - if (ContainsProtectableArray(*I, true)) + if (ContainsProtectableArray(*I, Strong, true)) return true; return false; } -/// RequiresStackProtector - Check whether or not this function needs a stack -/// protector based upon the stack protector level. The heuristic we use is to -/// add a guard variable to functions that call alloca, and functions with -/// buffers larger than SSPBufferSize bytes. -bool StackProtector::RequiresStackProtector() const { - if (F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::StackProtectReq)) - return true; +bool StackProtector::HasAddressTaken(const Instruction *AI) { + for (Value::const_use_iterator UI = AI->use_begin(), UE = AI->use_end(); + UI != UE; ++UI) { + const User *U = *UI; + if (const StoreInst *SI = dyn_cast(U)) { + if (AI == SI->getValueOperand()) + return true; + } else if (const PtrToIntInst *SI = dyn_cast(U)) { + if (AI == SI->getOperand(0)) + return true; + } else if (isa(U)) { + return true; + } else if (isa(U)) { + return true; + } else if (const SelectInst *SI = dyn_cast(U)) { + if (HasAddressTaken(SI)) + return true; + } else if (const PHINode *PN = dyn_cast(U)) { + // Keep track of what PHI nodes we have already visited to ensure + // they are only visited once. + if (VisitedPHIs.insert(PN)) + if (HasAddressTaken(PN)) + return true; + } else if (const GetElementPtrInst *GEP = dyn_cast(U)) { + if (HasAddressTaken(GEP)) + return true; + } else if (const BitCastInst *BI = dyn_cast(U)) { + if (HasAddressTaken(BI)) + return true; + } + } + return false; +} - // FIXME: Dummy SSP-strong implementation. Default to required until - // strong heuristic is implemented. +/// \brief Check whether or not this function needs a stack protector based +/// upon the stack protector level. +/// +/// We use two heuristics: a standard (ssp) and strong (sspstrong). +/// The standard heuristic which will add a guard variable to functions that +/// call alloca with a either a variable size or a size >= SSPBufferSize, +/// functions with character buffers larger than SSPBufferSize, and functions +/// with aggregates containing character buffers larger than SSPBufferSize. The +/// strong heuristic will add a guard variables to functions that call alloca +/// regardless of size, functions with any buffer regardless of type and size, +/// functions with aggregates that contain any buffer regardless of type and +/// size, and functions that contain stack-based variables that have had their +/// address taken. +bool StackProtector::RequiresStackProtector() { + bool Strong = false; if (F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::StackProtectStrong)) + Attribute::StackProtectReq)) return true; - - if (!F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::StackProtect)) + else if (F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::StackProtectStrong)) + Strong = true; + else if (!F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::StackProtect)) return false; for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) { BasicBlock *BB = I; for (BasicBlock::iterator - II = BB->begin(), IE = BB->end(); II != IE; ++II) + II = BB->begin(), IE = BB->end(); II != IE; ++II) { if (AllocaInst *AI = dyn_cast(II)) { - if (AI->isArrayAllocation()) - // This is a call to alloca with a variable size. Emit stack - // protectors. + if (AI->isArrayAllocation()) { + // SSP-Strong: Enable protectors for any call to alloca, regardless + // of size. + if (Strong) + return true; + + if (const ConstantInt *CI = + dyn_cast(AI->getArraySize())) { + unsigned BufferSize = TLI->getTargetMachine().Options.SSPBufferSize; + if (CI->getLimitedValue(BufferSize) >= BufferSize) + // A call to alloca with size >= SSPBufferSize requires + // stack protectors. + return true; + } else // A call to alloca with a variable size requires protectors. + return true; + } + + if (ContainsProtectableArray(AI->getAllocatedType(), Strong)) return true; - if (ContainsProtectableArray(AI->getAllocatedType())) + if (Strong && HasAddressTaken(AI)) { + ++NumAddrTaken; return true; + } } + } } return false; -- cgit v1.1 From 70465616c29538e21e4dc3e0a70d8edb4b93bf5f Mon Sep 17 00:00:00 2001 From: Duncan Sands Date: Wed, 23 Jan 2013 09:09:50 +0000 Subject: Initialize the components of this class. Otherwise GCC thinks that Array may be used uninitialized, since it fails to understand that Array is only used when SingleValue is not, and outputs a warning. It also seems generally safer given that the constructor is non-trivial and has plenty of early exits. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173242 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Utils/SimplifyCFG.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index f10c35f..0988a96 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -3382,7 +3382,8 @@ SwitchLookupTable::SwitchLookupTable(Module &M, ConstantInt *Offset, const SmallVector, 4>& Values, Constant *DefaultValue, - const DataLayout *TD) { + const DataLayout *TD) + : SingleValue(0), BitMap(0), BitMapElementTy(0), Array(0) { assert(Values.size() && "Can't build lookup table without values!"); assert(TableSize >= Values.size() && "Can't fit values in table!"); -- cgit v1.1 From 48a615fee78c3c262c60147a65dc6fff5fd6bb3b Mon Sep 17 00:00:00 2001 From: Kostya Serebryany Date: Wed, 23 Jan 2013 12:54:55 +0000 Subject: [asan] use ADD instead of OR when applying shadow offset of PowerPC. See http://gcc.gnu.org/bugzilla/show_bug.cgi?id=55975 for details git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173258 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Instrumentation/AddressSanitizer.cpp | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 8b6d64d..f4715f5 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -53,6 +53,7 @@ using namespace llvm; static const uint64_t kDefaultShadowScale = 3; static const uint64_t kDefaultShadowOffset32 = 1ULL << 29; static const uint64_t kDefaultShadowOffset64 = 1ULL << 44; +static const uint64_t kPPC64_ShadowOffset64 = 1ULL << 41; static const size_t kMaxStackMallocSize = 1 << 16; // 64K static const uintptr_t kCurrentStackFrameMagic = 0x41B58AB3; @@ -186,21 +187,29 @@ class SetOfDynamicallyInitializedGlobals { }; /// This struct defines the shadow mapping using the rule: -/// shadow = (mem >> Scale) + Offset. +/// shadow = (mem >> Scale) ADD-or-OR Offset. struct ShadowMapping { int Scale; uint64_t Offset; + bool OrShadowOffset; }; static ShadowMapping getShadowMapping(const Module &M, int LongSize, bool ZeroBaseShadow) { llvm::Triple TargetTriple(M.getTargetTriple()); bool IsAndroid = TargetTriple.getEnvironment() == llvm::Triple::Android; + bool IsPPC64 = TargetTriple.getArch() == llvm::Triple::ppc64; ShadowMapping Mapping; + // OR-ing shadow offset if more efficient (at least on x86), + // but on ppc64 we have to use add since the shadow offset is not neccesary + // 1/8-th of the address space. + Mapping.OrShadowOffset = !IsPPC64; + Mapping.Offset = (IsAndroid || ZeroBaseShadow) ? 0 : - (LongSize == 32 ? kDefaultShadowOffset32 : kDefaultShadowOffset64); + (LongSize == 32 ? kDefaultShadowOffset32 : + IsPPC64 ? kPPC64_ShadowOffset64 : kDefaultShadowOffset64); if (ClMappingOffsetLog >= 0) { // Zero offset log is the special case. Mapping.Offset = (ClMappingOffsetLog == 0) ? 0 : 1ULL << ClMappingOffsetLog; @@ -520,8 +529,10 @@ Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB) { if (Mapping.Offset == 0) return Shadow; // (Shadow >> scale) | offset - return IRB.CreateOr(Shadow, ConstantInt::get(IntptrTy, - Mapping.Offset)); + if (Mapping.OrShadowOffset) + return IRB.CreateOr(Shadow, ConstantInt::get(IntptrTy, Mapping.Offset)); + else + return IRB.CreateAdd(Shadow, ConstantInt::get(IntptrTy, Mapping.Offset)); } void AddressSanitizer::instrumentMemIntrinsicParam( -- cgit v1.1 From 03f7e727f41bc6e7ed4fd4423ae0c8bb04412219 Mon Sep 17 00:00:00 2001 From: Anton Korobeynikov Date: Wed, 23 Jan 2013 15:03:08 +0000 Subject: Make sure metarenamer won't rename special stuff (intrinsics and explicitly renamed stuff). Otherwise this might hide the problems. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173265 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Utils/MetaRenamer.cpp | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Utils/MetaRenamer.cpp b/lib/Transforms/Utils/MetaRenamer.cpp index d519fb7..3716f58 100644 --- a/lib/Transforms/Utils/MetaRenamer.cpp +++ b/lib/Transforms/Utils/MetaRenamer.cpp @@ -72,13 +72,23 @@ namespace { // Rename all aliases for (Module::alias_iterator AI = M.alias_begin(), AE = M.alias_end(); - AI != AE; ++AI) - AI->setName("alias"); + AI != AE; ++AI) { + StringRef Name = AI->getName(); + if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1)) + continue; + AI->setName("alias"); + } + // Rename all global variables for (Module::global_iterator GI = M.global_begin(), GE = M.global_end(); - GI != GE; ++GI) + GI != GE; ++GI) { + StringRef Name = GI->getName(); + if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1)) + continue; + GI->setName("global"); + } // Rename all struct types TypeFinder StructTypes; @@ -95,6 +105,10 @@ namespace { // Rename all functions for (Module::iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) { + StringRef Name = FI->getName(); + if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1)) + continue; + FI->setName(metaNames[prng.rand() % array_lengthof(metaNames)]); runOnFunction(*FI); } -- cgit v1.1 From e735945ad74a4af9772a7d9bb45418b2551fffe9 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Wed, 23 Jan 2013 15:21:44 +0000 Subject: NVPTX: Stop leaking memory by using a managed constant instead of a new Argument. This is still an egregious hack since we don't have a nice interface for this kind of thing but should help the valgrind leak check buildbot to become green. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173267 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/NVPTX/NVPTXISelLowering.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index b3ab9fc..2699cea 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -1027,9 +1027,11 @@ NVPTXTargetLowering::LowerFormalArguments(SDValue Chain, if (isABI || isKernel) { // If ABI, load from the param symbol SDValue Arg = getParamSymbol(DAG, idx); - Value *srcValue = new Argument(PointerType::get(ObjectVT.getTypeForEVT( - F->getContext()), - llvm::ADDRESS_SPACE_PARAM)); + // Conjure up a value that we can get the address space from. + // FIXME: Using a constant here is a hack. + Value *srcValue = Constant::getNullValue(PointerType::get( + ObjectVT.getTypeForEVT(F->getContext()), + llvm::ADDRESS_SPACE_PARAM)); SDValue p = DAG.getLoad(ObjectVT, dl, Root, Arg, MachinePointerInfo(srcValue), false, false, false, -- cgit v1.1 From e752feee5228bfa33acee35ef9c606ce12f0f173 Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Wed, 23 Jan 2013 16:22:04 +0000 Subject: Clean up assignment of CalleeSaveStackSlotSize: get rid of the default and explicitly set this in every target that needs to change it from the default. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173270 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCAsmInfo.cpp | 2 +- lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp | 2 +- lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp | 5 +++-- lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp | 5 +++-- lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp | 5 +++-- lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp | 5 +++-- 6 files changed, 14 insertions(+), 10 deletions(-) (limited to 'lib') diff --git a/lib/MC/MCAsmInfo.cpp b/lib/MC/MCAsmInfo.cpp index de1095b..51bb435 100644 --- a/lib/MC/MCAsmInfo.cpp +++ b/lib/MC/MCAsmInfo.cpp @@ -24,7 +24,7 @@ using namespace llvm; MCAsmInfo::MCAsmInfo() { PointerSize = 4; - CalleeSaveStackSlotSize = 0; // 0 means PointerSize is used in getter. + CalleeSaveStackSlotSize = 4; IsLittleEndian = true; StackGrowsUp = false; diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp index 2e328cb..3c95760 100644 --- a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp +++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp @@ -18,7 +18,7 @@ using namespace llvm; void MSP430MCAsmInfo::anchor() { } MSP430MCAsmInfo::MSP430MCAsmInfo(const Target &T, StringRef TT) { - PointerSize = 2; + PointerSize = CalleeSaveStackSlotSize = 2; PrivateGlobalPrefix = ".L"; WeakRefDirective ="\t.weak\t"; diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp index a679749..5d4b32d 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp @@ -25,8 +25,9 @@ MipsMCAsmInfo::MipsMCAsmInfo(const Target &T, StringRef TT) { IsLittleEndian = false; if ((TheTriple.getArch() == Triple::mips64el) || - (TheTriple.getArch() == Triple::mips64)) - PointerSize = 8; + (TheTriple.getArch() == Triple::mips64)) { + PointerSize = CalleeSaveStackSlotSize = 8; + } AlignmentIsInBytes = false; Data16bitsDirective = "\t.2byte\t"; diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp index 1d41665..6191819 100644 --- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp +++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp @@ -30,8 +30,9 @@ void NVPTXMCAsmInfo::anchor() { } NVPTXMCAsmInfo::NVPTXMCAsmInfo(const Target &T, const StringRef &TT) { Triple TheTriple(TT); - if (TheTriple.getArch() == Triple::nvptx64) - PointerSize = 8; + if (TheTriple.getArch() == Triple::nvptx64) { + PointerSize = CalleeSaveStackSlotSize = 8; + } CommentString = "//"; diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp index 215aa40..b048427 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp @@ -17,8 +17,9 @@ using namespace llvm; void PPCMCAsmInfoDarwin::anchor() { } PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) { - if (is64Bit) - PointerSize = 8; + if (is64Bit) { + PointerSize = CalleeSaveStackSlotSize = 8; + } IsLittleEndian = false; PCSymbol = "."; diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp index f5e10fc..3d4bfdc 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp @@ -21,8 +21,9 @@ void SparcELFMCAsmInfo::anchor() { } SparcELFMCAsmInfo::SparcELFMCAsmInfo(const Target &T, StringRef TT) { IsLittleEndian = false; Triple TheTriple(TT); - if (TheTriple.getArch() == Triple::sparcv9) - PointerSize = 8; + if (TheTriple.getArch() == Triple::sparcv9) { + PointerSize = CalleeSaveStackSlotSize = 8; + } Data16bitsDirective = "\t.half\t"; Data32bitsDirective = "\t.word\t"; -- cgit v1.1 From e807d1ea1e05fea895ba90dd4da8c91026ba1f29 Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Wed, 23 Jan 2013 17:12:15 +0000 Subject: Fix powerpc test failure - forgot to initialize stack slot size for PPCLinuxMCAsmInfo git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173275 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp index b048427..a25d7fe 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp @@ -36,8 +36,9 @@ PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) { void PPCLinuxMCAsmInfo::anchor() { } PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) { - if (is64Bit) - PointerSize = 8; + if (is64Bit) { + PointerSize = CalleeSaveStackSlotSize = 8; + } IsLittleEndian = false; // ".comm align is in bytes but .align is pow-2." -- cgit v1.1 From 9381dd1ac9ac8a4020cd0dd03323a26f1ae5587f Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Wed, 23 Jan 2013 17:16:22 +0000 Subject: InstCombine: Clean up weird code that talks about a modulus that's long gone. This does the right thing unless the multiplication overflows, but the old code didn't handle that case either. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173276 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/InstCombine/InstructionCombining.cpp | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index dc7fe5c..e9e05ce 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -758,12 +758,7 @@ Type *InstCombiner::FindElementAtOffset(Type *Ty, int64_t Offset, FirstIdx = Offset/TySize; Offset -= FirstIdx*TySize; - // Handle hosts where % returns negative instead of values [0..TySize). - if (Offset < 0) { - --FirstIdx; - Offset += TySize; - assert(Offset >= 0); - } + assert(Offset >= 0 && "Offset should never be negative!"); assert((uint64_t)Offset < (uint64_t)TySize && "Out of range offset"); } -- cgit v1.1 From 028dba376ae90c82b44d72ddb3ed97849484aab4 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Wed, 23 Jan 2013 17:52:29 +0000 Subject: Revert "InstCombine: Clean up weird code that talks about a modulus that's long gone." This causes crashes during the build of compiler-rt during selfhost. Add a testcase for coverage. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173279 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/InstCombine/InstructionCombining.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index e9e05ce..dc7fe5c 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -758,7 +758,12 @@ Type *InstCombiner::FindElementAtOffset(Type *Ty, int64_t Offset, FirstIdx = Offset/TySize; Offset -= FirstIdx*TySize; - assert(Offset >= 0 && "Offset should never be negative!"); + // Handle hosts where % returns negative instead of values [0..TySize). + if (Offset < 0) { + --FirstIdx; + Offset += TySize; + assert(Offset >= 0); + } assert((uint64_t)Offset < (uint64_t)TySize && "Out of range offset"); } -- cgit v1.1 From 9e6a5a37460ff82ad4e3a7aea1c45e2c934ab25b Mon Sep 17 00:00:00 2001 From: Richard Osborne Date: Wed, 23 Jan 2013 20:08:11 +0000 Subject: Add instruction encodings / disassembly support for l6r instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173288 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../XCore/Disassembler/XCoreDisassembler.cpp | 25 ++++++++++++++++++++++ lib/Target/XCore/XCoreInstrFormats.td | 6 +++++- lib/Target/XCore/XCoreInstrInfo.td | 9 ++++---- 3 files changed, 34 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp b/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp index e6861bf..73aeb9c 100644 --- a/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp +++ b/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp @@ -170,6 +170,11 @@ static DecodeStatus DecodeL2RUSBitpInstruction(MCInst &Inst, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeL6RInstruction(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + #include "XCoreGenDisassemblerTables.inc" static DecodeStatus DecodeGRRegsRegisterClass(MCInst &Inst, @@ -572,6 +577,26 @@ DecodeL2RUSBitpInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, return S; } +static DecodeStatus +DecodeL6RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, + const void *Decoder) { + unsigned Op1, Op2, Op3, Op4, Op5, Op6; + DecodeStatus S = + Decode3OpInstruction(fieldFromInstruction(Insn, 0, 16), Op1, Op2, Op3); + if (S != MCDisassembler::Success) + return S; + S = Decode3OpInstruction(fieldFromInstruction(Insn, 16, 16), Op4, Op5, Op6); + if (S != MCDisassembler::Success) + return S; + DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op4, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op3, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op5, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op6, Address, Decoder); + return S; +} + MCDisassembler::DecodeStatus XCoreDisassembler::getInstruction(MCInst &instr, uint64_t &Size, diff --git a/lib/Target/XCore/XCoreInstrFormats.td b/lib/Target/XCore/XCoreInstrFormats.td index 29bc658..fa360a7 100644 --- a/lib/Target/XCore/XCoreInstrFormats.td +++ b/lib/Target/XCore/XCoreInstrFormats.td @@ -226,6 +226,10 @@ class _L5R pattern> : InstXCore<4, outs, ins, asmstr, pattern> { } -class _L6R pattern> +class _FL6R opc, dag outs, dag ins, string asmstr, list pattern> : InstXCore<4, outs, ins, asmstr, pattern> { + let Inst{31-27} = opc; + let Inst{15-11} = 0b11111; + + let DecoderMethod = "DecodeL6RInstruction"; } diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td index d193b45..65dbaef 100644 --- a/lib/Target/XCore/XCoreInstrInfo.td +++ b/lib/Target/XCore/XCoreInstrInfo.td @@ -502,11 +502,10 @@ def LDIV_l5r : _L5R<(outs GRRegs:$dst1, GRRegs:$dst2), // Six operand long -def LMUL_l6r : _L6R<(outs GRRegs:$dst1, GRRegs:$dst2), - (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3, - GRRegs:$src4), - "lmul $dst1, $dst2, $src1, $src2, $src3, $src4", - []>; +def LMUL_l6r : _FL6R< + 0b00000, (outs GRRegs:$dst1, GRRegs:$dst2), + (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3, GRRegs:$src4), + "lmul $dst1, $dst2, $src1, $src2, $src3, $src4", []>; // Register - U6 -- cgit v1.1 From b4d201ec544bbd3aec5e9feaec44df43b6b4bb6c Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Wed, 23 Jan 2013 20:41:05 +0000 Subject: ConstantFolding: Evaluate GEP indices in the index type. This fixes some edge cases that we would get wrong with uint64_ts. PR14986. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173289 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/ConstantFolding.cpp | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index e2b1e25..95a68bf 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -254,13 +254,22 @@ static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, if (!CI) return false; // Index isn't a simple constant? if (CI->isZero()) continue; // Not adding anything. + // Evaluate offsets in the index type. + APInt APOffset(CI->getBitWidth(), Offset); + if (StructType *ST = dyn_cast(*GTI)) { // N = N + Offset - Offset += TD.getStructLayout(ST)->getElementOffset(CI->getZExtValue()); + APOffset += + APInt(CI->getBitWidth(), + TD.getStructLayout(ST)->getElementOffset(CI->getZExtValue())); } else { SequentialType *SQT = cast(*GTI); - Offset += TD.getTypeAllocSize(SQT->getElementType())*CI->getSExtValue(); + APOffset += + APInt(CI->getBitWidth(), + TD.getTypeAllocSize(SQT->getElementType())*CI->getSExtValue()); } + + Offset = APOffset.getSExtValue(); } return true; } -- cgit v1.1 From 1094b41c7b26089025822c5705b5c771f9b8cda4 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Wed, 23 Jan 2013 21:21:24 +0000 Subject: ConstantFolding: Tweak r173289, it should evaluate in the intptr type, not the index type. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173293 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/ConstantFolding.cpp | 33 +++++++++++++++------------------ 1 file changed, 15 insertions(+), 18 deletions(-) (limited to 'lib') diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index 95a68bf..9246e26 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -218,10 +218,10 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, /// from a global, return the global and the constant. Because of /// constantexprs, this function is recursive. static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, - int64_t &Offset, const DataLayout &TD) { + APInt &Offset, const DataLayout &TD) { // Trivial case, constant is the global. if ((GV = dyn_cast(C))) { - Offset = 0; + Offset.clearAllBits(); return true; } @@ -254,22 +254,17 @@ static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, if (!CI) return false; // Index isn't a simple constant? if (CI->isZero()) continue; // Not adding anything. - // Evaluate offsets in the index type. - APInt APOffset(CI->getBitWidth(), Offset); - if (StructType *ST = dyn_cast(*GTI)) { // N = N + Offset - APOffset += - APInt(CI->getBitWidth(), - TD.getStructLayout(ST)->getElementOffset(CI->getZExtValue())); + Offset += + APInt(Offset.getBitWidth(), + TD.getStructLayout(ST)->getElementOffset(CI->getZExtValue())); } else { SequentialType *SQT = cast(*GTI); - APOffset += - APInt(CI->getBitWidth(), - TD.getTypeAllocSize(SQT->getElementType())*CI->getSExtValue()); + Offset += APInt(Offset.getBitWidth(), + TD.getTypeAllocSize(SQT->getElementType()) * + CI->getSExtValue()); } - - Offset = APOffset.getSExtValue(); } return true; } @@ -432,7 +427,7 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C, if (BytesLoaded > 32 || BytesLoaded == 0) return 0; GlobalValue *GVal; - int64_t Offset; + APInt Offset(TD.getPointerSizeInBits(), 0); if (!IsConstantOffsetFromGlobal(C, GVal, Offset, TD)) return 0; @@ -443,14 +438,15 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C, // If we're loading off the beginning of the global, some bytes may be valid, // but we don't try to handle this. - if (Offset < 0) return 0; + if (Offset.isNegative()) return 0; // If we're not accessing anything in this constant, the result is undefined. - if (uint64_t(Offset) >= TD.getTypeAllocSize(GV->getInitializer()->getType())) + if (Offset.getZExtValue() >= + TD.getTypeAllocSize(GV->getInitializer()->getType())) return UndefValue::get(IntType); unsigned char RawBytes[32] = {0}; - if (!ReadDataFromGlobal(GV->getInitializer(), Offset, RawBytes, + if (!ReadDataFromGlobal(GV->getInitializer(), Offset.getZExtValue(), RawBytes, BytesLoaded, TD)) return 0; @@ -574,7 +570,8 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, // constant. This happens frequently when iterating over a global array. if (Opc == Instruction::Sub && TD) { GlobalValue *GV1, *GV2; - int64_t Offs1, Offs2; + APInt Offs1(TD->getPointerSizeInBits(), 0), + Offs2(TD->getPointerSizeInBits(), 0); if (IsConstantOffsetFromGlobal(Op0, GV1, Offs1, *TD)) if (IsConstantOffsetFromGlobal(Op1, GV2, Offs2, *TD) && -- cgit v1.1 From 2a3e0d7e76079289e2b007a15c311c51218f0b89 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Wed, 23 Jan 2013 21:39:47 +0000 Subject: R600: Simplify stream outputs intrinsic Patch by: Vincent Lejeune Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173296 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/R600ISelLowering.cpp | 32 ----------------------------- lib/Target/R600/R600Instructions.td | 24 +++++++++++----------- lib/Target/R600/R600Intrinsics.td | 2 +- lib/Target/R600/R600MachineFunctionInfo.cpp | 1 - lib/Target/R600/R600MachineFunctionInfo.h | 1 - 5 files changed, 13 insertions(+), 47 deletions(-) (limited to 'lib') diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index 69ca3f5..3434d7e 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -386,39 +386,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const Chain); } - case AMDGPUIntrinsic::R600_store_stream_output : { - MachineFunction &MF = DAG.getMachineFunction(); - R600MachineFunctionInfo *MFI = MF.getInfo(); - int64_t RegIndex = cast(Op.getOperand(3))->getZExtValue(); - int64_t BufIndex = cast(Op.getOperand(4))->getZExtValue(); - - SDNode **OutputsMap = MFI->StreamOutputs[BufIndex]; - unsigned Inst; - switch (cast(Op.getOperand(4))->getZExtValue() ) { - // STREAM3 - case 3: - Inst = 4; - break; - // STREAM2 - case 2: - Inst = 3; - break; - // STREAM1 - case 1: - Inst = 2; - break; - // STREAM0 - case 0: - Inst = 1; - break; - default: - llvm_unreachable("Wrong buffer id for stream outputs !"); - } - return InsertScalarToRegisterExport(DAG, Op.getDebugLoc(), OutputsMap, - RegIndex / 4, RegIndex % 4, Inst, 0, Op.getOperand(2), - Chain); - } // default for switch(IntrinsicID) default: break; } diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index a6c3910..3e069da 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -604,24 +604,24 @@ multiclass ExportPattern cf_inst> { multiclass SteamOutputExportPattern buf0inst, bits<8> buf1inst, bits<8> buf2inst, bits<8> buf3inst> { // Stream0 - def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 1), - (i32 imm:$type), (i32 imm:$arraybase), (i32 imm:$mask)), - (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase, + def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src), + (i32 imm:$arraybase), (i32 0), (i32 imm:$mask)), + (ExportInst R600_Reg128:$src, 0, imm:$arraybase, 4095, imm:$mask, buf0inst, 0)>; // Stream1 - def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 2), - (i32 imm:$type), (i32 imm:$arraybase), (i32 imm:$mask)), - (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase, + def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src), + (i32 imm:$arraybase), (i32 1), (i32 imm:$mask)), + (ExportInst R600_Reg128:$src, 0, imm:$arraybase, 4095, imm:$mask, buf1inst, 0)>; // Stream2 - def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 3), - (i32 imm:$type), (i32 imm:$arraybase), (i32 imm:$mask)), - (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase, + def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src), + (i32 imm:$arraybase), (i32 2), (i32 imm:$mask)), + (ExportInst R600_Reg128:$src, 0, imm:$arraybase, 4095, imm:$mask, buf2inst, 0)>; // Stream3 - def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 4), - (i32 imm:$type), (i32 imm:$arraybase), (i32 imm:$mask)), - (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase, + def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src), + (i32 imm:$arraybase), (i32 3), (i32 imm:$mask)), + (ExportInst R600_Reg128:$src, 0, imm:$arraybase, 4095, imm:$mask, buf3inst, 0)>; } diff --git a/lib/Target/R600/R600Intrinsics.td b/lib/Target/R600/R600Intrinsics.td index 3825bc4..06a7341 100644 --- a/lib/Target/R600/R600Intrinsics.td +++ b/lib/Target/R600/R600Intrinsics.td @@ -20,7 +20,7 @@ let TargetPrefix = "R600", isTarget = 1 in { def int_R600_load_input_linear : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>; def int_R600_store_stream_output : - Intrinsic<[], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty], []>; + Intrinsic<[], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; def int_R600_store_pixel_color : Intrinsic<[], [llvm_float_ty, llvm_i32_ty], []>; def int_R600_store_pixel_depth : diff --git a/lib/Target/R600/R600MachineFunctionInfo.cpp b/lib/Target/R600/R600MachineFunctionInfo.cpp index 4eb5efa..bcb7f94 100644 --- a/lib/Target/R600/R600MachineFunctionInfo.cpp +++ b/lib/Target/R600/R600MachineFunctionInfo.cpp @@ -17,7 +17,6 @@ R600MachineFunctionInfo::R600MachineFunctionInfo(const MachineFunction &MF) HasLinearInterpolation(false), HasPerspectiveInterpolation(false) { memset(Outputs, 0, sizeof(Outputs)); - memset(StreamOutputs, 0, sizeof(StreamOutputs)); } unsigned R600MachineFunctionInfo::GetIJPerspectiveIndex() const { diff --git a/lib/Target/R600/R600MachineFunctionInfo.h b/lib/Target/R600/R600MachineFunctionInfo.h index e97fb5b..91f9de2 100644 --- a/lib/Target/R600/R600MachineFunctionInfo.h +++ b/lib/Target/R600/R600MachineFunctionInfo.h @@ -25,7 +25,6 @@ public: R600MachineFunctionInfo(const MachineFunction &MF); std::vector ReservedRegs; SDNode *Outputs[16]; - SDNode *StreamOutputs[64][4]; bool HasLinearInterpolation; bool HasPerspectiveInterpolation; -- cgit v1.1 From 254a83e46c0ffb08c5c77d99f64d6e86db550c6f Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Wed, 23 Jan 2013 21:39:49 +0000 Subject: R600: Add a llvm.R600.store.swizzle intrinsics This intrinsic is translated to ALLOC_EXPORT_WORD1_SWIZ, hence its name. It is used to store vs/fs outputs Patch by: Vincent Lejeune Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173297 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/R600ISelLowering.cpp | 20 ++++++++++++++++++-- lib/Target/R600/R600Instructions.td | 11 +++++++++++ lib/Target/R600/R600Intrinsics.td | 2 ++ 3 files changed, 31 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index 3434d7e..3dc5b00 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -269,8 +269,24 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( case AMDGPU::EG_ExportSwz: case AMDGPU::R600_ExportSwz: { + // Instruction is left unmodified if its not the last one of its type + bool isLastInstructionOfItsType = true; + unsigned InstExportType = MI->getOperand(1).getImm(); + for (MachineBasicBlock::iterator NextExportInst = llvm::next(I), + EndBlock = BB->end(); NextExportInst != EndBlock; + NextExportInst = llvm::next(NextExportInst)) { + if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz || + NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) { + unsigned CurrentInstExportType = NextExportInst->getOperand(1) + .getImm(); + if (CurrentInstExportType == InstExportType) { + isLastInstructionOfItsType = false; + break; + } + } + } bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0; - if (!EOP) + if (!EOP && !isLastInstructionOfItsType) return BB; unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40; BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode())) @@ -282,7 +298,7 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( .addOperand(MI->getOperand(5)) .addOperand(MI->getOperand(6)) .addImm(CfInst) - .addImm(1); + .addImm(EOP); break; } } diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index 3e069da..04b83bc 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -599,6 +599,17 @@ multiclass ExportPattern cf_inst> { (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase, 0, 1, 2, 3, cf_inst, 0) >; + def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 1), + (i32 imm:$type), (i32 imm:$arraybase), (i32 imm)), + (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase, + 0, 1, 2, 3, cf_inst, 0) + >; + + def : Pat<(int_R600_store_swizzle (v4f32 R600_Reg128:$src), imm:$arraybase, + imm:$type), + (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase, + 0, 1, 2, 3, cf_inst, 0) + >; } multiclass SteamOutputExportPattern; def int_R600_load_input_linear : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>; + def int_R600_store_swizzle : + Intrinsic<[], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>; def int_R600_store_stream_output : Intrinsic<[], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; def int_R600_store_pixel_color : -- cgit v1.1 From 2915a691b9eecde508948d4300428860d1655175 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Wed, 23 Jan 2013 22:38:33 +0000 Subject: Remove dead methods. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173302 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/AttributeImpl.h | 3 --- lib/IR/Attributes.cpp | 20 -------------------- 2 files changed, 23 deletions(-) (limited to 'lib') diff --git a/lib/IR/AttributeImpl.h b/lib/IR/AttributeImpl.h index b5d292e..0843fd8 100644 --- a/lib/IR/AttributeImpl.h +++ b/lib/IR/AttributeImpl.h @@ -48,10 +48,7 @@ public: bool hasAttributes() const; uint64_t getAlignment() const; - void setAlignment(unsigned Align); - uint64_t getStackAlignment() const; - void setStackAlignment(unsigned Align); bool operator==(Attribute::AttrKind Kind) const; bool operator!=(Attribute::AttrKind Kind) const; diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 964a404..37cd3fb 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -76,12 +76,6 @@ unsigned Attribute::getAlignment() const { return 1U << ((pImpl->getAlignment() >> 16) - 1); } -void Attribute::setAlignment(unsigned Align) { - assert(hasAttribute(Attribute::Alignment) && - "Trying to set the alignment on a non-alignment attribute!"); - pImpl->setAlignment(Align); -} - /// This returns the stack alignment field of an attribute as a byte alignment /// value. unsigned Attribute::getStackAlignment() const { @@ -90,12 +84,6 @@ unsigned Attribute::getStackAlignment() const { return 1U << ((pImpl->getStackAlignment() >> 26) - 1); } -void Attribute::setStackAlignment(unsigned Align) { - assert(hasAttribute(Attribute::StackAlignment) && - "Trying to set the stack alignment on a non-alignment attribute!"); - pImpl->setStackAlignment(Align); -} - bool Attribute::operator==(AttrKind K) const { return pImpl && *pImpl == K; } @@ -506,18 +494,10 @@ uint64_t AttributeImpl::getAlignment() const { return Raw() & getAttrMask(Attribute::Alignment); } -void AttributeImpl::setAlignment(unsigned Align) { - Vals.push_back(ConstantInt::get(Type::getInt64Ty(Context), Align)); -} - uint64_t AttributeImpl::getStackAlignment() const { return Raw() & getAttrMask(Attribute::StackAlignment); } -void AttributeImpl::setStackAlignment(unsigned Align) { - Vals.push_back(ConstantInt::get(Type::getInt64Ty(Context), Align)); -} - void AttributeImpl::Profile(FoldingSetNodeID &ID, Constant *Data, ArrayRef Vals) { ID.AddInteger(cast(Data)->getZExtValue()); -- cgit v1.1 From a8ab5fc772e1eaaa1066d1c9c4135ac875d79365 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Wed, 23 Jan 2013 23:00:05 +0000 Subject: Push down the conversion of the alignment from the bit mask to a real number into the attribute implementation class. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173304 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Attributes.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 37cd3fb..94615da 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -73,7 +73,7 @@ bool Attribute::hasAttributes() const { unsigned Attribute::getAlignment() const { if (!hasAttribute(Attribute::Alignment)) return 0; - return 1U << ((pImpl->getAlignment() >> 16) - 1); + return pImpl->getAlignment(); } /// This returns the stack alignment field of an attribute as a byte alignment @@ -81,7 +81,7 @@ unsigned Attribute::getAlignment() const { unsigned Attribute::getStackAlignment() const { if (!hasAttribute(Attribute::StackAlignment)) return 0; - return 1U << ((pImpl->getStackAlignment() >> 26) - 1); + return pImpl->getStackAlignment(); } bool Attribute::operator==(AttrKind K) const { @@ -491,11 +491,13 @@ bool AttributeImpl::hasAttributes() const { } uint64_t AttributeImpl::getAlignment() const { - return Raw() & getAttrMask(Attribute::Alignment); + uint64_t Mask = Raw() & getAttrMask(Attribute::Alignment); + return 1U << ((Mask >> 16) - 1); } uint64_t AttributeImpl::getStackAlignment() const { - return Raw() & getAttrMask(Attribute::StackAlignment); + uint64_t Mask = Raw() & getAttrMask(Attribute::StackAlignment); + return 1U << ((Mask >> 26) - 1); } void AttributeImpl::Profile(FoldingSetNodeID &ID, Constant *Data, -- cgit v1.1 From 3467e30edf63b6d8a8d446186674ba9e4b7885a9 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Thu, 24 Jan 2013 00:06:56 +0000 Subject: Create a new class: AttributeSetNode. This is a helper class for the AttributeSetImpl class. It holds a set of attributes that apply to a single element: function, return type, or parameter. These are uniqued. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173310 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/AttributeImpl.h | 35 ++++++++++++++++++++++- lib/IR/Attributes.cpp | 72 ++++++++++++++++++++++++++++++++++++++++++++++++ lib/IR/LLVMContextImpl.h | 1 + 3 files changed, 107 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/IR/AttributeImpl.h b/lib/IR/AttributeImpl.h index 0843fd8..b02cc8b 100644 --- a/lib/IR/AttributeImpl.h +++ b/lib/IR/AttributeImpl.h @@ -56,6 +56,8 @@ public: bool operator==(StringRef Kind) const; bool operator!=(StringRef Kind) const; + bool operator<(const AttributeImpl &AI) const; + uint64_t Raw() const; // FIXME: Remove. static uint64_t getAttrMask(Attribute::AttrKind Val); @@ -69,7 +71,38 @@ public: //===----------------------------------------------------------------------===// /// \class -/// \brief This class represents a set of attributes. +/// \brief This class represents a group of attributes that apply to one +/// element: function, return type, or parameter. +class AttributeSetNode : public FoldingSetNode { + SmallVector AttrList; + + AttributeSetNode(ArrayRef Attrs) + : AttrList(Attrs.begin(), Attrs.end()) {} +public: + static AttributeSetNode *get(LLVMContext &C, ArrayRef Attrs); + + typedef SmallVectorImpl::iterator iterator; + typedef SmallVectorImpl::const_iterator const_iterator; + + iterator begin() { return AttrList.begin(); } + iterator end() { return AttrList.end(); } + + const_iterator begin() const { return AttrList.begin(); } + const_iterator end() const { return AttrList.end(); } + + void Profile(FoldingSetNodeID &ID) const { + Profile(ID, AttrList); + } + static void Profile(FoldingSetNodeID &ID, ArrayRef AttrList) { + for (unsigned I = 0, E = AttrList.size(); I != E; ++I) + AttrList[I].Profile(ID); + } +}; + +//===----------------------------------------------------------------------===// +/// \class +/// \brief This class represents a set of attributes that apply to the function, +/// return type, and parameters. class AttributeSetImpl : public FoldingSetNode { LLVMContext &Context; SmallVector AttrList; diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 94615da..8623b98 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -23,6 +23,7 @@ #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/Mutex.h" #include "llvm/Support/raw_ostream.h" +#include using namespace llvm; //===----------------------------------------------------------------------===// @@ -91,6 +92,18 @@ bool Attribute::operator!=(AttrKind K) const { return !(*this == K); } +bool Attribute::operator<(Attribute A) const { + if (!pImpl && !A.pImpl) return false; + if (!pImpl) return true; + if (!A.pImpl) return false; + return *pImpl < *A.pImpl; +} + + +void Attribute::Profile(FoldingSetNodeID &ID) const { + ID.AddPointer(pImpl); +} + uint64_t Attribute::Raw() const { return pImpl ? pImpl->Raw() : 0; } @@ -431,10 +444,34 @@ bool AttributeImpl::operator==(StringRef Kind) const { return CDA->getAsString() == Kind; return false; } + bool AttributeImpl::operator!=(StringRef Kind) const { return !(*this == Kind); } +bool AttributeImpl::operator<(const AttributeImpl &AI) const { + if (!Data && !AI.Data) return false; + if (!Data && AI.Data) return true; + if (Data && !AI.Data) return false; + + ConstantInt *ThisCI = dyn_cast(Data); + ConstantInt *ThatCI = dyn_cast(AI.Data); + + ConstantDataArray *ThisCDA = dyn_cast(Data); + ConstantDataArray *ThatCDA = dyn_cast(AI.Data); + + if (ThisCI && ThatCI) + return ThisCI->getZExtValue() < ThatCI->getZExtValue(); + + if (ThisCI && ThatCDA) + return true; + + if (ThisCDA && ThatCI) + return false; + + return ThisCDA->getAsString() < ThatCDA->getAsString(); +} + uint64_t AttributeImpl::Raw() const { // FIXME: Remove this. return cast(Data)->getZExtValue(); @@ -523,6 +560,41 @@ AttributeWithIndex AttributeWithIndex::get(LLVMContext &C, unsigned Idx, } //===----------------------------------------------------------------------===// +// AttributeSetNode Definition +//===----------------------------------------------------------------------===// + +AttributeSetNode *AttributeSetNode::get(LLVMContext &C, + ArrayRef Attrs) { + if (Attrs.empty()) + return 0; + + // Otherwise, build a key to look up the existing attributes. + LLVMContextImpl *pImpl = C.pImpl; + FoldingSetNodeID ID; + + SmallVector SortedAttrs(Attrs.begin(), Attrs.end()); + std::sort(SortedAttrs.begin(), SortedAttrs.end()); + + for (SmallVectorImpl::iterator I = SortedAttrs.begin(), + E = SortedAttrs.end(); I != E; ++I) + I->Profile(ID); + + void *InsertPoint; + AttributeSetNode *PA = + pImpl->AttrsSetNodes.FindNodeOrInsertPos(ID, InsertPoint); + + // If we didn't find any existing attributes of the same shape then create a + // new one and insert it. + if (!PA) { + PA = new AttributeSetNode(SortedAttrs); + pImpl->AttrsSetNodes.InsertNode(PA, InsertPoint); + } + + // Return the AttributesListNode that we found or created. + return PA; +} + +//===----------------------------------------------------------------------===// // AttributeSetImpl Definition //===----------------------------------------------------------------------===// diff --git a/lib/IR/LLVMContextImpl.h b/lib/IR/LLVMContextImpl.h index 30fd666..cc7ca5e 100644 --- a/lib/IR/LLVMContextImpl.h +++ b/lib/IR/LLVMContextImpl.h @@ -249,6 +249,7 @@ public: FoldingSet AttrsSet; FoldingSet AttrsLists; + FoldingSet AttrsSetNodes; StringMap MDStringCache; -- cgit v1.1 From 8b1f2f3b7007e1e086c0e20ad4fdea72222c4baf Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Thu, 24 Jan 2013 00:14:46 +0000 Subject: Cleanup the AttributeSetNodes that we create. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173311 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/LLVMContextImpl.cpp | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'lib') diff --git a/lib/IR/LLVMContextImpl.cpp b/lib/IR/LLVMContextImpl.cpp index 8fc9379..89e163f 100644 --- a/lib/IR/LLVMContextImpl.cpp +++ b/lib/IR/LLVMContextImpl.cpp @@ -109,6 +109,13 @@ LLVMContextImpl::~LLVMContextImpl() { delete &*Elem; } + // Destroy attribute node lists. + for (FoldingSetIterator I = AttrsSetNodes.begin(), + E = AttrsSetNodes.end(); I != E; ) { + FoldingSetIterator Elem = I++; + delete &*Elem; + } + // Destroy MDNodes. ~MDNode can move and remove nodes between the MDNodeSet // and the NonUniquedMDNodes sets, so copy the values out first. SmallVector MDNodes; -- cgit v1.1 From bb08593980b16fbd9758da6ca4fa9c7964f2f926 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Thu, 24 Jan 2013 01:01:34 +0000 Subject: Add a profile for uniquifying the AttributeSet with the AttributeSetNodes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173313 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/AttributeImpl.h | 15 ++++++++++++++- lib/IR/Attributes.cpp | 5 ----- 2 files changed, 14 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/IR/AttributeImpl.h b/lib/IR/AttributeImpl.h index b02cc8b..b35e5e0 100644 --- a/lib/IR/AttributeImpl.h +++ b/lib/IR/AttributeImpl.h @@ -107,12 +107,17 @@ class AttributeSetImpl : public FoldingSetNode { LLVMContext &Context; SmallVector AttrList; + SmallVector, 4> AttrNodes; + // AttributesSet is uniqued, these should not be publicly available. void operator=(const AttributeSetImpl &) LLVM_DELETED_FUNCTION; AttributeSetImpl(const AttributeSetImpl &) LLVM_DELETED_FUNCTION; public: AttributeSetImpl(LLVMContext &C, ArrayRef attrs) : Context(C), AttrList(attrs.begin(), attrs.end()) {} + AttributeSetImpl(LLVMContext &C, + ArrayRef > attrs) + : Context(C), AttrNodes(attrs.begin(), attrs.end()) {} LLVMContext &getContext() { return Context; } ArrayRef getAttributes() const { return AttrList; } @@ -122,12 +127,20 @@ public: Profile(ID, AttrList); } static void Profile(FoldingSetNodeID &ID, - ArrayRef AttrList){ + ArrayRef AttrList) { for (unsigned i = 0, e = AttrList.size(); i != e; ++i) { ID.AddInteger(AttrList[i].Index); ID.AddInteger(AttrList[i].Attrs.Raw()); } } + + static void Profile(FoldingSetNodeID &ID, + ArrayRef > Nodes) { + for (unsigned i = 0, e = Nodes.size(); i != e; ++i) { + ID.AddInteger(Nodes[i].first); + ID.AddPointer(Nodes[i].second); + } + } }; } // end llvm namespace diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 8623b98..8ee7057 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -99,11 +99,6 @@ bool Attribute::operator<(Attribute A) const { return *pImpl < *A.pImpl; } - -void Attribute::Profile(FoldingSetNodeID &ID) const { - ID.AddPointer(pImpl); -} - uint64_t Attribute::Raw() const { return pImpl ? pImpl->Raw() : 0; } -- cgit v1.1 From 66658dd9a1ffe00a5f6e0afca7afb16ec6704ed3 Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Thu, 24 Jan 2013 02:09:55 +0000 Subject: MIsched: Added biasCriticalPath. Allow schedulers to order DAG edges by critical path. This makes DFS-based heuristics more stable and effective. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173317 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/ScheduleDAG.cpp | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'lib') diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp index e639c55..70ad949 100644 --- a/lib/CodeGen/ScheduleDAG.cpp +++ b/lib/CodeGen/ScheduleDAG.cpp @@ -301,6 +301,21 @@ void SUnit::ComputeHeight() { } while (!WorkList.empty()); } +void SUnit::biasCriticalPath() { + if (NumPreds < 2) + return; + + SUnit::pred_iterator BestI = Preds.begin(); + unsigned MaxDepth = BestI->getSUnit()->getDepth(); + for (SUnit::pred_iterator + I = llvm::next(BestI), E = Preds.end(); I != E; ++I) { + if (I->getKind() == SDep::Data && I->getSUnit()->getDepth() > MaxDepth) + BestI = I; + } + if (BestI != Preds.begin()) + std::swap(*Preds.begin(), *BestI); +} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// SUnit - Scheduling unit. It's an wrapper around either a single SDNode or /// a group of nodes flagged together. -- cgit v1.1 From db4170697f866dc8620946c77828ef0804996c3d Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Thu, 24 Jan 2013 02:09:57 +0000 Subject: MachineScheduler: enable biasCriticalPath for all DAGs. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173318 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineScheduler.cpp | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'lib') diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index c949266..b9198e8 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -563,6 +563,10 @@ void ScheduleDAGMI::releaseRoots() { for (std::vector::iterator I = SUnits.begin(), E = SUnits.end(); I != E; ++I) { SUnit *SU = &(*I); + + // Order predecessors so DFSResult follows the critical path. + SU->biasCriticalPath(); + // A SUnit is ready to top schedule if it has no predecessors. if (!I->NumPredsLeft && SU != &EntrySU) SchedImpl->releaseTopNode(SU); -- cgit v1.1 From 8453b3f66a3c3200ea828491ef5cf162db9ccfb2 Mon Sep 17 00:00:00 2001 From: Reed Kotler Date: Thu, 24 Jan 2013 04:24:02 +0000 Subject: The next phase of Mips16 hard float implementation. Allow Mips16 routines to call Mips32 routines that have abi requirements that either arguments or return values are passed in floating point registers. This handles only the pic case. We have not done non pic for Mips16 yet in any form. The libm functions are Mips32, so with this addition we have a complete Mips16 hard float implementation. We still are not able to complete mix Mip16 and Mips32 with hard float. That will be the next phase which will have several steps. For Mips32 to freely call Mips16 some stub functions must be created. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173320 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsISelLowering.cpp | 275 ++++++++++++++++++++++++++++++----- lib/Target/Mips/MipsISelLowering.h | 7 + 2 files changed, 246 insertions(+), 36 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index e148da1..a23ffd7 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -11,8 +11,8 @@ // selection DAG. // //===----------------------------------------------------------------------===// - #define DEBUG_TYPE "mips-lower" +#include #include "MipsISelLowering.h" #include "InstPrinter/MipsInstPrinter.h" #include "MCTargetDesc/MipsBaseInfo.h" @@ -205,39 +205,64 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const { } } +namespace { + struct eqstr { + bool operator()(const char *s1, const char *s2) const + { + return strcmp(s1, s2) == 0; + } + }; + + std::set noHelperNeeded; + + const char* addToNoHelperNeeded(const char* s) { + noHelperNeeded.insert(s); + return s; + } + +} + void MipsTargetLowering::setMips16HardFloatLibCalls() { - setLibcallName(RTLIB::ADD_F32, "__mips16_addsf3"); - setLibcallName(RTLIB::ADD_F64, "__mips16_adddf3"); - setLibcallName(RTLIB::SUB_F32, "__mips16_subsf3"); - setLibcallName(RTLIB::SUB_F64, "__mips16_subdf3"); - setLibcallName(RTLIB::MUL_F32, "__mips16_mulsf3"); - setLibcallName(RTLIB::MUL_F64, "__mips16_muldf3"); - setLibcallName(RTLIB::DIV_F32, "__mips16_divsf3"); - setLibcallName(RTLIB::DIV_F64, "__mips16_divdf3"); - setLibcallName(RTLIB::FPEXT_F32_F64, "__mips16_extendsfdf2"); - setLibcallName(RTLIB::FPROUND_F64_F32, "__mips16_truncdfsf2"); - setLibcallName(RTLIB::FPTOSINT_F32_I32, "__mips16_fix_truncsfsi"); - setLibcallName(RTLIB::FPTOSINT_F64_I32, "__mips16_fix_truncdfsi"); - setLibcallName(RTLIB::SINTTOFP_I32_F32, "__mips16_floatsisf"); - setLibcallName(RTLIB::SINTTOFP_I32_F64, "__mips16_floatsidf"); - setLibcallName(RTLIB::UINTTOFP_I32_F32, "__mips16_floatunsisf"); - setLibcallName(RTLIB::UINTTOFP_I32_F64, "__mips16_floatunsidf"); - setLibcallName(RTLIB::OEQ_F32, "__mips16_eqsf2"); - setLibcallName(RTLIB::OEQ_F64, "__mips16_eqdf2"); - setLibcallName(RTLIB::UNE_F32, "__mips16_nesf2"); - setLibcallName(RTLIB::UNE_F64, "__mips16_nedf2"); - setLibcallName(RTLIB::OGE_F32, "__mips16_gesf2"); - setLibcallName(RTLIB::OGE_F64, "__mips16_gedf2"); - setLibcallName(RTLIB::OLT_F32, "__mips16_ltsf2"); - setLibcallName(RTLIB::OLT_F64, "__mips16_ltdf2"); - setLibcallName(RTLIB::OLE_F32, "__mips16_lesf2"); - setLibcallName(RTLIB::OLE_F64, "__mips16_ledf2"); - setLibcallName(RTLIB::OGT_F32, "__mips16_gtsf2"); - setLibcallName(RTLIB::OGT_F64, "__mips16_gtdf2"); - setLibcallName(RTLIB::UO_F32, "__mips16_unordsf2"); - setLibcallName(RTLIB::UO_F64, "__mips16_unorddf2"); - setLibcallName(RTLIB::O_F32, "__mips16_unordsf2"); - setLibcallName(RTLIB::O_F64, "__mips16_unorddf2"); + setLibcallName(RTLIB::ADD_F32, addToNoHelperNeeded("__mips16_addsf3")); + setLibcallName(RTLIB::ADD_F64, addToNoHelperNeeded("__mips16_adddf3")); + setLibcallName(RTLIB::SUB_F32, addToNoHelperNeeded("__mips16_subsf3")); + setLibcallName(RTLIB::SUB_F64, addToNoHelperNeeded("__mips16_subdf3")); + setLibcallName(RTLIB::MUL_F32, addToNoHelperNeeded("__mips16_mulsf3")); + setLibcallName(RTLIB::MUL_F64, addToNoHelperNeeded("__mips16_muldf3")); + setLibcallName(RTLIB::DIV_F32, addToNoHelperNeeded("__mips16_divsf3")); + setLibcallName(RTLIB::DIV_F64, addToNoHelperNeeded("__mips16_divdf3")); + setLibcallName(RTLIB::FPEXT_F32_F64, + addToNoHelperNeeded("__mips16_extendsfdf2")); + setLibcallName(RTLIB::FPROUND_F64_F32, + addToNoHelperNeeded("__mips16_truncdfsf2")); + setLibcallName(RTLIB::FPTOSINT_F32_I32, + addToNoHelperNeeded("__mips16_fix_truncsfsi")); + setLibcallName(RTLIB::FPTOSINT_F64_I32, + addToNoHelperNeeded("__mips16_fix_truncdfsi")); + setLibcallName(RTLIB::SINTTOFP_I32_F32, + addToNoHelperNeeded("__mips16_floatsisf")); + setLibcallName(RTLIB::SINTTOFP_I32_F64, + addToNoHelperNeeded("__mips16_floatsidf")); + setLibcallName(RTLIB::UINTTOFP_I32_F32, + addToNoHelperNeeded("__mips16_floatunsisf")); + setLibcallName(RTLIB::UINTTOFP_I32_F64, + addToNoHelperNeeded("__mips16_floatunsidf")); + setLibcallName(RTLIB::OEQ_F32, addToNoHelperNeeded("__mips16_eqsf2")); + setLibcallName(RTLIB::OEQ_F64, addToNoHelperNeeded("__mips16_eqdf2")); + setLibcallName(RTLIB::UNE_F32, addToNoHelperNeeded("__mips16_nesf2")); + setLibcallName(RTLIB::UNE_F64, addToNoHelperNeeded("__mips16_nedf2")); + setLibcallName(RTLIB::OGE_F32, addToNoHelperNeeded("__mips16_gesf2")); + setLibcallName(RTLIB::OGE_F64, addToNoHelperNeeded("__mips16_gedf2")); + setLibcallName(RTLIB::OLT_F32, addToNoHelperNeeded("__mips16_ltsf2")); + setLibcallName(RTLIB::OLT_F64, addToNoHelperNeeded("__mips16_ltdf2")); + setLibcallName(RTLIB::OLE_F32, addToNoHelperNeeded("__mips16_lesf2")); + setLibcallName(RTLIB::OLE_F64, addToNoHelperNeeded("__mips16_ledf2")); + setLibcallName(RTLIB::OGT_F32, addToNoHelperNeeded("__mips16_gtsf2")); + setLibcallName(RTLIB::OGT_F64, addToNoHelperNeeded("__mips16_gtdf2")); + setLibcallName(RTLIB::UO_F32, addToNoHelperNeeded("__mips16_unordsf2")); + setLibcallName(RTLIB::UO_F64, addToNoHelperNeeded("__mips16_unorddf2")); + setLibcallName(RTLIB::O_F32, addToNoHelperNeeded("__mips16_unordsf2")); + setLibcallName(RTLIB::O_F64, addToNoHelperNeeded("__mips16_unorddf2")); } MipsTargetLowering:: @@ -2754,6 +2779,155 @@ MipsTargetLowering::passArgOnStack(SDValue StackPtr, unsigned Offset, /*isVolatile=*/ true, false, 0); } +// +// The Mips16 hard float is a crazy quilt inherited from gcc. I have a much +// cleaner way to do all of this but it will have to wait until the traditional +// gcc mechanism is completed. +// +// For Pic, in order for Mips16 code to call Mips32 code which according the abi +// have either arguments or returned values placed in floating point registers, +// we use a set of helper functions. (This includes functions which return type +// complex which on Mips are returned in a pair of floating point registers). +// +// This is an encoding that we inherited from gcc. +// In Mips traditional O32, N32 ABI, floating point numbers are passed in +// floating point argument registers 1,2 only when the first and optionally +// the second arguments are float (sf) or double (df). +// For Mips16 we are only concerned with the situations where floating point +// arguments are being passed in floating point registers by the ABI, because +// Mips16 mode code cannot execute floating point instructions to load those +// values and hence helper functions are needed. +// The possibilities are (), (sf), (sf, sf), (sf, df), (df), (df, sf), (df, df) +// the helper function suffixs for these are: +// 0, 1, 5, 9, 2, 6, 10 +// this suffix can then be calculated as follows: +// for a given argument Arg: +// Arg1x, Arg2x = 1 : Arg is sf +// 2 : Arg is df +// 0: Arg is neither sf or df +// So this stub is the string for number Arg1x + Arg2x*4. +// However not all numbers between 0 and 10 are possible, we check anyway and +// assert if the impossible exists. +// + +unsigned int MipsTargetLowering::getMips16HelperFunctionStubNumber + (ArgListTy &Args) const { + unsigned int resultNum = 0; + if (Args.size() >= 1) { + Type *t = Args[0].Ty; + if (t->isFloatTy()) { + resultNum = 1; + } + else if (t->isDoubleTy()) { + resultNum = 2; + } + } + if (resultNum) { + if (Args.size() >=2) { + Type *t = Args[1].Ty; + if (t->isFloatTy()) { + resultNum += 4; + } + else if (t->isDoubleTy()) { + resultNum += 8; + } + } + } + return resultNum; +} + +// +// prefixs are attached to stub numbers depending on the return type . +// return type: float sf_ +// double df_ +// single complex sc_ +// double complext dc_ +// others NO PREFIX +// +// +// The full name of a helper function is__mips16_call_stub + +// return type dependent prefix + stub number +// +// +// This is something that probably should be in a different source file and +// perhaps done differently but my main purpose is to not waste runtime +// on something that we can enumerate in the source. Another possibility is +// to have a python script to generate these mapping tables. This will do +// for now. There are a whole series of helper function mapping arrays, one +// for each return type class as outlined above. There there are 11 possible +// entries. Ones with 0 are ones which should never be selected +// +// All the arrays are similar except for ones which return neither +// sf, df, sc, dc, in which only care about ones which have sf or df as a +// first parameter. +// +#define P_ "__mips16_call_stub_" +#define MAX_STUB_NUMBER 10 +#define T1 P "1", P "2", 0, 0, P "5", P "6", 0, 0, P "9", P "10" +#define T P "0" , T1 +#define P P_ +static char const * vMips16Helper[MAX_STUB_NUMBER+1] = + {0, T1 }; +#undef P +#define P P_ "sf_" +static char const * sfMips16Helper[MAX_STUB_NUMBER+1] = + { T }; +#undef P +#define P P_ "df_" +static char const * dfMips16Helper[MAX_STUB_NUMBER+1] = + { T }; +#undef P +#define P P_ "sc_" +static char const * scMips16Helper[MAX_STUB_NUMBER+1] = + { T }; +#undef P +#define P P_ "dc_" +static char const * dcMips16Helper[MAX_STUB_NUMBER+1] = + { T }; +#undef P +#undef P_ + + +const char* MipsTargetLowering:: + getMips16HelperFunction + (Type* RetTy, ArgListTy &Args, bool &needHelper) const { + const unsigned int maxStubNum = 10; + const bool validStubNum[maxStubNum+1] = + {true, true, true, false, false, true, true, false, false, true, true}; + const unsigned int stubNum = getMips16HelperFunctionStubNumber(Args); + assert(stubNum <= maxStubNum); + assert (validStubNum[stubNum]); + const char *result; + if (RetTy->isFloatTy()) { + result = sfMips16Helper[stubNum]; + } + else if (RetTy ->isDoubleTy()) { + result = dfMips16Helper[stubNum]; + } + else if (RetTy->isStructTy()) { + // check if it's complex + if (RetTy->getNumContainedTypes() == 2) { + if ((RetTy->getContainedType(0)->isFloatTy()) && + (RetTy->getContainedType(1)->isFloatTy())) { + result = scMips16Helper[stubNum]; + } + else if ((RetTy->getContainedType(0)->isDoubleTy()) && + (RetTy->getContainedType(1)->isDoubleTy())) { + result = dcMips16Helper[stubNum]; + } + } + } + else { + if (stubNum == 0) { + needHelper = false; + return ""; + } + result = vMips16Helper[stubNum]; + } + needHelper = true; + return result; +} + /// LowerCall - functions arguments are copied from virtual regs to /// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted. SDValue @@ -2770,6 +2944,26 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, CallingConv::ID CallConv = CLI.CallConv; bool isVarArg = CLI.IsVarArg; + const char* mips16HelperFunction = 0; + bool needMips16Helper = false; + + if (Subtarget->inMips16Mode() && getTargetMachine().Options.UseSoftFloat && + Mips16HardFloat) { + // + // currently we don't have symbols tagged with the mips16 or mips32 + // qualifier so we will assume that we don't know what kind it is. + // and generate the helper + // + bool lookupHelper = true; + if (ExternalSymbolSDNode *S = dyn_cast(Callee)) { + if (noHelperNeeded.find(S->getSymbol()) != noHelperNeeded.end()) { + lookupHelper = false; + } + } + if (lookupHelper) mips16HelperFunction = + getMips16HelperFunction(CLI.RetTy, CLI.Args, needMips16Helper); + + } MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); const TargetFrameLowering *TFL = MF.getTarget().getFrameLowering(); @@ -2934,10 +3128,19 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // -reloction-model=pic or it is an indirect call. if (IsPICCall || !GlobalOrExternal) { unsigned T9Reg = IsN64 ? Mips::T9_64 : Mips::T9; - RegsToPass.push_front(std::make_pair(T9Reg, Callee)); + unsigned V0Reg = Mips::V0; + if (needMips16Helper) { + RegsToPass.push_front(std::make_pair(V0Reg, Callee)); + JumpTarget = DAG.getExternalSymbol( + mips16HelperFunction, getPointerTy()); + JumpTarget = getAddrGlobal(JumpTarget, DAG, MipsII::MO_GOT); + } + else { + RegsToPass.push_front(std::make_pair(T9Reg, Callee)); - if (!Subtarget->inMips16Mode()) - JumpTarget = SDValue(); + if (!Subtarget->inMips16Mode()) + JumpTarget = SDValue(); + } } // Insert node "GP copy globalreg" before call to function. diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index deb6ad0..c682c89 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -21,6 +21,7 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Target/TargetLowering.h" #include +#include namespace llvm { namespace MipsISD { @@ -177,6 +178,12 @@ namespace llvm { void setMips16HardFloatLibCalls(); + unsigned int + getMips16HelperFunctionStubNumber(ArgListTy &Args) const; + + const char *getMips16HelperFunction + (Type* RetTy, ArgListTy &Args, bool &needHelper) const; + /// ByValArgInfo - Byval argument information. struct ByValArgInfo { unsigned FirstIdx; // Index of the first register used. -- cgit v1.1 From b57c292d29f7fdc01a9cc06a4c99f1c3e37105f4 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 24 Jan 2013 05:22:40 +0000 Subject: Remove trailing whitespace. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173322 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/InstCombine/InstCombineCasts.cpp | 268 ++++++++++++------------ 1 file changed, 134 insertions(+), 134 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index 0c0864f..653f97a 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -30,7 +30,7 @@ static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale, Scale = 0; return ConstantInt::get(Val->getType(), 0); } - + if (BinaryOperator *I = dyn_cast(Val)) { // Cannot look past anything that might overflow. OverflowingBinaryOperator *OBI = dyn_cast(Val); @@ -47,19 +47,19 @@ static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale, Offset = 0; return I->getOperand(0); } - + if (I->getOpcode() == Instruction::Mul) { // This value is scaled by 'RHS'. Scale = RHS->getZExtValue(); Offset = 0; return I->getOperand(0); } - + if (I->getOpcode() == Instruction::Add) { - // We have X+C. Check to see if we really have (X*C2)+C1, + // We have X+C. Check to see if we really have (X*C2)+C1, // where C1 is divisible by C2. unsigned SubScale; - Value *SubVal = + Value *SubVal = DecomposeSimpleLinearExpr(I->getOperand(0), SubScale, Offset); Offset += RHS->getZExtValue(); Scale = SubScale; @@ -82,7 +82,7 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, if (!TD) return 0; PointerType *PTy = cast(CI.getType()); - + BuilderTy AllocaBuilder(*Builder); AllocaBuilder.SetInsertPoint(AI.getParent(), &AI); @@ -110,7 +110,7 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, uint64_t ArrayOffset; Value *NumElements = // See if the array size is a decomposable linear expr. DecomposeSimpleLinearExpr(AI.getOperand(0), ArraySizeScale, ArrayOffset); - + // If we can now satisfy the modulus, by using a non-1 scale, we really can // do the xform. if ((AllocElTySize*ArraySizeScale) % CastElTySize != 0 || @@ -125,17 +125,17 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, // Insert before the alloca, not before the cast. Amt = AllocaBuilder.CreateMul(Amt, NumElements); } - + if (uint64_t Offset = (AllocElTySize*ArrayOffset)/CastElTySize) { Value *Off = ConstantInt::get(AI.getArraySize()->getType(), Offset, true); Amt = AllocaBuilder.CreateAdd(Amt, Off); } - + AllocaInst *New = AllocaBuilder.CreateAlloca(CastElTy, Amt); New->setAlignment(AI.getAlignment()); New->takeName(&AI); - + // If the allocation has multiple real uses, insert a cast and change all // things that used it to use the new cast. This will also hack on CI, but it // will die soon. @@ -148,10 +148,10 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, return ReplaceInstUsesWith(CI, New); } -/// EvaluateInDifferentType - Given an expression that +/// EvaluateInDifferentType - Given an expression that /// CanEvaluateTruncated or CanEvaluateSExtd returns true for, actually /// insert the code to evaluate the expression. -Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty, +Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty, bool isSigned) { if (Constant *C = dyn_cast(V)) { C = ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/); @@ -181,7 +181,7 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty, Value *RHS = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned); Res = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS); break; - } + } case Instruction::Trunc: case Instruction::ZExt: case Instruction::SExt: @@ -190,7 +190,7 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty, // new. if (I->getOperand(0)->getType() == Ty) return I->getOperand(0); - + // Otherwise, must be the same type of cast, so just reinsert a new one. // This also handles the case of zext(trunc(x)) -> zext(x). Res = CastInst::CreateIntegerCast(I->getOperand(0), Ty, @@ -212,11 +212,11 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty, Res = NPN; break; } - default: + default: // TODO: Can handle more cases here. llvm_unreachable("Unreachable!"); } - + Res->takeName(I); return InsertNewInstWith(Res, *I); } @@ -224,7 +224,7 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty, /// This function is a wrapper around CastInst::isEliminableCastPair. It /// simply extracts arguments and returns what that function returns. -static Instruction::CastOps +static Instruction::CastOps isEliminableCastPair( const CastInst *CI, ///< The first cast instruction unsigned opcode, ///< The opcode of the second cast instruction @@ -253,7 +253,7 @@ isEliminableCastPair( if ((Res == Instruction::IntToPtr && SrcTy != DstIntPtrTy) || (Res == Instruction::PtrToInt && DstTy != SrcIntPtrTy)) Res = 0; - + return Instruction::CastOps(Res); } @@ -265,18 +265,18 @@ bool InstCombiner::ShouldOptimizeCast(Instruction::CastOps opc, const Value *V, Type *Ty) { // Noop casts and casts of constants should be eliminated trivially. if (V->getType() == Ty || isa(V)) return false; - + // If this is another cast that can be eliminated, we prefer to have it // eliminated. if (const CastInst *CI = dyn_cast(V)) if (isEliminableCastPair(CI, opc, Ty, TD)) return false; - + // If this is a vector sext from a compare, then we don't want to break the // idiom where each element of the extended vector is either zero or all ones. if (opc == Instruction::SExt && isa(V) && Ty->isVectorTy()) return false; - + return true; } @@ -288,7 +288,7 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) { // Many cases of "cast of a cast" are eliminable. If it's eliminable we just // eliminate it now. if (CastInst *CSrc = dyn_cast(Src)) { // A->B->C cast - if (Instruction::CastOps opc = + if (Instruction::CastOps opc = isEliminableCastPair(CSrc, CI.getOpcode(), CI.getType(), TD)) { // The first cast (CSrc) is eliminable so we need to fix up or replace // the second cast (CI). CSrc will then have a good chance of being dead. @@ -311,7 +311,7 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) { if (Instruction *NV = FoldOpIntoPhi(CI)) return NV; } - + return 0; } @@ -330,15 +330,15 @@ static bool CanEvaluateTruncated(Value *V, Type *Ty) { // We can always evaluate constants in another type. if (isa(V)) return true; - + Instruction *I = dyn_cast(V); if (!I) return false; - + Type *OrigTy = V->getType(); - + // If this is an extension from the dest type, we can eliminate it, even if it // has multiple uses. - if ((isa(I) || isa(I)) && + if ((isa(I) || isa(I)) && I->getOperand(0)->getType() == Ty) return true; @@ -423,29 +423,29 @@ static bool CanEvaluateTruncated(Value *V, Type *Ty) { // TODO: Can handle more cases here. break; } - + return false; } Instruction *InstCombiner::visitTrunc(TruncInst &CI) { if (Instruction *Result = commonCastTransforms(CI)) return Result; - - // See if we can simplify any instructions used by the input whose sole + + // See if we can simplify any instructions used by the input whose sole // purpose is to compute bits we don't care about. if (SimplifyDemandedInstructionBits(CI)) return &CI; - + Value *Src = CI.getOperand(0); Type *DestTy = CI.getType(), *SrcTy = Src->getType(); - + // Attempt to truncate the entire input expression tree to the destination // type. Only do this if the dest type is a simple type, don't convert the // expression tree to something weird like i93 unless the source is also // strange. if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) && CanEvaluateTruncated(Src, DestTy)) { - + // If this cast is a truncate, evaluting in a different type always // eliminates the cast, so it is always a win. DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type" @@ -462,7 +462,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { Value *Zero = Constant::getNullValue(Src->getType()); return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero); } - + // Transform trunc(lshr (zext A), Cst) to eliminate one type conversion. Value *A = 0; ConstantInt *Cst = 0; if (Src->hasOneUse() && @@ -472,7 +472,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { // ASize < MidSize and MidSize > ResultSize, but don't know the relation // between ASize and ResultSize. unsigned ASize = A->getType()->getPrimitiveSizeInBits(); - + // If the shift amount is larger than the size of A, then the result is // known to be zero because all the input bits got shifted out. if (Cst->getZExtValue() >= ASize) @@ -485,7 +485,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { Shift->takeName(Src); return CastInst::CreateIntegerCast(Shift, CI.getType(), false); } - + // Transform "trunc (and X, cst)" -> "and (trunc X), cst" so long as the dest // type isn't non-native. if (Src->hasOneUse() && isa(Src->getType()) && @@ -508,7 +508,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, // cast to integer to avoid the comparison. if (ConstantInt *Op1C = dyn_cast(ICI->getOperand(1))) { const APInt &Op1CV = Op1C->getValue(); - + // zext (x x>>u31 true if signbit set. // zext (x >s -1) to i32 --> (x>>u31)^1 true if signbit clear. if ((ICI->getPredicate() == ICmpInst::ICMP_SLT && Op1CV == 0) || @@ -538,14 +538,14 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, // zext (X != 0) to i32 --> X>>1 iff X has only the 2nd bit set. // zext (X != 1) to i32 --> X^1 iff X has only the low bit set. // zext (X != 2) to i32 --> (X>>1)^1 iff X has only the 2nd bit set. - if ((Op1CV == 0 || Op1CV.isPowerOf2()) && + if ((Op1CV == 0 || Op1CV.isPowerOf2()) && // This only works for EQ and NE ICI->isEquality()) { // If Op1C some other power of two, convert: uint32_t BitWidth = Op1C->getType()->getBitWidth(); APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); ComputeMaskedBits(ICI->getOperand(0), KnownZero, KnownOne); - + APInt KnownZeroMask(~KnownZero); if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1? if (!DoXform) return ICI; @@ -559,7 +559,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, Res = ConstantExpr::getZExt(Res, CI.getType()); return ReplaceInstUsesWith(CI, Res); } - + uint32_t ShiftAmt = KnownZeroMask.logBase2(); Value *In = ICI->getOperand(0); if (ShiftAmt) { @@ -568,12 +568,12 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, In = Builder->CreateLShr(In, ConstantInt::get(In->getType(),ShiftAmt), In->getName()+".lobit"); } - + if ((Op1CV != 0) == isNE) { // Toggle the low bit. Constant *One = ConstantInt::get(In->getType(), 1); In = Builder->CreateXor(In, One); } - + if (CI.getType() == In->getType()) return ReplaceInstUsesWith(CI, In); return CastInst::CreateIntegerCast(In, CI.getType(), false/*ZExt*/); @@ -646,19 +646,19 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) { BitsToClear = 0; if (isa(V)) return true; - + Instruction *I = dyn_cast(V); if (!I) return false; - + // If the input is a truncate from the destination type, we can trivially // eliminate it. if (isa(I) && I->getOperand(0)->getType() == Ty) return true; - + // We can't extend or shrink something that has multiple uses: doing so would // require duplicating the instruction in general, which isn't profitable. if (!I->hasOneUse()) return false; - + unsigned Opc = I->getOpcode(), Tmp; switch (Opc) { case Instruction::ZExt: // zext(zext(x)) -> zext(x). @@ -678,7 +678,7 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) { // These can all be promoted if neither operand has 'bits to clear'. if (BitsToClear == 0 && Tmp == 0) return true; - + // If the operation is an AND/OR/XOR and the bits to clear are zero in the // other side, BitsToClear is ok. if (Tmp == 0 && @@ -691,10 +691,10 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) { APInt::getHighBitsSet(VSize, BitsToClear))) return true; } - + // Otherwise, we don't know how to analyze this BitsToClear case yet. return false; - + case Instruction::LShr: // We can promote lshr(x, cst) if we can promote x. This requires the // ultimate 'and' to clear out the high zero bits we're clearing out though. @@ -716,7 +716,7 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) { Tmp != BitsToClear) return false; return true; - + case Instruction::PHI: { // We can change a phi if we can change all operands. Note that we never // get into trouble with cyclic PHIs here because we only consider @@ -743,44 +743,44 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { // eliminated before we try to optimize this zext. if (CI.hasOneUse() && isa(CI.use_back())) return 0; - + // If one of the common conversion will work, do it. if (Instruction *Result = commonCastTransforms(CI)) return Result; - // See if we can simplify any instructions used by the input whose sole + // See if we can simplify any instructions used by the input whose sole // purpose is to compute bits we don't care about. if (SimplifyDemandedInstructionBits(CI)) return &CI; - + Value *Src = CI.getOperand(0); Type *SrcTy = Src->getType(), *DestTy = CI.getType(); - + // Attempt to extend the entire input expression tree to the destination // type. Only do this if the dest type is a simple type, don't convert the // expression tree to something weird like i93 unless the source is also // strange. unsigned BitsToClear; if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) && - CanEvaluateZExtd(Src, DestTy, BitsToClear)) { + CanEvaluateZExtd(Src, DestTy, BitsToClear)) { assert(BitsToClear < SrcTy->getScalarSizeInBits() && "Unreasonable BitsToClear"); - + // Okay, we can transform this! Insert the new expression now. DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type" " to avoid zero extend: " << CI); Value *Res = EvaluateInDifferentType(Src, DestTy, false); assert(Res->getType() == DestTy); - + uint32_t SrcBitsKept = SrcTy->getScalarSizeInBits()-BitsToClear; uint32_t DestBitSize = DestTy->getScalarSizeInBits(); - + // If the high bits are already filled with zeros, just replace this // cast with the result. if (MaskedValueIsZero(Res, APInt::getHighBitsSet(DestBitSize, DestBitSize-SrcBitsKept))) return ReplaceInstUsesWith(CI, Res); - + // We need to emit an AND to clear the high bits. Constant *C = ConstantInt::get(Res->getType(), APInt::getLowBitsSet(DestBitSize, SrcBitsKept)); @@ -792,7 +792,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { // 'and' which will be much cheaper than the pair of casts. if (TruncInst *CSrc = dyn_cast(Src)) { // A->B->C cast // TODO: Subsume this into EvaluateInDifferentType. - + // Get the sizes of the types involved. We know that the intermediate type // will be smaller than A or C, but don't know the relation between A and C. Value *A = CSrc->getOperand(0); @@ -809,7 +809,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { Value *And = Builder->CreateAnd(A, AndConst, CSrc->getName()+".mask"); return new ZExtInst(And, CI.getType()); } - + if (SrcSize == DstSize) { APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize)); return BinaryOperator::CreateAnd(A, ConstantInt::get(A->getType(), @@ -818,7 +818,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { if (SrcSize > DstSize) { Value *Trunc = Builder->CreateTrunc(A, CI.getType()); APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize)); - return BinaryOperator::CreateAnd(Trunc, + return BinaryOperator::CreateAnd(Trunc, ConstantInt::get(Trunc->getType(), AndValue)); } @@ -876,7 +876,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { Value *New = Builder->CreateZExt(X, CI.getType()); return BinaryOperator::CreateXor(New, ConstantInt::get(CI.getType(), 1)); } - + return 0; } @@ -989,14 +989,14 @@ static bool CanEvaluateSExtd(Value *V, Type *Ty) { // If this is a constant, it can be trivially promoted. if (isa(V)) return true; - + Instruction *I = dyn_cast(V); if (!I) return false; - + // If this is a truncate from the dest type, we can trivially eliminate it. if (isa(I) && I->getOperand(0)->getType() == Ty) return true; - + // We can't extend or shrink something that has multiple uses: doing so would // require duplicating the instruction in general, which isn't profitable. if (!I->hasOneUse()) return false; @@ -1015,14 +1015,14 @@ static bool CanEvaluateSExtd(Value *V, Type *Ty) { // These operators can all arbitrarily be extended if their inputs can. return CanEvaluateSExtd(I->getOperand(0), Ty) && CanEvaluateSExtd(I->getOperand(1), Ty); - + //case Instruction::Shl: TODO //case Instruction::LShr: TODO - + case Instruction::Select: return CanEvaluateSExtd(I->getOperand(1), Ty) && CanEvaluateSExtd(I->getOperand(2), Ty); - + case Instruction::PHI: { // We can change a phi if we can change all operands. Note that we never // get into trouble with cyclic PHIs here because we only consider @@ -1036,7 +1036,7 @@ static bool CanEvaluateSExtd(Value *V, Type *Ty) { // TODO: Can handle more cases here. break; } - + return false; } @@ -1045,15 +1045,15 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { // eliminated before we try to optimize this zext. if (CI.hasOneUse() && isa(CI.use_back())) return 0; - + if (Instruction *I = commonCastTransforms(CI)) return I; - - // See if we can simplify any instructions used by the input whose sole + + // See if we can simplify any instructions used by the input whose sole // purpose is to compute bits we don't care about. if (SimplifyDemandedInstructionBits(CI)) return &CI; - + Value *Src = CI.getOperand(0); Type *SrcTy = Src->getType(), *DestTy = CI.getType(); @@ -1076,7 +1076,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { // cast with the result. if (ComputeNumSignBits(Res) > DestBitSize - SrcBitSize) return ReplaceInstUsesWith(CI, Res); - + // We need to emit a shl + ashr to do the sign extend. Value *ShAmt = ConstantInt::get(DestTy, DestBitSize-SrcBitSize); return BinaryOperator::CreateAShr(Builder->CreateShl(Res, ShAmt, "sext"), @@ -1089,7 +1089,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { if (TI->hasOneUse() && TI->getOperand(0)->getType() == DestTy) { uint32_t SrcBitSize = SrcTy->getScalarSizeInBits(); uint32_t DestBitSize = DestTy->getScalarSizeInBits(); - + // We need to emit a shl + ashr to do the sign extend. Value *ShAmt = ConstantInt::get(DestTy, DestBitSize-SrcBitSize); Value *Res = Builder->CreateShl(TI->getOperand(0), ShAmt, "sext"); @@ -1125,7 +1125,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { A = Builder->CreateShl(A, ShAmtV, CI.getName()); return BinaryOperator::CreateAShr(A, ShAmtV); } - + return 0; } @@ -1147,7 +1147,7 @@ static Value *LookThroughFPExtensions(Value *V) { if (Instruction *I = dyn_cast(V)) if (I->getOpcode() == Instruction::FPExt) return LookThroughFPExtensions(I->getOperand(0)); - + // If this value is a constant, return the constant in the smallest FP type // that can accurately represent it. This allows us to turn // (float)((double)X+2.0) into x+2.0f. @@ -1166,14 +1166,14 @@ static Value *LookThroughFPExtensions(Value *V) { return V; // Don't try to shrink to various long double types. } - + return V; } Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { if (Instruction *I = commonCastTransforms(CI)) return I; - + // If we have fptrunc(fadd (fpextend x), (fpextend y)), where x and y are // smaller than the destination type, we can eliminate the truncate by doing // the add as the smaller type. This applies to fadd/fsub/fmul/fdiv as well @@ -1190,7 +1190,7 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { Type *SrcTy = OpI->getType(); Value *LHSTrunc = LookThroughFPExtensions(OpI->getOperand(0)); Value *RHSTrunc = LookThroughFPExtensions(OpI->getOperand(1)); - if (LHSTrunc->getType() != SrcTy && + if (LHSTrunc->getType() != SrcTy && RHSTrunc->getType() != SrcTy) { unsigned DstSize = CI.getType()->getScalarSizeInBits(); // If the source types were both smaller than the destination type of @@ -1202,7 +1202,7 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { return BinaryOperator::Create(OpI->getOpcode(), LHSTrunc, RHSTrunc); } } - break; + break; } // (fptrunc (fneg x)) -> (fneg (fptrunc x)) @@ -1246,7 +1246,7 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { Arg->getOperand(0)->getType()->isFloatTy()) { Function *Callee = Call->getCalledFunction(); Module *M = CI.getParent()->getParent()->getParent(); - Constant *SqrtfFunc = M->getOrInsertFunction("sqrtf", + Constant *SqrtfFunc = M->getOrInsertFunction("sqrtf", Callee->getAttributes(), Builder->getFloatTy(), Builder->getFloatTy(), @@ -1254,15 +1254,15 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { CallInst *ret = CallInst::Create(SqrtfFunc, Arg->getOperand(0), "sqrtfcall"); ret->setAttributes(Callee->getAttributes()); - - + + // Remove the old Call. With -fmath-errno, it won't get marked readnone. ReplaceInstUsesWith(*Call, UndefValue::get(Call->getType())); EraseInstFromFunction(*Call); return ret; } } - + return 0; } @@ -1280,7 +1280,7 @@ Instruction *InstCombiner::visitFPToUI(FPToUIInst &FI) { // This is safe if the intermediate type has enough bits in its mantissa to // accurately represent all values of X. For example, do not do this with // i64->float->i64. This is also safe for sitofp case, because any negative - // 'X' value would cause an undefined result for the fptoui. + // 'X' value would cause an undefined result for the fptoui. if ((isa(OpI) || isa(OpI)) && OpI->getOperand(0)->getType() == FI.getType() && (int)FI.getType()->getScalarSizeInBits() < /*extra bit for sign */ @@ -1294,19 +1294,19 @@ Instruction *InstCombiner::visitFPToSI(FPToSIInst &FI) { Instruction *OpI = dyn_cast(FI.getOperand(0)); if (OpI == 0) return commonCastTransforms(FI); - + // fptosi(sitofp(X)) --> X // fptosi(uitofp(X)) --> X // This is safe if the intermediate type has enough bits in its mantissa to // accurately represent all values of X. For example, do not do this with // i64->float->i64. This is also safe for sitofp case, because any negative - // 'X' value would cause an undefined result for the fptoui. + // 'X' value would cause an undefined result for the fptoui. if ((isa(OpI) || isa(OpI)) && OpI->getOperand(0)->getType() == FI.getType() && (int)FI.getType()->getScalarSizeInBits() <= OpI->getType()->getFPMantissaWidth()) return ReplaceInstUsesWith(FI, OpI->getOperand(0)); - + return commonCastTransforms(FI); } @@ -1336,7 +1336,7 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) { return new IntToPtrInst(P, CI.getType()); } } - + if (Instruction *I = commonCastTransforms(CI)) return I; @@ -1346,19 +1346,19 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) { /// @brief Implement the transforms for cast of pointer (bitcast/ptrtoint) Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) { Value *Src = CI.getOperand(0); - + if (GetElementPtrInst *GEP = dyn_cast(Src)) { // If casting the result of a getelementptr instruction with no offset, turn // this into a cast of the original pointer! if (GEP->hasAllZeroIndices()) { // Changing the cast operand is usually not a good idea but it is safe - // here because the pointer operand is being replaced with another + // here because the pointer operand is being replaced with another // pointer operand so the opcode doesn't need to change. Worklist.Add(GEP); CI.setOperand(0, GEP->getOperand(0)); return &CI; } - + // If the GEP has a single use, and the base pointer is a bitcast, and the // GEP computes a constant offset, see if we can convert these three // instructions into fewer. This typically happens with unions and other @@ -1379,15 +1379,15 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) { Builder->CreateInBoundsGEP(OrigBase, NewIndices) : Builder->CreateGEP(OrigBase, NewIndices); NGEP->takeName(GEP); - + if (isa(CI)) return new BitCastInst(NGEP, CI.getType()); assert(isa(CI)); return new PtrToIntInst(NGEP, CI.getType()); - } + } } } - + return commonCastTransforms(CI); } @@ -1407,7 +1407,7 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) { return new ZExtInst(P, CI.getType()); } } - + return commonPointerCastTransforms(CI); } @@ -1422,33 +1422,33 @@ static Instruction *OptimizeVectorResize(Value *InVal, VectorType *DestTy, // element size, or the input is a multiple of the output element size. // Convert the input type to have the same element type as the output. VectorType *SrcTy = cast(InVal->getType()); - + if (SrcTy->getElementType() != DestTy->getElementType()) { // The input types don't need to be identical, but for now they must be the // same size. There is no specific reason we couldn't handle things like // <4 x i16> -> <4 x i32> by bitcasting to <2 x i32> but haven't gotten - // there yet. + // there yet. if (SrcTy->getElementType()->getPrimitiveSizeInBits() != DestTy->getElementType()->getPrimitiveSizeInBits()) return 0; - + SrcTy = VectorType::get(DestTy->getElementType(), SrcTy->getNumElements()); InVal = IC.Builder->CreateBitCast(InVal, SrcTy); } - + // Now that the element types match, get the shuffle mask and RHS of the // shuffle to use, which depends on whether we're increasing or decreasing the // size of the input. SmallVector ShuffleMask; Value *V2; - + if (SrcTy->getNumElements() > DestTy->getNumElements()) { // If we're shrinking the number of elements, just shuffle in the low // elements from the input and use undef as the second shuffle input. V2 = UndefValue::get(SrcTy); for (unsigned i = 0, e = DestTy->getNumElements(); i != e; ++i) ShuffleMask.push_back(i); - + } else { // If we're increasing the number of elements, shuffle in all of the // elements from InVal and fill the rest of the result elements with zeros @@ -1462,7 +1462,7 @@ static Instruction *OptimizeVectorResize(Value *InVal, VectorType *DestTy, for (unsigned i = 0, e = DestTy->getNumElements()-SrcElts; i != e; ++i) ShuffleMask.push_back(SrcElts); } - + return new ShuffleVectorInst(InVal, V2, ConstantDataVector::get(V2->getContext(), ShuffleMask)); @@ -1489,7 +1489,7 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex, Type *VecEltTy) { // Undef values never contribute useful bits to the result. if (isa(V)) return true; - + // If we got down to a value of the right type, we win, try inserting into the // right element. if (V->getType() == VecEltTy) { @@ -1497,15 +1497,15 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex, if (Constant *C = dyn_cast(V)) if (C->isNullValue()) return true; - + // Fail if multiple elements are inserted into this slot. if (ElementIndex >= Elements.size() || Elements[ElementIndex] != 0) return false; - + Elements[ElementIndex] = V; return true; } - + if (Constant *C = dyn_cast(V)) { // Figure out the # elements this provides, and bitcast it or slice it up // as required. @@ -1516,7 +1516,7 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex, if (NumElts == 1) return CollectInsertionElements(ConstantExpr::getBitCast(C, VecEltTy), ElementIndex, Elements, VecEltTy); - + // Okay, this is a constant that covers multiple elements. Slice it up into // pieces and insert each element-sized piece into the vector. if (!isa(C->getType())) @@ -1524,7 +1524,7 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex, C->getType()->getPrimitiveSizeInBits())); unsigned ElementSize = VecEltTy->getPrimitiveSizeInBits(); Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize); - + for (unsigned i = 0; i != NumElts; ++i) { Constant *Piece = ConstantExpr::getLShr(C, ConstantInt::get(C->getType(), i*ElementSize)); @@ -1534,23 +1534,23 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex, } return true; } - + if (!V->hasOneUse()) return false; - + Instruction *I = dyn_cast(V); if (I == 0) return false; switch (I->getOpcode()) { default: return false; // Unhandled case. case Instruction::BitCast: return CollectInsertionElements(I->getOperand(0), ElementIndex, - Elements, VecEltTy); + Elements, VecEltTy); case Instruction::ZExt: if (!isMultipleOfTypeSize( I->getOperand(0)->getType()->getPrimitiveSizeInBits(), VecEltTy)) return false; return CollectInsertionElements(I->getOperand(0), ElementIndex, - Elements, VecEltTy); + Elements, VecEltTy); case Instruction::Or: return CollectInsertionElements(I->getOperand(0), ElementIndex, Elements, VecEltTy) && @@ -1562,11 +1562,11 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex, if (CI == 0) return false; if (!isMultipleOfTypeSize(CI->getZExtValue(), VecEltTy)) return false; unsigned IndexShift = getTypeSizeIndex(CI->getZExtValue(), VecEltTy); - + return CollectInsertionElements(I->getOperand(0), ElementIndex+IndexShift, Elements, VecEltTy); } - + } } @@ -1601,11 +1601,11 @@ static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI, Value *Result = Constant::getNullValue(CI.getType()); for (unsigned i = 0, e = Elements.size(); i != e; ++i) { if (Elements[i] == 0) continue; // Unset element. - + Result = IC.Builder->CreateInsertElement(Result, Elements[i], IC.Builder->getInt32(i)); } - + return Result; } @@ -1633,11 +1633,11 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){ VecTy->getPrimitiveSizeInBits() / DestWidth); VecInput = IC.Builder->CreateBitCast(VecInput, VecTy); } - + return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(0)); } } - + // bitcast(trunc(lshr(bitcast(somevector), cst)) ConstantInt *ShAmt = 0; if (match(Src, m_Trunc(m_LShr(m_BitCast(m_Value(VecInput)), @@ -1654,7 +1654,7 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){ VecTy->getPrimitiveSizeInBits() / DestWidth); VecInput = IC.Builder->CreateBitCast(VecInput, VecTy); } - + unsigned Elt = ShAmt->getZExtValue() / DestWidth; return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt)); } @@ -1678,12 +1678,12 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { PointerType *SrcPTy = cast(SrcTy); Type *DstElTy = DstPTy->getElementType(); Type *SrcElTy = SrcPTy->getElementType(); - + // If the address spaces don't match, don't eliminate the bitcast, which is // required for changing types. if (SrcPTy->getAddressSpace() != DstPTy->getAddressSpace()) return 0; - + // If we are casting a alloca to a pointer to a type of the same // size, rewrite the allocation instruction to allocate the "right" type. // There is no need to modify malloc calls because it is their bitcast that @@ -1691,14 +1691,14 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { if (AllocaInst *AI = dyn_cast(Src)) if (Instruction *V = PromoteCastOfAllocation(CI, *AI)) return V; - + // If the source and destination are pointers, and this cast is equivalent // to a getelementptr X, 0, 0, 0... turn it into the appropriate gep. // This can enhance SROA and other transforms that want type-safe pointers. Constant *ZeroUInt = Constant::getNullValue(Type::getInt32Ty(CI.getContext())); unsigned NumZeros = 0; - while (SrcElTy != DstElTy && + while (SrcElTy != DstElTy && isa(SrcElTy) && !SrcElTy->isPointerTy() && SrcElTy->getNumContainedTypes() /* not "{}" */) { SrcElTy = cast(SrcElTy)->getTypeAtIndex(ZeroUInt); @@ -1711,7 +1711,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { return GetElementPtrInst::CreateInBounds(Src, Idxs); } } - + // Try to optimize int -> float bitcasts. if ((DestTy->isFloatTy() || DestTy->isDoubleTy()) && isa(SrcTy)) if (Instruction *I = OptimizeIntToFloatBitCast(CI, *this)) @@ -1724,7 +1724,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { Constant::getNullValue(Type::getInt32Ty(CI.getContext()))); // FIXME: Canonicalize bitcast(insertelement) -> insertelement(bitcast) } - + if (isa(SrcTy)) { // If this is a cast from an integer to vector, check to see if the input // is a trunc or zext of a bitcast from vector. If so, we can replace all @@ -1737,7 +1737,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { cast(DestTy), *this)) return I; } - + // If the input is an 'or' instruction, we may be doing shifts and ors to // assemble the elements of the vector manually. Try to rip the code out // and replace it with insertelements. @@ -1748,7 +1748,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { if (VectorType *SrcVTy = dyn_cast(SrcTy)) { if (SrcVTy->getNumElements() == 1 && !DestTy->isVectorTy()) { - Value *Elem = + Value *Elem = Builder->CreateExtractElement(Src, Constant::getNullValue(Type::getInt32Ty(CI.getContext()))); return CastInst::Create(Instruction::BitCast, Elem, DestTy); @@ -1758,7 +1758,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { if (ShuffleVectorInst *SVI = dyn_cast(Src)) { // Okay, we have (bitcast (shuffle ..)). Check to see if this is // a bitcast to a vector with the same # elts. - if (SVI->hasOneUse() && DestTy->isVectorTy() && + if (SVI->hasOneUse() && DestTy->isVectorTy() && cast(DestTy)->getNumElements() == SVI->getType()->getNumElements() && SVI->getType()->getNumElements() == @@ -1767,9 +1767,9 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { // If either of the operands is a cast from CI.getType(), then // evaluating the shuffle in the casted destination's type will allow // us to eliminate at least one cast. - if (((Tmp = dyn_cast(SVI->getOperand(0))) && + if (((Tmp = dyn_cast(SVI->getOperand(0))) && Tmp->getOperand(0)->getType() == DestTy) || - ((Tmp = dyn_cast(SVI->getOperand(1))) && + ((Tmp = dyn_cast(SVI->getOperand(1))) && Tmp->getOperand(0)->getType() == DestTy)) { Value *LHS = Builder->CreateBitCast(SVI->getOperand(0), DestTy); Value *RHS = Builder->CreateBitCast(SVI->getOperand(1), DestTy); @@ -1779,7 +1779,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { } } } - + if (SrcTy->isPointerTy()) return commonPointerCastTransforms(CI); return commonCastTransforms(CI); -- cgit v1.1 From d5a336cdb5ed691b0288c8d4aa4c5b1899b7e39b Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Thu, 24 Jan 2013 05:47:29 +0000 Subject: MipsISelLowering.cpp: Fix a warning. [-Wunused-variable] git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173323 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsISelLowering.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index a23ffd7..c01e852 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -2892,11 +2892,13 @@ const char* MipsTargetLowering:: getMips16HelperFunction (Type* RetTy, ArgListTy &Args, bool &needHelper) const { const unsigned int maxStubNum = 10; - const bool validStubNum[maxStubNum+1] = - {true, true, true, false, false, true, true, false, false, true, true}; const unsigned int stubNum = getMips16HelperFunctionStubNumber(Args); assert(stubNum <= maxStubNum); - assert (validStubNum[stubNum]); +#ifdef NDEBUG + const bool validStubNum[maxStubNum+1] = + {true, true, true, false, false, true, true, false, false, true, true}; + assert(validStubNum[stubNum]); +#endif const char *result; if (RetTy->isFloatTy()) { result = sfMips16Helper[stubNum]; -- cgit v1.1 From 00cdf602ae73e039c5d5244bae4bffb5e6455096 Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Thu, 24 Jan 2013 05:54:23 +0000 Subject: MipsISelLowering.cpp: Fix a warning, take two. [-Wunused-variable] ...and fix a typo, s/#ifdef/#ifndef/ git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173324 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsISelLowering.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index c01e852..5399b45 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -2891,10 +2891,10 @@ static char const * dcMips16Helper[MAX_STUB_NUMBER+1] = const char* MipsTargetLowering:: getMips16HelperFunction (Type* RetTy, ArgListTy &Args, bool &needHelper) const { - const unsigned int maxStubNum = 10; const unsigned int stubNum = getMips16HelperFunctionStubNumber(Args); +#ifndef NDEBUG + const unsigned int maxStubNum = 10; assert(stubNum <= maxStubNum); -#ifdef NDEBUG const bool validStubNum[maxStubNum+1] = {true, true, true, false, false, true, true, false, false, true, true}; assert(validStubNum[stubNum]); -- cgit v1.1 From b3105b9a9bb318672364b3d63e07b6325c3be3d7 Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Thu, 24 Jan 2013 06:08:06 +0000 Subject: MipsISelLowering.cpp: Fill unreachable paths to fix warnings. [-Wsometimes-uninitialized] FIXME: Could they, unreachable(s), be removed? FIXME: I could prefer the coding standards... git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173325 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsISelLowering.cpp | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'lib') diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 5399b45..f37b652f 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -2917,6 +2917,12 @@ const char* MipsTargetLowering:: (RetTy->getContainedType(1)->isDoubleTy())) { result = dcMips16Helper[stubNum]; } + else { + llvm_unreachable("Uncovered condition"); + } + } + else { + llvm_unreachable("Uncovered condition"); } } else { -- cgit v1.1 From c61bc7a90c65bf220a113f31ea26f45e0f4c1413 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Thu, 24 Jan 2013 08:05:06 +0000 Subject: Spiff up the comment on this method, making the example a bit more pretty in doxygen, adding some of the details actually present in a classic example where this matters (a loop from gzip and many other compression algorithms), and a cautionary note about the risks inherent in the transform. This has come up on the mailing lists recently, and I suspect folks reading this code could benefit from going and looking at the MI pass that can really deal with these issues. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173329 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Utils/SimplifyCFG.cpp | 51 +++++++++++++++++++++++++----------- 1 file changed, 35 insertions(+), 16 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 0988a96..21d156d 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1332,24 +1332,43 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) { return Changed; } -/// SpeculativelyExecuteBB - Given a conditional branch that goes to BB1 -/// and an BB2 and the only successor of BB1 is BB2, hoist simple code -/// (for now, restricted to a single instruction that's side effect free) from -/// the BB1 into the branch block to speculatively execute it. +/// \brief Speculate a conditional basic block flattening the CFG. /// -/// Turn -/// BB: -/// %t1 = icmp -/// br i1 %t1, label %BB1, label %BB2 -/// BB1: -/// %t3 = add %t2, c +/// Note that this is a very risky transform currently. Speculating +/// instructions like this is most often not desirable. Instead, there is an MI +/// pass which can do it with full awareness of the resource constraints. +/// However, some cases are "obvious" and we should do directly. An example of +/// this is speculating a single, reasonably cheap instruction. +/// +/// There is only one distinct advantage to flattening the CFG at the IR level: +/// it makes very common but simplistic optimizations such as are common in +/// instcombine and the DAG combiner more powerful by removing CFG edges and +/// modeling their effects with easier to reason about SSA value graphs. +/// +/// +/// An illustration of this transform is turning this IR: +/// \code +/// BB: +/// %cmp = icmp ult %x, %y +/// br i1 %cmp, label %EndBB, label %ThenBB +/// ThenBB: +/// %sub = sub %x, %y /// br label BB2 -/// BB2: -/// => -/// BB: -/// %t1 = icmp -/// %t4 = add %t2, c -/// %t3 = select i1 %t1, %t2, %t3 +/// EndBB: +/// %phi = phi [ %sub, %ThenBB ], [ 0, %EndBB ] +/// ... +/// \endcode +/// +/// Into this IR: +/// \code +/// BB: +/// %cmp = icmp ult %x, %y +/// %sub = sub %x, %y +/// %cond = select i1 %cmp, 0, %sub +/// ... +/// \endcode +/// +/// \returns true if the conditional block is removed. static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) { // Only speculatively execution a single instruction (not counting the // terminator) for now. -- cgit v1.1 From 9e620952617b90992e50dcf9bca8078c535cbaef Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Thu, 24 Jan 2013 08:22:40 +0000 Subject: Lift a cheap early exit test above loops and other complex early exit tests. No need to pay the high cost when we're never going to do anything. No functionality changed. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173331 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Utils/SimplifyCFG.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 21d156d..fc84c4a 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1370,6 +1370,11 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) { /// /// \returns true if the conditional block is removed. static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) { + // Be conservative for now. FP select instruction can often be expensive. + Value *BrCond = BI->getCondition(); + if (isa(BrCond)) + return false; + // Only speculatively execution a single instruction (not counting the // terminator) for now. Instruction *HInst = NULL; @@ -1409,11 +1414,6 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) { } } - // Be conservative for now. FP select instruction can often be expensive. - Value *BrCond = BI->getCondition(); - if (isa(BrCond)) - return false; - // If BB1 is actually on the false edge of the conditional branch, remember // to swap the select operands later. bool Invert = false; -- cgit v1.1 From 6cf7a91d12fe208a41223daa4aa9c603c390e0ce Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Thu, 24 Jan 2013 09:59:39 +0000 Subject: Give the basic block variables here names based on the if-then-end structure being analyzed. No functionality changed. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173334 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Utils/SimplifyCFG.cpp | 65 ++++++++++++++++++------------------ 1 file changed, 33 insertions(+), 32 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index fc84c4a..29e1180 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1369,17 +1369,29 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) { /// \endcode /// /// \returns true if the conditional block is removed. -static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) { +static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) { // Be conservative for now. FP select instruction can often be expensive. Value *BrCond = BI->getCondition(); if (isa(BrCond)) return false; + BasicBlock *BB = BI->getParent(); + BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(0); + + // If ThenBB is actually on the false edge of the conditional branch, remember + // to swap the select operands later. + bool Invert = false; + if (ThenBB != BI->getSuccessor(0)) { + assert(ThenBB == BI->getSuccessor(1) && "No edge from 'if' block?"); + Invert = true; + } + assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block"); + // Only speculatively execution a single instruction (not counting the // terminator) for now. Instruction *HInst = NULL; - Instruction *Term = BB1->getTerminator(); - for (BasicBlock::iterator BBI = BB1->begin(), BBE = BB1->end(); + Instruction *Term = ThenBB->getTerminator(); + for (BasicBlock::iterator BBI = ThenBB->begin(), BBE = ThenBB->end(); BBI != BBE; ++BBI) { Instruction *I = BBI; // Skip debug info. @@ -1391,8 +1403,6 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) { HInst = I; } - BasicBlock *BIParent = BI->getParent(); - // Check the instruction to be hoisted, if there is one. if (HInst) { // Don't hoist the instruction if it's unsafe or expensive. @@ -1407,35 +1417,26 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) { for (User::op_iterator i = HInst->op_begin(), e = HInst->op_end(); i != e; ++i) { Instruction *OpI = dyn_cast(*i); - if (OpI && OpI->getParent() == BIParent && + if (OpI && OpI->getParent() == BB && !OpI->mayHaveSideEffects() && - !OpI->isUsedInBasicBlock(BIParent)) + !OpI->isUsedInBasicBlock(BB)) return false; } } - // If BB1 is actually on the false edge of the conditional branch, remember - // to swap the select operands later. - bool Invert = false; - if (BB1 != BI->getSuccessor(0)) { - assert(BB1 == BI->getSuccessor(1) && "No edge from 'if' block?"); - Invert = true; - } - // Collect interesting PHIs, and scan for hazards. SmallSetVector, 4> PHIs; - BasicBlock *BB2 = BB1->getTerminator()->getSuccessor(0); - for (BasicBlock::iterator I = BB2->begin(); + for (BasicBlock::iterator I = EndBB->begin(); PHINode *PN = dyn_cast(I); ++I) { - Value *BB1V = PN->getIncomingValueForBlock(BB1); - Value *BIParentV = PN->getIncomingValueForBlock(BIParent); + Value *OrigV = PN->getIncomingValueForBlock(BB); + Value *ThenV = PN->getIncomingValueForBlock(ThenBB); // Skip PHIs which are trivial. - if (BB1V == BIParentV) + if (ThenV == OrigV) continue; // Check for safety. - if (ConstantExpr *CE = dyn_cast(BB1V)) { + if (ConstantExpr *CE = dyn_cast(ThenV)) { // An unfolded ConstantExpr could end up getting expanded into // Instructions. Don't speculate this and another instruction at // the same time. @@ -1448,7 +1449,7 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) { } // Ok, we may insert a select for this PHI. - PHIs.insert(std::make_pair(BB1V, BIParentV)); + PHIs.insert(std::make_pair(ThenV, OrigV)); } // If there are no PHIs to process, bail early. This helps ensure idempotence @@ -1457,11 +1458,11 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) { return false; // If we get here, we can hoist the instruction and if-convert. - DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *BB1 << "\n";); + DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";); // Hoist the instruction. if (HInst) - BIParent->getInstList().splice(BI, BB1->getInstList(), HInst); + BB->getInstList().splice(BI, ThenBB->getInstList(), HInst); // Insert selects and rewrite the PHI operands. IRBuilder Builder(BI); @@ -1483,15 +1484,15 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) { // Make the PHI node use the select for all incoming values for "then" and // "if" blocks. - for (BasicBlock::iterator I = BB2->begin(); + for (BasicBlock::iterator I = EndBB->begin(); PHINode *PN = dyn_cast(I); ++I) { - unsigned BB1I = PN->getBasicBlockIndex(BB1); - unsigned BIParentI = PN->getBasicBlockIndex(BIParent); - Value *BB1V = PN->getIncomingValue(BB1I); - Value *BIParentV = PN->getIncomingValue(BIParentI); - if (TrueV == BB1V && FalseV == BIParentV) { - PN->setIncomingValue(BB1I, SI); - PN->setIncomingValue(BIParentI, SI); + unsigned ThenI = PN->getBasicBlockIndex(ThenBB); + unsigned OrigI = PN->getBasicBlockIndex(BB); + Value *ThenV = PN->getIncomingValue(ThenI); + Value *OrigV = PN->getIncomingValue(OrigI); + if (TrueV == ThenV && FalseV == OrigV) { + PN->setIncomingValue(ThenI, SI); + PN->setIncomingValue(OrigI, SI); } } } -- cgit v1.1 From 63f0846f1eb43332a08811d332b813276b727eb6 Mon Sep 17 00:00:00 2001 From: Kostya Serebryany Date: Thu, 24 Jan 2013 10:35:40 +0000 Subject: [asan] adaptive redzones for globals (the larger the global the larger is the redzone) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173335 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Instrumentation/AddressSanitizer.cpp | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index f4715f5..477cb1a 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -875,12 +875,22 @@ bool AddressSanitizerModule::runOnModule(Module &M) { Value *FirstDynamic = 0, *LastDynamic = 0; for (size_t i = 0; i < n; i++) { + static const size_t kMaxGlobalRedzone = 1 << 18; GlobalVariable *G = GlobalsToChange[i]; PointerType *PtrTy = cast(G->getType()); Type *Ty = PtrTy->getElementType(); uint64_t SizeInBytes = TD->getTypeAllocSize(Ty); - size_t RZ = RedzoneSize(); - uint64_t RightRedzoneSize = RZ + (RZ - (SizeInBytes % RZ)); + size_t MinRZ = RedzoneSize(); + // MinRZ <= RZ <= kMaxGlobalRedzone + // and trying to make RZ to be ~ 1/4 of SizeInBytes. + size_t RZ = std::max(MinRZ, + std::min(kMaxGlobalRedzone, + (SizeInBytes / MinRZ / 4) * MinRZ)); + uint64_t RightRedzoneSize = RZ; + // Round up to MinRZ + if (SizeInBytes % MinRZ) + RightRedzoneSize += MinRZ - (SizeInBytes % MinRZ); + assert(((RightRedzoneSize + SizeInBytes) % MinRZ) == 0); Type *RightRedZoneTy = ArrayType::get(IRB.getInt8Ty(), RightRedzoneSize); // Determine whether this global should be poisoned in initialization. bool GlobalHasDynamicInitializer = @@ -904,7 +914,7 @@ bool AddressSanitizerModule::runOnModule(Module &M) { M, NewTy, G->isConstant(), G->getLinkage(), NewInitializer, "", G, G->getThreadLocalMode()); NewGlobal->copyAttributesFrom(G); - NewGlobal->setAlignment(RZ); + NewGlobal->setAlignment(MinRZ); Value *Indices2[2]; Indices2[0] = IRB.getInt32(0); -- cgit v1.1 From 0afa33115c5f0bce263ef370886f53bc845ab7c1 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Thu, 24 Jan 2013 10:40:51 +0000 Subject: Simplify the PHI node operand rewriting. Previously, the code would scan the PHI nodes and build up a small setvector of candidate value pairs in phi nodes to go and rewrite. Once certain the rewrite could be performed, the code walks the set, and for each one re-scans the entire PHI node list looking for nodes to rewrite operands. Instead, scan the PHI nodes once to check for hazards, and then scan it a second time to rewrite the operands to selects. No set vector, and a max of two scans. The only downside is that we might form identical selects, but instcombine or anything else should fold those easily, and it seems unlikely to happen often. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173337 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Utils/SimplifyCFG.cpp | 77 ++++++++++++++++-------------------- 1 file changed, 35 insertions(+), 42 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 29e1180..5492b60 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1424,8 +1424,8 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) { } } - // Collect interesting PHIs, and scan for hazards. - SmallSetVector, 4> PHIs; + // Check that the PHI nodes can be converted to selects. + bool HaveRewritablePHIs = false; for (BasicBlock::iterator I = EndBB->begin(); PHINode *PN = dyn_cast(I); ++I) { Value *OrigV = PN->getIncomingValueForBlock(BB); @@ -1435,26 +1435,27 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) { if (ThenV == OrigV) continue; + HaveRewritablePHIs = true; + // Check for safety. - if (ConstantExpr *CE = dyn_cast(ThenV)) { - // An unfolded ConstantExpr could end up getting expanded into - // Instructions. Don't speculate this and another instruction at - // the same time. - if (HInst) - return false; - if (!isSafeToSpeculativelyExecute(CE)) - return false; - if (ComputeSpeculationCost(CE) > PHINodeFoldingThreshold) - return false; - } + ConstantExpr *CE = dyn_cast(ThenV); + if (!CE) + continue; // Known safe. - // Ok, we may insert a select for this PHI. - PHIs.insert(std::make_pair(ThenV, OrigV)); + // An unfolded ConstantExpr could end up getting expanded into + // Instructions. Don't speculate this and another instruction at + // the same time. + if (HInst) + return false; + if (!isSafeToSpeculativelyExecute(CE)) + return false; + if (ComputeSpeculationCost(CE) > PHINodeFoldingThreshold) + return false; } // If there are no PHIs to process, bail early. This helps ensure idempotence // as well. - if (PHIs.empty()) + if (!HaveRewritablePHIs) return false; // If we get here, we can hoist the instruction and if-convert. @@ -1466,35 +1467,27 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) { // Insert selects and rewrite the PHI operands. IRBuilder Builder(BI); - for (unsigned i = 0, e = PHIs.size(); i != e; ++i) { - Value *TrueV = PHIs[i].first; - Value *FalseV = PHIs[i].second; + for (BasicBlock::iterator I = EndBB->begin(); + PHINode *PN = dyn_cast(I); ++I) { + unsigned OrigI = PN->getBasicBlockIndex(BB); + unsigned ThenI = PN->getBasicBlockIndex(ThenBB); + Value *OrigV = PN->getIncomingValue(OrigI); + Value *ThenV = PN->getIncomingValue(ThenI); + + // Skip PHIs which are trivial. + if (OrigV == ThenV) + continue; // Create a select whose true value is the speculatively executed value and - // false value is the previously determined FalseV. - SelectInst *SI; + // false value is the preexisting value. Swap them if the branch + // destinations were inverted. + Value *TrueV = ThenV, *FalseV = OrigV; if (Invert) - SI = cast - (Builder.CreateSelect(BrCond, FalseV, TrueV, - FalseV->getName() + "." + TrueV->getName())); - else - SI = cast - (Builder.CreateSelect(BrCond, TrueV, FalseV, - TrueV->getName() + "." + FalseV->getName())); - - // Make the PHI node use the select for all incoming values for "then" and - // "if" blocks. - for (BasicBlock::iterator I = EndBB->begin(); - PHINode *PN = dyn_cast(I); ++I) { - unsigned ThenI = PN->getBasicBlockIndex(ThenBB); - unsigned OrigI = PN->getBasicBlockIndex(BB); - Value *ThenV = PN->getIncomingValue(ThenI); - Value *OrigV = PN->getIncomingValue(OrigI); - if (TrueV == ThenV && FalseV == OrigV) { - PN->setIncomingValue(ThenI, SI); - PN->setIncomingValue(OrigI, SI); - } - } + std::swap(TrueV, FalseV); + Value *V = Builder.CreateSelect(BrCond, TrueV, FalseV, + TrueV->getName() + "." + FalseV->getName()); + PN->setIncomingValue(OrigI, V); + PN->setIncomingValue(ThenI, V); } ++NumSpeculations; -- cgit v1.1 From 29f975f8ffda1f5d78cbf2530c2316abef11aa70 Mon Sep 17 00:00:00 2001 From: Kostya Serebryany Date: Thu, 24 Jan 2013 10:43:50 +0000 Subject: [asan] fix 32-bit builds git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173338 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Instrumentation/AddressSanitizer.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 477cb1a..0474eb5 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -875,15 +875,15 @@ bool AddressSanitizerModule::runOnModule(Module &M) { Value *FirstDynamic = 0, *LastDynamic = 0; for (size_t i = 0; i < n; i++) { - static const size_t kMaxGlobalRedzone = 1 << 18; + static const uint64_t kMaxGlobalRedzone = 1 << 18; GlobalVariable *G = GlobalsToChange[i]; PointerType *PtrTy = cast(G->getType()); Type *Ty = PtrTy->getElementType(); uint64_t SizeInBytes = TD->getTypeAllocSize(Ty); - size_t MinRZ = RedzoneSize(); + uint64_t MinRZ = RedzoneSize(); // MinRZ <= RZ <= kMaxGlobalRedzone // and trying to make RZ to be ~ 1/4 of SizeInBytes. - size_t RZ = std::max(MinRZ, + uint64_t RZ = std::max(MinRZ, std::min(kMaxGlobalRedzone, (SizeInBytes / MinRZ / 4) * MinRZ)); uint64_t RightRedzoneSize = RZ; -- cgit v1.1 From 2c107a80206056cdc8c2c7cb715ff9e1db64add9 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Thu, 24 Jan 2013 11:52:58 +0000 Subject: Rephrase the speculating scan of the conditional BB to be phrased in terms of cost rather than hoisting a single instruction. This does *not* change the cost model! We still set the cost threshold at 1 here, it's just that we track it by accumulating cost rather than by storing an instruction. The primary advantage is that we no longer leave no-op intrinsics in the basic block. For example, this will now move both debug info intrinsics and a single instruction, instead of only moving the instruction and leaving a basic block with nothing bug debug info intrinsics in it, and those intrinsics now no longer ordered correctly with the hoisted value. Instead, we now splice the entire conditional basic block's instruction sequence. This also places the code for checking the safety of hoisting next to the code computing the cost. Currently, the only observable side-effect of this change is that debug info intrinsics are no longer abandoned. I'm not sure how to craft a test case for this, and my real goal was the refactoring, but I'll talk to Dave or Eric about how to add a test case for this. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173339 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Utils/SimplifyCFG.cpp | 35 ++++++++++++++++------------------- 1 file changed, 16 insertions(+), 19 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 5492b60..0e38287 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1387,34 +1387,31 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) { } assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block"); - // Only speculatively execution a single instruction (not counting the - // terminator) for now. - Instruction *HInst = NULL; - Instruction *Term = ThenBB->getTerminator(); - for (BasicBlock::iterator BBI = ThenBB->begin(), BBE = ThenBB->end(); + unsigned SpeculationCost = 0; + for (BasicBlock::iterator BBI = ThenBB->begin(), + BBE = llvm::prior(ThenBB->end()); BBI != BBE; ++BBI) { Instruction *I = BBI; // Skip debug info. - if (isa(I)) continue; - if (I == Term) break; + if (isa(I)) + continue; - if (HInst) + // Only speculatively execution a single instruction (not counting the + // terminator) for now. + ++SpeculationCost; + if (SpeculationCost > 1) return false; - HInst = I; - } - // Check the instruction to be hoisted, if there is one. - if (HInst) { // Don't hoist the instruction if it's unsafe or expensive. - if (!isSafeToSpeculativelyExecute(HInst)) + if (!isSafeToSpeculativelyExecute(I)) return false; - if (ComputeSpeculationCost(HInst) > PHINodeFoldingThreshold) + if (ComputeSpeculationCost(I) > PHINodeFoldingThreshold) return false; // Do not hoist the instruction if any of its operands are defined but not // used in this BB. The transformation will prevent the operand from // being sunk into the use block. - for (User::op_iterator i = HInst->op_begin(), e = HInst->op_end(); + for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i) { Instruction *OpI = dyn_cast(*i); if (OpI && OpI->getParent() == BB && @@ -1445,7 +1442,7 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) { // An unfolded ConstantExpr could end up getting expanded into // Instructions. Don't speculate this and another instruction at // the same time. - if (HInst) + if (SpeculationCost > 0) return false; if (!isSafeToSpeculativelyExecute(CE)) return false; @@ -1461,9 +1458,9 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) { // If we get here, we can hoist the instruction and if-convert. DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";); - // Hoist the instruction. - if (HInst) - BB->getInstList().splice(BI, ThenBB->getInstList(), HInst); + // Hoist the instructions. + BB->getInstList().splice(BI, ThenBB->getInstList(), ThenBB->begin(), + llvm::prior(ThenBB->end())); // Insert selects and rewrite the PHI operands. IRBuilder Builder(BI); -- cgit v1.1 From 681add7a63b44249fd0fd38e63f10f18d6e99e38 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Thu, 24 Jan 2013 11:53:01 +0000 Subject: Switch the constant expression speculation cost evaluation away from a cost fuction that seems both a bit ad-hoc and also poorly suited to evaluating constant expressions. Notably, it is missing any support for trivial expressions such as 'inttoptr'. I could fix this routine, but it isn't clear to me all of the constraints its other users are operating under. The core protection that seems relevant here is avoiding the formation of a select instruction wich a further chain of select operations in a constant expression operand. Just explicitly encode that constraint. Also, update the comments and organization here to make it clear where this needs to go -- this should be driven off of real cost measurements which take into account the number of constants expressions and the depth of the constant expression tree. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173340 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Utils/SimplifyCFG.cpp | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 0e38287..7ec3165 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1433,21 +1433,28 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) { continue; HaveRewritablePHIs = true; - - // Check for safety. ConstantExpr *CE = dyn_cast(ThenV); if (!CE) - continue; // Known safe. + continue; // Known safe and cheap. + + if (!isSafeToSpeculativelyExecute(CE)) + return false; + + // Don't speculate into a select with a constant select expression operand. + // FIXME: This should really be a cost metric, but our cost model doesn't + // accurately model the expense of select. + if (Operator::getOpcode(CE) == Instruction::Select) + return false; // An unfolded ConstantExpr could end up getting expanded into // Instructions. Don't speculate this and another instruction at // the same time. + // FIXME: This is strange because provided we haven't already hit the cost + // of 1, this code will speculate an arbitrary number of complex constant + // expression PHI nodes. Also, this doesn't account for how complex the + // constant expression is. if (SpeculationCost > 0) return false; - if (!isSafeToSpeculativelyExecute(CE)) - return false; - if (ComputeSpeculationCost(CE) > PHINodeFoldingThreshold) - return false; } // If there are no PHIs to process, bail early. This helps ensure idempotence -- cgit v1.1 From 47d8f6dca5a64f642a82d24b9e3cf882b56c5c3e Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Thu, 24 Jan 2013 12:05:17 +0000 Subject: Address a large chunk of this FIXME by accumulating the cost for unfolded constant expressions rather than checking each one independently. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173341 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Utils/SimplifyCFG.cpp | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 7ec3165..11cb25d 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1446,14 +1446,12 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) { if (Operator::getOpcode(CE) == Instruction::Select) return false; - // An unfolded ConstantExpr could end up getting expanded into - // Instructions. Don't speculate this and another instruction at - // the same time. - // FIXME: This is strange because provided we haven't already hit the cost - // of 1, this code will speculate an arbitrary number of complex constant - // expression PHI nodes. Also, this doesn't account for how complex the - // constant expression is. - if (SpeculationCost > 0) + // Account for the cost of an unfolded ConstantExpr which could end up + // getting expanded into Instructions. + // FIXME: This doesn't account for how many operations are combined in the + // constant expression. + ++SpeculationCost; + if (SpeculationCost > 1) return false; } -- cgit v1.1 From 1f255419d424c85bc5dbb77f530561d46e5197cc Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Thu, 24 Jan 2013 12:39:29 +0000 Subject: Plug TTI into the speculation logic, giving it a real cost interface that can be specialized by targets. The goal here is not to be more aggressive, but to just be more accurate with very obvious cases. There are instructions which are known to be truly free and which were not being modeled as such in this code -- see the regression test which is distilled from an inner loop of zlib. Everywhere the TTI cost model is insufficiently conservative I've added explicit checks with FIXME comments to go add proper modelling of these cost factors. If this causes regressions, the likely solution is to make TTI even more conservative in its cost estimates, but test cases will help here. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173342 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Utils/SimplifyCFG.cpp | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 11cb25d..9f3464d 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1369,7 +1369,8 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) { /// \endcode /// /// \returns true if the conditional block is removed. -static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) { +static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, + const TargetTransformInfo &TTI) { // Be conservative for now. FP select instruction can often be expensive. Value *BrCond = BI->getCondition(); if (isa(BrCond)) @@ -1398,15 +1399,22 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) { // Only speculatively execution a single instruction (not counting the // terminator) for now. - ++SpeculationCost; - if (SpeculationCost > 1) + SpeculationCost += TTI.getUserCost(I); + if (SpeculationCost > TargetTransformInfo::TCC_Basic) return false; // Don't hoist the instruction if it's unsafe or expensive. if (!isSafeToSpeculativelyExecute(I)) return false; - if (ComputeSpeculationCost(I) > PHINodeFoldingThreshold) + // FIXME: This should really be a cost metric, but our cost model doesn't + // accurately model the expense of select. + if (isa(I)) return false; + // FIXME: The cost metric currently doesn't reason accurately about simple + // versus complex GEPs, take a conservative approach here. + if (GEPOperator *GEP = dyn_cast(I)) + if (!GEP->hasAllConstantIndices()) + return false; // Do not hoist the instruction if any of its operands are defined but not // used in this BB. The transformation will prevent the operand from @@ -1449,9 +1457,10 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) { // Account for the cost of an unfolded ConstantExpr which could end up // getting expanded into Instructions. // FIXME: This doesn't account for how many operations are combined in the - // constant expression. - ++SpeculationCost; - if (SpeculationCost > 1) + // constant expression. The cost functions in TTI don't yet correctly model + // constant expression costs. + SpeculationCost += TargetTransformInfo::TCC_Basic; + if (SpeculationCost > TargetTransformInfo::TCC_Basic) return false; } @@ -3868,7 +3877,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { TerminatorInst *Succ0TI = BI->getSuccessor(0)->getTerminator(); if (Succ0TI->getNumSuccessors() == 1 && Succ0TI->getSuccessor(0) == BI->getSuccessor(1)) - if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0))) + if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0), TTI)) return SimplifyCFG(BB, TTI, TD) | true; } } else if (BI->getSuccessor(1)->getSinglePredecessor() != 0) { @@ -3877,7 +3886,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { TerminatorInst *Succ1TI = BI->getSuccessor(1)->getTerminator(); if (Succ1TI->getNumSuccessors() == 1 && Succ1TI->getSuccessor(0) == BI->getSuccessor(0)) - if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1))) + if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1), TTI)) return SimplifyCFG(BB, TTI, TD) | true; } -- cgit v1.1 From eacef325c6412820a377fe4f853eb9c7c23db9ee Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Thu, 24 Jan 2013 13:24:24 +0000 Subject: Revert r173342 temporarily. It appears to cause a very late miscompile of stage2 in a bootstrap. Still investigating.... git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173343 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Utils/SimplifyCFG.cpp | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 9f3464d..11cb25d 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1369,8 +1369,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) { /// \endcode /// /// \returns true if the conditional block is removed. -static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, - const TargetTransformInfo &TTI) { +static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) { // Be conservative for now. FP select instruction can often be expensive. Value *BrCond = BI->getCondition(); if (isa(BrCond)) @@ -1399,22 +1398,15 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, // Only speculatively execution a single instruction (not counting the // terminator) for now. - SpeculationCost += TTI.getUserCost(I); - if (SpeculationCost > TargetTransformInfo::TCC_Basic) + ++SpeculationCost; + if (SpeculationCost > 1) return false; // Don't hoist the instruction if it's unsafe or expensive. if (!isSafeToSpeculativelyExecute(I)) return false; - // FIXME: This should really be a cost metric, but our cost model doesn't - // accurately model the expense of select. - if (isa(I)) + if (ComputeSpeculationCost(I) > PHINodeFoldingThreshold) return false; - // FIXME: The cost metric currently doesn't reason accurately about simple - // versus complex GEPs, take a conservative approach here. - if (GEPOperator *GEP = dyn_cast(I)) - if (!GEP->hasAllConstantIndices()) - return false; // Do not hoist the instruction if any of its operands are defined but not // used in this BB. The transformation will prevent the operand from @@ -1457,10 +1449,9 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, // Account for the cost of an unfolded ConstantExpr which could end up // getting expanded into Instructions. // FIXME: This doesn't account for how many operations are combined in the - // constant expression. The cost functions in TTI don't yet correctly model - // constant expression costs. - SpeculationCost += TargetTransformInfo::TCC_Basic; - if (SpeculationCost > TargetTransformInfo::TCC_Basic) + // constant expression. + ++SpeculationCost; + if (SpeculationCost > 1) return false; } @@ -3877,7 +3868,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { TerminatorInst *Succ0TI = BI->getSuccessor(0)->getTerminator(); if (Succ0TI->getNumSuccessors() == 1 && Succ0TI->getSuccessor(0) == BI->getSuccessor(1)) - if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0), TTI)) + if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0))) return SimplifyCFG(BB, TTI, TD) | true; } } else if (BI->getSuccessor(1)->getSinglePredecessor() != 0) { @@ -3886,7 +3877,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { TerminatorInst *Succ1TI = BI->getSuccessor(1)->getTerminator(); if (Succ1TI->getNumSuccessors() == 1 && Succ1TI->getSuccessor(0) == BI->getSuccessor(0)) - if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1), TTI)) + if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1))) return SimplifyCFG(BB, TTI, TD) | true; } -- cgit v1.1 From e5742464895b7f1fcc6a5b968b72f6ec66a1fd44 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Thu, 24 Jan 2013 16:28:28 +0000 Subject: ConstantFolding: Add a missing folding that leads to a miscompile. We use constant folding to see if an intrinsic evaluates to the same value as a constant that we know. If we don't take the undefinedness into account we get a value that doesn't match the actual implementation, and miscompiled code. This was uncovered by Chandler's simplifycfg changes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173356 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/ConstantFolding.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index 9246e26..4b4fa5c 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -1474,12 +1474,12 @@ llvm::ConstantFoldCall(Function *F, ArrayRef Operands, return ConstantStruct::get(cast(F->getReturnType()), Ops); } case Intrinsic::cttz: - // FIXME: This should check for Op2 == 1, and become unreachable if - // Op1 == 0. + if (Op2->isOne() && Op1->isZero()) // cttz(0, 1) is undef. + return UndefValue::get(Ty); return ConstantInt::get(Ty, Op1->getValue().countTrailingZeros()); case Intrinsic::ctlz: - // FIXME: This should check for Op2 == 1, and become unreachable if - // Op1 == 0. + if (Op2->isOne() && Op1->isZero()) // ctlz(0, 1) is undef. + return UndefValue::get(Ty); return ConstantInt::get(Ty, Op1->getValue().countLeadingZeros()); } } -- cgit v1.1 From d5a80c7358d01cde9e166bebc8a3ffa0eca3aa54 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Thu, 24 Jan 2013 16:44:25 +0000 Subject: Reapply chandlerc's r173342 now that the miscompile it was triggering is fixed. Original commit message: Plug TTI into the speculation logic, giving it a real cost interface that can be specialized by targets. The goal here is not to be more aggressive, but to just be more accurate with very obvious cases. There are instructions which are known to be truly free and which were not being modeled as such in this code -- see the regression test which is distilled from an inner loop of zlib. Everywhere the TTI cost model is insufficiently conservative I've added explicit checks with FIXME comments to go add proper modelling of these cost factors. If this causes regressions, the likely solution is to make TTI even more conservative in its cost estimates, but test cases will help here. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173357 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Utils/SimplifyCFG.cpp | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 11cb25d..9f3464d 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1369,7 +1369,8 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) { /// \endcode /// /// \returns true if the conditional block is removed. -static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) { +static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, + const TargetTransformInfo &TTI) { // Be conservative for now. FP select instruction can often be expensive. Value *BrCond = BI->getCondition(); if (isa(BrCond)) @@ -1398,15 +1399,22 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) { // Only speculatively execution a single instruction (not counting the // terminator) for now. - ++SpeculationCost; - if (SpeculationCost > 1) + SpeculationCost += TTI.getUserCost(I); + if (SpeculationCost > TargetTransformInfo::TCC_Basic) return false; // Don't hoist the instruction if it's unsafe or expensive. if (!isSafeToSpeculativelyExecute(I)) return false; - if (ComputeSpeculationCost(I) > PHINodeFoldingThreshold) + // FIXME: This should really be a cost metric, but our cost model doesn't + // accurately model the expense of select. + if (isa(I)) return false; + // FIXME: The cost metric currently doesn't reason accurately about simple + // versus complex GEPs, take a conservative approach here. + if (GEPOperator *GEP = dyn_cast(I)) + if (!GEP->hasAllConstantIndices()) + return false; // Do not hoist the instruction if any of its operands are defined but not // used in this BB. The transformation will prevent the operand from @@ -1449,9 +1457,10 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) { // Account for the cost of an unfolded ConstantExpr which could end up // getting expanded into Instructions. // FIXME: This doesn't account for how many operations are combined in the - // constant expression. - ++SpeculationCost; - if (SpeculationCost > 1) + // constant expression. The cost functions in TTI don't yet correctly model + // constant expression costs. + SpeculationCost += TargetTransformInfo::TCC_Basic; + if (SpeculationCost > TargetTransformInfo::TCC_Basic) return false; } @@ -3868,7 +3877,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { TerminatorInst *Succ0TI = BI->getSuccessor(0)->getTerminator(); if (Succ0TI->getNumSuccessors() == 1 && Succ0TI->getSuccessor(0) == BI->getSuccessor(1)) - if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0))) + if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0), TTI)) return SimplifyCFG(BB, TTI, TD) | true; } } else if (BI->getSuccessor(1)->getSinglePredecessor() != 0) { @@ -3877,7 +3886,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { TerminatorInst *Succ1TI = BI->getSuccessor(1)->getTerminator(); if (Succ1TI->getNumSuccessors() == 1 && Succ1TI->getSuccessor(0) == BI->getSuccessor(0)) - if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1))) + if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1), TTI)) return SimplifyCFG(BB, TTI, TD) | true; } -- cgit v1.1 From 78e10573710a2f2623dfd5a2cc66855814b7371f Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Thu, 24 Jan 2013 20:43:18 +0000 Subject: Start cleanup of PPC register definitions using foreach loops. No functionality change intended. This captures the first two cases GPR32/64. For the others, we need an addition operator (if we have one, I've not yet found it). Based on a suggestion made by Tom Stellard in the AArch64 review! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173366 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCRegisterInfo.td | 72 ++++------------------------------- 1 file changed, 7 insertions(+), 65 deletions(-) (limited to 'lib') diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td index 5ca3876..44665a6 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/lib/Target/PowerPC/PPCRegisterInfo.td @@ -63,74 +63,16 @@ class CRBIT num, string n> : PPCReg { field bits<5> Num = num; } - // General-purpose registers -def R0 : GPR< 0, "r0">, DwarfRegNum<[-2, 0]>; -def R1 : GPR< 1, "r1">, DwarfRegNum<[-2, 1]>; -def R2 : GPR< 2, "r2">, DwarfRegNum<[-2, 2]>; -def R3 : GPR< 3, "r3">, DwarfRegNum<[-2, 3]>; -def R4 : GPR< 4, "r4">, DwarfRegNum<[-2, 4]>; -def R5 : GPR< 5, "r5">, DwarfRegNum<[-2, 5]>; -def R6 : GPR< 6, "r6">, DwarfRegNum<[-2, 6]>; -def R7 : GPR< 7, "r7">, DwarfRegNum<[-2, 7]>; -def R8 : GPR< 8, "r8">, DwarfRegNum<[-2, 8]>; -def R9 : GPR< 9, "r9">, DwarfRegNum<[-2, 9]>; -def R10 : GPR<10, "r10">, DwarfRegNum<[-2, 10]>; -def R11 : GPR<11, "r11">, DwarfRegNum<[-2, 11]>; -def R12 : GPR<12, "r12">, DwarfRegNum<[-2, 12]>; -def R13 : GPR<13, "r13">, DwarfRegNum<[-2, 13]>; -def R14 : GPR<14, "r14">, DwarfRegNum<[-2, 14]>; -def R15 : GPR<15, "r15">, DwarfRegNum<[-2, 15]>; -def R16 : GPR<16, "r16">, DwarfRegNum<[-2, 16]>; -def R17 : GPR<17, "r17">, DwarfRegNum<[-2, 17]>; -def R18 : GPR<18, "r18">, DwarfRegNum<[-2, 18]>; -def R19 : GPR<19, "r19">, DwarfRegNum<[-2, 19]>; -def R20 : GPR<20, "r20">, DwarfRegNum<[-2, 20]>; -def R21 : GPR<21, "r21">, DwarfRegNum<[-2, 21]>; -def R22 : GPR<22, "r22">, DwarfRegNum<[-2, 22]>; -def R23 : GPR<23, "r23">, DwarfRegNum<[-2, 23]>; -def R24 : GPR<24, "r24">, DwarfRegNum<[-2, 24]>; -def R25 : GPR<25, "r25">, DwarfRegNum<[-2, 25]>; -def R26 : GPR<26, "r26">, DwarfRegNum<[-2, 26]>; -def R27 : GPR<27, "r27">, DwarfRegNum<[-2, 27]>; -def R28 : GPR<28, "r28">, DwarfRegNum<[-2, 28]>; -def R29 : GPR<29, "r29">, DwarfRegNum<[-2, 29]>; -def R30 : GPR<30, "r30">, DwarfRegNum<[-2, 30]>; -def R31 : GPR<31, "r31">, DwarfRegNum<[-2, 31]>; +foreach Index = 0-31 in { + def R#Index : GPR, DwarfRegNum<[-2, Index]>; +} // 64-bit General-purpose registers -def X0 : GP8< R0, "r0">, DwarfRegNum<[0, -2]>; -def X1 : GP8< R1, "r1">, DwarfRegNum<[1, -2]>; -def X2 : GP8< R2, "r2">, DwarfRegNum<[2, -2]>; -def X3 : GP8< R3, "r3">, DwarfRegNum<[3, -2]>; -def X4 : GP8< R4, "r4">, DwarfRegNum<[4, -2]>; -def X5 : GP8< R5, "r5">, DwarfRegNum<[5, -2]>; -def X6 : GP8< R6, "r6">, DwarfRegNum<[6, -2]>; -def X7 : GP8< R7, "r7">, DwarfRegNum<[7, -2]>; -def X8 : GP8< R8, "r8">, DwarfRegNum<[8, -2]>; -def X9 : GP8< R9, "r9">, DwarfRegNum<[9, -2]>; -def X10 : GP8, DwarfRegNum<[10, -2]>; -def X11 : GP8, DwarfRegNum<[11, -2]>; -def X12 : GP8, DwarfRegNum<[12, -2]>; -def X13 : GP8, DwarfRegNum<[13, -2]>; -def X14 : GP8, DwarfRegNum<[14, -2]>; -def X15 : GP8, DwarfRegNum<[15, -2]>; -def X16 : GP8, DwarfRegNum<[16, -2]>; -def X17 : GP8, DwarfRegNum<[17, -2]>; -def X18 : GP8, DwarfRegNum<[18, -2]>; -def X19 : GP8, DwarfRegNum<[19, -2]>; -def X20 : GP8, DwarfRegNum<[20, -2]>; -def X21 : GP8, DwarfRegNum<[21, -2]>; -def X22 : GP8, DwarfRegNum<[22, -2]>; -def X23 : GP8, DwarfRegNum<[23, -2]>; -def X24 : GP8, DwarfRegNum<[24, -2]>; -def X25 : GP8, DwarfRegNum<[25, -2]>; -def X26 : GP8, DwarfRegNum<[26, -2]>; -def X27 : GP8, DwarfRegNum<[27, -2]>; -def X28 : GP8, DwarfRegNum<[28, -2]>; -def X29 : GP8, DwarfRegNum<[29, -2]>; -def X30 : GP8, DwarfRegNum<[30, -2]>; -def X31 : GP8, DwarfRegNum<[31, -2]>; +foreach Index = 0-31 in { + def X#Index : GP8("R"#Index), "r"#Index>, + DwarfRegNum<[Index, -2]>; +} // Floating-point registers def F0 : FPR< 0, "f0">, DwarfRegNum<[32, 32]>; -- cgit v1.1 From 7c24e61a2b9c372f0077b8887e7cca8dd49c933d Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Thu, 24 Jan 2013 21:35:00 +0000 Subject: Added comment to ObjCARC elaborating what is meant by the term 'Provenance' in 'Provenance Analysis'. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173374 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/ObjCARC.cpp | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'lib') diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp index 45bc7ab..1c7acb0 100644 --- a/lib/Transforms/Scalar/ObjCARC.cpp +++ b/lib/Transforms/Scalar/ObjCARC.cpp @@ -1222,6 +1222,12 @@ namespace { /// \brief This is similar to BasicAliasAnalysis, and it uses many of the same /// techniques, except it uses special ObjC-specific reasoning about pointer /// relationships. + /// + /// In this context ``Provenance'' is defined as the history of an object's + /// ownership. Thus ``Provenance Analysis'' is defined by using the notion of + /// an ``independent provenance source'' of a pointer to determine whether or + /// not two pointers have the same provenance source and thus could + /// potentially be related. class ProvenanceAnalysis { AliasAnalysis *AA; -- cgit v1.1 From b3755e7fa2e386e9bd348eda6b1876ae09c1bf99 Mon Sep 17 00:00:00 2001 From: Renato Golin Date: Thu, 24 Jan 2013 23:01:00 +0000 Subject: Moving Cost Tables up to share with other targets git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173382 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86TargetTransformInfo.cpp | 59 ++++++------------------------- 1 file changed, 11 insertions(+), 48 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index 675c896..f3dfa0e 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -20,6 +20,7 @@ #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Target/TargetLowering.h" +#include "llvm/Target/CostTable.h" using namespace llvm; // Declare the pass initialization routine locally as target-specific passes @@ -119,44 +120,6 @@ llvm::createX86TargetTransformInfoPass(const X86TargetMachine *TM) { // //===----------------------------------------------------------------------===// -namespace { -struct X86CostTblEntry { - int ISD; - MVT Type; - unsigned Cost; -}; -} - -static int -FindInTable(const X86CostTblEntry *Tbl, unsigned len, int ISD, MVT Ty) { - for (unsigned int i = 0; i < len; ++i) - if (Tbl[i].ISD == ISD && Tbl[i].Type == Ty) - return i; - - // Could not find an entry. - return -1; -} - -namespace { -struct X86TypeConversionCostTblEntry { - int ISD; - MVT Dst; - MVT Src; - unsigned Cost; -}; -} - -static int -FindInConvertTable(const X86TypeConversionCostTblEntry *Tbl, unsigned len, - int ISD, MVT Dst, MVT Src) { - for (unsigned int i = 0; i < len; ++i) - if (Tbl[i].ISD == ISD && Tbl[i].Src == Src && Tbl[i].Dst == Dst) - return i; - - // Could not find an entry. - return -1; -} - X86TTI::PopcntSupportKind X86TTI::getPopcntSupport(unsigned TyWidth) const { assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); // TODO: Currently the __builtin_popcount() implementation using SSE3 @@ -206,7 +169,7 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const { int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); - static const X86CostTblEntry AVX1CostTable[] = { + static const CostTblEntry AVX1CostTable[] = { // We don't have to scalarize unsupported ops. We can issue two half-sized // operations and we only need to extract the upper YMM half. // Two ops + 1 extract + 1 insert = 4. @@ -220,7 +183,7 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const { // Look for AVX1 lowering tricks. if (ST->hasAVX()) { - int Idx = FindInTable(AVX1CostTable, array_lengthof(AVX1CostTable), ISD, + int Idx = CostTableLookup(AVX1CostTable, array_lengthof(AVX1CostTable), ISD, LT.second); if (Idx != -1) return LT.first * AVX1CostTable[Idx].Cost; @@ -254,7 +217,7 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const { if (!SrcTy.isSimple() || !DstTy.isSimple()) return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); - static const X86TypeConversionCostTblEntry AVXConversionTbl[] = { + static const TypeConversionCostTblEntry AVXConversionTbl[] = { { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 1 }, @@ -273,7 +236,7 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const { }; if (ST->hasAVX()) { - int Idx = FindInConvertTable(AVXConversionTbl, + int Idx = ConvertCostTableLookup(AVXConversionTbl, array_lengthof(AVXConversionTbl), ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()); if (Idx != -1) @@ -293,7 +256,7 @@ unsigned X86TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); - static const X86CostTblEntry SSE42CostTbl[] = { + static const CostTblEntry SSE42CostTbl[] = { { ISD::SETCC, MVT::v2f64, 1 }, { ISD::SETCC, MVT::v4f32, 1 }, { ISD::SETCC, MVT::v2i64, 1 }, @@ -302,7 +265,7 @@ unsigned X86TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, { ISD::SETCC, MVT::v16i8, 1 }, }; - static const X86CostTblEntry AVX1CostTbl[] = { + static const CostTblEntry AVX1CostTbl[] = { { ISD::SETCC, MVT::v4f64, 1 }, { ISD::SETCC, MVT::v8f32, 1 }, // AVX1 does not support 8-wide integer compare. @@ -312,7 +275,7 @@ unsigned X86TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, { ISD::SETCC, MVT::v32i8, 4 }, }; - static const X86CostTblEntry AVX2CostTbl[] = { + static const CostTblEntry AVX2CostTbl[] = { { ISD::SETCC, MVT::v4i64, 1 }, { ISD::SETCC, MVT::v8i32, 1 }, { ISD::SETCC, MVT::v16i16, 1 }, @@ -320,19 +283,19 @@ unsigned X86TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, }; if (ST->hasAVX2()) { - int Idx = FindInTable(AVX2CostTbl, array_lengthof(AVX2CostTbl), ISD, MTy); + int Idx = CostTableLookup(AVX2CostTbl, array_lengthof(AVX2CostTbl), ISD, MTy); if (Idx != -1) return LT.first * AVX2CostTbl[Idx].Cost; } if (ST->hasAVX()) { - int Idx = FindInTable(AVX1CostTbl, array_lengthof(AVX1CostTbl), ISD, MTy); + int Idx = CostTableLookup(AVX1CostTbl, array_lengthof(AVX1CostTbl), ISD, MTy); if (Idx != -1) return LT.first * AVX1CostTbl[Idx].Cost; } if (ST->hasSSE42()) { - int Idx = FindInTable(SSE42CostTbl, array_lengthof(SSE42CostTbl), ISD, MTy); + int Idx = CostTableLookup(SSE42CostTbl, array_lengthof(SSE42CostTbl), ISD, MTy); if (Idx != -1) return LT.first * SSE42CostTbl[Idx].Cost; } -- cgit v1.1 From dd4fc446b52cf9e617846c77e2c46e827f1d533e Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Thu, 24 Jan 2013 23:59:08 +0000 Subject: Avoid creating duplicate CFG edges in the IfConversion pass. Patch by Stefan Hepp. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173395 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/IfConversion.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp index 3583a9b..9958d7d 100644 --- a/lib/CodeGen/IfConversion.cpp +++ b/lib/CodeGen/IfConversion.cpp @@ -1557,7 +1557,7 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) { if (Succ == FallThrough) continue; FromBBI.BB->removeSuccessor(Succ); - if (AddEdges) + if (AddEdges && !ToBBI.BB->isSuccessor(Succ)) ToBBI.BB->addSuccessor(Succ); } -- cgit v1.1 From b12a77199245a72c24dadbc039ed263d68d8e91a Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Fri, 25 Jan 2013 00:12:57 +0000 Subject: SchedDFS: Refactor and tweak the subtree selection criteria. For sanity, create a root when NumDataSuccs >= 4. Splitting large subtrees will no longer be detrimental after my next checkin to handle nested tree. A magic number of 4 is fine because single subtrees seldom rejoin more than this. It makes subtrees easier to visualize and heuristics more sane. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173399 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/ScheduleDAGInstrs.cpp | 56 ++++++++++++++++++++++----------------- 1 file changed, 32 insertions(+), 24 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index 411c46b..3960c57 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -1039,37 +1039,17 @@ public: void visitPostorder(const SUnit *SU, const SDep *PredDep, const SUnit *Parent) { R.DFSData[SU->NodeNum].SubtreeID = SU->NodeNum; - // Join the child to its parent if they are connected via data dependence - // and do not exceed the limit. - if (!Parent || PredDep->getKind() != SDep::Data) + if (!Parent) return; + assert(PredDep && "PredDep required for non-root node"); - unsigned PredCnt = R.DFSData[SU->NodeNum].InstrCount; - if (PredCnt > R.SubtreeLimit) - return; - - R.DFSData[SU->NodeNum].SubtreeID = Parent->NodeNum; - - // Add the recently finished predecessor's bottom-up descendent count. - R.DFSData[Parent->NodeNum].InstrCount += PredCnt; - SubtreeClasses.join(Parent->NodeNum, SU->NodeNum); + joinPredSubtree(*PredDep, Parent); } /// Determine whether the DFS cross edge should be considered a subtree edge /// or a connection between subtrees. void visitCross(const SDep &PredDep, const SUnit *Succ) { - if (PredDep.getKind() == SDep::Data) { - // If this is a cross edge to a root, join the subtrees. This happens when - // the root was first reached by a non-data dependence. - unsigned NodeNum = PredDep.getSUnit()->NodeNum; - unsigned PredCnt = R.DFSData[NodeNum].InstrCount; - if (R.DFSData[NodeNum].SubtreeID == NodeNum && PredCnt < R.SubtreeLimit) { - R.DFSData[NodeNum].SubtreeID = Succ->NodeNum; - R.DFSData[Succ->NodeNum].InstrCount += PredCnt; - SubtreeClasses.join(Succ->NodeNum, NodeNum); - return; - } - } + joinPredSubtree(PredDep, Succ); ConnectionPairs.push_back(std::make_pair(PredDep.getSUnit(), Succ)); } @@ -1099,6 +1079,34 @@ public: } protected: + void joinPredSubtree(const SDep &PredDep, const SUnit *Succ) { + // Join the child to its parent if they are connected via data dependence. + if (PredDep.getKind() != SDep::Data) + return; + + // Four is the magic number of successors before a node is considered a + // pinch point. + unsigned NumDataSucs = 0; + const SUnit *PredSU = PredDep.getSUnit(); + for (SUnit::const_succ_iterator SI = PredSU->Succs.begin(), + SE = PredSU->Succs.end(); SI != SE; ++SI) { + if (SI->getKind() == SDep::Data) { + if (++NumDataSucs >= 4) + return; + } + } + // If this is a cross edge to a root, join the subtrees. This happens when + // the root was first reached by a non-data dependence. + unsigned NodeNum = PredSU->NodeNum; + unsigned PredCnt = R.DFSData[NodeNum].InstrCount; + if (R.DFSData[NodeNum].SubtreeID == NodeNum && PredCnt < R.SubtreeLimit) { + R.DFSData[NodeNum].SubtreeID = Succ->NodeNum; + R.DFSData[Succ->NodeNum].InstrCount += PredCnt; + SubtreeClasses.join(Succ->NodeNum, NodeNum); + return; + } + } + /// Called by finalize() to record a connection between trees. void addConnection(unsigned FromTree, unsigned ToTree, unsigned Depth) { if (!Depth) -- cgit v1.1 From d2047c60013a1a48cc3cef88003633463285b9ee Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Fri, 25 Jan 2013 00:20:39 +0000 Subject: [mips] Set flag neverHasSideEffects flag on some of the floating point instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173401 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsInstrFPU.td | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td index a38ed16..70465d8 100644 --- a/lib/Target/Mips/MipsInstrFPU.td +++ b/lib/Target/Mips/MipsInstrFPU.td @@ -107,7 +107,8 @@ multiclass ADDS_M : InstSE<(outs DstRC:$fd), (ins SrcRC:$fs), !strconcat(opstr, "\t$fd, $fs"), - [(set DstRC:$fd, (OpNode SrcRC:$fs))], Itin, FrmFR>; + [(set DstRC:$fd, (OpNode SrcRC:$fs))], Itin, FrmFR>, + NeverHasSideEffects; multiclass ABSS_M { @@ -213,15 +214,13 @@ def ROUND_W_S : ABSS_FT<"round.w.s", FGR32, FGR32, IIFcvt>, ABSS_FM<0xc, 16>; def TRUNC_W_S : ABSS_FT<"trunc.w.s", FGR32, FGR32, IIFcvt>, ABSS_FM<0xd, 16>; def CEIL_W_S : ABSS_FT<"ceil.w.s", FGR32, FGR32, IIFcvt>, ABSS_FM<0xe, 16>; def FLOOR_W_S : ABSS_FT<"floor.w.s", FGR32, FGR32, IIFcvt>, ABSS_FM<0xf, 16>; -def CVT_W_S : ABSS_FT<"cvt.w.s", FGR32, FGR32, IIFcvt>, ABSS_FM<0x24, 16>, - NeverHasSideEffects; +def CVT_W_S : ABSS_FT<"cvt.w.s", FGR32, FGR32, IIFcvt>, ABSS_FM<0x24, 16>; defm ROUND_W : ROUND_M<"round.w.d", IIFcvt>, ABSS_FM<0xc, 17>; defm TRUNC_W : ROUND_M<"trunc.w.d", IIFcvt>, ABSS_FM<0xd, 17>; defm CEIL_W : ROUND_M<"ceil.w.d", IIFcvt>, ABSS_FM<0xe, 17>; defm FLOOR_W : ROUND_M<"floor.w.d", IIFcvt>, ABSS_FM<0xf, 17>; -defm CVT_W : ROUND_M<"cvt.w.d", IIFcvt>, ABSS_FM<0x24, 17>, - NeverHasSideEffects; +defm CVT_W : ROUND_M<"cvt.w.d", IIFcvt>, ABSS_FM<0x24, 17>; let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace = "Mips64" in { def ROUND_L_S : ABSS_FT<"round.l.s", FGR64, FGR32, IIFcvt>, ABSS_FM<0x8, 16>; @@ -238,19 +237,16 @@ let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace = "Mips64" in { } def CVT_S_W : ABSS_FT<"cvt.s.w", FGR32, FGR32, IIFcvt>, ABSS_FM<0x20, 20>; -def CVT_L_S : ABSS_FT<"cvt.l.s", FGR64, FGR32, IIFcvt>, ABSS_FM<0x25, 16>, - NeverHasSideEffects; -def CVT_L_D64: ABSS_FT<"cvt.l.d", FGR64, FGR64, IIFcvt>, ABSS_FM<0x25, 17>, - NeverHasSideEffects; +def CVT_L_S : ABSS_FT<"cvt.l.s", FGR64, FGR32, IIFcvt>, ABSS_FM<0x25, 16>; +def CVT_L_D64: ABSS_FT<"cvt.l.d", FGR64, FGR64, IIFcvt>, ABSS_FM<0x25, 17>; -let Predicates = [NotFP64bit, HasStdEnc], neverHasSideEffects = 1 in { +let Predicates = [NotFP64bit, HasStdEnc] in { def CVT_S_D32 : ABSS_FT<"cvt.s.d", FGR32, AFGR64, IIFcvt>, ABSS_FM<0x20, 17>; def CVT_D32_W : ABSS_FT<"cvt.d.w", AFGR64, FGR32, IIFcvt>, ABSS_FM<0x21, 20>; def CVT_D32_S : ABSS_FT<"cvt.d.s", AFGR64, FGR32, IIFcvt>, ABSS_FM<0x21, 16>; } -let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace = "Mips64", - neverHasSideEffects = 1 in { +let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace = "Mips64" in { def CVT_S_D64 : ABSS_FT<"cvt.s.d", FGR32, FGR64, IIFcvt>, ABSS_FM<0x20, 17>; def CVT_S_L : ABSS_FT<"cvt.s.l", FGR32, FGR64, IIFcvt>, ABSS_FM<0x20, 21>; def CVT_D64_W : ABSS_FT<"cvt.d.w", FGR64, FGR32, IIFcvt>, ABSS_FM<0x21, 20>; -- cgit v1.1 From 801c5838830d190a6b0d8e462bd43805f66ba50f Mon Sep 17 00:00:00 2001 From: Jack Carter Date: Fri, 25 Jan 2013 01:31:34 +0000 Subject: This patch implements parsing the .word directive for the Mips assembler. Contributer: Vladimir Medic git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173407 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/AsmParser/MipsAsmParser.cpp | 49 ++++++++++++++++++++++++----- 1 file changed, 42 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 39a53ae..1f143d1 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -133,6 +133,8 @@ class MipsAsmParser : public MCTargetAsmParser { bool parseSetReorderDirective(); bool parseSetNoReorderDirective(); + bool parseDirectiveWord(unsigned Size, SMLoc L); + MCSymbolRefExpr::VariantKind getVariantKind(StringRef Symbol); bool isMips64() const { @@ -1451,51 +1453,84 @@ bool MipsAsmParser::parseDirectiveSet() { Parser.EatToEndOfStatement(); return false; } + return true; } +/// parseDirectiveWord +/// ::= .word [ expression (, expression)* ] +bool MipsAsmParser::parseDirectiveWord(unsigned Size, SMLoc L) { + if (getLexer().isNot(AsmToken::EndOfStatement)) { + for (;;) { + const MCExpr *Value; + if (getParser().ParseExpression(Value)) + return true; + + getParser().getStreamer().EmitValue(Value, Size); + + if (getLexer().is(AsmToken::EndOfStatement)) + break; + + // FIXME: Improve diagnostic. + if (getLexer().isNot(AsmToken::Comma)) + return Error(L, "unexpected token in directive"); + Parser.Lex(); + } + } + + Parser.Lex(); + return false; +} + bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) { - if (DirectiveID.getString() == ".ent") { + StringRef IDVal = DirectiveID.getString(); + + if ( IDVal == ".ent") { // ignore this directive for now Parser.Lex(); return false; } - if (DirectiveID.getString() == ".end") { + if (IDVal == ".end") { // ignore this directive for now Parser.Lex(); return false; } - if (DirectiveID.getString() == ".frame") { + if (IDVal == ".frame") { // ignore this directive for now Parser.EatToEndOfStatement(); return false; } - if (DirectiveID.getString() == ".set") { + if (IDVal == ".set") { return parseDirectiveSet(); } - if (DirectiveID.getString() == ".fmask") { + if (IDVal == ".fmask") { // ignore this directive for now Parser.EatToEndOfStatement(); return false; } - if (DirectiveID.getString() == ".mask") { + if (IDVal == ".mask") { // ignore this directive for now Parser.EatToEndOfStatement(); return false; } - if (DirectiveID.getString() == ".gpword") { + if (IDVal == ".gpword") { // ignore this directive for now Parser.EatToEndOfStatement(); return false; } + if (IDVal == ".word") { + parseDirectiveWord(4, DirectiveID.getLoc()); + return false; + } + return true; } -- cgit v1.1 From 178f7d08a41f2e9432b96cd27f0c8ea42fa0ac9e Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Fri, 25 Jan 2013 04:01:04 +0000 Subject: MISched: Add SchedDFSResult to ScheduleDAGMI to formalize the interface and allow other strategies to select it. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173413 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineScheduler.cpp | 80 +++++++++++++++++++++++++++------------- 1 file changed, 55 insertions(+), 25 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index b9198e8..3e5935c 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -56,6 +56,9 @@ static cl::opt EnableLoadCluster("misched-cluster", cl::Hidden, static cl::opt EnableMacroFusion("misched-fusion", cl::Hidden, cl::desc("Enable scheduling for macro fusion."), cl::init(true)); +// DAG subtrees must have at least this many nodes. +static const unsigned MinSubtreeSize = 8; + //===----------------------------------------------------------------------===// // Machine Instruction Scheduling Pass and Registry //===----------------------------------------------------------------------===// @@ -301,6 +304,12 @@ void ReadyQueue::dump() { // preservation. //===----------------------------------------------------------------------===// +ScheduleDAGMI::~ScheduleDAGMI() { + delete DFSResult; + DeleteContainerPointers(Mutations); + delete SchedImpl; +} + bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) { if (SuccSU != &ExitSU) { // Do not use WillCreateCycle, it assumes SD scheduling. @@ -504,8 +513,6 @@ void ScheduleDAGMI::schedule() { DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) SUnits[su].dumpAll(this)); - if (ViewMISchedDAGs) viewGraph(); - initQueues(); bool IsTopNode = false; @@ -554,6 +561,19 @@ void ScheduleDAGMI::postprocessDAG() { } } +void ScheduleDAGMI::initDFSResult() { + if (!DFSResult) + DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize); + DFSResult->clear(); + DFSResult->resize(SUnits.size()); + ScheduledTrees.clear(); +} + +void ScheduleDAGMI::computeDFSResult(ArrayRef Roots) { + DFSResult->compute(Roots); + ScheduledTrees.resize(DFSResult->getNumSubtrees()); +} + // Release all DAG roots for scheduling. // // Nodes with unreleased weak edges can still be roots. @@ -655,6 +675,15 @@ void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) { SU->isScheduled = true; + if (DFSResult) { + unsigned SubtreeID = DFSResult->getSubtreeID(SU); + if (!ScheduledTrees.test(SubtreeID)) { + ScheduledTrees.set(SubtreeID); + DFSResult->scheduleTree(SubtreeID); + SchedImpl->scheduleTree(SubtreeID); + } + } + // Notify the scheduling strategy after updating the DAG. SchedImpl->schedNode(SU, IsTopNode); } @@ -1187,6 +1216,8 @@ void ConvergingScheduler::initialize(ScheduleDAGMI *dag) { Top.init(DAG, SchedModel, &Rem); Bot.init(DAG, SchedModel, &Rem); + DAG->initDFSResult(); + // Initialize resource counts. // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or @@ -1247,6 +1278,8 @@ void ConvergingScheduler::registerRoots() { Rem.CriticalPath = (*I)->getDepth(); } DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n'); + + DAG->computeDFSResult(Bot.Available.elements()); } /// Does this SU have a hazard within the current instruction group. @@ -2056,12 +2089,11 @@ ConvergingSchedRegistry("converge", "Standard converging scheduler.", namespace { /// \brief Order nodes by the ILP metric. struct ILPOrder { - SchedDFSResult *DFSResult; - BitVector *ScheduledTrees; + const SchedDFSResult *DFSResult; + const BitVector *ScheduledTrees; bool MaximizeILP; - ILPOrder(SchedDFSResult *dfs, BitVector *schedtrees, bool MaxILP) - : DFSResult(dfs), ScheduledTrees(schedtrees), MaximizeILP(MaxILP) {} + ILPOrder(bool MaxILP): DFSResult(0), ScheduledTrees(0), MaximizeILP(MaxILP) {} /// \brief Apply a less-than relation on node priority. /// @@ -2099,26 +2131,23 @@ class ILPScheduler : public MachineSchedStrategy { /// (a motivating test case must be found). static const unsigned SubtreeLimit = 16; - SchedDFSResult DFSResult; - BitVector ScheduledTrees; + ScheduleDAGMI *DAG; ILPOrder Cmp; std::vector ReadyQ; public: - ILPScheduler(bool MaximizeILP) - : DFSResult(/*BottomUp=*/true, SubtreeLimit), - Cmp(&DFSResult, &ScheduledTrees, MaximizeILP) {} + ILPScheduler(bool MaximizeILP): DAG(0), Cmp(MaximizeILP) {} - virtual void initialize(ScheduleDAGMI *DAG) { + virtual void initialize(ScheduleDAGMI *dag) { + DAG = dag; + DAG->initDFSResult(); + Cmp.DFSResult = DAG->getDFSResult(); + Cmp.ScheduledTrees = &DAG->getScheduledTrees(); ReadyQ.clear(); - DFSResult.clear(); - DFSResult.resize(DAG->SUnits.size()); - ScheduledTrees.clear(); } virtual void registerRoots() { - DFSResult.compute(ReadyQ); - ScheduledTrees.resize(DFSResult.getNumSubtrees()); + DAG->computeDFSResult(ReadyQ); // Restore the heap in ReadyQ with the updated DFS results. std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); } @@ -2135,21 +2164,22 @@ public: IsTopNode = false; DEBUG(dbgs() << "*** Scheduling " << "SU(" << SU->NodeNum << "): " << *SU->getInstr() - << " ILP: " << DFSResult.getILP(SU) - << " Tree: " << DFSResult.getSubtreeID(SU) << " @" - << DFSResult.getSubtreeLevel(DFSResult.getSubtreeID(SU))<< '\n'); + << " ILP: " << DAG->getDFSResult()->getILP(SU) + << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU) << " @" + << DAG->getDFSResult()->getSubtreeLevel( + DAG->getDFSResult()->getSubtreeID(SU)) << '\n'); return SU; } + /// \brief Scheduler callback to notify that a new subtree is scheduled. + virtual void scheduleTree(unsigned SubtreeID) { + std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); + } + /// Callback after a node is scheduled. Mark a newly scheduled tree, notify /// DFSResults, and resort the priority Q. virtual void schedNode(SUnit *SU, bool IsTopNode) { assert(!IsTopNode && "SchedDFSResult needs bottom-up"); - if (!ScheduledTrees.test(DFSResult.getSubtreeID(SU))) { - ScheduledTrees.set(DFSResult.getSubtreeID(SU)); - DFSResult.scheduleTree(DFSResult.getSubtreeID(SU)); - std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); - } } virtual void releaseTopNode(SUnit *) { /*only called for top roots*/ } -- cgit v1.1 From baf868b9b8d187744d183d57ef3cbb2a44ca047a Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Fri, 25 Jan 2013 05:40:09 +0000 Subject: Switch this code away from Value::isUsedInBasicBlock. That code either loops over instructions in the basic block or the use-def list of the value, neither of which are really efficient when repeatedly querying about values in the same basic block. What's more, we already know that the CondBB is small, and so we can do a much more efficient test by counting the uses in CondBB, and seeing if those account for all of the uses. Finally, we shouldn't blanket fail on any such instruction, instead we should conservatively assume that those instructions are part of the cost. Note that this actually fixes a bug in the pass because isUsedInBasicBlock has a really terrible bug in it. I'll fix that in my next commit, but the fix for it would make this code suddenly take the compile time hit I thought it already was taking, so I wanted to go ahead and migrate this code to a faster & better pattern. The bug in isUsedInBasicBlock was also causing other tests to test the wrong thing entirely: for example we weren't actually disabling speculation for floating point operations as intended (and tested), but the test passed because we failed to speculate them due to the isUsedInBasicBlock failure. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173417 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Utils/SimplifyCFG.cpp | 36 +++++++++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 9f3464d..3cf3984 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1388,6 +1388,13 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, } assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block"); + // Keep a count of how many times instructions are used within CondBB when + // they are candidates for sinking into CondBB. Specifically: + // - They are defined in BB, and + // - They have no side effects, and + // - All of their uses are in CondBB. + SmallDenseMap SinkCandidateUseCounts; + unsigned SpeculationCost = 0; for (BasicBlock::iterator BBI = ThenBB->begin(), BBE = llvm::prior(ThenBB->end()); @@ -1406,9 +1413,11 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, // Don't hoist the instruction if it's unsafe or expensive. if (!isSafeToSpeculativelyExecute(I)) return false; - // FIXME: This should really be a cost metric, but our cost model doesn't - // accurately model the expense of select. - if (isa(I)) + // FIXME: These should really be cost metrics, but our cost model doesn't + // accurately model the expense of selects and floating point operations. + // FIXME: Is it really safe to speculate floating point operations? + // Signaling NaNs break with this, but we shouldn't care, right? + if (isa(I) || I->getType()->isFPOrFPVectorTy()) return false; // FIXME: The cost metric currently doesn't reason accurately about simple // versus complex GEPs, take a conservative approach here. @@ -1422,13 +1431,26 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i) { Instruction *OpI = dyn_cast(*i); - if (OpI && OpI->getParent() == BB && - !OpI->mayHaveSideEffects() && - !OpI->isUsedInBasicBlock(BB)) - return false; + if (!OpI || OpI->getParent() != BB || + OpI->mayHaveSideEffects()) + continue; // Not a candidate for sinking. + + ++SinkCandidateUseCounts[OpI]; } } + // Consider any sink candidates which are only used in CondBB as costs for + // speculation. Note, while we iterate over a DenseMap here, we are summing + // and so iteration order isn't significant. + for (SmallDenseMap::iterator I = + SinkCandidateUseCounts.begin(), E = SinkCandidateUseCounts.end(); + I != E; ++I) + if (I->first->getNumUses() == I->second) { + SpeculationCost += TTI.getUserCost(I->first); + if (SpeculationCost > TargetTransformInfo::TCC_Basic) + return false; + } + // Check that the PHI nodes can be converted to selects. bool HaveRewritablePHIs = false; for (BasicBlock::iterator I = EndBB->begin(); -- cgit v1.1 From bfb8223e2b2a55c3ac6c73be0ac99bbce17cb097 Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Fri, 25 Jan 2013 06:02:44 +0000 Subject: SchedDFS: Initial support for nested subtrees. This is mostly refactoring, along with adding an instruction count within the subtrees and ensuring we only look at data edges. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173420 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/ScheduleDAGInstrs.cpp | 110 +++++++++++++++++++++++++------------- 1 file changed, 73 insertions(+), 37 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index 3960c57..428c1a4 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -1021,35 +1021,56 @@ class SchedDFSImpl { public: SchedDFSImpl(SchedDFSResult &r): R(r), SubtreeClasses(R.DFSData.size()) {} - /// SubtreID is initialized to zero, set to itself to flag the root of a - /// subtree, set to the parent to indicate an interior node, - /// then set to a representative subtree ID during finalization. + /// Return true if this node been visited by the DFS traversal. + /// + /// During visitPostorderNode the Node's SubtreeID is assigned to the Node + /// ID. Later, SubtreeID is updated but remains valid. bool isVisited(const SUnit *SU) const { - return R.DFSData[SU->NodeNum].SubtreeID; + return R.DFSData[SU->NodeNum].SubtreeID != SchedDFSResult::InvalidSubtreeID; } /// Initialize this node's instruction count. We don't need to flag the node /// visited until visitPostorder because the DAG cannot have cycles. void visitPreorder(const SUnit *SU) { R.DFSData[SU->NodeNum].InstrCount = SU->getInstr()->isTransient() ? 0 : 1; + R.DFSData[SU->NodeNum].SubInstrCount = R.DFSData[SU->NodeNum].InstrCount; } - /// Mark this node as either the root of a subtree or an interior - /// node. Increment the parent node's instruction count. - void visitPostorder(const SUnit *SU, const SDep *PredDep, const SUnit *Parent) { - R.DFSData[SU->NodeNum].SubtreeID = SU->NodeNum; + /// Called once for each tree edge after calling visitPostOrderNode on the + /// predecessor. Increment the parent node's instruction count and + /// preemptively join this subtree to its parent's if it is small enough. + void visitPostorderEdge(const SDep &PredDep, const SUnit *Succ) { + R.DFSData[Succ->NodeNum].InstrCount + += R.DFSData[PredDep.getSUnit()->NodeNum].InstrCount; + joinPredSubtree(PredDep, Succ); + } - if (!Parent) - return; - assert(PredDep && "PredDep required for non-root node"); + /// Called once for each node after all predecessors are visited. Revisit this + /// node's predecessors and potentially join them now that we know the ILP of + /// the other predecessors. + void visitPostorderNode(const SUnit *SU) { + // Mark this node as the root of a subtree. It may be joined with its + // successors later. + R.DFSData[SU->NodeNum].SubtreeID = SU->NodeNum; - joinPredSubtree(*PredDep, Parent); + // If any predecessors are still in their own subtree, they either cannot be + // joined or are large enough to remain separate. If this parent node's + // total instruction count is not greater than a child subtree by at least + // the subtree limit, then try to join it now since splitting subtrees is + // only useful if multiple high-pressure paths are possible. + unsigned InstrCount = R.DFSData[SU->NodeNum].InstrCount; + for (SUnit::const_pred_iterator + PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) { + if (PI->getKind() != SDep::Data) + continue; + unsigned PredNum = PI->getSUnit()->NodeNum; + if ((InstrCount - R.DFSData[PredNum].InstrCount) < R.SubtreeLimit) + joinPredSubtree(*PI, SU, /*CheckLimit=*/false); + } } - /// Determine whether the DFS cross edge should be considered a subtree edge - /// or a connection between subtrees. - void visitCross(const SDep &PredDep, const SUnit *Succ) { - joinPredSubtree(PredDep, Succ); + /// Add a connection for cross edges. + void visitCrossEdge(const SDep &PredDep, const SUnit *Succ) { ConnectionPairs.push_back(std::make_pair(PredDep.getSUnit(), Succ)); } @@ -1079,32 +1100,35 @@ public: } protected: - void joinPredSubtree(const SDep &PredDep, const SUnit *Succ) { - // Join the child to its parent if they are connected via data dependence. - if (PredDep.getKind() != SDep::Data) - return; + /// Join the predecessor subtree with the successor that is its DFS + /// parent. Apply some heuristics before joining. + bool joinPredSubtree(const SDep &PredDep, const SUnit *Succ, + bool CheckLimit = true) { + assert(PredDep.getKind() == SDep::Data && "Subtrees are for data edges"); + + // Check if the predecessor is already joined. + const SUnit *PredSU = PredDep.getSUnit(); + unsigned PredNum = PredSU->NodeNum; + if (R.DFSData[PredNum].SubtreeID != PredNum) + return false; // Four is the magic number of successors before a node is considered a // pinch point. unsigned NumDataSucs = 0; - const SUnit *PredSU = PredDep.getSUnit(); for (SUnit::const_succ_iterator SI = PredSU->Succs.begin(), SE = PredSU->Succs.end(); SI != SE; ++SI) { if (SI->getKind() == SDep::Data) { if (++NumDataSucs >= 4) - return; + return false; } } - // If this is a cross edge to a root, join the subtrees. This happens when - // the root was first reached by a non-data dependence. - unsigned NodeNum = PredSU->NodeNum; - unsigned PredCnt = R.DFSData[NodeNum].InstrCount; - if (R.DFSData[NodeNum].SubtreeID == NodeNum && PredCnt < R.SubtreeLimit) { - R.DFSData[NodeNum].SubtreeID = Succ->NodeNum; - R.DFSData[Succ->NodeNum].InstrCount += PredCnt; - SubtreeClasses.join(Succ->NodeNum, NodeNum); - return; - } + if (CheckLimit && R.DFSData[PredNum].SubInstrCount > R.SubtreeLimit) + return false; + + R.DFSData[PredNum].SubtreeID = Succ->NodeNum; + R.DFSData[Succ->NodeNum].SubInstrCount += R.DFSData[PredNum].SubInstrCount; + SubtreeClasses.join(Succ->NodeNum, PredNum); + return true; } /// Called by finalize() to record a connection between trees. @@ -1153,6 +1177,15 @@ public: }; } // anonymous +static bool hasDataSucc(const SUnit *SU) { + for (SUnit::const_succ_iterator + SI = SU->Succs.begin(), SE = SU->Succs.end(); SI != SE; ++SI) { + if (SI->getKind() == SDep::Data) + return true; + } + return false; +} + /// Compute an ILP metric for all nodes in the subDAG reachable via depth-first /// search from this root. void SchedDFSResult::compute(ArrayRef Roots) { @@ -1170,11 +1203,12 @@ void SchedDFSResult::compute(ArrayRef Roots) { while (DFS.getPred() != DFS.getPredEnd()) { const SDep &PredDep = *DFS.getPred(); DFS.advance(); - // If the pred is already valid, skip it. We may preorder visit a node - // with InstrCount==0 more than once, but it won't affect heuristics - // because we don't care about cross edges to leaf copies. + // Ignore non-data edges. + if (PredDep.getKind() != SDep::Data) + continue; + // An already visited edge is a cross edge, assuming an acyclic DAG. if (Impl.isVisited(PredDep.getSUnit())) { - Impl.visitCross(PredDep, DFS.getCurr()); + Impl.visitCrossEdge(PredDep, DFS.getCurr()); continue; } Impl.visitPreorder(PredDep.getSUnit()); @@ -1183,7 +1217,9 @@ void SchedDFSResult::compute(ArrayRef Roots) { // Visit the top of the stack in postorder and backtrack. const SUnit *Child = DFS.getCurr(); const SDep *PredDep = DFS.backtrack(); - Impl.visitPostorder(Child, PredDep, PredDep ? DFS.getCurr() : 0); + Impl.visitPostorderNode(Child); + if (PredDep) + Impl.visitPostorderEdge(*PredDep, DFS.getCurr()); if (DFS.isComplete()) break; } -- cgit v1.1 From 4e1fb1894048455d49d62543b3f83672b27b0000 Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Fri, 25 Jan 2013 06:33:57 +0000 Subject: MIsched: Improve the interface to SchedDFS analysis (subtrees). Allow the strategy to select SchedDFS. Allow the results of SchedDFS to affect initialization of the scheduler state. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173425 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineScheduler.cpp | 62 ++++++++++++++------------ lib/CodeGen/ScheduleDAGInstrs.cpp | 14 +++--- lib/Target/Hexagon/HexagonMachineScheduler.cpp | 8 +++- 3 files changed, 49 insertions(+), 35 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index 3e5935c..aa59915 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -510,10 +510,19 @@ void ScheduleDAGMI::schedule() { postprocessDAG(); + SmallVector TopRoots, BotRoots; + findRootsAndBiasEdges(TopRoots, BotRoots); + + // Initialize the strategy before modifying the DAG. + // This may initialize a DFSResult to be used for queue priority. + SchedImpl->initialize(this); + DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) SUnits[su].dumpAll(this)); + if (ViewMISchedDAGs) viewGraph(); - initQueues(); + // Initialize ready queues now that the DAG and priority data are finalized. + initQueues(TopRoots, BotRoots); bool IsTopNode = false; while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) { @@ -561,25 +570,18 @@ void ScheduleDAGMI::postprocessDAG() { } } -void ScheduleDAGMI::initDFSResult() { +void ScheduleDAGMI::computeDFSResult() { if (!DFSResult) DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize); DFSResult->clear(); - DFSResult->resize(SUnits.size()); ScheduledTrees.clear(); -} - -void ScheduleDAGMI::computeDFSResult(ArrayRef Roots) { - DFSResult->compute(Roots); + DFSResult->resize(SUnits.size()); + DFSResult->compute(SUnits); ScheduledTrees.resize(DFSResult->getNumSubtrees()); } -// Release all DAG roots for scheduling. -// -// Nodes with unreleased weak edges can still be roots. -void ScheduleDAGMI::releaseRoots() { - SmallVector BotRoots; - +void ScheduleDAGMI::findRootsAndBiasEdges(SmallVectorImpl &TopRoots, + SmallVectorImpl &BotRoots) { for (std::vector::iterator I = SUnits.begin(), E = SUnits.end(); I != E; ++I) { SUnit *SU = &(*I); @@ -589,28 +591,33 @@ void ScheduleDAGMI::releaseRoots() { // A SUnit is ready to top schedule if it has no predecessors. if (!I->NumPredsLeft && SU != &EntrySU) - SchedImpl->releaseTopNode(SU); + TopRoots.push_back(SU); // A SUnit is ready to bottom schedule if it has no successors. if (!I->NumSuccsLeft && SU != &ExitSU) BotRoots.push_back(SU); } - // Release bottom roots in reverse order so the higher priority nodes appear - // first. This is more natural and slightly more efficient. - for (SmallVectorImpl::const_reverse_iterator - I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) - SchedImpl->releaseBottomNode(*I); } /// Identify DAG roots and setup scheduler queues. -void ScheduleDAGMI::initQueues() { +void ScheduleDAGMI::initQueues(ArrayRef TopRoots, + ArrayRef BotRoots) { NextClusterSucc = NULL; NextClusterPred = NULL; - // Initialize the strategy before modifying the DAG. - SchedImpl->initialize(this); - // Release all DAG roots for scheduling, not including EntrySU/ExitSU. - releaseRoots(); + // + // Nodes with unreleased weak edges can still be roots. + // Release top roots in forward order. + for (SmallVectorImpl::const_iterator + I = TopRoots.begin(), E = TopRoots.end(); I != E; ++I) { + SchedImpl->releaseTopNode(*I); + } + // Release bottom roots in reverse order so the higher priority nodes appear + // first. This is more natural and slightly more efficient. + for (SmallVectorImpl::const_reverse_iterator + I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) { + SchedImpl->releaseBottomNode(*I); + } releaseSuccessors(&EntrySU); releasePredecessors(&ExitSU); @@ -1216,7 +1223,7 @@ void ConvergingScheduler::initialize(ScheduleDAGMI *dag) { Top.init(DAG, SchedModel, &Rem); Bot.init(DAG, SchedModel, &Rem); - DAG->initDFSResult(); + DAG->computeDFSResult(); // Initialize resource counts. @@ -1278,8 +1285,6 @@ void ConvergingScheduler::registerRoots() { Rem.CriticalPath = (*I)->getDepth(); } DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n'); - - DAG->computeDFSResult(Bot.Available.elements()); } /// Does this SU have a hazard within the current instruction group. @@ -2140,14 +2145,13 @@ public: virtual void initialize(ScheduleDAGMI *dag) { DAG = dag; - DAG->initDFSResult(); + DAG->computeDFSResult(); Cmp.DFSResult = DAG->getDFSResult(); Cmp.ScheduledTrees = &DAG->getScheduledTrees(); ReadyQ.clear(); } virtual void registerRoots() { - DAG->computeDFSResult(ReadyQ); // Restore the heap in ReadyQ with the updated DFS results. std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); } diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index 428c1a4..7ee5207 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -1188,16 +1188,20 @@ static bool hasDataSucc(const SUnit *SU) { /// Compute an ILP metric for all nodes in the subDAG reachable via depth-first /// search from this root. -void SchedDFSResult::compute(ArrayRef Roots) { +void SchedDFSResult::compute(ArrayRef SUnits) { if (!IsBottomUp) llvm_unreachable("Top-down ILP metric is unimplemnted"); SchedDFSImpl Impl(*this); - for (ArrayRef::const_iterator - RootI = Roots.begin(), RootE = Roots.end(); RootI != RootE; ++RootI) { + for (ArrayRef::const_iterator + SI = SUnits.begin(), SE = SUnits.end(); SI != SE; ++SI) { + const SUnit *SU = &*SI; + if (Impl.isVisited(SU) || hasDataSucc(SU)) + continue; + SchedDAGReverseDFS DFS; - Impl.visitPreorder(*RootI); - DFS.follow(*RootI); + Impl.visitPreorder(SU); + DFS.follow(SU); for (;;) { // Traverse the leftmost path as far as possible. while (DFS.getPred() != DFS.getPredEnd()) { diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/lib/Target/Hexagon/HexagonMachineScheduler.cpp index aef6830..36dfaa4 100644 --- a/lib/Target/Hexagon/HexagonMachineScheduler.cpp +++ b/lib/Target/Hexagon/HexagonMachineScheduler.cpp @@ -152,6 +152,12 @@ void VLIWMachineScheduler::schedule() { // Postprocess the DAG to add platform specific artificial dependencies. postprocessDAG(); + SmallVector TopRoots, BotRoots; + findRootsAndBiasEdges(TopRoots, BotRoots); + + // Initialize the strategy before modifying the DAG. + SchedImpl->initialize(this); + // To view Height/Depth correctly, they should be accessed at least once. DEBUG(unsigned maxH = 0; for (unsigned su = 0, e = SUnits.size(); su != e; ++su) @@ -166,7 +172,7 @@ void VLIWMachineScheduler::schedule() { DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) SUnits[su].dumpAll(this)); - initQueues(); + initQueues(TopRoots, BotRoots); bool IsTopNode = false; while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) { -- cgit v1.1 From 988d06b0e574d8e50b043fd74dbf91c1dc403542 Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Fri, 25 Jan 2013 06:52:27 +0000 Subject: SchedDFS: Complete support for nested subtrees. Maintain separate per-node and per-tree book-keeping. Track all instructions above a DAG node including nested subtrees. Seperately track instructions within a subtree. Record subtree parents. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173426 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/ScheduleDAGInstrs.cpp | 107 ++++++++++++++++++++++++++------------ 1 file changed, 74 insertions(+), 33 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index 7ee5207..ef50406 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -1018,31 +1018,39 @@ class SchedDFSImpl { /// List PredSU, SuccSU pairs that represent data edges between subtrees. std::vector > ConnectionPairs; + struct RootData { + unsigned NodeID; + unsigned ParentNodeID; // Parent node (member of the parent subtree). + unsigned SubInstrCount; // Instr count in this tree only, not children. + + RootData(unsigned id): NodeID(id), + ParentNodeID(SchedDFSResult::InvalidSubtreeID), + SubInstrCount(0) {} + + unsigned getSparseSetIndex() const { return NodeID; } + }; + + SparseSet RootSet; + public: - SchedDFSImpl(SchedDFSResult &r): R(r), SubtreeClasses(R.DFSData.size()) {} + SchedDFSImpl(SchedDFSResult &r): R(r), SubtreeClasses(R.DFSNodeData.size()) { + RootSet.setUniverse(R.DFSNodeData.size()); + } /// Return true if this node been visited by the DFS traversal. /// /// During visitPostorderNode the Node's SubtreeID is assigned to the Node /// ID. Later, SubtreeID is updated but remains valid. bool isVisited(const SUnit *SU) const { - return R.DFSData[SU->NodeNum].SubtreeID != SchedDFSResult::InvalidSubtreeID; + return R.DFSNodeData[SU->NodeNum].SubtreeID + != SchedDFSResult::InvalidSubtreeID; } /// Initialize this node's instruction count. We don't need to flag the node /// visited until visitPostorder because the DAG cannot have cycles. void visitPreorder(const SUnit *SU) { - R.DFSData[SU->NodeNum].InstrCount = SU->getInstr()->isTransient() ? 0 : 1; - R.DFSData[SU->NodeNum].SubInstrCount = R.DFSData[SU->NodeNum].InstrCount; - } - - /// Called once for each tree edge after calling visitPostOrderNode on the - /// predecessor. Increment the parent node's instruction count and - /// preemptively join this subtree to its parent's if it is small enough. - void visitPostorderEdge(const SDep &PredDep, const SUnit *Succ) { - R.DFSData[Succ->NodeNum].InstrCount - += R.DFSData[PredDep.getSUnit()->NodeNum].InstrCount; - joinPredSubtree(PredDep, Succ); + R.DFSNodeData[SU->NodeNum].InstrCount = + SU->getInstr()->isTransient() ? 0 : 1; } /// Called once for each node after all predecessors are visited. Revisit this @@ -1051,22 +1059,42 @@ public: void visitPostorderNode(const SUnit *SU) { // Mark this node as the root of a subtree. It may be joined with its // successors later. - R.DFSData[SU->NodeNum].SubtreeID = SU->NodeNum; + R.DFSNodeData[SU->NodeNum].SubtreeID = SU->NodeNum; + RootData RData(SU->NodeNum); + RData.SubInstrCount = SU->getInstr()->isTransient() ? 0 : 1; // If any predecessors are still in their own subtree, they either cannot be // joined or are large enough to remain separate. If this parent node's // total instruction count is not greater than a child subtree by at least // the subtree limit, then try to join it now since splitting subtrees is // only useful if multiple high-pressure paths are possible. - unsigned InstrCount = R.DFSData[SU->NodeNum].InstrCount; + unsigned InstrCount = R.DFSNodeData[SU->NodeNum].InstrCount; for (SUnit::const_pred_iterator PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) { if (PI->getKind() != SDep::Data) continue; unsigned PredNum = PI->getSUnit()->NodeNum; - if ((InstrCount - R.DFSData[PredNum].InstrCount) < R.SubtreeLimit) + if ((InstrCount - R.DFSNodeData[PredNum].InstrCount) < R.SubtreeLimit) joinPredSubtree(*PI, SU, /*CheckLimit=*/false); + + // Either link or merge the TreeData entry from the child to the parent. + if (R.DFSNodeData[PredNum].SubtreeID == PredNum) + RootSet[PredNum].ParentNodeID = SU->NodeNum; + else { + RData.SubInstrCount += RootSet[PredNum].SubInstrCount; + RootSet.erase(PredNum); + } } + RootSet[SU->NodeNum] = RData; + } + + /// Called once for each tree edge after calling visitPostOrderNode on the + /// predecessor. Increment the parent node's instruction count and + /// preemptively join this subtree to its parent's if it is small enough. + void visitPostorderEdge(const SDep &PredDep, const SUnit *Succ) { + R.DFSNodeData[Succ->NodeNum].InstrCount + += R.DFSNodeData[PredDep.getSUnit()->NodeNum].InstrCount; + joinPredSubtree(PredDep, Succ); } /// Add a connection for cross edges. @@ -1078,13 +1106,25 @@ public: /// between trees. void finalize() { SubtreeClasses.compress(); + R.DFSTreeData.resize(SubtreeClasses.getNumClasses()); + assert(SubtreeClasses.getNumClasses() == RootSet.size() + && "number of roots should match trees"); + for (SparseSet::const_iterator + RI = RootSet.begin(), RE = RootSet.end(); RI != RE; ++RI) { + unsigned TreeID = SubtreeClasses[RI->NodeID]; + if (RI->ParentNodeID != SchedDFSResult::InvalidSubtreeID) + R.DFSTreeData[TreeID].ParentTreeID = SubtreeClasses[RI->ParentNodeID]; + R.DFSTreeData[TreeID].SubInstrCount = RI->SubInstrCount; + assert(RI->SubInstrCount <= R.DFSNodeData[RI->NodeID].InstrCount && + "Bad SubInstrCount"); + } R.SubtreeConnections.resize(SubtreeClasses.getNumClasses()); R.SubtreeConnectLevels.resize(SubtreeClasses.getNumClasses()); DEBUG(dbgs() << R.getNumSubtrees() << " subtrees:\n"); - for (unsigned Idx = 0, End = R.DFSData.size(); Idx != End; ++Idx) { - R.DFSData[Idx].SubtreeID = SubtreeClasses[Idx]; + for (unsigned Idx = 0, End = R.DFSNodeData.size(); Idx != End; ++Idx) { + R.DFSNodeData[Idx].SubtreeID = SubtreeClasses[Idx]; DEBUG(dbgs() << " SU(" << Idx << ") in tree " - << R.DFSData[Idx].SubtreeID << '\n'); + << R.DFSNodeData[Idx].SubtreeID << '\n'); } for (std::vector >::const_iterator I = ConnectionPairs.begin(), E = ConnectionPairs.end(); @@ -1109,7 +1149,7 @@ protected: // Check if the predecessor is already joined. const SUnit *PredSU = PredDep.getSUnit(); unsigned PredNum = PredSU->NodeNum; - if (R.DFSData[PredNum].SubtreeID != PredNum) + if (R.DFSNodeData[PredNum].SubtreeID != PredNum) return false; // Four is the magic number of successors before a node is considered a @@ -1122,11 +1162,9 @@ protected: return false; } } - if (CheckLimit && R.DFSData[PredNum].SubInstrCount > R.SubtreeLimit) + if (CheckLimit && R.DFSNodeData[PredNum].InstrCount > R.SubtreeLimit) return false; - - R.DFSData[PredNum].SubtreeID = Succ->NodeNum; - R.DFSData[Succ->NodeNum].SubInstrCount += R.DFSData[PredNum].SubInstrCount; + R.DFSNodeData[PredNum].SubtreeID = Succ->NodeNum; SubtreeClasses.join(Succ->NodeNum, PredNum); return true; } @@ -1136,16 +1174,19 @@ protected: if (!Depth) return; - SmallVectorImpl &Connections = - R.SubtreeConnections[FromTree]; - for (SmallVectorImpl::iterator - I = Connections.begin(), E = Connections.end(); I != E; ++I) { - if (I->TreeID == ToTree) { - I->Level = std::max(I->Level, Depth); - return; + do { + SmallVectorImpl &Connections = + R.SubtreeConnections[FromTree]; + for (SmallVectorImpl::iterator + I = Connections.begin(), E = Connections.end(); I != E; ++I) { + if (I->TreeID == ToTree) { + I->Level = std::max(I->Level, Depth); + return; + } } - } - Connections.push_back(SchedDFSResult::Connection(ToTree, Depth)); + Connections.push_back(SchedDFSResult::Connection(ToTree, Depth)); + FromTree = R.DFSTreeData[FromTree].ParentTreeID; + } while (FromTree != SchedDFSResult::InvalidSubtreeID); } }; } // namespace llvm -- cgit v1.1 From a5a73ad15905c18843a8312bb3f20f5c501744de Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Fri, 25 Jan 2013 06:52:30 +0000 Subject: ScheduleDAG: Added isBoundaryNode to conveniently detect a common corner case. This fixes DAG subtree analysis at the boundary. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173427 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/ScheduleDAGInstrs.cpp | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index ef50406..f27b970 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -1078,9 +1078,17 @@ public: joinPredSubtree(*PI, SU, /*CheckLimit=*/false); // Either link or merge the TreeData entry from the child to the parent. - if (R.DFSNodeData[PredNum].SubtreeID == PredNum) - RootSet[PredNum].ParentNodeID = SU->NodeNum; - else { + if (R.DFSNodeData[PredNum].SubtreeID == PredNum) { + // If the predecessor's parent is invalid, this is a tree edge and the + // current node is the parent. + if (RootSet[PredNum].ParentNodeID == SchedDFSResult::InvalidSubtreeID) + RootSet[PredNum].ParentNodeID = SU->NodeNum; + } + else if (RootSet.count(PredNum)) { + // The predecessor is not a root, but is still in the root set. This + // must be the new parent that it was just joined to. Note that + // RootSet[PredNum].ParentNodeID may either be invalid or may still be + // set to the original parent. RData.SubInstrCount += RootSet[PredNum].SubInstrCount; RootSet.erase(PredNum); } @@ -1115,8 +1123,10 @@ public: if (RI->ParentNodeID != SchedDFSResult::InvalidSubtreeID) R.DFSTreeData[TreeID].ParentTreeID = SubtreeClasses[RI->ParentNodeID]; R.DFSTreeData[TreeID].SubInstrCount = RI->SubInstrCount; - assert(RI->SubInstrCount <= R.DFSNodeData[RI->NodeID].InstrCount && - "Bad SubInstrCount"); + // Note that SubInstrCount may be greater than InstrCount if we joined + // subtrees across a cross edge. InstrCount will be attributed to the + // original parent, while SubInstrCount will be attributed to the joined + // parent. } R.SubtreeConnections.resize(SubtreeClasses.getNumClasses()); R.SubtreeConnectLevels.resize(SubtreeClasses.getNumClasses()); @@ -1221,7 +1231,7 @@ public: static bool hasDataSucc(const SUnit *SU) { for (SUnit::const_succ_iterator SI = SU->Succs.begin(), SE = SU->Succs.end(); SI != SE; ++SI) { - if (SI->getKind() == SDep::Data) + if (SI->getKind() == SDep::Data && !SI->getSUnit()->isBoundaryNode()) return true; } return false; @@ -1249,8 +1259,10 @@ void SchedDFSResult::compute(ArrayRef SUnits) { const SDep &PredDep = *DFS.getPred(); DFS.advance(); // Ignore non-data edges. - if (PredDep.getKind() != SDep::Data) + if (PredDep.getKind() != SDep::Data + || PredDep.getSUnit()->isBoundaryNode()) { continue; + } // An already visited edge is a cross edge, assuming an acyclic DAG. if (Impl.isVisited(PredDep.getSUnit())) { Impl.visitCrossEdge(PredDep, DFS.getCurr()); -- cgit v1.1 From c6ada8e5f38168f13830e448f2b9e2d8e3eac72b Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Fri, 25 Jan 2013 07:45:25 +0000 Subject: ScheduleDAG: colorize the DOT graph and improve formatting. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173431 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineInstr.cpp | 6 +++++- lib/CodeGen/ScheduleDAGInstrs.cpp | 2 +- lib/CodeGen/ScheduleDAGPrinter.cpp | 4 ++++ lib/Support/GraphWriter.cpp | 11 +++++++++++ 4 files changed, 21 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index 8f7c5fd..d8b5fd4 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -1428,7 +1428,8 @@ static void printDebugLoc(DebugLoc DL, const MachineFunction *MF, } } -void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { +void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM, + bool SkipOpers) const { // We can be a bit tidier if we know the TargetMachine and/or MachineFunction. const MachineFunction *MF = 0; const MachineRegisterInfo *MRI = 0; @@ -1465,6 +1466,9 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { else OS << "UNKNOWN"; + if (SkipOpers) + return; + // Print the rest of the operands. bool OmittedAnyCallClobbers = false; bool FirstOp = true; diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index f27b970..59e1ca1 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -994,7 +994,7 @@ std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const { else if (SU == &ExitSU) oss << ""; else - SU->getInstr()->print(oss); + SU->getInstr()->print(oss, &TM, /*SkipOpers=*/true); return oss.str(); } diff --git a/lib/CodeGen/ScheduleDAGPrinter.cpp b/lib/CodeGen/ScheduleDAGPrinter.cpp index 6c50913..8ddb3e8 100644 --- a/lib/CodeGen/ScheduleDAGPrinter.cpp +++ b/lib/CodeGen/ScheduleDAGPrinter.cpp @@ -41,6 +41,10 @@ namespace llvm { return true; } + static bool isNodeHidden(const SUnit *Node) { + return (Node->NumPreds > 10 || Node->NumSuccs > 10); + } + static bool hasNodeAddressLabel(const SUnit *Node, const ScheduleDAG *Graph) { return true; diff --git a/lib/Support/GraphWriter.cpp b/lib/Support/GraphWriter.cpp index 669c238..bff182f 100644 --- a/lib/Support/GraphWriter.cpp +++ b/lib/Support/GraphWriter.cpp @@ -53,6 +53,17 @@ std::string llvm::DOT::EscapeString(const std::string &Label) { return Str; } +/// \brief Get a color string for this node number. Simply round-robin selects +/// from a reasonable number of colors. +StringRef llvm::DOT::getColorString(unsigned ColorNumber) { + static const int NumColors = 20; + static const char* Colors[NumColors] = { + "aaaaaa", "aa0000", "00aa00", "aa5500", "0055ff", "aa00aa", "00aaaa", + "555555", "ff5555", "55ff55", "ffff55", "5555ff", "ff55ff", "55ffff", + "ffaaaa", "aaffaa", "ffffaa", "aaaaff", "ffaaff", "aaffff"}; + return Colors[ColorNumber % NumColors]; +} + // Execute the graph viewer. Return true if successful. static bool LLVM_ATTRIBUTE_UNUSED ExecGraphViewer(const sys::Path &ExecPath, std::vector &args, -- cgit v1.1 From 3084979ff27f48487c7421536144c41a36cae997 Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Fri, 25 Jan 2013 07:45:29 +0000 Subject: MachineScheduler support for viewGraph. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173432 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineScheduler.cpp | 89 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 88 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index aa59915..9072dd4 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -26,6 +26,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/GraphWriter.h" #include "llvm/Support/raw_ostream.h" #include @@ -557,7 +558,6 @@ void ScheduleDAGMI::buildDAGWithRegPressure() { // Build the DAG, and compute current register pressure. buildSchedGraph(AA, &RPTracker); - if (ViewMISchedDAGs) viewGraph(); // Initialize top/bottom trackers after computing region pressure. initRegPressure(); @@ -2294,3 +2294,90 @@ static MachineSchedRegistry ShufflerRegistry( "shuffle", "Shuffle machine instructions alternating directions", createInstructionShuffler); #endif // !NDEBUG + +//===----------------------------------------------------------------------===// +// GraphWriter support for ScheduleDAGMI. +//===----------------------------------------------------------------------===// + +#ifndef NDEBUG +namespace llvm { + +template<> struct GraphTraits< + ScheduleDAGMI*> : public GraphTraits {}; + +template<> +struct DOTGraphTraits : public DefaultDOTGraphTraits { + + DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {} + + static std::string getGraphName(const ScheduleDAG *G) { + return G->MF.getName(); + } + + static bool renderGraphFromBottomUp() { + return true; + } + + static bool isNodeHidden(const SUnit *Node) { + return (Node->NumPreds > 10 || Node->NumSuccs > 10); + } + + static bool hasNodeAddressLabel(const SUnit *Node, + const ScheduleDAG *Graph) { + return false; + } + + /// If you want to override the dot attributes printed for a particular + /// edge, override this method. + static std::string getEdgeAttributes(const SUnit *Node, + SUnitIterator EI, + const ScheduleDAG *Graph) { + if (EI.isArtificialDep()) + return "color=cyan,style=dashed"; + if (EI.isCtrlDep()) + return "color=blue,style=dashed"; + return ""; + } + + static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) { + std::string Str; + raw_string_ostream SS(Str); + SS << "SU(" << SU->NodeNum << ')'; + return SS.str(); + } + static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) { + return G->getGraphNodeLabel(SU); + } + + static std::string getNodeAttributes(const SUnit *N, + const ScheduleDAG *Graph) { + std::string Str("shape=Mrecord"); + const SchedDFSResult *DFS = + static_cast(Graph)->getDFSResult(); + if (DFS) { + Str += ",style=filled,fillcolor=\"#"; + Str += DOT::getColorString(DFS->getSubtreeID(N)); + Str += '"'; + } + return Str; + } +}; +} // namespace llvm +#endif // NDEBUG + +/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG +/// rendered using 'dot'. +/// +void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) { +#ifndef NDEBUG + ViewGraph(this, Name, false, Title); +#else + errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on " + << "systems with Graphviz or gv!\n"; +#endif // NDEBUG +} + +/// Out-of-line implementation with no arguments is handy for gdb. +void ScheduleDAGMI::viewGraph() { + viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName()); +} -- cgit v1.1 From c855423ff2a18e2168324ac5902cfe862cd4b54f Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Fri, 25 Jan 2013 07:45:31 +0000 Subject: MIsched: Print block name. No functionality. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173433 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineScheduler.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index 9072dd4..821a4f2 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -263,7 +263,8 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { } DEBUG(dbgs() << "********** MI Scheduling **********\n"); DEBUG(dbgs() << MF->getName() - << ":BB#" << MBB->getNumber() << "\n From: " << *I << " To: "; + << ":BB#" << MBB->getNumber() << " " << MBB->getName() + << "\n From: " << *I << " To: "; if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; else dbgs() << "End"; dbgs() << " Remaining: " << RemainingInstrs << "\n"); -- cgit v1.1 From a3bb665c6505ff78c29a37ca95dad1d297928791 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Fri, 25 Jan 2013 08:08:54 +0000 Subject: Use the AttributeSet query instead of the Attribute query. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173434 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Verifier.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp index ee20f53..b263bbe 100644 --- a/lib/IR/Verifier.cpp +++ b/lib/IR/Verifier.cpp @@ -730,12 +730,12 @@ void Verifier::VerifyFunctionAttrs(FunctionType *FT, VerifyParameterAttrs(Attrs, Attr.Index, Ty, Attr.Index == 0, V); - if (Attr.Attrs.hasAttribute(Attribute::Nest)) { + if (Attrs.hasAttribute(Attr.Index, Attribute::Nest)) { Assert1(!SawNest, "More than one parameter has attribute nest!", V); SawNest = true; } - if (Attr.Attrs.hasAttribute(Attribute::StructRet)) + if (Attrs.hasAttribute(Attr.Index, Attribute::StructRet)) Assert1(Attr.Index == 1, "Attribute sret is not on first parameter!", V); } -- cgit v1.1 From 4a9256f265a7fcccd1f04518b55fd751f3a920a8 Mon Sep 17 00:00:00 2001 From: Silviu Baranga Date: Fri, 25 Jan 2013 10:39:49 +0000 Subject: Fixed the condition codes for the atomic64 min/umin code generation on ARM. If the sutraction of the higher 32 bit parts gives a 0 result, we need to do the store operation. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173437 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 5b3e31f..bdbf45c 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -7123,7 +7123,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, /*NeedsCarry*/ true, /*IsCmpxchg*/false, - /*IsMinMax*/ true, ARMCC::LE); + /*IsMinMax*/ true, ARMCC::LT); case ARM::ATOMMAX6432: return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, @@ -7133,7 +7133,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, /*NeedsCarry*/ true, /*IsCmpxchg*/false, - /*IsMinMax*/ true, ARMCC::LS); + /*IsMinMax*/ true, ARMCC::LO); case ARM::ATOMUMAX6432: return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, -- cgit v1.1 From d23a41c153712b929bd84f5e713bda5db5d6e66d Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Fri, 25 Jan 2013 14:49:08 +0000 Subject: Add an addition operator to TableGen This adds an !add(a, b) operator to tablegen; this will be used to cleanup the PPC register definitions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173445 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/TableGen/Record.cpp | 3 +++ lib/TableGen/TGLexer.cpp | 1 + lib/TableGen/TGLexer.h | 2 +- lib/TableGen/TGParser.cpp | 4 ++++ 4 files changed, 9 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/TableGen/Record.cpp b/lib/TableGen/Record.cpp index b1d3a5b..fcee93a 100644 --- a/lib/TableGen/Record.cpp +++ b/lib/TableGen/Record.cpp @@ -935,6 +935,7 @@ Init *BinOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const { break; } + case ADD: case SHL: case SRA: case SRL: { @@ -945,6 +946,7 @@ Init *BinOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const { int64_t Result; switch (getOpcode()) { default: llvm_unreachable("Bad opcode!"); + case ADD: Result = LHSv + RHSv; break; case SHL: Result = LHSv << RHSv; break; case SRA: Result = LHSv >> RHSv; break; case SRL: Result = (uint64_t)LHSv >> (uint64_t)RHSv; break; @@ -970,6 +972,7 @@ std::string BinOpInit::getAsString() const { std::string Result; switch (Opc) { case CONCAT: Result = "!con"; break; + case ADD: Result = "!add"; break; case SHL: Result = "!shl"; break; case SRA: Result = "!sra"; break; case SRL: Result = "!srl"; break; diff --git a/lib/TableGen/TGLexer.cpp b/lib/TableGen/TGLexer.cpp index d733f14..e75abcf 100644 --- a/lib/TableGen/TGLexer.cpp +++ b/lib/TableGen/TGLexer.cpp @@ -462,6 +462,7 @@ tgtok::TokKind TGLexer::LexExclaim() { .Case("head", tgtok::XHead) .Case("tail", tgtok::XTail) .Case("con", tgtok::XConcat) + .Case("add", tgtok::XADD) .Case("shl", tgtok::XSHL) .Case("sra", tgtok::XSRA) .Case("srl", tgtok::XSRL) diff --git a/lib/TableGen/TGLexer.h b/lib/TableGen/TGLexer.h index e2e116b..a0818f9 100644 --- a/lib/TableGen/TGLexer.h +++ b/lib/TableGen/TGLexer.h @@ -46,7 +46,7 @@ namespace tgtok { MultiClass, String, // !keywords. - XConcat, XSRA, XSRL, XSHL, XStrConcat, XCast, XSubst, + XConcat, XADD, XSRA, XSRL, XSHL, XStrConcat, XCast, XSubst, XForEach, XHead, XTail, XEmpty, XIf, XEq, // Integer value. diff --git a/lib/TableGen/TGParser.cpp b/lib/TableGen/TGParser.cpp index 8ee3a7b..da0086a 100644 --- a/lib/TableGen/TGParser.cpp +++ b/lib/TableGen/TGParser.cpp @@ -912,6 +912,7 @@ Init *TGParser::ParseOperation(Record *CurRec) { } case tgtok::XConcat: + case tgtok::XADD: case tgtok::XSRA: case tgtok::XSRL: case tgtok::XSHL: @@ -927,6 +928,7 @@ Init *TGParser::ParseOperation(Record *CurRec) { switch (OpTok) { default: llvm_unreachable("Unhandled code!"); case tgtok::XConcat: Code = BinOpInit::CONCAT;Type = DagRecTy::get(); break; + case tgtok::XADD: Code = BinOpInit::ADD; Type = IntRecTy::get(); break; case tgtok::XSRA: Code = BinOpInit::SRA; Type = IntRecTy::get(); break; case tgtok::XSRL: Code = BinOpInit::SRL; Type = IntRecTy::get(); break; case tgtok::XSHL: Code = BinOpInit::SHL; Type = IntRecTy::get(); break; @@ -1142,6 +1144,7 @@ RecTy *TGParser::ParseOperatorType() { /// SimpleValue ::= '[' ValueList ']' /// SimpleValue ::= '(' IDValue DagArgList ')' /// SimpleValue ::= CONCATTOK '(' Value ',' Value ')' +/// SimpleValue ::= ADDTOK '(' Value ',' Value ')' /// SimpleValue ::= SHLTOK '(' Value ',' Value ')' /// SimpleValue ::= SRATOK '(' Value ',' Value ')' /// SimpleValue ::= SRLTOK '(' Value ',' Value ')' @@ -1397,6 +1400,7 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType, case tgtok::XEmpty: case tgtok::XCast: // Value ::= !unop '(' Value ')' case tgtok::XConcat: + case tgtok::XADD: case tgtok::XSRA: case tgtok::XSRL: case tgtok::XSHL: -- cgit v1.1 From 5928deaf2021b8fd6defa7138e15dc455e492316 Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Fri, 25 Jan 2013 14:49:10 +0000 Subject: More cleanup of PPC register definitions. Uses the new !add TableGen operator to do more cleanup of the PPC register definitions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173446 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCRegisterInfo.td | 72 ++++------------------------------- 1 file changed, 8 insertions(+), 64 deletions(-) (limited to 'lib') diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td index 44665a6..8ee9b1e 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/lib/Target/PowerPC/PPCRegisterInfo.td @@ -75,72 +75,16 @@ foreach Index = 0-31 in { } // Floating-point registers -def F0 : FPR< 0, "f0">, DwarfRegNum<[32, 32]>; -def F1 : FPR< 1, "f1">, DwarfRegNum<[33, 33]>; -def F2 : FPR< 2, "f2">, DwarfRegNum<[34, 34]>; -def F3 : FPR< 3, "f3">, DwarfRegNum<[35, 35]>; -def F4 : FPR< 4, "f4">, DwarfRegNum<[36, 36]>; -def F5 : FPR< 5, "f5">, DwarfRegNum<[37, 37]>; -def F6 : FPR< 6, "f6">, DwarfRegNum<[38, 38]>; -def F7 : FPR< 7, "f7">, DwarfRegNum<[39, 39]>; -def F8 : FPR< 8, "f8">, DwarfRegNum<[40, 40]>; -def F9 : FPR< 9, "f9">, DwarfRegNum<[41, 41]>; -def F10 : FPR<10, "f10">, DwarfRegNum<[42, 42]>; -def F11 : FPR<11, "f11">, DwarfRegNum<[43, 43]>; -def F12 : FPR<12, "f12">, DwarfRegNum<[44, 44]>; -def F13 : FPR<13, "f13">, DwarfRegNum<[45, 45]>; -def F14 : FPR<14, "f14">, DwarfRegNum<[46, 46]>; -def F15 : FPR<15, "f15">, DwarfRegNum<[47, 47]>; -def F16 : FPR<16, "f16">, DwarfRegNum<[48, 48]>; -def F17 : FPR<17, "f17">, DwarfRegNum<[49, 49]>; -def F18 : FPR<18, "f18">, DwarfRegNum<[50, 50]>; -def F19 : FPR<19, "f19">, DwarfRegNum<[51, 51]>; -def F20 : FPR<20, "f20">, DwarfRegNum<[52, 52]>; -def F21 : FPR<21, "f21">, DwarfRegNum<[53, 53]>; -def F22 : FPR<22, "f22">, DwarfRegNum<[54, 54]>; -def F23 : FPR<23, "f23">, DwarfRegNum<[55, 55]>; -def F24 : FPR<24, "f24">, DwarfRegNum<[56, 56]>; -def F25 : FPR<25, "f25">, DwarfRegNum<[57, 57]>; -def F26 : FPR<26, "f26">, DwarfRegNum<[58, 58]>; -def F27 : FPR<27, "f27">, DwarfRegNum<[59, 59]>; -def F28 : FPR<28, "f28">, DwarfRegNum<[60, 60]>; -def F29 : FPR<29, "f29">, DwarfRegNum<[61, 61]>; -def F30 : FPR<30, "f30">, DwarfRegNum<[62, 62]>; -def F31 : FPR<31, "f31">, DwarfRegNum<[63, 63]>; +foreach Index = 0-31 in { + def F#Index : FPR, + DwarfRegNum<[!add(Index, 32), !add(Index, 32)]>; +} // Vector registers -def V0 : VR< 0, "v0">, DwarfRegNum<[77, 77]>; -def V1 : VR< 1, "v1">, DwarfRegNum<[78, 78]>; -def V2 : VR< 2, "v2">, DwarfRegNum<[79, 79]>; -def V3 : VR< 3, "v3">, DwarfRegNum<[80, 80]>; -def V4 : VR< 4, "v4">, DwarfRegNum<[81, 81]>; -def V5 : VR< 5, "v5">, DwarfRegNum<[82, 82]>; -def V6 : VR< 6, "v6">, DwarfRegNum<[83, 83]>; -def V7 : VR< 7, "v7">, DwarfRegNum<[84, 84]>; -def V8 : VR< 8, "v8">, DwarfRegNum<[85, 85]>; -def V9 : VR< 9, "v9">, DwarfRegNum<[86, 86]>; -def V10 : VR<10, "v10">, DwarfRegNum<[87, 87]>; -def V11 : VR<11, "v11">, DwarfRegNum<[88, 88]>; -def V12 : VR<12, "v12">, DwarfRegNum<[89, 89]>; -def V13 : VR<13, "v13">, DwarfRegNum<[90, 90]>; -def V14 : VR<14, "v14">, DwarfRegNum<[91, 91]>; -def V15 : VR<15, "v15">, DwarfRegNum<[92, 92]>; -def V16 : VR<16, "v16">, DwarfRegNum<[93, 93]>; -def V17 : VR<17, "v17">, DwarfRegNum<[94, 94]>; -def V18 : VR<18, "v18">, DwarfRegNum<[95, 95]>; -def V19 : VR<19, "v19">, DwarfRegNum<[96, 96]>; -def V20 : VR<20, "v20">, DwarfRegNum<[97, 97]>; -def V21 : VR<21, "v21">, DwarfRegNum<[98, 98]>; -def V22 : VR<22, "v22">, DwarfRegNum<[99, 99]>; -def V23 : VR<23, "v23">, DwarfRegNum<[100, 100]>; -def V24 : VR<24, "v24">, DwarfRegNum<[101, 101]>; -def V25 : VR<25, "v25">, DwarfRegNum<[102, 102]>; -def V26 : VR<26, "v26">, DwarfRegNum<[103, 103]>; -def V27 : VR<27, "v27">, DwarfRegNum<[104, 104]>; -def V28 : VR<28, "v28">, DwarfRegNum<[105, 105]>; -def V29 : VR<29, "v29">, DwarfRegNum<[106, 106]>; -def V30 : VR<30, "v30">, DwarfRegNum<[107, 107]>; -def V31 : VR<31, "v31">, DwarfRegNum<[108, 108]>; +foreach Index = 0-31 in { + def V#Index : VR, + DwarfRegNum<[!add(Index, 77), !add(Index, 77)]>; +} // Condition register bits def CR0LT : CRBIT< 0, "0">; -- cgit v1.1 From ea387fc3b8cf12c3c6ad218b81eca156e8173bba Mon Sep 17 00:00:00 2001 From: Preston Gurd Date: Fri, 25 Jan 2013 15:18:54 +0000 Subject: This patch aims to reduce compile time in LegalizeTypes by using SmallDenseMap, with an initial number of elements, instead of DenseMap, which has zero initial elements, in order to avoid the copying of elements when the size changes and to avoid allocating space every time LegalizeTypes is run. This patch will not affect the memory footprint, because DenseMap will increase the element size to 64 when the first element is added. Patch by Wan Xiaofei. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173448 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/LegalizeTypes.h | 16 ++++++++-------- lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 724fdb9..7de42ea 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -80,35 +80,35 @@ private: /// PromotedIntegers - For integer nodes that are below legal width, this map /// indicates what promoted value to use. - DenseMap PromotedIntegers; + SmallDenseMap PromotedIntegers; /// ExpandedIntegers - For integer nodes that need to be expanded this map /// indicates which operands are the expanded version of the input. - DenseMap > ExpandedIntegers; + SmallDenseMap, 8> ExpandedIntegers; /// SoftenedFloats - For floating point nodes converted to integers of /// the same size, this map indicates the converted value to use. - DenseMap SoftenedFloats; + SmallDenseMap SoftenedFloats; /// ExpandedFloats - For float nodes that need to be expanded this map /// indicates which operands are the expanded version of the input. - DenseMap > ExpandedFloats; + SmallDenseMap, 8> ExpandedFloats; /// ScalarizedVectors - For nodes that are <1 x ty>, this map indicates the /// scalar value of type 'ty' to use. - DenseMap ScalarizedVectors; + SmallDenseMap ScalarizedVectors; /// SplitVectors - For nodes that need to be split this map indicates /// which operands are the expanded version of the input. - DenseMap > SplitVectors; + SmallDenseMap, 8> SplitVectors; /// WidenedVectors - For vector nodes that need to be widened, indicates /// the widened value to use. - DenseMap WidenedVectors; + SmallDenseMap WidenedVectors; /// ReplacedValues - For values that have been replaced with another, /// indicates the replacement value to use. - DenseMap ReplacedValues; + SmallDenseMap ReplacedValues; /// Worklist - This defines a worklist of nodes to process. In order to be /// pushed onto this worklist, all operands of a node must have already been diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 3989295..5d547ed 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -40,7 +40,7 @@ class VectorLegalizer { /// LegalizedNodes - For nodes that are of legal width, and that have more /// than one use, this map indicates what regularized operand to use. This /// allows us to avoid legalizing the same thing more than once. - DenseMap LegalizedNodes; + SmallDenseMap LegalizedNodes; // Adds a node to the translation cache void AddLegalizedOperand(SDValue From, SDValue To) { -- cgit v1.1 From 351f65d9723c075af86466c07a0a3dc28be272cd Mon Sep 17 00:00:00 2001 From: Evgeniy Stepanov Date: Fri, 25 Jan 2013 15:31:10 +0000 Subject: [msan] Implement exact shadow propagation for relational ICmp. Only for integers, pointers, and vectors of those. No floats. Instrumentation seems very heavy, and may need to be replaced with some approximation in the future. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173452 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Instrumentation/MemorySanitizer.cpp | 70 ++++++++++++++++++++++ 1 file changed, 70 insertions(+) (limited to 'lib') diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 40f0ebb..64882c2 100644 --- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -127,6 +127,10 @@ static cl::opt ClHandleICmp("msan-handle-icmp", cl::desc("propagate shadow through ICmpEQ and ICmpNE"), cl::Hidden, cl::init(true)); +static cl::opt ClHandleICmpExact("msan-handle-icmp-exact", + cl::desc("exact handling of relational integer ICmp"), + cl::Hidden, cl::init(true)); + static cl::opt ClStoreCleanOrigin("msan-store-clean-origin", cl::desc("store origin for clean (fully initialized) values"), cl::Hidden, cl::init(false)); @@ -1155,6 +1159,70 @@ struct MemorySanitizerVisitor : public InstVisitor { setOriginForNaryOp(I); } + /// \brief Build the lowest possible value of V, taking into account V's + /// uninitialized bits. + Value *getLowestPossibleValue(IRBuilder<> &IRB, Value *A, Value *Sa, + bool isSigned) { + if (isSigned) { + // Split shadow into sign bit and other bits. + Value *SaOtherBits = IRB.CreateLShr(IRB.CreateShl(Sa, 1), 1); + Value *SaSignBit = IRB.CreateXor(Sa, SaOtherBits); + // Maximise the undefined shadow bit, minimize other undefined bits. + return + IRB.CreateOr(IRB.CreateAnd(A, IRB.CreateNot(SaOtherBits)), SaSignBit); + } else { + // Minimize undefined bits. + return IRB.CreateAnd(A, IRB.CreateNot(Sa)); + } + } + + /// \brief Build the highest possible value of V, taking into account V's + /// uninitialized bits. + Value *getHighestPossibleValue(IRBuilder<> &IRB, Value *A, Value *Sa, + bool isSigned) { + if (isSigned) { + // Split shadow into sign bit and other bits. + Value *SaOtherBits = IRB.CreateLShr(IRB.CreateShl(Sa, 1), 1); + Value *SaSignBit = IRB.CreateXor(Sa, SaOtherBits); + // Minimise the undefined shadow bit, maximise other undefined bits. + return + IRB.CreateOr(IRB.CreateAnd(A, IRB.CreateNot(SaSignBit)), SaOtherBits); + } else { + // Maximize undefined bits. + return IRB.CreateOr(A, Sa); + } + } + + /// \brief Instrument relational comparisons. + /// + /// This function does exact shadow propagation for all relational + /// comparisons of integers, pointers and vectors of those. + /// FIXME: output seems suboptimal when one of the operands is a constant + void handleRelationalComparisonExact(ICmpInst &I) { + IRBuilder<> IRB(&I); + Value *A = I.getOperand(0); + Value *B = I.getOperand(1); + Value *Sa = getShadow(A); + Value *Sb = getShadow(B); + + // Get rid of pointers and vectors of pointers. + // For ints (and vectors of ints), types of A and Sa match, + // and this is a no-op. + A = IRB.CreatePointerCast(A, Sa->getType()); + B = IRB.CreatePointerCast(B, Sb->getType()); + + bool IsSigned = I.isSigned(); + Value *S1 = IRB.CreateICmp(I.getPredicate(), + getLowestPossibleValue(IRB, A, Sa, IsSigned), + getHighestPossibleValue(IRB, B, Sb, IsSigned)); + Value *S2 = IRB.CreateICmp(I.getPredicate(), + getHighestPossibleValue(IRB, A, Sa, IsSigned), + getLowestPossibleValue(IRB, B, Sb, IsSigned)); + Value *Si = IRB.CreateXor(S1, S2); + setShadow(&I, Si); + setOriginForNaryOp(I); + } + /// \brief Instrument signed relational comparisons. /// /// Handle (x<0) and (x>=0) comparisons (essentially, sign bit tests) by @@ -1186,6 +1254,8 @@ struct MemorySanitizerVisitor : public InstVisitor { void visitICmpInst(ICmpInst &I) { if (ClHandleICmp && I.isEquality()) handleEqualityComparison(I); + else if (ClHandleICmp && ClHandleICmpExact && I.isRelational()) + handleRelationalComparisonExact(I); else if (ClHandleICmp && I.isSigned() && I.isRelational()) handleSignedRelationalComparison(I); else -- cgit v1.1 From 94d1f5b10cbc65d12fc8eb46fd36a2f407cf4a35 Mon Sep 17 00:00:00 2001 From: Evgeniy Stepanov Date: Fri, 25 Jan 2013 15:35:29 +0000 Subject: [msan] A comment on ICmp handling logic. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173453 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Instrumentation/MemorySanitizer.cpp | 3 +++ 1 file changed, 3 insertions(+) (limited to 'lib') diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 64882c2..a329dcc 100644 --- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -1211,6 +1211,9 @@ struct MemorySanitizerVisitor : public InstVisitor { A = IRB.CreatePointerCast(A, Sa->getType()); B = IRB.CreatePointerCast(B, Sb->getType()); + // Let [a0, a1] be the interval of possible values of A, taking into account + // its undefined bits. Let [b0, b1] be the interval of possible values of B. + // Then (A cmp B) is defined iff (a0 cmp b1) == (a1 cmp b0). bool IsSigned = I.isSigned(); Value *S1 = IRB.CreateICmp(I.getPredicate(), getLowestPossibleValue(IRB, A, Sa, IsSigned), -- cgit v1.1 From f86c9321fc8088a021598df0265c5fd00ab4170d Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Fri, 25 Jan 2013 15:35:56 +0000 Subject: Fix MSVC 2012 warning about a 32-bit shift that should be 64-bit git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173454 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Attributes.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 8ee7057..cbbf484 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -524,12 +524,12 @@ bool AttributeImpl::hasAttributes() const { uint64_t AttributeImpl::getAlignment() const { uint64_t Mask = Raw() & getAttrMask(Attribute::Alignment); - return 1U << ((Mask >> 16) - 1); + return 1ULL << ((Mask >> 16) - 1); } uint64_t AttributeImpl::getStackAlignment() const { uint64_t Mask = Raw() & getAttrMask(Attribute::StackAlignment); - return 1U << ((Mask >> 26) - 1); + return 1ULL << ((Mask >> 26) - 1); } void AttributeImpl::Profile(FoldingSetNodeID &ID, Constant *Data, -- cgit v1.1 From bd7561ea29649625775ef814dded2856d91c4dcf Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Fri, 25 Jan 2013 17:01:00 +0000 Subject: APFloat: Make sure that we get a well-formed x87 NaN when converting from a smaller type. Fixes PR15054. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173459 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Support/APFloat.cpp | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'lib') diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp index 686f91b..5b68fbb 100644 --- a/lib/Support/APFloat.cpp +++ b/lib/Support/APFloat.cpp @@ -1913,6 +1913,12 @@ APFloat::convert(const fltSemantics &toSemantics, *losesInfo = (fs != opOK); } else if (category == fcNaN) { *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan; + + // For x87 extended precision, we want to make a NaN, not a special NaN if + // the input wasn't special either. + if (!X86SpecialNan && semantics == &APFloat::x87DoubleExtended) + APInt::tcSetBit(significandParts(), semantics->precision - 1); + // gcc forces the Quiet bit on, which means (float)(double)(float_sNan) // does not give you back the same bits. This is dubious, and we // don't currently do it. You're really supposed to get -- cgit v1.1 From f8c2f90bb3c70ab1830b44da990bda499f1e65d1 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Fri, 25 Jan 2013 18:34:09 +0000 Subject: Refactor some code to use the IRBuilder. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173467 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/LoopVectorize.cpp | 41 +++++++++++++++--------------- 1 file changed, 21 insertions(+), 20 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 0996b7b..0302bbf 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -941,29 +941,30 @@ InnerLoopVectorizer::addRuntimeCheck(LoopVectorizationLegality *Legal, } } + IRBuilder<> ChkBuilder(Loc->getContext()); + ChkBuilder.SetInsertPoint(Loc); + for (unsigned i = 0; i < NumPointers; ++i) { for (unsigned j = i+1; j < NumPointers; ++j) { Instruction::CastOps Op = Instruction::BitCast; - Value *Start0 = CastInst::Create(Op, Starts[i], PtrArithTy, "bc", Loc); - Value *Start1 = CastInst::Create(Op, Starts[j], PtrArithTy, "bc", Loc); - Value *End0 = CastInst::Create(Op, Ends[i], PtrArithTy, "bc", Loc); - Value *End1 = CastInst::Create(Op, Ends[j], PtrArithTy, "bc", Loc); - - Value *Cmp0 = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULE, - Start0, End1, "bound0", Loc); - Value *Cmp1 = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULE, - Start1, End0, "bound1", Loc); - Instruction *IsConflict = BinaryOperator::Create(Instruction::And, Cmp0, - Cmp1, "found.conflict", - Loc); - if (MemoryRuntimeCheck) - MemoryRuntimeCheck = BinaryOperator::Create(Instruction::Or, - MemoryRuntimeCheck, - IsConflict, - "conflict.rdx", Loc); - else - MemoryRuntimeCheck = IsConflict; - + Value *Start0 = ChkBuilder.CreateCast(Op, Starts[i], PtrArithTy, "bc"); + Value *Start1 = ChkBuilder.CreateCast(Op, Starts[j], PtrArithTy, "bc"); + Value *End0 = ChkBuilder.CreateCast(Op, Ends[i], PtrArithTy, "bc"); + Value *End1 = ChkBuilder.CreateCast(Op, Ends[j], PtrArithTy, "bc"); + + Value *Cmp0 = ChkBuilder.CreateICmp(CmpInst::ICMP_ULE, + Start0, End1, "bound0"); + Value *Cmp1 = ChkBuilder.CreateICmp(CmpInst::ICMP_ULE, + Start1, End0, "bound1"); + Value *IsConflict = ChkBuilder.CreateBinOp(Instruction::And, Cmp0, Cmp1, + "found.conflict"); + if (MemoryRuntimeCheck) { + Value *B = ChkBuilder.CreateBinOp(Instruction::Or, MemoryRuntimeCheck, + IsConflict, "conflict.rdx"); + MemoryRuntimeCheck = cast(B); + } else { + MemoryRuntimeCheck = cast(IsConflict); + } } } -- cgit v1.1 From 1275708f2980918230eb0e9dec78890de08a94f4 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Fri, 25 Jan 2013 19:26:23 +0000 Subject: LoopVectorizer: Refactor more code to use the IRBuilder. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173471 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/LoopVectorize.cpp | 49 +++++++++++++++--------------- 1 file changed, 25 insertions(+), 24 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 0302bbf..f53f57c 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1051,10 +1051,6 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { BasicBlock *ScalarPH = MiddleBlock->splitBasicBlock(MiddleBlock->getTerminator(), "scalar.ph"); - // This is the location in which we add all of the logic for bypassing - // the new vector loop. - Instruction *Loc = BypassBlock->getTerminator(); - // Use this IR builder to create the loop instructions (Phi, Br, Cmp) // inside the loop. Builder.SetInsertPoint(VecBody->getFirstInsertionPt()); @@ -1065,6 +1061,11 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { // times the unroll factor (num of SIMD instructions). Constant *Step = ConstantInt::get(IdxTy, VF * UF); + // This is the IR builder that we use to add all of the logic for bypassing + // the new vector loop. + IRBuilder<> BypassBuilder(OldBasicBlock->getContext()); + BypassBuilder.SetInsertPoint(BypassBlock->getTerminator()); + // We may need to extend the index in case there is a type mismatch. // We know that the count starts at zero and does not overflow. unsigned IdxTyBW = IdxTy->getScalarSizeInBits(); @@ -1072,36 +1073,36 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { // The exit count can be of pointer type. Convert it to the correct // integer type. if (ExitCount->getType()->isPointerTy()) - Count = CastInst::CreatePointerCast(Count, IdxTy, "ptrcnt.to.int", Loc); - else if (IdxTyBW < Count->getType()->getScalarSizeInBits()) - Count = CastInst::CreateTruncOrBitCast(Count, IdxTy, "tr.cnt", Loc); + Count = BypassBuilder.CreatePointerCast(Count, IdxTy, "ptrcnt.to.int"); else - Count = CastInst::CreateZExtOrBitCast(Count, IdxTy, "zext.cnt", Loc); + Count = BypassBuilder.CreateZExtOrTrunc(Count, IdxTy, "cnt.cast"); } // Add the start index to the loop count to get the new end index. - Value *IdxEnd = BinaryOperator::CreateAdd(Count, StartIdx, "end.idx", Loc); + Value *IdxEnd = BypassBuilder.CreateAdd(Count, StartIdx, "end.idx"); // Now we need to generate the expression for N - (N % VF), which is // the part that the vectorized body will execute. - Value *R = BinaryOperator::CreateURem(Count, Step, "n.mod.vf", Loc); - Value *CountRoundDown = BinaryOperator::CreateSub(Count, R, "n.vec", Loc); - Value *IdxEndRoundDown = BinaryOperator::CreateAdd(CountRoundDown, StartIdx, - "end.idx.rnd.down", Loc); + Value *R = BypassBuilder.CreateURem(Count, Step, "n.mod.vf"); + Value *CountRoundDown = BypassBuilder.CreateSub(Count, R, "n.vec"); + Value *IdxEndRoundDown = BypassBuilder.CreateAdd(CountRoundDown, StartIdx, + "end.idx.rnd.down"); // Now, compare the new count to zero. If it is zero skip the vector loop and // jump to the scalar loop. - Value *Cmp = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, - IdxEndRoundDown, - StartIdx, - "cmp.zero", Loc); + Value *Cmp = BypassBuilder.CreateICmp(CmpInst::ICMP_EQ, IdxEndRoundDown, + StartIdx, "cmp.zero"); + + BasicBlock *LastBypassBlock = BypassBlock; // Generate the code that checks in runtime if arrays overlap. We put the // checks into a separate block to make the more common case of few elements // faster. - if (Instruction *MemoryRuntimeCheck = addRuntimeCheck(Legal, Loc)) { + Instruction *MemRuntimeCheck = addRuntimeCheck(Legal, + BypassBlock->getTerminator()); + if (MemRuntimeCheck) { // Create a new block containing the memory check. - BasicBlock *CheckBlock = BypassBlock->splitBasicBlock(MemoryRuntimeCheck, + BasicBlock *CheckBlock = BypassBlock->splitBasicBlock(MemRuntimeCheck, "vector.memcheck"); LoopBypassBlocks.push_back(CheckBlock); @@ -1111,13 +1112,13 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { BranchInst::Create(MiddleBlock, CheckBlock, Cmp, OldTerm); OldTerm->eraseFromParent(); - Cmp = MemoryRuntimeCheck; - assert(Loc == CheckBlock->getTerminator()); + Cmp = MemRuntimeCheck; + LastBypassBlock = CheckBlock; } - BranchInst::Create(MiddleBlock, VectorPH, Cmp, Loc); - // Remove the old terminator. - Loc->eraseFromParent(); + LastBypassBlock->getTerminator()->eraseFromParent(); + BranchInst::Create(MiddleBlock, VectorPH, Cmp, + LastBypassBlock); // We are going to resume the execution of the scalar loop. // Go over all of the induction variables that we found and fix the -- cgit v1.1 From 3f16858579fdf7ee1cc6823736c68cede6643da1 Mon Sep 17 00:00:00 2001 From: Pedro Artigas Date: Fri, 25 Jan 2013 19:41:03 +0000 Subject: added ability to dynamically change the ExportList of an already created InternalizePass (useful for pass reuse) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173474 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/IPO/Internalize.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'lib') diff --git a/lib/Transforms/IPO/Internalize.cpp b/lib/Transforms/IPO/Internalize.cpp index 70d55b0..4bfab5b 100644 --- a/lib/Transforms/IPO/Internalize.cpp +++ b/lib/Transforms/IPO/Internalize.cpp @@ -50,6 +50,8 @@ namespace { explicit InternalizePass(); explicit InternalizePass(ArrayRef exportList); void LoadFile(const char *Filename); + void ClearExportList(); + void AddToExportList(const std::string &val); virtual bool runOnModule(Module &M); virtual void getAnalysisUsage(AnalysisUsage &AU) const { @@ -97,6 +99,14 @@ void InternalizePass::LoadFile(const char *Filename) { } } +void InternalizePass::ClearExportList() { + ExternalNames.clear(); +} + +void InternalizePass::AddToExportList(const std::string &val) { + ExternalNames.insert(val); +} + bool InternalizePass::runOnModule(Module &M) { CallGraph *CG = getAnalysisIfAvailable(); CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : 0; -- cgit v1.1 From 5ac70a3804148749a15edf6113867a8bdae12974 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Fri, 25 Jan 2013 19:43:15 +0000 Subject: LoopVectorize: Simplify code. No functionality change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173475 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/LoopVectorize.cpp | 43 ++++++++++++------------------ 1 file changed, 17 insertions(+), 26 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index f53f57c..060b06a 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -941,30 +941,23 @@ InnerLoopVectorizer::addRuntimeCheck(LoopVectorizationLegality *Legal, } } - IRBuilder<> ChkBuilder(Loc->getContext()); - ChkBuilder.SetInsertPoint(Loc); + IRBuilder<> ChkBuilder(Loc); for (unsigned i = 0; i < NumPointers; ++i) { for (unsigned j = i+1; j < NumPointers; ++j) { - Instruction::CastOps Op = Instruction::BitCast; - Value *Start0 = ChkBuilder.CreateCast(Op, Starts[i], PtrArithTy, "bc"); - Value *Start1 = ChkBuilder.CreateCast(Op, Starts[j], PtrArithTy, "bc"); - Value *End0 = ChkBuilder.CreateCast(Op, Ends[i], PtrArithTy, "bc"); - Value *End1 = ChkBuilder.CreateCast(Op, Ends[j], PtrArithTy, "bc"); - - Value *Cmp0 = ChkBuilder.CreateICmp(CmpInst::ICMP_ULE, - Start0, End1, "bound0"); - Value *Cmp1 = ChkBuilder.CreateICmp(CmpInst::ICMP_ULE, - Start1, End0, "bound1"); - Value *IsConflict = ChkBuilder.CreateBinOp(Instruction::And, Cmp0, Cmp1, - "found.conflict"); - if (MemoryRuntimeCheck) { - Value *B = ChkBuilder.CreateBinOp(Instruction::Or, MemoryRuntimeCheck, - IsConflict, "conflict.rdx"); - MemoryRuntimeCheck = cast(B); - } else { - MemoryRuntimeCheck = cast(IsConflict); - } + Value *Start0 = ChkBuilder.CreateBitCast(Starts[i], PtrArithTy, "bc"); + Value *Start1 = ChkBuilder.CreateBitCast(Starts[j], PtrArithTy, "bc"); + Value *End0 = ChkBuilder.CreateBitCast(Ends[i], PtrArithTy, "bc"); + Value *End1 = ChkBuilder.CreateBitCast(Ends[j], PtrArithTy, "bc"); + + Value *Cmp0 = ChkBuilder.CreateICmpULE(Start0, End1, "bound0"); + Value *Cmp1 = ChkBuilder.CreateICmpULE(Start1, End0, "bound1"); + Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict"); + if (MemoryRuntimeCheck) + IsConflict = ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, + "conflict.rdx"); + + MemoryRuntimeCheck = cast(IsConflict); } } @@ -1063,12 +1056,10 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { // This is the IR builder that we use to add all of the logic for bypassing // the new vector loop. - IRBuilder<> BypassBuilder(OldBasicBlock->getContext()); - BypassBuilder.SetInsertPoint(BypassBlock->getTerminator()); + IRBuilder<> BypassBuilder(BypassBlock->getTerminator()); // We may need to extend the index in case there is a type mismatch. // We know that the count starts at zero and does not overflow. - unsigned IdxTyBW = IdxTy->getScalarSizeInBits(); if (Count->getType() != IdxTy) { // The exit count can be of pointer type. Convert it to the correct // integer type. @@ -1090,8 +1081,8 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { // Now, compare the new count to zero. If it is zero skip the vector loop and // jump to the scalar loop. - Value *Cmp = BypassBuilder.CreateICmp(CmpInst::ICMP_EQ, IdxEndRoundDown, - StartIdx, "cmp.zero"); + Value *Cmp = BypassBuilder.CreateICmpEQ(IdxEndRoundDown, StartIdx, + "cmp.zero"); BasicBlock *LastBypassBlock = BypassBlock; -- cgit v1.1 From 907bfd740a1c6f5403bd125bc32493f2bfbf5da7 Mon Sep 17 00:00:00 2001 From: Richard Osborne Date: Fri, 25 Jan 2013 20:11:26 +0000 Subject: Use correct mnemonic / instruction name for ldivu. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173476 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/XCore/XCoreInstrInfo.td | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td index 65dbaef..92e7ec5 100644 --- a/lib/Target/XCore/XCoreInstrInfo.td +++ b/lib/Target/XCore/XCoreInstrInfo.td @@ -495,10 +495,9 @@ def LSUB_l5r : _L5R<(outs GRRegs:$dst1, GRRegs:$dst2), "lsub $dst1, $dst2, $src1, $src2, $src3", []>; -def LDIV_l5r : _L5R<(outs GRRegs:$dst1, GRRegs:$dst2), - (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3), - "ldiv $dst1, $dst2, $src1, $src2, $src3", - []>; +def LDIVU_l5r : _L5R<(outs GRRegs:$dst1, GRRegs:$dst2), + (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3), + "ldivu $dst1, $dst2, $src1, $src2, $src3", []>; // Six operand long -- cgit v1.1 From f5c36489210cb17f786cee598b94bb3dc582ef46 Mon Sep 17 00:00:00 2001 From: Richard Osborne Date: Fri, 25 Jan 2013 20:16:00 +0000 Subject: Fix order of operands for l5r instructions. With this change the operands order matches the order in which the operands are encoded in the instruction. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173477 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/XCore/XCoreISelLowering.cpp | 34 +++++++++++++++++++--------------- lib/Target/XCore/XCoreInstrInfo.td | 6 +++--- 2 files changed, 22 insertions(+), 18 deletions(-) (limited to 'lib') diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index 6e894ac..af8e4cb 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -736,13 +736,13 @@ ExpandADDSUB(SDNode *N, SelectionDAG &DAG) const unsigned Opcode = (N->getOpcode() == ISD::ADD) ? XCoreISD::LADD : XCoreISD::LSUB; SDValue Zero = DAG.getConstant(0, MVT::i32); - SDValue Carry = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32), - LHSL, RHSL, Zero); - SDValue Lo(Carry.getNode(), 1); + SDValue Lo = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32), + LHSL, RHSL, Zero); + SDValue Carry(Lo.getNode(), 1); - SDValue Ignored = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32), - LHSH, RHSH, Carry); - SDValue Hi(Ignored.getNode(), 1); + SDValue Hi = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32), + LHSH, RHSH, Carry); + SDValue Ignored(Hi.getNode(), 1); // Merge the pieces return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi); } @@ -1353,13 +1353,13 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, SDValue Carry = DAG.getConstant(0, VT); SDValue Result = DAG.getNode(ISD::AND, dl, VT, N2, DAG.getConstant(1, VT)); - SDValue Ops [] = { Carry, Result }; + SDValue Ops[] = { Result, Carry }; return DAG.getMergeValues(Ops, 2, dl); } // fold (ladd x, 0, y) -> 0, add x, y iff carry is unused and y has only the // low bit set - if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 0)) { + if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 1)) { APInt KnownZero, KnownOne; APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), VT.getSizeInBits() - 1); @@ -1367,7 +1367,7 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, if ((KnownZero & Mask) == Mask) { SDValue Carry = DAG.getConstant(0, VT); SDValue Result = DAG.getNode(ISD::ADD, dl, VT, N0, N2); - SDValue Ops [] = { Carry, Result }; + SDValue Ops[] = { Result, Carry }; return DAG.getMergeValues(Ops, 2, dl); } } @@ -1391,14 +1391,14 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, SDValue Borrow = N2; SDValue Result = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, VT), N2); - SDValue Ops [] = { Borrow, Result }; + SDValue Ops[] = { Result, Borrow }; return DAG.getMergeValues(Ops, 2, dl); } } // fold (lsub x, 0, y) -> 0, sub x, y iff borrow is unused and y has only the // low bit set - if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 0)) { + if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 1)) { APInt KnownZero, KnownOne; APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), VT.getSizeInBits() - 1); @@ -1406,7 +1406,7 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, if ((KnownZero & Mask) == Mask) { SDValue Borrow = DAG.getConstant(0, VT); SDValue Result = DAG.getNode(ISD::SUB, dl, VT, N0, N2); - SDValue Ops [] = { Borrow, Result }; + SDValue Ops[] = { Result, Borrow }; return DAG.getMergeValues(Ops, 2, dl); } } @@ -1432,11 +1432,15 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, // If the high result is unused fold to add(a, b) if (N->hasNUsesOfValue(0, 0)) { SDValue Lo = DAG.getNode(ISD::ADD, dl, VT, N2, N3); - SDValue Ops [] = { Lo, Lo }; + SDValue Ops[] = { Lo, Lo }; return DAG.getMergeValues(Ops, 2, dl); } // Otherwise fold to ladd(a, b, 0) - return DAG.getNode(XCoreISD::LADD, dl, DAG.getVTList(VT, VT), N2, N3, N1); + SDValue Result = + DAG.getNode(XCoreISD::LADD, dl, DAG.getVTList(VT, VT), N2, N3, N1); + SDValue Carry(Result.getNode(), 1); + SDValue Ops[] = { Carry, Result }; + return DAG.getMergeValues(Ops, 2, dl); } } break; @@ -1530,7 +1534,7 @@ void XCoreTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, default: break; case XCoreISD::LADD: case XCoreISD::LSUB: - if (Op.getResNo() == 0) { + if (Op.getResNo() == 1) { // Top bits of carry / borrow are clear. KnownZero = APInt::getHighBitsSet(KnownZero.getBitWidth(), KnownZero.getBitWidth() - 1); diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td index 92e7ec5..e6e434c 100644 --- a/lib/Target/XCore/XCoreInstrInfo.td +++ b/lib/Target/XCore/XCoreInstrInfo.td @@ -487,17 +487,17 @@ def CRC8_l4r : _L4R<(outs GRRegs:$dst1, GRRegs:$dst2), def LADD_l5r : _L5R<(outs GRRegs:$dst1, GRRegs:$dst2), (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3), - "ladd $dst1, $dst2, $src1, $src2, $src3", + "ladd $dst2, $dst1, $src1, $src2, $src3", []>; def LSUB_l5r : _L5R<(outs GRRegs:$dst1, GRRegs:$dst2), (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3), - "lsub $dst1, $dst2, $src1, $src2, $src3", + "lsub $dst2, $dst1, $src1, $src2, $src3", []>; def LDIVU_l5r : _L5R<(outs GRRegs:$dst1, GRRegs:$dst2), (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3), - "ldivu $dst1, $dst2, $src1, $src2, $src3", []>; + "ldivu $dst1, $dst2, $src3, $src1, $src2", []>; // Six operand long -- cgit v1.1 From 3b6a5eefe0ab2199bc69094b390b736ae332b905 Mon Sep 17 00:00:00 2001 From: Richard Osborne Date: Fri, 25 Jan 2013 20:20:07 +0000 Subject: Add instruction encodings / disassembly support for l5r instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173479 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../XCore/Disassembler/XCoreDisassembler.cpp | 39 ++++++++++++++++++++++ lib/Target/XCore/XCoreInstrFormats.td | 7 +++- lib/Target/XCore/XCoreInstrInfo.td | 21 ++++++------ 3 files changed, 55 insertions(+), 12 deletions(-) (limited to 'lib') diff --git a/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp b/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp index 73aeb9c..e785030 100644 --- a/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp +++ b/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp @@ -175,6 +175,11 @@ static DecodeStatus DecodeL6RInstruction(MCInst &Inst, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeL5RInstruction(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + #include "XCoreGenDisassemblerTables.inc" static DecodeStatus DecodeGRRegsRegisterClass(MCInst &Inst, @@ -597,6 +602,40 @@ DecodeL6RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, return S; } +static DecodeStatus +DecodeL5RInstructionFail(MCInst &Inst, unsigned Insn, uint64_t Address, + const void *Decoder) { + // Try and decode as a L6R instruction. + Inst.clear(); + unsigned Opcode = fieldFromInstruction(Insn, 27, 5); + switch (Opcode) { + case 0x00: + Inst.setOpcode(XCore::LMUL_l6r); + return DecodeL6RInstruction(Inst, Insn, Address, Decoder); + } + return MCDisassembler::Fail; +} + +static DecodeStatus +DecodeL5RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, + const void *Decoder) { + unsigned Op1, Op2, Op3, Op4, Op5; + DecodeStatus S = + Decode3OpInstruction(fieldFromInstruction(Insn, 0, 16), Op1, Op2, Op3); + if (S != MCDisassembler::Success) + return DecodeL5RInstructionFail(Inst, Insn, Address, Decoder); + S = Decode2OpInstruction(fieldFromInstruction(Insn, 16, 16), Op4, Op5); + if (S != MCDisassembler::Success) + return DecodeL5RInstructionFail(Inst, Insn, Address, Decoder); + + DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op4, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op3, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op5, Address, Decoder); + return S; +} + MCDisassembler::DecodeStatus XCoreDisassembler::getInstruction(MCInst &instr, uint64_t &Size, diff --git a/lib/Target/XCore/XCoreInstrFormats.td b/lib/Target/XCore/XCoreInstrFormats.td index fa360a7..6240362 100644 --- a/lib/Target/XCore/XCoreInstrFormats.td +++ b/lib/Target/XCore/XCoreInstrFormats.td @@ -222,8 +222,13 @@ class _L4R pattern> : InstXCore<4, outs, ins, asmstr, pattern> { } -class _L5R pattern> +class _FL5R opc, dag outs, dag ins, string asmstr, list pattern> : InstXCore<4, outs, ins, asmstr, pattern> { + let Inst{31-27} = opc{5-1}; + let Inst{20} = opc{0}; + let Inst{15-11} = 0b11111; + + let DecoderMethod = "DecodeL5RInstruction"; } class _FL6R opc, dag outs, dag ins, string asmstr, list pattern> diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td index e6e434c..1810a13 100644 --- a/lib/Target/XCore/XCoreInstrInfo.td +++ b/lib/Target/XCore/XCoreInstrInfo.td @@ -485,19 +485,18 @@ def CRC8_l4r : _L4R<(outs GRRegs:$dst1, GRRegs:$dst2), // Five operand long -def LADD_l5r : _L5R<(outs GRRegs:$dst1, GRRegs:$dst2), - (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3), - "ladd $dst2, $dst1, $src1, $src2, $src3", - []>; - -def LSUB_l5r : _L5R<(outs GRRegs:$dst1, GRRegs:$dst2), - (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3), - "lsub $dst2, $dst1, $src1, $src2, $src3", - []>; +def LADD_l5r : _FL5R<0b000001, (outs GRRegs:$dst1, GRRegs:$dst2), + (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3), + "ladd $dst2, $dst1, $src1, $src2, $src3", + []>; -def LDIVU_l5r : _L5R<(outs GRRegs:$dst1, GRRegs:$dst2), +def LSUB_l5r : _FL5R<0b000010, (outs GRRegs:$dst1, GRRegs:$dst2), (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3), - "ldivu $dst1, $dst2, $src3, $src1, $src2", []>; + "lsub $dst2, $dst1, $src1, $src2, $src3", []>; + +def LDIVU_l5r : _FL5R<0b000000, (outs GRRegs:$dst1, GRRegs:$dst2), + (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3), + "ldivu $dst1, $dst2, $src3, $src1, $src2", []>; // Six operand long -- cgit v1.1 From 939a4e8b693820d161f362317f7dba9057e66cc7 Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Fri, 25 Jan 2013 20:26:43 +0000 Subject: Add command-line flags for DWARF dumping. Flags for dumping specific DWARF sections added in lib/DebugInfo and llvm-dwarfdump. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173480 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/DebugInfo/DWARFContext.cpp | 144 +++++++++++++++++++++++------------------ lib/DebugInfo/DWARFContext.h | 2 +- 2 files changed, 83 insertions(+), 63 deletions(-) (limited to 'lib') diff --git a/lib/DebugInfo/DWARFContext.cpp b/lib/DebugInfo/DWARFContext.cpp index 13a527b..3995349 100644 --- a/lib/DebugInfo/DWARFContext.cpp +++ b/lib/DebugInfo/DWARFContext.cpp @@ -19,80 +19,100 @@ using namespace dwarf; typedef DWARFDebugLine::LineTable DWARFLineTable; -void DWARFContext::dump(raw_ostream &OS) { - OS << ".debug_abbrev contents:\n"; - getDebugAbbrev()->dump(OS); +void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType) { + if (DumpType == DIDT_All || DumpType == DIDT_Abbrev) { + OS << ".debug_abbrev contents:\n"; + getDebugAbbrev()->dump(OS); + } - OS << "\n.debug_info contents:\n"; - for (unsigned i = 0, e = getNumCompileUnits(); i != e; ++i) - getCompileUnitAtIndex(i)->dump(OS); + if (DumpType == DIDT_All || DumpType == DIDT_Info) { + OS << "\n.debug_info contents:\n"; + for (unsigned i = 0, e = getNumCompileUnits(); i != e; ++i) + getCompileUnitAtIndex(i)->dump(OS); + } - OS << "\n.debug_aranges contents:\n"; - DataExtractor arangesData(getARangeSection(), isLittleEndian(), 0); uint32_t offset = 0; - DWARFDebugArangeSet set; - while (set.extract(arangesData, &offset)) - set.dump(OS); + if (DumpType == DIDT_All || DumpType == DIDT_Aranges) { + OS << "\n.debug_aranges contents:\n"; + DataExtractor arangesData(getARangeSection(), isLittleEndian(), 0); + DWARFDebugArangeSet set; + while (set.extract(arangesData, &offset)) + set.dump(OS); + } uint8_t savedAddressByteSize = 0; - OS << "\n.debug_line contents:\n"; - for (unsigned i = 0, e = getNumCompileUnits(); i != e; ++i) { - DWARFCompileUnit *cu = getCompileUnitAtIndex(i); - savedAddressByteSize = cu->getAddressByteSize(); - unsigned stmtOffset = - cu->getCompileUnitDIE()->getAttributeValueAsUnsigned(cu, DW_AT_stmt_list, - -1U); - if (stmtOffset != -1U) { - DataExtractor lineData(getLineSection(), isLittleEndian(), - savedAddressByteSize); - DWARFDebugLine::DumpingState state(OS); - DWARFDebugLine::parseStatementTable(lineData, &stmtOffset, state); + if (DumpType == DIDT_All || DumpType == DIDT_Line) { + OS << "\n.debug_line contents:\n"; + for (unsigned i = 0, e = getNumCompileUnits(); i != e; ++i) { + DWARFCompileUnit *cu = getCompileUnitAtIndex(i); + savedAddressByteSize = cu->getAddressByteSize(); + unsigned stmtOffset = + cu->getCompileUnitDIE()->getAttributeValueAsUnsigned(cu, DW_AT_stmt_list, + -1U); + if (stmtOffset != -1U) { + DataExtractor lineData(getLineSection(), isLittleEndian(), + savedAddressByteSize); + DWARFDebugLine::DumpingState state(OS); + DWARFDebugLine::parseStatementTable(lineData, &stmtOffset, state); + } + } + } + + if (DumpType == DIDT_All || DumpType == DIDT_Str) { + OS << "\n.debug_str contents:\n"; + DataExtractor strData(getStringSection(), isLittleEndian(), 0); + offset = 0; + uint32_t strOffset = 0; + while (const char *s = strData.getCStr(&offset)) { + OS << format("0x%8.8x: \"%s\"\n", strOffset, s); + strOffset = offset; } } - OS << "\n.debug_str contents:\n"; - DataExtractor strData(getStringSection(), isLittleEndian(), 0); - offset = 0; - uint32_t strOffset = 0; - while (const char *s = strData.getCStr(&offset)) { - OS << format("0x%8.8x: \"%s\"\n", strOffset, s); - strOffset = offset; + if (DumpType == DIDT_All || DumpType == DIDT_Ranges) { + OS << "\n.debug_ranges contents:\n"; + // In fact, different compile units may have different address byte + // sizes, but for simplicity we just use the address byte size of the last + // compile unit (there is no easy and fast way to associate address range + // list and the compile unit it describes). + DataExtractor rangesData(getRangeSection(), isLittleEndian(), + savedAddressByteSize); + offset = 0; + DWARFDebugRangeList rangeList; + while (rangeList.extract(rangesData, &offset)) + rangeList.dump(OS); + } + + if (DumpType == DIDT_All || DumpType == DIDT_AbbrevDwo) { + OS << "\n.debug_abbrev.dwo contents:\n"; + getDebugAbbrevDWO()->dump(OS); } - OS << "\n.debug_ranges contents:\n"; - // In fact, different compile units may have different address byte - // sizes, but for simplicity we just use the address byte size of the last - // compile unit (there is no easy and fast way to associate address range - // list and the compile unit it describes). - DataExtractor rangesData(getRangeSection(), isLittleEndian(), - savedAddressByteSize); - offset = 0; - DWARFDebugRangeList rangeList; - while (rangeList.extract(rangesData, &offset)) - rangeList.dump(OS); - - OS << "\n.debug_abbrev.dwo contents:\n"; - getDebugAbbrevDWO()->dump(OS); - - OS << "\n.debug_info.dwo contents:\n"; - for (unsigned i = 0, e = getNumDWOCompileUnits(); i != e; ++i) - getDWOCompileUnitAtIndex(i)->dump(OS); - - OS << "\n.debug_str.dwo contents:\n"; - DataExtractor strDWOData(getStringDWOSection(), isLittleEndian(), 0); - offset = 0; - uint32_t strDWOOffset = 0; - while (const char *s = strDWOData.getCStr(&offset)) { - OS << format("0x%8.8x: \"%s\"\n", strDWOOffset, s); - strDWOOffset = offset; + if (DumpType == DIDT_All || DumpType == DIDT_InfoDwo) { + OS << "\n.debug_info.dwo contents:\n"; + for (unsigned i = 0, e = getNumDWOCompileUnits(); i != e; ++i) + getDWOCompileUnitAtIndex(i)->dump(OS); } - OS << "\n.debug_str_offsets.dwo contents:\n"; - DataExtractor strOffsetExt(getStringOffsetDWOSection(), isLittleEndian(), 0); - offset = 0; - while (offset < getStringOffsetDWOSection().size()) { - OS << format("0x%8.8x: ", offset); - OS << format("%8.8x\n", strOffsetExt.getU32(&offset)); + if (DumpType == DIDT_All || DumpType == DIDT_StrDwo) { + OS << "\n.debug_str.dwo contents:\n"; + DataExtractor strDWOData(getStringDWOSection(), isLittleEndian(), 0); + offset = 0; + uint32_t strDWOOffset = 0; + while (const char *s = strDWOData.getCStr(&offset)) { + OS << format("0x%8.8x: \"%s\"\n", strDWOOffset, s); + strDWOOffset = offset; + } + } + + if (DumpType == DIDT_All || DumpType == DIDT_StrOffsetsDwo) { + OS << "\n.debug_str_offsets.dwo contents:\n"; + DataExtractor strOffsetExt(getStringOffsetDWOSection(), isLittleEndian(), 0); + offset = 0; + while (offset < getStringOffsetDWOSection().size()) { + OS << format("0x%8.8x: ", offset); + OS << format("%8.8x\n", strOffsetExt.getU32(&offset)); + } } } diff --git a/lib/DebugInfo/DWARFContext.h b/lib/DebugInfo/DWARFContext.h index 7da5c85..8a4a3af 100644 --- a/lib/DebugInfo/DWARFContext.h +++ b/lib/DebugInfo/DWARFContext.h @@ -45,7 +45,7 @@ class DWARFContext : public DIContext { public: DWARFContext() {} - virtual void dump(raw_ostream &OS); + virtual void dump(raw_ostream &OS, DIDumpType DumpType = DIDT_All); /// Get the number of compile units in this context. unsigned getNumCompileUnits() { -- cgit v1.1 From a965baca3c7ce1ced00446cff1c6395d03dfed52 Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Fri, 25 Jan 2013 20:53:41 +0000 Subject: When encountering an unknown file format, ObjectFile::createObjectFile should politely report it instead of running into llvm_unreachable. Also patch llvm-dwarfdump to actually check whether the file it's attempting to dump is a valid object file. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173489 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Object/ObjectFile.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Object/ObjectFile.cpp b/lib/Object/ObjectFile.cpp index b14df9a..860c87b 100644 --- a/lib/Object/ObjectFile.cpp +++ b/lib/Object/ObjectFile.cpp @@ -33,6 +33,8 @@ ObjectFile *ObjectFile::createObjectFile(MemoryBuffer *Object) { sys::LLVMFileType type = sys::IdentifyFileType(Object->getBufferStart(), static_cast(Object->getBufferSize())); switch (type) { + case sys::Unknown_FileType: + return 0; case sys::ELF_Relocatable_FileType: case sys::ELF_Executable_FileType: case sys::ELF_SharedObject_FileType: @@ -52,7 +54,7 @@ ObjectFile *ObjectFile::createObjectFile(MemoryBuffer *Object) { case sys::COFF_FileType: return createCOFFObjectFile(Object); default: - llvm_unreachable("Unknown Object File Type"); + llvm_unreachable("Unexpected Object File Type"); } } -- cgit v1.1 From 0f1bcedf5a871c360f2ca1354464d81cb81bdca7 Mon Sep 17 00:00:00 2001 From: Richard Osborne Date: Fri, 25 Jan 2013 21:20:28 +0000 Subject: Fix order of operands for crc8_l4r The order in which operands appear in the encoded instruction is different to order in which they appear in assembly. This changes the XCore backend to use the instruction encoding order. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173493 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/XCore/XCoreISelDAGToDAG.cpp | 13 +++----- lib/Target/XCore/XCoreISelLowering.cpp | 54 ++++++++++++++++++++++++---------- lib/Target/XCore/XCoreISelLowering.h | 4 +++ lib/Target/XCore/XCoreInstrInfo.td | 4 +-- 4 files changed, 48 insertions(+), 27 deletions(-) (limited to 'lib') diff --git a/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/lib/Target/XCore/XCoreISelDAGToDAG.cpp index 472ce63..fbf86c5 100644 --- a/lib/Target/XCore/XCoreISelDAGToDAG.cpp +++ b/lib/Target/XCore/XCoreISelDAGToDAG.cpp @@ -211,15 +211,10 @@ SDNode *XCoreDAGToDAGISel::Select(SDNode *N) { return CurDAG->getMachineNode(XCore::LMUL_l6r, dl, MVT::i32, MVT::i32, Ops, 4); } - case ISD::INTRINSIC_WO_CHAIN: { - unsigned IntNo = cast(N->getOperand(0))->getZExtValue(); - switch (IntNo) { - case Intrinsic::xcore_crc8: - SDValue Ops[] = { N->getOperand(1), N->getOperand(2), N->getOperand(3) }; - return CurDAG->getMachineNode(XCore::CRC8_l4r, dl, MVT::i32, MVT::i32, - Ops, 3); - } - break; + case XCoreISD::CRC8: { + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) }; + return CurDAG->getMachineNode(XCore::CRC8_l4r, dl, MVT::i32, MVT::i32, + Ops, 3); } case ISD::BRIND: if (SDNode *ResNode = SelectBRIND(N)) diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index af8e4cb..59be84a 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -54,6 +54,7 @@ getTargetNodeName(unsigned Opcode) const case XCoreISD::LMUL : return "XCoreISD::LMUL"; case XCoreISD::MACCU : return "XCoreISD::MACCU"; case XCoreISD::MACCS : return "XCoreISD::MACCS"; + case XCoreISD::CRC8 : return "XCoreISD::CRC8"; case XCoreISD::BR_JT : return "XCoreISD::BR_JT"; case XCoreISD::BR_JT32 : return "XCoreISD::BR_JT32"; default : return NULL; @@ -152,6 +153,9 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM) setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom); setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom); + // We want to custom lower some of our intrinsics. + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); + maxStoresPerMemset = maxStoresPerMemsetOptSize = 4; maxStoresPerMemmove = maxStoresPerMemmoveOptSize = maxStoresPerMemcpy = maxStoresPerMemcpyOptSize = 2; @@ -167,24 +171,25 @@ SDValue XCoreTargetLowering:: LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { - case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); - case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); - case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); - case ISD::ConstantPool: return LowerConstantPool(Op, DAG); - case ISD::BR_JT: return LowerBR_JT(Op, DAG); - case ISD::LOAD: return LowerLOAD(Op, DAG); - case ISD::STORE: return LowerSTORE(Op, DAG); - case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); - case ISD::VAARG: return LowerVAARG(Op, DAG); - case ISD::VASTART: return LowerVASTART(Op, DAG); - case ISD::SMUL_LOHI: return LowerSMUL_LOHI(Op, DAG); - case ISD::UMUL_LOHI: return LowerUMUL_LOHI(Op, DAG); + case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); + case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); + case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); + case ISD::ConstantPool: return LowerConstantPool(Op, DAG); + case ISD::BR_JT: return LowerBR_JT(Op, DAG); + case ISD::LOAD: return LowerLOAD(Op, DAG); + case ISD::STORE: return LowerSTORE(Op, DAG); + case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); + case ISD::VAARG: return LowerVAARG(Op, DAG); + case ISD::VASTART: return LowerVASTART(Op, DAG); + case ISD::SMUL_LOHI: return LowerSMUL_LOHI(Op, DAG); + case ISD::UMUL_LOHI: return LowerUMUL_LOHI(Op, DAG); // FIXME: Remove these when LegalizeDAGTypes lands. case ISD::ADD: - case ISD::SUB: return ExpandADDSUB(Op.getNode(), DAG); - case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); - case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG); - case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); + case ISD::SUB: return ExpandADDSUB(Op.getNode(), DAG); + case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); + case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG); + case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); + case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); default: llvm_unreachable("unimplemented operand"); } @@ -858,6 +863,23 @@ LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 5); } +SDValue XCoreTargetLowering:: +LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { + DebugLoc DL = Op.getDebugLoc(); + unsigned IntNo = cast(Op.getOperand(0))->getZExtValue(); + switch (IntNo) { + case Intrinsic::xcore_crc8: + EVT VT = Op.getValueType(); + SDValue Data = + DAG.getNode(XCoreISD::CRC8, DL, DAG.getVTList(VT, VT), + Op.getOperand(1), Op.getOperand(2) , Op.getOperand(3)); + SDValue Crc(Data.getNode(), 1); + SDValue Results[] = { Crc, Data }; + return DAG.getMergeValues(Results, 2, DL); + } + return SDValue(); +} + //===----------------------------------------------------------------------===// // Calling Convention Implementation //===----------------------------------------------------------------------===// diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h index 2874f00..6d430ef 100644 --- a/lib/Target/XCore/XCoreISelLowering.h +++ b/lib/Target/XCore/XCoreISelLowering.h @@ -63,6 +63,9 @@ namespace llvm { // Corresponds to MACCS instruction MACCS, + // Corresponds to CRC8 instruction + CRC8, + // Jumptable branch. BR_JT, @@ -147,6 +150,7 @@ namespace llvm { SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; // Inline asm support std::pair diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td index 1810a13..89845bc 100644 --- a/lib/Target/XCore/XCoreInstrInfo.td +++ b/lib/Target/XCore/XCoreInstrInfo.td @@ -477,10 +477,10 @@ def MACCS_l4r : _L4R<(outs GRRegs:$dst1, GRRegs:$dst2), []>; } -let Constraints = "$src1 = $dst1" in +let Constraints = "$src1 = $dst2" in def CRC8_l4r : _L4R<(outs GRRegs:$dst1, GRRegs:$dst2), (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3), - "crc8 $dst1, $dst2, $src2, $src3", + "crc8 $dst2, $dst1, $src2, $src3", []>; // Five operand long -- cgit v1.1 From 1f375e5bc78647f9b29564eafdc907250ccd91ed Mon Sep 17 00:00:00 2001 From: Richard Osborne Date: Fri, 25 Jan 2013 21:25:12 +0000 Subject: Use the correct format in the STW / SETPSC instruction names. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173494 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/XCore/Disassembler/XCoreDisassembler.cpp | 2 +- lib/Target/XCore/XCoreInstrInfo.td | 14 +++++++------- lib/Target/XCore/XCoreRegisterInfo.cpp | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) (limited to 'lib') diff --git a/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp b/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp index e785030..821c33d 100644 --- a/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp +++ b/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp @@ -401,7 +401,7 @@ DecodeL2OpInstructionFail(MCInst &Inst, unsigned Insn, uint64_t Address, fieldFromInstruction(Insn, 27, 5) << 4; switch (Opcode) { case 0x0c: - Inst.setOpcode(XCore::STW_3r); + Inst.setOpcode(XCore::STW_l3r); return DecodeL3RInstruction(Inst, Insn, Address, Decoder); case 0x1c: Inst.setOpcode(XCore::XOR_l3r); diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td index 89845bc..b48a31d 100644 --- a/lib/Target/XCore/XCoreInstrInfo.td +++ b/lib/Target/XCore/XCoreInstrInfo.td @@ -384,9 +384,9 @@ def LD8U_3r : _F3R<0b10001, (outs GRRegs:$dst), } let mayStore=1 in { -def STW_3r : _FL3R<0b000001100, (outs), - (ins GRRegs:$val, GRRegs:$addr, GRRegs:$offset), - "stw $val, $addr[$offset]", []>; +def STW_l3r : _FL3R<0b000001100, (outs), + (ins GRRegs:$val, GRRegs:$addr, GRRegs:$offset), + "stw $val, $addr[$offset]", []>; def STW_2rus : _F2RUS<0b0000, (outs), (ins GRRegs:$val, GRRegs:$addr, i32imm:$offset), @@ -783,9 +783,9 @@ def SETD_2r : _FR2R<0b000101, (outs), (ins GRRegs:$r, GRRegs:$val), "setd res[$r], $val", [(int_xcore_setd GRRegs:$r, GRRegs:$val)]>; -def SETPSC_l2r : _FR2R<0b110000, (outs), (ins GRRegs:$src1, GRRegs:$src2), - "setpsc res[$src1], $src2", - [(int_xcore_setpsc GRRegs:$src1, GRRegs:$src2)]>; +def SETPSC_2r : _FR2R<0b110000, (outs), (ins GRRegs:$src1, GRRegs:$src2), + "setpsc res[$src1], $src2", + [(int_xcore_setpsc GRRegs:$src1, GRRegs:$src2)]>; def GETST_2r : _F2R<0b000001, (outs GRRegs:$dst), (ins GRRegs:$r), "getst $dst, res[$r]", @@ -1006,7 +1006,7 @@ def : Pat<(truncstorei16 GRRegs:$val, GRRegs:$addr), (ST16_l3r GRRegs:$val, GRRegs:$addr, (LDC_ru6 0))>; def : Pat<(store GRRegs:$val, (ldawf GRRegs:$addr, GRRegs:$offset)), - (STW_3r GRRegs:$val, GRRegs:$addr, GRRegs:$offset)>; + (STW_l3r GRRegs:$val, GRRegs:$addr, GRRegs:$offset)>; def : Pat<(store GRRegs:$val, (add GRRegs:$addr, immUs4:$offset)), (STW_2rus GRRegs:$val, GRRegs:$addr, (div4_xform immUs4:$offset))>; def : Pat<(store GRRegs:$val, GRRegs:$addr), diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp index e637d9a..7e3e476 100644 --- a/lib/Target/XCore/XCoreRegisterInfo.cpp +++ b/lib/Target/XCore/XCoreRegisterInfo.cpp @@ -231,7 +231,7 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, .addReg(ScratchReg, RegState::Kill); break; case XCore::STWFI: - BuildMI(MBB, II, dl, TII.get(XCore::STW_3r)) + BuildMI(MBB, II, dl, TII.get(XCore::STW_l3r)) .addReg(Reg, getKillRegState(isKill)) .addReg(FrameReg) .addReg(ScratchReg, RegState::Kill); -- cgit v1.1 From e1f95db4803a48a30fc2a1d5868281a87a36fb85 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Fri, 25 Jan 2013 21:30:53 +0000 Subject: Add an accessor method to get the slot's index. This will limit the use of AttributeWithIndex. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173495 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/AttributeImpl.h | 4 ++++ lib/IR/Attributes.cpp | 6 ++++++ lib/IR/Verifier.cpp | 24 ++++++++++++------------ 3 files changed, 22 insertions(+), 12 deletions(-) (limited to 'lib') diff --git a/lib/IR/AttributeImpl.h b/lib/IR/AttributeImpl.h index b35e5e0..ebd90e2 100644 --- a/lib/IR/AttributeImpl.h +++ b/lib/IR/AttributeImpl.h @@ -122,6 +122,10 @@ public: LLVMContext &getContext() { return Context; } ArrayRef getAttributes() const { return AttrList; } unsigned getNumAttributes() const { return AttrList.size(); } + unsigned getSlotIndex(unsigned Slot) const { + // FIXME: This needs to use AttrNodes instead. + return AttrList[Slot].Index; + } void Profile(FoldingSetNodeID &ID) const { Profile(ID, AttrList); diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index cbbf484..b09d55d 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -682,6 +682,12 @@ unsigned AttributeSet::getNumSlots() const { return AttrList ? AttrList->getNumAttributes() : 0; } +unsigned AttributeSet::getSlotIndex(unsigned Slot) const { + assert(AttrList && Slot < AttrList->getNumAttributes() && + "Slot # out of range!"); + return AttrList->getSlotIndex(Slot); +} + /// getSlot - Return the AttributeWithIndex at the specified slot. This /// holds a number plus a set of attributes. const AttributeWithIndex &AttributeSet::getSlot(unsigned Slot) const { diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp index b263bbe..9a482f1 100644 --- a/lib/IR/Verifier.cpp +++ b/lib/IR/Verifier.cpp @@ -718,25 +718,25 @@ void Verifier::VerifyFunctionAttrs(FunctionType *FT, bool SawNest = false; for (unsigned i = 0, e = Attrs.getNumSlots(); i != e; ++i) { - const AttributeWithIndex &Attr = Attrs.getSlot(i); + unsigned Index = Attrs.getSlotIndex(i); Type *Ty; - if (Attr.Index == 0) + if (Index == 0) Ty = FT->getReturnType(); - else if (Attr.Index-1 < FT->getNumParams()) - Ty = FT->getParamType(Attr.Index-1); + else if (Index-1 < FT->getNumParams()) + Ty = FT->getParamType(Index-1); else break; // VarArgs attributes, verified elsewhere. - VerifyParameterAttrs(Attrs, Attr.Index, Ty, Attr.Index == 0, V); + VerifyParameterAttrs(Attrs, Index, Ty, Index == 0, V); - if (Attrs.hasAttribute(Attr.Index, Attribute::Nest)) { + if (Attrs.hasAttribute(i, Attribute::Nest)) { Assert1(!SawNest, "More than one parameter has attribute nest!", V); SawNest = true; } - if (Attrs.hasAttribute(Attr.Index, Attribute::StructRet)) - Assert1(Attr.Index == 1, "Attribute sret is not on first parameter!", V); + if (Attrs.hasAttribute(Index, Attribute::StructRet)) + Assert1(Index == 1, "Attribute sret is not on first parameter!", V); } if (!Attrs.hasAttributes(AttributeSet::FunctionIndex)) @@ -801,12 +801,12 @@ static bool VerifyAttributeCount(const AttributeSet &Attrs, unsigned Params) { return true; unsigned LastSlot = Attrs.getNumSlots() - 1; - unsigned LastIndex = Attrs.getSlot(LastSlot).Index; + unsigned LastIndex = Attrs.getSlotIndex(LastSlot); if (LastIndex <= Params - || (LastIndex == (unsigned)~0 - && (LastSlot == 0 || Attrs.getSlot(LastSlot - 1).Index <= Params))) + || (LastIndex == AttributeSet::FunctionIndex + && (LastSlot == 0 || Attrs.getSlotIndex(LastSlot - 1) <= Params))) return true; - + return false; } -- cgit v1.1 From b74564a15ac6bfa85c9597bed842b686be1a1a62 Mon Sep 17 00:00:00 2001 From: Jakub Staszak Date: Fri, 25 Jan 2013 21:44:27 +0000 Subject: Use const reference instead of vector copying. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173497 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineScheduler.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index 821a4f2..adf9a57 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -467,7 +467,8 @@ void ScheduleDAGMI::initRegPressure() { // Cache the list of excess pressure sets in this region. This will also track // the max pressure in the scheduled code for these sets. RegionCriticalPSets.clear(); - std::vector RegionPressure = RPTracker.getPressure().MaxSetPressure; + const std::vector &RegionPressure = + RPTracker.getPressure().MaxSetPressure; for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) { unsigned Limit = TRI->getRegPressureSetLimit(i); DEBUG(dbgs() << TRI->getRegPressureSetName(i) -- cgit v1.1 From 85875647d6f85cbcbe506809640b9857eaaa26a4 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Fri, 25 Jan 2013 21:46:52 +0000 Subject: Use the new 'getSlotIndex' method to retrieve the attribute's slot index. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173499 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Bitcode/Writer/BitcodeWriter.cpp | 2 +- lib/IR/Attributes.cpp | 4 ++-- lib/Target/CppBackend/CPPBackend.cpp | 2 +- lib/Transforms/IPO/DeadArgumentElimination.cpp | 4 ++-- lib/Transforms/IPO/GlobalOpt.cpp | 2 +- lib/Transforms/InstCombine/InstCombineCalls.cpp | 2 +- 6 files changed, 8 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index 4ee762e..cc6b8b3 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -174,7 +174,7 @@ static void WriteAttributeTable(const ValueEnumerator &VE, const AttributeSet &A = Attrs[i]; for (unsigned i = 0, e = A.getNumSlots(); i != e; ++i) { const AttributeWithIndex &PAWI = A.getSlot(i); - Record.push_back(PAWI.Index); + Record.push_back(A.getSlotIndex(i)); Record.push_back(Attribute::encodeLLVMAttributesForBitcode(PAWI.Attrs)); } diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index b09d55d..538d9fe 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -853,8 +853,8 @@ AttributeSet AttributeSet::removeAttr(LLVMContext &C, unsigned Idx, void AttributeSet::dump() const { dbgs() << "PAL[ "; for (unsigned i = 0; i < getNumSlots(); ++i) { - const AttributeWithIndex &PAWI = getSlot(i); - dbgs() << "{ " << PAWI.Index << ", " << PAWI.Attrs.getAsString() << " } "; + unsigned Index = getSlotIndex(i); + dbgs() << "{ " << Index << " => " << getAsString(Index) << " } "; } dbgs() << "]\n"; diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp index 50bfef5..2560c6e 100644 --- a/lib/Target/CppBackend/CPPBackend.cpp +++ b/lib/Target/CppBackend/CPPBackend.cpp @@ -473,7 +473,7 @@ void CppWriter::printAttributes(const AttributeSet &PAL, Out << "SmallVector Attrs;"; nl(Out); Out << "AttributeWithIndex PAWI;"; nl(Out); for (unsigned i = 0; i < PAL.getNumSlots(); ++i) { - unsigned index = PAL.getSlot(i).Index; + unsigned index = PAL.getSlotIndex(i); AttrBuilder attrs(PAL.getSlot(i).Attrs); Out << "PAWI.Index = " << index << "U;\n"; Out << " {\n AttrBuilder B;\n"; diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp index 3a38ca4..4603146 100644 --- a/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -272,9 +272,9 @@ bool DAE::DeleteDeadVarargs(Function &Fn) { // Drop any attributes that were on the vararg arguments. AttributeSet PAL = CS.getAttributes(); - if (!PAL.isEmpty() && PAL.getSlot(PAL.getNumSlots() - 1).Index > NumArgs) { + if (!PAL.isEmpty() && PAL.getSlotIndex(PAL.getNumSlots() - 1) > NumArgs) { SmallVector AttributesVec; - for (unsigned i = 0; PAL.getSlot(i).Index <= NumArgs; ++i) + for (unsigned i = 0; PAL.getSlotIndex(i) <= NumArgs; ++i) AttributesVec.push_back(PAL.getSlot(i)); if (PAL.hasAttributes(AttributeSet::FunctionIndex)) AttributesVec.push_back(AttributeWithIndex::get(Fn.getContext(), diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index 6fe4316..52d4e2f 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -2072,7 +2072,7 @@ static AttributeSet StripNest(LLVMContext &C, const AttributeSet &Attrs) { continue; // There can be only one. - return Attrs.removeAttribute(C, Attrs.getSlot(i).Index, Attribute::Nest); + return Attrs.removeAttribute(C, Attrs.getSlotIndex(i), Attribute::Nest); } return Attrs; diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 2fd3549..8555c2f 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1101,7 +1101,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { // won't be dropping them. Check that these extra arguments have attributes // that are compatible with being a vararg call argument. for (unsigned i = CallerPAL.getNumSlots(); i; --i) { - if (CallerPAL.getSlot(i - 1).Index <= FT->getNumParams()) + if (CallerPAL.getSlotIndex(i - 1) <= FT->getNumParams()) break; Attribute PAttrs = CallerPAL.getSlot(i - 1).Attrs; // Check if it has an attribute that's incompatible with varargs. -- cgit v1.1 From c655c43ba54990535cf7f374ed78f4fd5bdee093 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Fri, 25 Jan 2013 21:47:42 +0000 Subject: LoopVectorize: Refactor the code that vectorizes loads/stores to remove duplication. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173500 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/LoopVectorize.cpp | 258 +++++++++++++---------------- 1 file changed, 113 insertions(+), 145 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 060b06a..e260b58 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -187,6 +187,10 @@ private: /// of scalars. void scalarizeInstruction(Instruction *Instr); + /// Vectorize Load and Store instructions, + void vectorizeMemoryInstruction(Instruction *Instr, + LoopVectorizationLegality *Legal); + /// Create a broadcast instruction. This method generates a broadcast /// instruction (shuffle) for loop invariant values and for the induction /// value. If this is the induction variable then we extend it to N, N+1, ... @@ -832,6 +836,111 @@ Value *InnerLoopVectorizer::reverseVector(Value *Vec) { "reverse"); } + +void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr, + LoopVectorizationLegality *Legal) { + // Attempt to issue a wide load. + LoadInst *LI = dyn_cast(Instr); + StoreInst *SI = dyn_cast(Instr); + + assert((LI || SI) && "Invalid Load/Store instruction"); + + Type *ScalarDataTy = LI ? LI->getType() : SI->getValueOperand()->getType(); + Type *DataTy = VectorType::get(ScalarDataTy, VF); + Value *Ptr = LI ? LI->getPointerOperand() : SI->getPointerOperand(); + unsigned Alignment = LI ? LI->getAlignment() : SI->getAlignment(); + + // If the pointer is loop invariant or if it is non consecutive, + // scalarize the load. + int Stride = Legal->isConsecutivePtr(Ptr); + bool Reverse = Stride < 0; + bool UniformLoad = LI && Legal->isUniform(Ptr); + if (Stride == 0 || UniformLoad) + return scalarizeInstruction(Instr); + + Constant *Zero = Builder.getInt32(0); + VectorParts &Entry = WidenMap.get(Instr); + + // Handle consecutive loads/stores. + GetElementPtrInst *Gep = dyn_cast(Ptr); + if (Gep && Legal->isInductionVariable(Gep->getPointerOperand())) { + Value *PtrOperand = Gep->getPointerOperand(); + Value *FirstBasePtr = getVectorValue(PtrOperand)[0]; + FirstBasePtr = Builder.CreateExtractElement(FirstBasePtr, Zero); + + // Create the new GEP with the new induction variable. + GetElementPtrInst *Gep2 = cast(Gep->clone()); + Gep2->setOperand(0, FirstBasePtr); + Gep2->setName("gep.indvar.base"); + Ptr = Builder.Insert(Gep2); + } else if (Gep) { + assert(SE->isLoopInvariant(SE->getSCEV(Gep->getPointerOperand()), + OrigLoop) && "Base ptr must be invariant"); + + // The last index does not have to be the induction. It can be + // consecutive and be a function of the index. For example A[I+1]; + unsigned NumOperands = Gep->getNumOperands(); + + Value *LastGepOperand = Gep->getOperand(NumOperands - 1); + VectorParts &GEPParts = getVectorValue(LastGepOperand); + Value *LastIndex = GEPParts[0]; + LastIndex = Builder.CreateExtractElement(LastIndex, Zero); + + // Create the new GEP with the new induction variable. + GetElementPtrInst *Gep2 = cast(Gep->clone()); + Gep2->setOperand(NumOperands - 1, LastIndex); + Gep2->setName("gep.indvar.idx"); + Ptr = Builder.Insert(Gep2); + } else { + // Use the induction element ptr. + assert(isa(Ptr) && "Invalid induction ptr"); + VectorParts &PtrVal = getVectorValue(Ptr); + Ptr = Builder.CreateExtractElement(PtrVal[0], Zero); + } + + // Handle Stores: + if (SI) { + assert(!Legal->isUniform(SI->getPointerOperand()) && + "We do not allow storing to uniform addresses"); + + VectorParts &StoredVal = getVectorValue(SI->getValueOperand()); + for (unsigned Part = 0; Part < UF; ++Part) { + // Calculate the pointer for the specific unroll-part. + Value *PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(Part * VF)); + + if (Reverse) { + // If we store to reverse consecutive memory locations then we need + // to reverse the order of elements in the stored value. + StoredVal[Part] = reverseVector(StoredVal[Part]); + // If the address is consecutive but reversed, then the + // wide store needs to start at the last vector element. + PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(-Part * VF)); + PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF)); + } + + Value *VecPtr = Builder.CreateBitCast(PartPtr, DataTy->getPointerTo()); + Builder.CreateStore(StoredVal[Part], VecPtr)->setAlignment(Alignment); + } + } + + for (unsigned Part = 0; Part < UF; ++Part) { + // Calculate the pointer for the specific unroll-part. + Value *PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(Part * VF)); + + if (Reverse) { + // If the address is consecutive but reversed, then the + // wide store needs to start at the last vector element. + PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(-Part * VF)); + PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF)); + } + + Value *VecPtr = Builder.CreateBitCast(PartPtr, DataTy->getPointerTo()); + Value *LI = Builder.CreateLoad(VecPtr, "wide.load"); + cast(LI)->setAlignment(Alignment); + Entry[Part] = Reverse ? reverseVector(LI) : LI; + } +} + void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr) { assert(!Instr->getType()->isAggregateType() && "Can't handle vectors"); // Holds vector parameters or scalars, in case of uniform vals. @@ -1353,9 +1462,7 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { // the cost-model. // //===------------------------------------------------===// - BasicBlock &BB = *OrigLoop->getHeader(); - Constant *Zero = - ConstantInt::get(IntegerType::getInt32Ty(BB.getContext()), 0); + Constant *Zero = Builder.getInt32(0); // In order to support reduction variables we need to be able to vectorize // Phi nodes. Phi nodes have cycles, so we need to vectorize them in two @@ -1592,8 +1699,6 @@ InnerLoopVectorizer::createBlockInMask(BasicBlock *BB) { void InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal, BasicBlock *BB, PhiVector *PV) { - Constant *Zero = Builder.getInt32(0); - // For each instruction in the old loop. for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) { VectorParts &Entry = WidenMap.get(it); @@ -1808,147 +1913,10 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal, break; } - case Instruction::Store: { - // Attempt to issue a wide store. - StoreInst *SI = dyn_cast(it); - Type *StTy = VectorType::get(SI->getValueOperand()->getType(), VF); - Value *Ptr = SI->getPointerOperand(); - unsigned Alignment = SI->getAlignment(); - - assert(!Legal->isUniform(Ptr) && - "We do not allow storing to uniform addresses"); - - - int Stride = Legal->isConsecutivePtr(Ptr); - bool Reverse = Stride < 0; - if (Stride == 0) { - scalarizeInstruction(it); + case Instruction::Store: + case Instruction::Load: + vectorizeMemoryInstruction(it, Legal); break; - } - - // Handle consecutive stores. - - GetElementPtrInst *Gep = dyn_cast(Ptr); - if (Gep && Legal->isInductionVariable(Gep->getPointerOperand())) { - Value *PtrOperand = Gep->getPointerOperand(); - Value *FirstBasePtr = getVectorValue(PtrOperand)[0]; - FirstBasePtr = Builder.CreateExtractElement(FirstBasePtr, Zero); - - // Create the new GEP with the new induction variable. - GetElementPtrInst *Gep2 = cast(Gep->clone()); - Gep2->setOperand(0, FirstBasePtr); - Ptr = Builder.Insert(Gep2); - } else if (Gep) { - assert(SE->isLoopInvariant(SE->getSCEV(Gep->getPointerOperand()), - OrigLoop) && "Base ptr must be invariant"); - - // The last index does not have to be the induction. It can be - // consecutive and be a function of the index. For example A[I+1]; - unsigned NumOperands = Gep->getNumOperands(); - - Value *LastGepOperand = Gep->getOperand(NumOperands - 1); - VectorParts &GEPParts = getVectorValue(LastGepOperand); - Value *LastIndex = GEPParts[0]; - LastIndex = Builder.CreateExtractElement(LastIndex, Zero); - - // Create the new GEP with the new induction variable. - GetElementPtrInst *Gep2 = cast(Gep->clone()); - Gep2->setOperand(NumOperands - 1, LastIndex); - Ptr = Builder.Insert(Gep2); - } else { - // Use the induction element ptr. - assert(isa(Ptr) && "Invalid induction ptr"); - VectorParts &PtrVal = getVectorValue(Ptr); - Ptr = Builder.CreateExtractElement(PtrVal[0], Zero); - } - - VectorParts &StoredVal = getVectorValue(SI->getValueOperand()); - for (unsigned Part = 0; Part < UF; ++Part) { - // Calculate the pointer for the specific unroll-part. - Value *PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(Part * VF)); - - if (Reverse) { - // If we store to reverse consecutive memory locations then we need - // to reverse the order of elements in the stored value. - StoredVal[Part] = reverseVector(StoredVal[Part]); - // If the address is consecutive but reversed, then the - // wide store needs to start at the last vector element. - PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(-Part * VF)); - PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF)); - } - - Value *VecPtr = Builder.CreateBitCast(PartPtr, StTy->getPointerTo()); - Builder.CreateStore(StoredVal[Part], VecPtr)->setAlignment(Alignment); - } - break; - } - case Instruction::Load: { - // Attempt to issue a wide load. - LoadInst *LI = dyn_cast(it); - Type *RetTy = VectorType::get(LI->getType(), VF); - Value *Ptr = LI->getPointerOperand(); - unsigned Alignment = LI->getAlignment(); - - // If the pointer is loop invariant or if it is non consecutive, - // scalarize the load. - int Stride = Legal->isConsecutivePtr(Ptr); - bool Reverse = Stride < 0; - if (Legal->isUniform(Ptr) || Stride == 0) { - scalarizeInstruction(it); - break; - } - - GetElementPtrInst *Gep = dyn_cast(Ptr); - if (Gep && Legal->isInductionVariable(Gep->getPointerOperand())) { - Value *PtrOperand = Gep->getPointerOperand(); - Value *FirstBasePtr = getVectorValue(PtrOperand)[0]; - FirstBasePtr = Builder.CreateExtractElement(FirstBasePtr, Zero); - // Create the new GEP with the new induction variable. - GetElementPtrInst *Gep2 = cast(Gep->clone()); - Gep2->setOperand(0, FirstBasePtr); - Ptr = Builder.Insert(Gep2); - } else if (Gep) { - assert(SE->isLoopInvariant(SE->getSCEV(Gep->getPointerOperand()), - OrigLoop) && "Base ptr must be invariant"); - - // The last index does not have to be the induction. It can be - // consecutive and be a function of the index. For example A[I+1]; - unsigned NumOperands = Gep->getNumOperands(); - - Value *LastGepOperand = Gep->getOperand(NumOperands - 1); - VectorParts &GEPParts = getVectorValue(LastGepOperand); - Value *LastIndex = GEPParts[0]; - LastIndex = Builder.CreateExtractElement(LastIndex, Zero); - - // Create the new GEP with the new induction variable. - GetElementPtrInst *Gep2 = cast(Gep->clone()); - Gep2->setOperand(NumOperands - 1, LastIndex); - Ptr = Builder.Insert(Gep2); - } else { - // Use the induction element ptr. - assert(isa(Ptr) && "Invalid induction ptr"); - VectorParts &PtrVal = getVectorValue(Ptr); - Ptr = Builder.CreateExtractElement(PtrVal[0], Zero); - } - - for (unsigned Part = 0; Part < UF; ++Part) { - // Calculate the pointer for the specific unroll-part. - Value *PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(Part * VF)); - - if (Reverse) { - // If the address is consecutive but reversed, then the - // wide store needs to start at the last vector element. - PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(-Part * VF)); - PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF)); - } - - Value *VecPtr = Builder.CreateBitCast(PartPtr, RetTy->getPointerTo()); - Value *LI = Builder.CreateLoad(VecPtr, "wide.load"); - cast(LI)->setAlignment(Alignment); - Entry[Part] = Reverse ? reverseVector(LI) : LI; - } - break; - } case Instruction::ZExt: case Instruction::SExt: case Instruction::FPToUI: -- cgit v1.1 From c47bd9899b639c3384268f871009259c2a94fba4 Mon Sep 17 00:00:00 2001 From: Richard Osborne Date: Fri, 25 Jan 2013 21:55:32 +0000 Subject: Add instruction encodings / disassembly support for l4r instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173501 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../XCore/Disassembler/XCoreDisassembler.cpp | 49 ++++++++++++++++++++++ lib/Target/XCore/XCoreInstrFormats.td | 23 +++++++++- lib/Target/XCore/XCoreInstrInfo.td | 29 ++++++------- 3 files changed, 83 insertions(+), 18 deletions(-) (limited to 'lib') diff --git a/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp b/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp index 821c33d..a94f5b9 100644 --- a/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp +++ b/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp @@ -180,6 +180,16 @@ static DecodeStatus DecodeL5RInstruction(MCInst &Inst, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeL4RSrcDstInstruction(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeL4RSrcDstSrcDstInstruction(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + #include "XCoreGenDisassemblerTables.inc" static DecodeStatus DecodeGRRegsRegisterClass(MCInst &Inst, @@ -636,6 +646,45 @@ DecodeL5RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, return S; } +static DecodeStatus +DecodeL4RSrcDstInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, + const void *Decoder) { + unsigned Op1, Op2, Op3; + unsigned Op4 = fieldFromInstruction(Insn, 16, 4); + DecodeStatus S = + Decode3OpInstruction(fieldFromInstruction(Insn, 0, 16), Op1, Op2, Op3); + if (S == MCDisassembler::Success) { + DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); + S = DecodeGRRegsRegisterClass(Inst, Op4, Address, Decoder); + } + if (S == MCDisassembler::Success) { + DecodeGRRegsRegisterClass(Inst, Op4, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op3, Address, Decoder); + } + return S; +} + +static DecodeStatus +DecodeL4RSrcDstSrcDstInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, + const void *Decoder) { + unsigned Op1, Op2, Op3; + unsigned Op4 = fieldFromInstruction(Insn, 16, 4); + DecodeStatus S = + Decode3OpInstruction(fieldFromInstruction(Insn, 0, 16), Op1, Op2, Op3); + if (S == MCDisassembler::Success) { + DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); + S = DecodeGRRegsRegisterClass(Inst, Op4, Address, Decoder); + } + if (S == MCDisassembler::Success) { + DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op4, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op3, Address, Decoder); + } + return S; +} + MCDisassembler::DecodeStatus XCoreDisassembler::getInstruction(MCInst &instr, uint64_t &Size, diff --git a/lib/Target/XCore/XCoreInstrFormats.td b/lib/Target/XCore/XCoreInstrFormats.td index 6240362..8dceb30 100644 --- a/lib/Target/XCore/XCoreInstrFormats.td +++ b/lib/Target/XCore/XCoreInstrFormats.td @@ -218,8 +218,29 @@ class _F0R opc, dag outs, dag ins, string asmstr, list pattern> let Inst{4-0} = opc{4-0}; } -class _L4R pattern> +class _FL4R opc, dag outs, dag ins, string asmstr, list pattern> : InstXCore<4, outs, ins, asmstr, pattern> { + bits<4> d; + + let Inst{31-27} = opc{5-1}; + let Inst{26-21} = 0b111111; + let Inst{20} = opc{0}; + let Inst{19-16} = d; + let Inst{15-11} = 0b11111; +} + +// L4R with 4th operand as both a source and a destination. +class _FL4RSrcDst opc, dag outs, dag ins, string asmstr, + list pattern> + : _FL4R { + let DecoderMethod = "DecodeL4RSrcDstInstruction"; +} + +// L4R with 1st and 4th operand as both a source and a destination. +class _FL4RSrcDstSrcDst opc, dag outs, dag ins, string asmstr, + list pattern> + : _FL4R { + let DecoderMethod = "DecodeL4RSrcDstSrcDstInstruction"; } class _FL5R opc, dag outs, dag ins, string asmstr, list pattern> diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td index b48a31d..770b1fc 100644 --- a/lib/Target/XCore/XCoreInstrInfo.td +++ b/lib/Target/XCore/XCoreInstrInfo.td @@ -463,25 +463,20 @@ def ST8_l3r : _FL3R<0b100011100, (outs), } // Four operand long -let Constraints = "$src1 = $dst1,$src2 = $dst2" in { -def MACCU_l4r : _L4R<(outs GRRegs:$dst1, GRRegs:$dst2), - (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3, - GRRegs:$src4), - "maccu $dst1, $dst2, $src3, $src4", - []>; - -def MACCS_l4r : _L4R<(outs GRRegs:$dst1, GRRegs:$dst2), - (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3, - GRRegs:$src4), - "maccs $dst1, $dst2, $src3, $src4", - []>; +let Constraints = "$e = $a,$f = $b" in { +def MACCU_l4r : _FL4RSrcDstSrcDst< + 0b000001, (outs GRRegs:$a, GRRegs:$b), + (ins GRRegs:$e, GRRegs:$f, GRRegs:$c, GRRegs:$d), "maccu $a, $b, $c, $d", []>; + +def MACCS_l4r : _FL4RSrcDstSrcDst< + 0b000010, (outs GRRegs:$a, GRRegs:$b), + (ins GRRegs:$e, GRRegs:$f, GRRegs:$c, GRRegs:$d), "maccs $a, $b, $c, $d", []>; } -let Constraints = "$src1 = $dst2" in -def CRC8_l4r : _L4R<(outs GRRegs:$dst1, GRRegs:$dst2), - (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3), - "crc8 $dst2, $dst1, $src2, $src3", - []>; +let Constraints = "$e = $b" in +def CRC8_l4r : _FL4RSrcDst<0b000000, (outs GRRegs:$a, GRRegs:$b), + (ins GRRegs:$e, GRRegs:$c, GRRegs:$d), + "crc8 $b, $a, $c, $d", []>; // Five operand long -- cgit v1.1 From a5597f0eaf1f93c6d0bc641a0cc54ecffb33955a Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Fri, 25 Jan 2013 22:07:43 +0000 Subject: In this patch, we teach X86_64TargetMachine that it has a ILP32 (defined by the x32 ABI) mode, in which case its pointers are 32-bits in size. This knowledge is also added to X86RegisterInfo that now returns the appropriate registers in getPointerRegClass. There are many outcomes to this change. In order to keep the patches separate and manageable, we start by focusing on some simple testable cases. The patch adds a test with passing a pointer to a function - focusing on the difference between the two data models for x86-64. Another test is added for handling of 'sret' arguments (and functionality is added in X86ISelLowering to make it work). A note on naming: the "x32 ABI" document refers to the AMD64 architecture (in LLVM it's distinguished by being is64Bits() in the x86 subtarget) with two variations: the LP64 (default) data model, and the ILP32 data model. This patch adds predicates to the subtarget which are consistent with this naming scheme. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173503 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 25 ++++++++++++++----------- lib/Target/X86/X86RegisterInfo.cpp | 9 +++++---- lib/Target/X86/X86Subtarget.h | 15 ++++++++++++++- lib/Target/X86/X86TargetMachine.cpp | 8 ++++++-- 4 files changed, 39 insertions(+), 18 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 0475814..0c12410 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1646,10 +1646,10 @@ X86TargetLowering::LowerReturn(SDValue Chain, Flag = Chain.getValue(1); } - // The x86-64 ABI for returning structs by value requires that we copy - // the sret argument into %rax for the return. We saved the argument into - // a virtual register in the entry block, so now we copy the value out - // and into %rax. + // The x86-64 ABIs require that for returning structs by value we copy + // the sret argument into %rax/%eax (depending on ABI) for the return. + // We saved the argument into a virtual register in the entry block, + // so now we copy the value out and into %rax/%eax. if (Subtarget->is64Bit() && DAG.getMachineFunction().getFunction()->hasStructRetAttr()) { MachineFunction &MF = DAG.getMachineFunction(); @@ -1659,11 +1659,12 @@ X86TargetLowering::LowerReturn(SDValue Chain, "SRetReturnReg should have been set in LowerFormalArguments()."); SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy()); - Chain = DAG.getCopyToReg(Chain, dl, X86::RAX, Val, Flag); + unsigned RetValReg = Subtarget->isTarget64BitILP32() ? X86::EAX : X86::RAX; + Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag); Flag = Chain.getValue(1); - // RAX now acts like a return value. - MRI.addLiveOut(X86::RAX); + // RAX/EAX now acts like a return value. + MRI.addLiveOut(RetValReg); } RetOps[0] = Chain; // Update chain. @@ -2015,14 +2016,16 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, InVals.push_back(ArgValue); } - // The x86-64 ABI for returning structs by value requires that we copy - // the sret argument into %rax for the return. Save the argument into - // a virtual register so that we can access it from the return points. + // The x86-64 ABIs require that for returning structs by value we copy + // the sret argument into %rax/%eax (depending on ABI) for the return. + // Save the argument into a virtual register so that we can access it + // from the return points. if (Is64Bit && MF.getFunction()->hasStructRetAttr()) { X86MachineFunctionInfo *FuncInfo = MF.getInfo(); unsigned Reg = FuncInfo->getSRetReturnReg(); if (!Reg) { - Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64)); + MVT PtrTy = getPointerTy(); + Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy)); FuncInfo->setSRetReturnReg(Reg); } SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[0]); diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 58064b8..1dd1e41 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -177,20 +177,21 @@ X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC) const{ const TargetRegisterClass * X86RegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind) const { + const X86Subtarget &Subtarget = TM.getSubtarget(); switch (Kind) { default: llvm_unreachable("Unexpected Kind in getPointerRegClass!"); case 0: // Normal GPRs. - if (TM.getSubtarget().is64Bit()) + if (Subtarget.isTarget64BitLP64()) return &X86::GR64RegClass; return &X86::GR32RegClass; case 1: // Normal GPRs except the stack pointer (for encoding reasons). - if (TM.getSubtarget().is64Bit()) + if (Subtarget.isTarget64BitLP64()) return &X86::GR64_NOSPRegClass; return &X86::GR32_NOSPRegClass; case 2: // Available for tailcall (not callee-saved GPRs). - if (TM.getSubtarget().isTargetWin64()) + if (Subtarget.isTargetWin64()) return &X86::GR64_TCW64RegClass; - if (TM.getSubtarget().is64Bit()) + else if (Subtarget.is64Bit()) return &X86::GR64_TCRegClass; const Function *F = MF.getFunction(); diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 080f4cf..b325f62 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -194,7 +194,20 @@ public: /// instruction. void AutoDetectSubtargetFeatures(); - bool is64Bit() const { return In64BitMode; } + /// Is this x86_64? (disregarding specific ABI / programming model) + bool is64Bit() const { + return In64BitMode; + } + + /// Is this x86_64 with the ILP32 programming model (x32 ABI)? + bool isTarget64BitILP32() const { + return In64BitMode && (TargetTriple.getEnvironment() == Triple::GNUX32); + } + + /// Is this x86_64 with the LP64 programming model (standard AMD64, no x32)? + bool isTarget64BitLP64() const { + return In64BitMode && (TargetTriple.getEnvironment() != Triple::GNUX32); + } PICStyles::Style getPICStyle() const { return PICStyle; } void setPICStyle(PICStyles::Style Style) { PICStyle = Style; } diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 799f140..8aa58a2 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -59,8 +59,12 @@ X86_64TargetMachine::X86_64TargetMachine(const Target &T, StringRef TT, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) : X86TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true), - DL("e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-" - "n8:16:32:64-S128"), + // The x32 ABI dictates the ILP32 programming model for x64. + DL(getSubtargetImpl()->isTarget64BitILP32() ? + "e-p:32:32-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-" + "n8:16:32:64-S128" : + "e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-" + "n8:16:32:64-S128"), InstrInfo(*this), TLInfo(*this), TSInfo(*this), -- cgit v1.1 From ee7c0d2f931590ccdc53a14b1839e11bb29fc96e Mon Sep 17 00:00:00 2001 From: Andrew Kaylor Date: Fri, 25 Jan 2013 22:50:58 +0000 Subject: Add support for applying in-memory relocations to the .debug_line section and, in the case of ELF files, using symbol addresses when available for relocations to the .debug_info section. Also extending the llvm-rtdyld tool to add the ability to dump line number information for testing purposes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173517 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/DebugInfo/DWARFCompileUnit.h | 2 +- lib/DebugInfo/DWARFContext.cpp | 18 ++++++++++++++---- lib/DebugInfo/DWARFContext.h | 3 +++ lib/DebugInfo/DWARFDebugLine.cpp | 15 ++++++++++++--- lib/DebugInfo/DWARFDebugLine.h | 4 ++++ lib/DebugInfo/DWARFRelocMap.h | 21 +++++++++++++++++++++ lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h | 2 ++ 7 files changed, 57 insertions(+), 8 deletions(-) create mode 100644 lib/DebugInfo/DWARFRelocMap.h (limited to 'lib') diff --git a/lib/DebugInfo/DWARFCompileUnit.h b/lib/DebugInfo/DWARFCompileUnit.h index de70b2e..2a74605 100644 --- a/lib/DebugInfo/DWARFCompileUnit.h +++ b/lib/DebugInfo/DWARFCompileUnit.h @@ -13,6 +13,7 @@ #include "DWARFDebugAbbrev.h" #include "DWARFDebugInfoEntry.h" #include "DWARFDebugRangeList.h" +#include "DWARFRelocMap.h" #include namespace llvm { @@ -20,7 +21,6 @@ namespace llvm { class DWARFDebugAbbrev; class StringRef; class raw_ostream; -typedef DenseMap > RelocAddrMap; class DWARFCompileUnit { const DWARFDebugAbbrev *Abbrev; diff --git a/lib/DebugInfo/DWARFContext.cpp b/lib/DebugInfo/DWARFContext.cpp index 3995349..66d299b 100644 --- a/lib/DebugInfo/DWARFContext.cpp +++ b/lib/DebugInfo/DWARFContext.cpp @@ -53,7 +53,7 @@ void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType) { DataExtractor lineData(getLineSection(), isLittleEndian(), savedAddressByteSize); DWARFDebugLine::DumpingState state(OS); - DWARFDebugLine::parseStatementTable(lineData, &stmtOffset, state); + DWARFDebugLine::parseStatementTable(lineData, &lineRelocMap(), &stmtOffset, state); } } } @@ -155,7 +155,7 @@ const DWARFDebugAranges *DWARFContext::getDebugAranges() { const DWARFLineTable * DWARFContext::getLineTableForCompileUnit(DWARFCompileUnit *cu) { if (!Line) - Line.reset(new DWARFDebugLine()); + Line.reset(new DWARFDebugLine(&lineRelocMap())); unsigned stmtOffset = cu->getCompileUnitDIE()->getAttributeValueAsUnsigned(cu, DW_AT_stmt_list, @@ -422,12 +422,15 @@ DWARFContextInMemory::DWARFContextInMemory(object::ObjectFile *Obj) : else continue; - // TODO: For now only handle relocations for the debug_info section. + // TODO: Add support for relocations in other sections as needed. + // Record relocations for the debug_info and debug_line sections. RelocAddrMap *Map; if (name == "debug_info") Map = &InfoRelocMap; else if (name == "debug_info.dwo") Map = &InfoDWORelocMap; + else if (name == "debug_line") + Map = &LineRelocMap; else continue; @@ -441,10 +444,17 @@ DWARFContextInMemory::DWARFContextInMemory(object::ObjectFile *Obj) : reloc_i->getAddress(Address); uint64_t Type; reloc_i->getType(Type); + uint64_t SymAddr = 0; + // ELF relocations may need the symbol address + if (Obj->isELF()) { + object::SymbolRef Sym; + reloc_i->getSymbol(Sym); + Sym.getAddress(SymAddr); + } object::RelocVisitor V(Obj->getFileFormatName()); // The section address is always 0 for debug sections. - object::RelocToApply R(V.visit(Type, *reloc_i)); + object::RelocToApply R(V.visit(Type, *reloc_i, 0, SymAddr)); if (V.error()) { SmallString<32> Name; error_code ec(reloc_i->getTypeName(Name)); diff --git a/lib/DebugInfo/DWARFContext.h b/lib/DebugInfo/DWARFContext.h index 8a4a3af..d6314b8 100644 --- a/lib/DebugInfo/DWARFContext.h +++ b/lib/DebugInfo/DWARFContext.h @@ -95,6 +95,7 @@ public: virtual bool isLittleEndian() const = 0; virtual const RelocAddrMap &infoRelocMap() const = 0; + virtual const RelocAddrMap &lineRelocMap() const = 0; virtual StringRef getInfoSection() = 0; virtual StringRef getAbbrevSection() = 0; virtual StringRef getARangeSection() = 0; @@ -130,6 +131,7 @@ class DWARFContextInMemory : public DWARFContext { virtual void anchor(); bool IsLittleEndian; RelocAddrMap InfoRelocMap; + RelocAddrMap LineRelocMap; StringRef InfoSection; StringRef AbbrevSection; StringRef ARangeSection; @@ -150,6 +152,7 @@ public: DWARFContextInMemory(object::ObjectFile *); virtual bool isLittleEndian() const { return IsLittleEndian; } virtual const RelocAddrMap &infoRelocMap() const { return InfoRelocMap; } + virtual const RelocAddrMap &lineRelocMap() const { return LineRelocMap; } virtual StringRef getInfoSection() { return InfoSection; } virtual StringRef getAbbrevSection() { return AbbrevSection; } virtual StringRef getARangeSection() { return ARangeSection; } diff --git a/lib/DebugInfo/DWARFDebugLine.cpp b/lib/DebugInfo/DWARFDebugLine.cpp index 267364a..16ef896 100644 --- a/lib/DebugInfo/DWARFDebugLine.cpp +++ b/lib/DebugInfo/DWARFDebugLine.cpp @@ -155,7 +155,7 @@ DWARFDebugLine::getOrParseLineTable(DataExtractor debug_line_data, if (pos.second) { // Parse and cache the line table for at this offset. State state; - if (!parseStatementTable(debug_line_data, &offset, state)) + if (!parseStatementTable(debug_line_data, RelocMap, &offset, state)) return 0; pos.first->second = state; } @@ -219,7 +219,8 @@ DWARFDebugLine::parsePrologue(DataExtractor debug_line_data, } bool -DWARFDebugLine::parseStatementTable(DataExtractor debug_line_data, +DWARFDebugLine::parseStatementTable(DataExtractor debug_line_data, + const RelocAddrMap *RMap, uint32_t *offset_ptr, State &state) { const uint32_t debug_line_offset = *offset_ptr; @@ -268,7 +269,15 @@ DWARFDebugLine::parseStatementTable(DataExtractor debug_line_data, // relocatable address. All of the other statement program opcodes // that affect the address register add a delta to it. This instruction // stores a relocatable value into it instead. - state.Address = debug_line_data.getAddress(offset_ptr); + { + // If this address is in our relocation map, apply the relocation. + RelocAddrMap::const_iterator AI = RMap->find(*offset_ptr); + if (AI != RMap->end()) { + const std::pair &R = AI->second; + state.Address = debug_line_data.getAddress(offset_ptr) + R.second; + } else + state.Address = debug_line_data.getAddress(offset_ptr); + } break; case DW_LNE_define_file: diff --git a/lib/DebugInfo/DWARFDebugLine.h b/lib/DebugInfo/DWARFDebugLine.h index 586dd7e..dbaf91d 100644 --- a/lib/DebugInfo/DWARFDebugLine.h +++ b/lib/DebugInfo/DWARFDebugLine.h @@ -10,6 +10,7 @@ #ifndef LLVM_DEBUGINFO_DWARFDEBUGLINE_H #define LLVM_DEBUGINFO_DWARFDEBUGLINE_H +#include "DWARFRelocMap.h" #include "llvm/Support/DataExtractor.h" #include #include @@ -21,6 +22,7 @@ class raw_ostream; class DWARFDebugLine { public: + DWARFDebugLine(const RelocAddrMap* LineInfoRelocMap) : RelocMap(LineInfoRelocMap) {} struct FileNameEntry { FileNameEntry() : Name(0), DirIdx(0), ModTime(0), Length(0) {} @@ -227,6 +229,7 @@ public: Prologue *prologue); /// Parse a single line table (prologue and all rows). static bool parseStatementTable(DataExtractor debug_line_data, + const RelocAddrMap *RMap, uint32_t *offset_ptr, State &state); const LineTable *getLineTable(uint32_t offset) const; @@ -238,6 +241,7 @@ private: typedef LineTableMapTy::iterator LineTableIter; typedef LineTableMapTy::const_iterator LineTableConstIter; + const RelocAddrMap *RelocMap; LineTableMapTy LineTableMap; }; diff --git a/lib/DebugInfo/DWARFRelocMap.h b/lib/DebugInfo/DWARFRelocMap.h new file mode 100644 index 0000000..f53d05c --- /dev/null +++ b/lib/DebugInfo/DWARFRelocMap.h @@ -0,0 +1,21 @@ +//===-- DWARFRelocMap.h -----------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_DWARFRELOCMAP_H +#define LLVM_DEBUGINFO_DWARFRELOCMAP_H + +#include "llvm/ADT/DenseMap.h" + +namespace llvm { + +typedef DenseMap > RelocAddrMap; + +} // namespace llvm + +#endif // LLVM_DEBUGINFO_DWARFRELOCMAP_H \ No newline at end of file diff --git a/lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h b/lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h index 28b4f0f..89350cc 100644 --- a/lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h +++ b/lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h @@ -58,6 +58,8 @@ public: virtual StringRef getData() const { return ObjFile->getData(); } + virtual object::ObjectFile* getObjectFile() const { return ObjFile; } + // Subclasses can override these methods to update the image with loaded // addresses for sections and common symbols virtual void updateSectionAddress(const object::SectionRef &Sec, -- cgit v1.1 From f777d09f375c1206cd0cea649bd0b2c04d668bfa Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Fri, 25 Jan 2013 22:57:05 +0000 Subject: Fix a warning in the new DWARFheader. Add a new line at the end of the file. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173518 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/DebugInfo/DWARFRelocMap.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/DebugInfo/DWARFRelocMap.h b/lib/DebugInfo/DWARFRelocMap.h index f53d05c..6929e36 100644 --- a/lib/DebugInfo/DWARFRelocMap.h +++ b/lib/DebugInfo/DWARFRelocMap.h @@ -18,4 +18,5 @@ typedef DenseMap > RelocAddrMap; } // namespace llvm -#endif // LLVM_DEBUGINFO_DWARFRELOCMAP_H \ No newline at end of file +#endif // LLVM_DEBUGINFO_DWARFRELOCMAP_H + -- cgit v1.1 From a8b289b70d5ef416608bb71a874b8b4fe80158e1 Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Fri, 25 Jan 2013 23:05:59 +0000 Subject: Initial implementation of PPCTargetTransformInfo This provides a place to add customized operation cost information and control some other target-specific IR-level transformations. The only non-trivial logic in this checkin assigns a higher cost to unaligned loads and stores (covered by the included test case). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173520 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/CMakeLists.txt | 1 + lib/Target/PowerPC/PPC.h | 4 + lib/Target/PowerPC/PPCTargetMachine.cpp | 9 ++ lib/Target/PowerPC/PPCTargetMachine.h | 3 + lib/Target/PowerPC/PPCTargetTransformInfo.cpp | 220 ++++++++++++++++++++++++++ 5 files changed, 237 insertions(+) create mode 100644 lib/Target/PowerPC/PPCTargetTransformInfo.cpp (limited to 'lib') diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt index 192d18d..6036428 100644 --- a/lib/Target/PowerPC/CMakeLists.txt +++ b/lib/Target/PowerPC/CMakeLists.txt @@ -26,6 +26,7 @@ add_llvm_target(PowerPCCodeGen PPCRegisterInfo.cpp PPCSubtarget.cpp PPCTargetMachine.cpp + PPCTargetTransformInfo.cpp PPCSelectionDAGInfo.cpp ) diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h index e6d38eb..72e0ce3 100644 --- a/lib/Target/PowerPC/PPC.h +++ b/lib/Target/PowerPC/PPC.h @@ -25,6 +25,7 @@ namespace llvm { class PPCTargetMachine; class FunctionPass; + class ImmutablePass; class JITCodeEmitter; class MachineInstr; class AsmPrinter; @@ -37,6 +38,9 @@ namespace llvm { JITCodeEmitter &MCE); void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, AsmPrinter &AP, bool isDarwin); + + /// \brief Creates an PPC-specific Target Transformation Info pass. + ImmutablePass *createPPCTargetTransformInfoPass(const PPCTargetMachine *TM); namespace PPCII { diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index b8b7882..fe851c1 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -126,3 +126,12 @@ bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM, return false; } + +void PPCTargetMachine::addAnalysisPasses(PassManagerBase &PM) { + // Add first the target-independent BasicTTI pass, then our PPC pass. This + // allows the PPC pass to delegate to the target independent layer when + // appropriate. + PM.add(createBasicTargetTransformInfoPass(getTargetLowering())); + PM.add(createPPCTargetTransformInfoPass(this)); +} + diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h index d917d99..606ccb3 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.h +++ b/lib/Target/PowerPC/PPCTargetMachine.h @@ -68,6 +68,9 @@ public: virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); virtual bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE); + + /// \brief Register PPC analysis passes with a pass manager. + virtual void addAnalysisPasses(PassManagerBase &PM); }; /// PPC32TargetMachine - PowerPC 32-bit target machine. diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp new file mode 100644 index 0000000..1afef33 --- /dev/null +++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -0,0 +1,220 @@ +//===-- PPCTargetTransformInfo.cpp - PPC specific TTI pass ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements a TargetTransformInfo analysis pass specific to the +/// PPC target machine. It uses the target's detailed information to provide +/// more precise answers to certain TTI queries, while letting the target +/// independent and default TTI implementations handle the rest. +/// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "ppctti" +#include "PPC.h" +#include "PPCTargetMachine.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/CostTable.h" +using namespace llvm; + +// Declare the pass initialization routine locally as target-specific passes +// don't havve a target-wide initialization entry point, and so we rely on the +// pass constructor initialization. +namespace llvm { +void initializePPCTTIPass(PassRegistry &); +} + +namespace { + +class PPCTTI : public ImmutablePass, public TargetTransformInfo { + const PPCTargetMachine *TM; + const PPCSubtarget *ST; + const PPCTargetLowering *TLI; + + /// Estimate the overhead of scalarizing an instruction. Insert and Extract + /// are set if the result needs to be inserted and/or extracted from vectors. + unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; + +public: + PPCTTI() : ImmutablePass(ID), TM(0), ST(0), TLI(0) { + llvm_unreachable("This pass cannot be directly constructed"); + } + + PPCTTI(const PPCTargetMachine *TM) + : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()), + TLI(TM->getTargetLowering()) { + initializePPCTTIPass(*PassRegistry::getPassRegistry()); + } + + virtual void initializePass() { + pushTTIStack(this); + } + + virtual void finalizePass() { + popTTIStack(); + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + TargetTransformInfo::getAnalysisUsage(AU); + } + + /// Pass identification. + static char ID; + + /// Provide necessary pointer adjustments for the two base classes. + virtual void *getAdjustedAnalysisPointer(const void *ID) { + if (ID == &TargetTransformInfo::ID) + return (TargetTransformInfo*)this; + return this; + } + + /// \name Scalar TTI Implementations + /// @{ + virtual PopcntSupportKind getPopcntSupport(unsigned TyWidth) const; + + /// @} + + /// \name Vector TTI Implementations + /// @{ + + virtual unsigned getNumberOfRegisters(bool Vector) const; + virtual unsigned getRegisterBitWidth(bool Vector) const; + virtual unsigned getMaximumUnrollFactor() const; + virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const; + virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, + int Index, Type *SubTp) const; + virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst, + Type *Src) const; + virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, + Type *CondTy) const; + virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index) const; + virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src, + unsigned Alignment, + unsigned AddressSpace) const; + + /// @} +}; + +} // end anonymous namespace + +INITIALIZE_AG_PASS(PPCTTI, TargetTransformInfo, "ppctti", + "PPC Target Transform Info", true, true, false) +char PPCTTI::ID = 0; + +ImmutablePass * +llvm::createPPCTargetTransformInfoPass(const PPCTargetMachine *TM) { + return new PPCTTI(TM); +} + + +//===----------------------------------------------------------------------===// +// +// PPC cost model. +// +//===----------------------------------------------------------------------===// + +PPCTTI::PopcntSupportKind PPCTTI::getPopcntSupport(unsigned TyWidth) const { + assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); + // FIXME: PPC currently does not have custom popcnt lowering even though + // there is hardware support. Once this is fixed, update this function + // to reflect the real capabilities of the hardware. + return PSK_Software; +} + +unsigned PPCTTI::getNumberOfRegisters(bool Vector) const { + if (Vector && !ST->hasAltivec()) + return 0; + return 32; +} + +unsigned PPCTTI::getRegisterBitWidth(bool Vector) const { + if (Vector) { + if (ST->hasAltivec()) return 128; + return 0; + } + + if (ST->isPPC64()) + return 64; + return 32; + +} + +unsigned PPCTTI::getMaximumUnrollFactor() const { + unsigned Directive = ST->getDarwinDirective(); + // The 440 has no SIMD support, but floating-point instructions + // have a 5-cycle latency, so unroll by 5x for latency hiding. + if (Directive == PPC::DIR_440) + return 5; + + // The A2 has no SIMD support, but floating-point instructions + // have a 6-cycle latency, so unroll by 6x for latency hiding. + if (Directive == PPC::DIR_A2) + return 6; + + // FIXME: For lack of any better information, do no harm... + if (Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500) + return 1; + + // For most things, modern systems have two execution units (and + // out-of-order execution). + return 2; +} + +unsigned PPCTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const { + int ISD = TLI->InstructionOpcodeToISD(Opcode); + assert(ISD && "Invalid opcode"); + + // Fallback to the default implementation. + return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty); +} + +unsigned PPCTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, + Type *SubTp) const { + return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp); +} + +unsigned PPCTTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const { + int ISD = TLI->InstructionOpcodeToISD(Opcode); + assert(ISD && "Invalid opcode"); + + return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); +} + +unsigned PPCTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, + Type *CondTy) const { + return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy); +} + +unsigned PPCTTI::getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index) const { + assert(Val->isVectorTy() && "This must be a vector type"); + + return TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index); +} + +unsigned PPCTTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, + unsigned AddressSpace) const { + // Legalize the type. + std::pair LT = TLI->getTypeLegalizationCost(Src); + assert((Opcode == Instruction::Load || Opcode == Instruction::Store) && + "Invalid Opcode"); + + // Each load/store unit costs 1. + unsigned Cost = LT.first * 1; + + // PPC in general does not support unaligned loads and stores. They'll need + // to be decomposed based on the alignment factor. + unsigned SrcBytes = LT.second.getStoreSize(); + if (SrcBytes && Alignment && Alignment < SrcBytes) + Cost *= (SrcBytes/Alignment); + + return Cost; +} + -- cgit v1.1 From 8e47daf2858e980210f3e1f007036b24da342c29 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Fri, 25 Jan 2013 23:09:36 +0000 Subject: Remove some introspection functions. The 'getSlot' function and its ilk allow introspection into the AttributeSet class. However, that class should be opaque. Allow access through accessor methods instead. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173522 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Bitcode/Reader/BitcodeReader.cpp | 2 +- lib/Bitcode/Writer/BitcodeWriter.cpp | 8 +- lib/IR/AttributeImpl.h | 6 ++ lib/IR/Attributes.cpp | 138 ++++++++++++++---------- lib/IR/Verifier.cpp | 4 +- lib/Target/CppBackend/CPPBackend.cpp | 2 +- lib/Transforms/IPO/DeadArgumentElimination.cpp | 18 ++-- lib/Transforms/IPO/GlobalOpt.cpp | 5 +- lib/Transforms/InstCombine/InstCombineCalls.cpp | 14 +-- 9 files changed, 117 insertions(+), 80 deletions(-) (limited to 'lib') diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index f09b93b..4190161 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -465,7 +465,7 @@ bool BitcodeReader::ParseAttributeBlock() { for (unsigned i = 0, e = Record.size(); i != e; i += 2) { Attribute ReconstitutedAttr = - Attribute::decodeLLVMAttributesForBitcode(Context, Record[i+1]); + AttributeFuncs::decodeLLVMAttributesForBitcode(Context, Record[i+1]); Record[i+1] = ReconstitutedAttr.Raw(); } diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index cc6b8b3..b6c2bc0 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -173,9 +173,11 @@ static void WriteAttributeTable(const ValueEnumerator &VE, for (unsigned i = 0, e = Attrs.size(); i != e; ++i) { const AttributeSet &A = Attrs[i]; for (unsigned i = 0, e = A.getNumSlots(); i != e; ++i) { - const AttributeWithIndex &PAWI = A.getSlot(i); - Record.push_back(A.getSlotIndex(i)); - Record.push_back(Attribute::encodeLLVMAttributesForBitcode(PAWI.Attrs)); + unsigned Index = A.getSlotIndex(i); + Record.push_back(Index); + Record.push_back(AttributeFuncs:: + encodeLLVMAttributesForBitcode(A.getSlotAttributes(i), + Index)); } Stream.EmitRecord(bitc::PARAMATTR_CODE_ENTRY, Record); diff --git a/lib/IR/AttributeImpl.h b/lib/IR/AttributeImpl.h index ebd90e2..d7ebec5 100644 --- a/lib/IR/AttributeImpl.h +++ b/lib/IR/AttributeImpl.h @@ -104,6 +104,8 @@ public: /// \brief This class represents a set of attributes that apply to the function, /// return type, and parameters. class AttributeSetImpl : public FoldingSetNode { + friend class AttributeSet; + LLVMContext &Context; SmallVector AttrList; @@ -126,6 +128,10 @@ public: // FIXME: This needs to use AttrNodes instead. return AttrList[Slot].Index; } + AttributeSet getSlotAttributes(unsigned Slot) const { + // FIXME: This needs to use AttrNodes instead. + return AttributeSet::get(Context, AttrList[Slot]); + } void Profile(FoldingSetNodeID &ID) const { Profile(ID, AttrList); diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 538d9fe..f44a0fc 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -103,63 +103,6 @@ uint64_t Attribute::Raw() const { return pImpl ? pImpl->Raw() : 0; } -Attribute Attribute::typeIncompatible(Type *Ty) { - AttrBuilder Incompatible; - - if (!Ty->isIntegerTy()) - // Attribute that only apply to integers. - Incompatible.addAttribute(Attribute::SExt) - .addAttribute(Attribute::ZExt); - - if (!Ty->isPointerTy()) - // Attribute that only apply to pointers. - Incompatible.addAttribute(Attribute::ByVal) - .addAttribute(Attribute::Nest) - .addAttribute(Attribute::NoAlias) - .addAttribute(Attribute::NoCapture) - .addAttribute(Attribute::StructRet); - - return Attribute::get(Ty->getContext(), Incompatible); -} - -/// encodeLLVMAttributesForBitcode - This returns an integer containing an -/// encoding of all the LLVM attributes found in the given attribute bitset. -/// Any change to this encoding is a breaking change to bitcode compatibility. -uint64_t Attribute::encodeLLVMAttributesForBitcode(Attribute Attrs) { - // FIXME: It doesn't make sense to store the alignment information as an - // expanded out value, we should store it as a log2 value. However, we can't - // just change that here without breaking bitcode compatibility. If this ever - // becomes a problem in practice, we should introduce new tag numbers in the - // bitcode file and have those tags use a more efficiently encoded alignment - // field. - - // Store the alignment in the bitcode as a 16-bit raw value instead of a 5-bit - // log2 encoded value. Shift the bits above the alignment up by 11 bits. - uint64_t EncodedAttrs = Attrs.Raw() & 0xffff; - if (Attrs.hasAttribute(Attribute::Alignment)) - EncodedAttrs |= Attrs.getAlignment() << 16; - EncodedAttrs |= (Attrs.Raw() & (0xffffULL << 21)) << 11; - return EncodedAttrs; -} - -/// decodeLLVMAttributesForBitcode - This returns an attribute bitset containing -/// the LLVM attributes that have been decoded from the given integer. This -/// function must stay in sync with 'encodeLLVMAttributesForBitcode'. -Attribute Attribute::decodeLLVMAttributesForBitcode(LLVMContext &C, - uint64_t EncodedAttrs) { - // The alignment is stored as a 16-bit raw value from bits 31--16. We shift - // the bits above 31 down by 11 bits. - unsigned Alignment = (EncodedAttrs & (0xffffULL << 16)) >> 16; - assert((!Alignment || isPowerOf2_32(Alignment)) && - "Alignment must be a power of two."); - - AttrBuilder B(EncodedAttrs & 0xffff); - if (Alignment) - B.addAlignmentAttr(Alignment); - B.addRawValue((EncodedAttrs & (0xffffULL << 32)) >> 11); - return Attribute::get(C, B); -} - std::string Attribute::getAsString() const { std::string Result; if (hasAttribute(Attribute::ZExt)) @@ -666,6 +609,18 @@ AttributeSet AttributeSet::get(LLVMContext &C, unsigned Idx, return get(C, AttributeWithIndex::get(Idx, Attribute::get(C, Kind))); } +AttributeSet AttributeSet::get(LLVMContext &C, ArrayRef Attrs) { + SmallVector AttrList; + for (ArrayRef::iterator I = Attrs.begin(), E = Attrs.end(); + I != E; ++I) { + AttributeSet AS = *I; + if (!AS.AttrList) continue; + AttrList.append(AS.AttrList->AttrList.begin(), AS.AttrList->AttrList.end()); + } + + return get(C, AttrList); +} + //===----------------------------------------------------------------------===// // AttributeSet Method Implementations //===----------------------------------------------------------------------===// @@ -688,6 +643,12 @@ unsigned AttributeSet::getSlotIndex(unsigned Slot) const { return AttrList->getSlotIndex(Slot); } +AttributeSet AttributeSet::getSlotAttributes(unsigned Slot) const { + assert(AttrList && Slot < AttrList->getNumAttributes() && + "Slot # out of range!"); + return AttrList->getSlotAttributes(Slot); +} + /// getSlot - Return the AttributeWithIndex at the specified slot. This /// holds a number plus a set of attributes. const AttributeWithIndex &AttributeSet::getSlot(unsigned Slot) const { @@ -859,3 +820,66 @@ void AttributeSet::dump() const { dbgs() << "]\n"; } + +//===----------------------------------------------------------------------===// +// AttributeFuncs Function Defintions +//===----------------------------------------------------------------------===// + +Attribute AttributeFuncs::typeIncompatible(Type *Ty) { + AttrBuilder Incompatible; + + if (!Ty->isIntegerTy()) + // Attribute that only apply to integers. + Incompatible.addAttribute(Attribute::SExt) + .addAttribute(Attribute::ZExt); + + if (!Ty->isPointerTy()) + // Attribute that only apply to pointers. + Incompatible.addAttribute(Attribute::ByVal) + .addAttribute(Attribute::Nest) + .addAttribute(Attribute::NoAlias) + .addAttribute(Attribute::NoCapture) + .addAttribute(Attribute::StructRet); + + return Attribute::get(Ty->getContext(), Incompatible); +} + +/// encodeLLVMAttributesForBitcode - This returns an integer containing an +/// encoding of all the LLVM attributes found in the given attribute bitset. +/// Any change to this encoding is a breaking change to bitcode compatibility. +uint64_t AttributeFuncs::encodeLLVMAttributesForBitcode(AttributeSet Attrs, + unsigned Index) { + // FIXME: It doesn't make sense to store the alignment information as an + // expanded out value, we should store it as a log2 value. However, we can't + // just change that here without breaking bitcode compatibility. If this ever + // becomes a problem in practice, we should introduce new tag numbers in the + // bitcode file and have those tags use a more efficiently encoded alignment + // field. + + // Store the alignment in the bitcode as a 16-bit raw value instead of a 5-bit + // log2 encoded value. Shift the bits above the alignment up by 11 bits. + uint64_t EncodedAttrs = Attrs.Raw(Index) & 0xffff; + if (Attrs.hasAttribute(Index, Attribute::Alignment)) + EncodedAttrs |= Attrs.getParamAlignment(Index) << 16; + EncodedAttrs |= (Attrs.Raw(Index) & (0xffffULL << 21)) << 11; + return EncodedAttrs; +} + +/// decodeLLVMAttributesForBitcode - This returns an attribute bitset containing +/// the LLVM attributes that have been decoded from the given integer. This +/// function must stay in sync with 'encodeLLVMAttributesForBitcode'. +Attribute AttributeFuncs::decodeLLVMAttributesForBitcode(LLVMContext &C, + uint64_t EncodedAttrs){ + // The alignment is stored as a 16-bit raw value from bits 31--16. We shift + // the bits above 31 down by 11 bits. + unsigned Alignment = (EncodedAttrs & (0xffffULL << 16)) >> 16; + assert((!Alignment || isPowerOf2_32(Alignment)) && + "Alignment must be a power of two."); + + AttrBuilder B(EncodedAttrs & 0xffff); + if (Alignment) + B.addAlignmentAttr(Alignment); + B.addRawValue((EncodedAttrs & (0xffffULL << 32)) >> 11); + return Attribute::get(C, B); +} + diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp index 9a482f1..39f95fa 100644 --- a/lib/IR/Verifier.cpp +++ b/lib/IR/Verifier.cpp @@ -693,9 +693,9 @@ void Verifier::VerifyParameterAttrs(AttributeSet Attrs, uint64_t Idx, Type *Ty, "'noinline and alwaysinline' are incompatible!", V); Assert1(!AttrBuilder(Attrs, Idx). - hasAttributes(Attribute::typeIncompatible(Ty)), + hasAttributes(AttributeFuncs::typeIncompatible(Ty)), "Wrong types for attribute: " + - Attribute::typeIncompatible(Ty).getAsString(), V); + AttributeFuncs::typeIncompatible(Ty).getAsString(), V); if (PointerType *PTy = dyn_cast(Ty)) Assert1(!Attrs.hasAttribute(Idx, Attribute::ByVal) || diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp index 2560c6e..1f4bdf8 100644 --- a/lib/Target/CppBackend/CPPBackend.cpp +++ b/lib/Target/CppBackend/CPPBackend.cpp @@ -474,7 +474,7 @@ void CppWriter::printAttributes(const AttributeSet &PAL, Out << "AttributeWithIndex PAWI;"; nl(Out); for (unsigned i = 0; i < PAL.getNumSlots(); ++i) { unsigned index = PAL.getSlotIndex(i); - AttrBuilder attrs(PAL.getSlot(i).Attrs); + AttrBuilder attrs(PAL.getSlotAttributes(i), index); Out << "PAWI.Index = " << index << "U;\n"; Out << " {\n AttrBuilder B;\n"; diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp index 4603146..61b37d8 100644 --- a/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -273,13 +273,15 @@ bool DAE::DeleteDeadVarargs(Function &Fn) { // Drop any attributes that were on the vararg arguments. AttributeSet PAL = CS.getAttributes(); if (!PAL.isEmpty() && PAL.getSlotIndex(PAL.getNumSlots() - 1) > NumArgs) { - SmallVector AttributesVec; + SmallVector AttributesVec; for (unsigned i = 0; PAL.getSlotIndex(i) <= NumArgs; ++i) - AttributesVec.push_back(PAL.getSlot(i)); + AttributesVec.push_back(PAL.getSlotAttributes(i)); if (PAL.hasAttributes(AttributeSet::FunctionIndex)) - AttributesVec.push_back(AttributeWithIndex::get(Fn.getContext(), - AttributeSet::FunctionIndex, - PAL.getFnAttributes())); + AttributesVec.push_back( + AttributeSet::get(Fn.getContext(), + AttributeWithIndex::get(Fn.getContext(), + AttributeSet::FunctionIndex, + PAL.getFnAttributes()))); PAL = AttributeSet::get(Fn.getContext(), AttributesVec); } @@ -765,10 +767,10 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { RAttrs = AttributeSet::get(NRetTy->getContext(), AttributeSet::ReturnIndex, AttrBuilder(RAttrs, AttributeSet::ReturnIndex). - removeAttributes(Attribute::typeIncompatible(NRetTy))); + removeAttributes(AttributeFuncs::typeIncompatible(NRetTy))); else assert(!AttrBuilder(RAttrs, AttributeSet::ReturnIndex). - hasAttributes(Attribute::typeIncompatible(NRetTy)) && + hasAttributes(AttributeFuncs::typeIncompatible(NRetTy)) && "Return attributes no longer compatible?"); if (RAttrs.hasAttributes(AttributeSet::ReturnIndex)) @@ -846,7 +848,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { RAttrs = AttributeSet::get(NF->getContext(), AttributeSet::ReturnIndex, AttrBuilder(RAttrs, AttributeSet::ReturnIndex). - removeAttributes(Attribute::typeIncompatible(NF->getReturnType()))); + removeAttributes(AttributeFuncs::typeIncompatible(NF->getReturnType()))); if (RAttrs.hasAttributes(AttributeSet::ReturnIndex)) AttributesVec.push_back(AttributeWithIndex::get(NF->getContext(), AttributeSet::ReturnIndex, diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index 52d4e2f..c753e2a 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -2068,11 +2068,12 @@ static void ChangeCalleesToFastCall(Function *F) { static AttributeSet StripNest(LLVMContext &C, const AttributeSet &Attrs) { for (unsigned i = 0, e = Attrs.getNumSlots(); i != e; ++i) { - if (!Attrs.getSlot(i).Attrs.hasAttribute(Attribute::Nest)) + unsigned Index = Attrs.getSlotIndex(i); + if (!Attrs.getSlotAttributes(i).hasAttribute(Index, Attribute::Nest)) continue; // There can be only one. - return Attrs.removeAttribute(C, Attrs.getSlotIndex(i), Attribute::Nest); + return Attrs.removeAttribute(C, Index, Attribute::Nest); } return Attrs; diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 8555c2f..19eb965 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1015,7 +1015,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { if (!CallerPAL.isEmpty() && !Caller->use_empty()) { AttrBuilder RAttrs(CallerPAL, AttributeSet::ReturnIndex); - if (RAttrs.hasAttributes(Attribute::typeIncompatible(NewRetTy))) + if (RAttrs.hasAttributes(AttributeFuncs::typeIncompatible(NewRetTy))) return false; // Attribute not compatible with transformed value. } @@ -1045,7 +1045,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { return false; // Cannot transform this parameter value. if (AttrBuilder(CallerPAL.getParamAttributes(i + 1), i + 1). - hasAttributes(Attribute::typeIncompatible(ParamTy))) + hasAttributes(AttributeFuncs::typeIncompatible(ParamTy))) return false; // Attribute not compatible with transformed value. // If the parameter is passed as a byval argument, then we have to have a @@ -1101,11 +1101,13 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { // won't be dropping them. Check that these extra arguments have attributes // that are compatible with being a vararg call argument. for (unsigned i = CallerPAL.getNumSlots(); i; --i) { - if (CallerPAL.getSlotIndex(i - 1) <= FT->getNumParams()) + unsigned Index = CallerPAL.getSlotIndex(i - 1); + if (Index <= FT->getNumParams()) break; - Attribute PAttrs = CallerPAL.getSlot(i - 1).Attrs; + // Check if it has an attribute that's incompatible with varargs. - if (PAttrs.hasAttribute(Attribute::StructRet)) + AttributeSet PAttrs = CallerPAL.getSlotAttributes(i - 1); + if (PAttrs.hasAttribute(Index, Attribute::StructRet)) return false; } @@ -1122,7 +1124,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { // If the return value is not being used, the type may not be compatible // with the existing attributes. Wipe out any problematic attributes. - RAttrs.removeAttributes(Attribute::typeIncompatible(NewRetTy)); + RAttrs.removeAttributes(AttributeFuncs::typeIncompatible(NewRetTy)); // Add the new return attributes. if (RAttrs.hasAttributes()) -- cgit v1.1 From 483bbd36b8b3f87073feea2e1096f188d0e735b8 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Fri, 25 Jan 2013 23:14:36 +0000 Subject: Remove dead method. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173524 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Attributes.cpp | 8 -------- 1 file changed, 8 deletions(-) (limited to 'lib') diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index f44a0fc..45f8184 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -649,14 +649,6 @@ AttributeSet AttributeSet::getSlotAttributes(unsigned Slot) const { return AttrList->getSlotAttributes(Slot); } -/// getSlot - Return the AttributeWithIndex at the specified slot. This -/// holds a number plus a set of attributes. -const AttributeWithIndex &AttributeSet::getSlot(unsigned Slot) const { - assert(AttrList && Slot < AttrList->getNumAttributes() && - "Slot # out of range!"); - return AttrList->getAttributes()[Slot]; -} - bool AttributeSet::hasAttribute(unsigned Index, Attribute::AttrKind Kind) const{ return getAttributes(Index).hasAttribute(Kind); } -- cgit v1.1 From 395210d15b323aa620059fe362e0f8e5eacc0b05 Mon Sep 17 00:00:00 2001 From: Dmitri Gribenko Date: Fri, 25 Jan 2013 23:17:21 +0000 Subject: Remove unused variables, silences -Wunused-variable git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173526 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCTargetTransformInfo.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index 1afef33..88b63e6 100644 --- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -168,8 +168,7 @@ unsigned PPCTTI::getMaximumUnrollFactor() const { } unsigned PPCTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const { - int ISD = TLI->InstructionOpcodeToISD(Opcode); - assert(ISD && "Invalid opcode"); + assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode"); // Fallback to the default implementation. return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty); @@ -181,8 +180,7 @@ unsigned PPCTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, } unsigned PPCTTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const { - int ISD = TLI->InstructionOpcodeToISD(Opcode); - assert(ISD && "Invalid opcode"); + assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode"); return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); } -- cgit v1.1 From 32a57958226e369f964a034da2ce7083a1a34297 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Sat, 26 Jan 2013 00:03:11 +0000 Subject: Convert BuildLibCalls.cpp to using the AttributeSet methods instead of AttributeWithIndex. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173536 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Attributes.cpp | 10 ++- lib/Transforms/Utils/BuildLibCalls.cpp | 132 ++++++++++++++++----------------- 2 files changed, 74 insertions(+), 68 deletions(-) (limited to 'lib') diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 45f8184..c033b5a 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -605,8 +605,14 @@ AttributeSet AttributeSet::get(LLVMContext &C, unsigned Idx, AttrBuilder &B) { } AttributeSet AttributeSet::get(LLVMContext &C, unsigned Idx, - Attribute::AttrKind Kind) { - return get(C, AttributeWithIndex::get(Idx, Attribute::get(C, Kind))); + ArrayRef Kind) { + // FIXME: This is temporary. Ultimately, the AttributeWithIndex will be + // replaced by an object that holds multiple Attribute::AttrKinds. + AttrBuilder B; + for (ArrayRef::iterator I = Kind.begin(), + E = Kind.end(); I != E; ++I) + B.addAttribute(*I); + return get(C, Idx, B); } AttributeSet AttributeSet::get(LLVMContext &C, ArrayRef Attrs) { diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp index bf540b0..6d13217 100644 --- a/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/lib/Transforms/Utils/BuildLibCalls.cpp @@ -38,16 +38,16 @@ Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout *TD, return 0; Module *M = B.GetInsertBlock()->getParent()->getParent(); - AttributeWithIndex AWI[2]; - AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attribute::NoCapture); + AttributeSet AS[2]; + AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture); Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind }; - AWI[1] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex, - ArrayRef(AVs, 2)); + AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, + ArrayRef(AVs, 2)); LLVMContext &Context = B.GetInsertBlock()->getContext(); Constant *StrLen = M->getOrInsertFunction("strlen", AttributeSet::get(M->getContext(), - AWI), + AS), TD->getIntPtrType(Context), B.getInt8PtrTy(), NULL); @@ -67,16 +67,16 @@ Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B, return 0; Module *M = B.GetInsertBlock()->getParent()->getParent(); - AttributeWithIndex AWI[2]; - AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attribute::NoCapture); + AttributeSet AS[2]; + AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture); Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind }; - AWI[1] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex, - ArrayRef(AVs, 2)); + AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, + ArrayRef(AVs, 2)); LLVMContext &Context = B.GetInsertBlock()->getContext(); Constant *StrNLen = M->getOrInsertFunction("strnlen", AttributeSet::get(M->getContext(), - AWI), + AS), TD->getIntPtrType(Context), B.getInt8PtrTy(), TD->getIntPtrType(Context), @@ -98,15 +98,15 @@ Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B, Module *M = B.GetInsertBlock()->getParent()->getParent(); Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind }; - AttributeWithIndex AWI = - AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex, - ArrayRef(AVs, 2)); + AttributeSet AS = + AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, + ArrayRef(AVs, 2)); Type *I8Ptr = B.getInt8PtrTy(); Type *I32Ty = B.getInt32Ty(); Constant *StrChr = M->getOrInsertFunction("strchr", AttributeSet::get(M->getContext(), - AWI), + AS), I8Ptr, I8Ptr, I32Ty, NULL); CallInst *CI = B.CreateCall2(StrChr, CastToCStr(Ptr, B), ConstantInt::get(I32Ty, C), "strchr"); @@ -123,17 +123,17 @@ Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, return 0; Module *M = B.GetInsertBlock()->getParent()->getParent(); - AttributeWithIndex AWI[3]; - AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attribute::NoCapture); - AWI[1] = AttributeWithIndex::get(M->getContext(), 2, Attribute::NoCapture); + AttributeSet AS[3]; + AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture); + AS[1] = AttributeSet::get(M->getContext(), 2, Attribute::NoCapture); Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind }; - AWI[2] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex, - ArrayRef(AVs, 2)); + AS[2] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, + ArrayRef(AVs, 2)); LLVMContext &Context = B.GetInsertBlock()->getContext(); Value *StrNCmp = M->getOrInsertFunction("strncmp", AttributeSet::get(M->getContext(), - AWI), + AS), B.getInt32Ty(), B.getInt8PtrTy(), B.getInt8PtrTy(), @@ -156,13 +156,13 @@ Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B, return 0; Module *M = B.GetInsertBlock()->getParent()->getParent(); - AttributeWithIndex AWI[2]; - AWI[0] = AttributeWithIndex::get(M->getContext(), 2, Attribute::NoCapture); - AWI[1] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex, - Attribute::NoUnwind); + AttributeSet AS[2]; + AS[0] = AttributeSet::get(M->getContext(), 2, Attribute::NoCapture); + AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, + Attribute::NoUnwind); Type *I8Ptr = B.getInt8PtrTy(); Value *StrCpy = M->getOrInsertFunction(Name, - AttributeSet::get(M->getContext(), AWI), + AttributeSet::get(M->getContext(), AS), I8Ptr, I8Ptr, I8Ptr, NULL); CallInst *CI = B.CreateCall2(StrCpy, CastToCStr(Dst, B), CastToCStr(Src, B), Name); @@ -180,14 +180,14 @@ Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len, return 0; Module *M = B.GetInsertBlock()->getParent()->getParent(); - AttributeWithIndex AWI[2]; - AWI[0] = AttributeWithIndex::get(M->getContext(), 2, Attribute::NoCapture); - AWI[1] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex, - Attribute::NoUnwind); + AttributeSet AS[2]; + AS[0] = AttributeSet::get(M->getContext(), 2, Attribute::NoCapture); + AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, + Attribute::NoUnwind); Type *I8Ptr = B.getInt8PtrTy(); Value *StrNCpy = M->getOrInsertFunction(Name, AttributeSet::get(M->getContext(), - AWI), + AS), I8Ptr, I8Ptr, I8Ptr, Len->getType(), NULL); CallInst *CI = B.CreateCall3(StrNCpy, CastToCStr(Dst, B), CastToCStr(Src, B), @@ -207,12 +207,12 @@ Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize, return 0; Module *M = B.GetInsertBlock()->getParent()->getParent(); - AttributeWithIndex AWI; - AWI = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex, - Attribute::NoUnwind); + AttributeSet AS; + AS = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, + Attribute::NoUnwind); LLVMContext &Context = B.GetInsertBlock()->getContext(); Value *MemCpy = M->getOrInsertFunction("__memcpy_chk", - AttributeSet::get(M->getContext(), AWI), + AttributeSet::get(M->getContext(), AS), B.getInt8PtrTy(), B.getInt8PtrTy(), B.getInt8PtrTy(), @@ -235,13 +235,13 @@ Value *llvm::EmitMemChr(Value *Ptr, Value *Val, return 0; Module *M = B.GetInsertBlock()->getParent()->getParent(); - AttributeWithIndex AWI; + AttributeSet AS; Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind }; - AWI = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex, - ArrayRef(AVs, 2)); + AS = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, + ArrayRef(AVs, 2)); LLVMContext &Context = B.GetInsertBlock()->getContext(); Value *MemChr = M->getOrInsertFunction("memchr", - AttributeSet::get(M->getContext(), AWI), + AttributeSet::get(M->getContext(), AS), B.getInt8PtrTy(), B.getInt8PtrTy(), B.getInt32Ty(), @@ -263,16 +263,16 @@ Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2, return 0; Module *M = B.GetInsertBlock()->getParent()->getParent(); - AttributeWithIndex AWI[3]; - AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attribute::NoCapture); - AWI[1] = AttributeWithIndex::get(M->getContext(), 2, Attribute::NoCapture); + AttributeSet AS[3]; + AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture); + AS[1] = AttributeSet::get(M->getContext(), 2, Attribute::NoCapture); Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind }; - AWI[2] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex, - ArrayRef(AVs, 2)); + AS[2] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, + ArrayRef(AVs, 2)); LLVMContext &Context = B.GetInsertBlock()->getContext(); Value *MemCmp = M->getOrInsertFunction("memcmp", - AttributeSet::get(M->getContext(), AWI), + AttributeSet::get(M->getContext(), AS), B.getInt32Ty(), B.getInt8PtrTy(), B.getInt8PtrTy(), @@ -344,13 +344,13 @@ Value *llvm::EmitPutS(Value *Str, IRBuilder<> &B, const DataLayout *TD, return 0; Module *M = B.GetInsertBlock()->getParent()->getParent(); - AttributeWithIndex AWI[2]; - AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attribute::NoCapture); - AWI[1] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex, - Attribute::NoUnwind); + AttributeSet AS[2]; + AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture); + AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, + Attribute::NoUnwind); Value *PutS = M->getOrInsertFunction("puts", - AttributeSet::get(M->getContext(), AWI), + AttributeSet::get(M->getContext(), AS), B.getInt32Ty(), B.getInt8PtrTy(), NULL); @@ -368,14 +368,14 @@ Value *llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B, return 0; Module *M = B.GetInsertBlock()->getParent()->getParent(); - AttributeWithIndex AWI[2]; - AWI[0] = AttributeWithIndex::get(M->getContext(), 2, Attribute::NoCapture); - AWI[1] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex, - Attribute::NoUnwind); + AttributeSet AS[2]; + AS[0] = AttributeSet::get(M->getContext(), 2, Attribute::NoCapture); + AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, + Attribute::NoUnwind); Constant *F; if (File->getType()->isPointerTy()) F = M->getOrInsertFunction("fputc", - AttributeSet::get(M->getContext(), AWI), + AttributeSet::get(M->getContext(), AS), B.getInt32Ty(), B.getInt32Ty(), File->getType(), NULL); @@ -401,16 +401,16 @@ Value *llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B, return 0; Module *M = B.GetInsertBlock()->getParent()->getParent(); - AttributeWithIndex AWI[3]; - AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attribute::NoCapture); - AWI[1] = AttributeWithIndex::get(M->getContext(), 2, Attribute::NoCapture); - AWI[2] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex, - Attribute::NoUnwind); + AttributeSet AS[3]; + AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture); + AS[1] = AttributeSet::get(M->getContext(), 2, Attribute::NoCapture); + AS[2] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, + Attribute::NoUnwind); StringRef FPutsName = TLI->getName(LibFunc::fputs); Constant *F; if (File->getType()->isPointerTy()) F = M->getOrInsertFunction(FPutsName, - AttributeSet::get(M->getContext(), AWI), + AttributeSet::get(M->getContext(), AS), B.getInt32Ty(), B.getInt8PtrTy(), File->getType(), NULL); @@ -434,17 +434,17 @@ Value *llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File, return 0; Module *M = B.GetInsertBlock()->getParent()->getParent(); - AttributeWithIndex AWI[3]; - AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attribute::NoCapture); - AWI[1] = AttributeWithIndex::get(M->getContext(), 4, Attribute::NoCapture); - AWI[2] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex, - Attribute::NoUnwind); + AttributeSet AS[3]; + AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture); + AS[1] = AttributeSet::get(M->getContext(), 4, Attribute::NoCapture); + AS[2] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, + Attribute::NoUnwind); LLVMContext &Context = B.GetInsertBlock()->getContext(); StringRef FWriteName = TLI->getName(LibFunc::fwrite); Constant *F; if (File->getType()->isPointerTy()) F = M->getOrInsertFunction(FWriteName, - AttributeSet::get(M->getContext(), AWI), + AttributeSet::get(M->getContext(), AS), TD->getIntPtrType(Context), B.getInt8PtrTy(), TD->getIntPtrType(Context), -- cgit v1.1 From e27a787760ea7c2899da3287002fe8ba316df0d0 Mon Sep 17 00:00:00 2001 From: Andrew Kaylor Date: Sat, 26 Jan 2013 00:28:05 +0000 Subject: Add DIContext::getLineInfoForAddressRange() function and test. This function allows a caller to obtain a table of line information for a function using the function's address and size. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173537 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/DebugInfo/DWARFContext.cpp | 58 +++++++++++++++++++++++++++++++ lib/DebugInfo/DWARFContext.h | 2 ++ lib/DebugInfo/DWARFDebugLine.cpp | 75 ++++++++++++++++++++++++++++++++++++++++ lib/DebugInfo/DWARFDebugLine.h | 4 +++ 4 files changed, 139 insertions(+) (limited to 'lib') diff --git a/lib/DebugInfo/DWARFContext.cpp b/lib/DebugInfo/DWARFContext.cpp index 66d299b..768427f 100644 --- a/lib/DebugInfo/DWARFContext.cpp +++ b/lib/DebugInfo/DWARFContext.cpp @@ -325,6 +325,64 @@ DILineInfo DWARFContext::getLineInfoForAddress(uint64_t Address, Line, Column); } +DILineInfoTable DWARFContext::getLineInfoForAddressRange(uint64_t Address, + uint64_t Size, + DILineInfoSpecifier Specifier) { + DILineInfoTable Lines; + DWARFCompileUnit *CU = getCompileUnitForAddress(Address); + if (!CU) + return Lines; + + std::string FunctionName = ""; + if (Specifier.needs(DILineInfoSpecifier::FunctionName)) { + // The address may correspond to instruction in some inlined function, + // so we have to build the chain of inlined functions and take the + // name of the topmost function in it. + const DWARFDebugInfoEntryMinimal::InlinedChain &InlinedChain = + CU->getInlinedChainForAddress(Address); + if (InlinedChain.size() > 0) { + const DWARFDebugInfoEntryMinimal &TopFunctionDIE = InlinedChain[0]; + if (const char *Name = TopFunctionDIE.getSubroutineName(CU)) + FunctionName = Name; + } + } + + StringRef FuncNameRef = StringRef(FunctionName); + + // If the Specifier says we don't need FileLineInfo, just + // return the top-most function at the starting address. + if (!Specifier.needs(DILineInfoSpecifier::FileLineInfo)) { + Lines.push_back(std::make_pair(Address, + DILineInfo(StringRef(""), + FuncNameRef, 0, 0))); + return Lines; + } + + const DWARFLineTable *LineTable = getLineTableForCompileUnit(CU); + const bool NeedsAbsoluteFilePath = + Specifier.needs(DILineInfoSpecifier::AbsoluteFilePath); + + // Get the index of row we're looking for in the line table. + std::vector RowVector; + if (!LineTable->lookupAddressRange(Address, Size, RowVector)) + return Lines; + + uint32_t NumRows = RowVector.size(); + for (uint32_t i = 0; i < NumRows; ++i) { + uint32_t RowIndex = RowVector[i]; + // Take file number and line/column from the row. + const DWARFDebugLine::Row &Row = LineTable->Rows[RowIndex]; + std::string FileName = ""; + getFileNameForCompileUnit(CU, LineTable, Row.File, + NeedsAbsoluteFilePath, FileName); + Lines.push_back(std::make_pair(Row.Address, + DILineInfo(StringRef(FileName), + FuncNameRef, Row.Line, Row.Column))); + } + + return Lines; +} + DIInliningInfo DWARFContext::getInliningInfoForAddress(uint64_t Address, DILineInfoSpecifier Specifier) { DWARFCompileUnit *CU = getCompileUnitForAddress(Address); diff --git a/lib/DebugInfo/DWARFContext.h b/lib/DebugInfo/DWARFContext.h index d6314b8..9ff094b 100644 --- a/lib/DebugInfo/DWARFContext.h +++ b/lib/DebugInfo/DWARFContext.h @@ -90,6 +90,8 @@ public: virtual DILineInfo getLineInfoForAddress(uint64_t Address, DILineInfoSpecifier Specifier = DILineInfoSpecifier()); + virtual DILineInfoTable getLineInfoForAddressRange(uint64_t Address, + uint64_t Size, DILineInfoSpecifier Specifier = DILineInfoSpecifier()); virtual DIInliningInfo getInliningInfoForAddress(uint64_t Address, DILineInfoSpecifier Specifier = DILineInfoSpecifier()); diff --git a/lib/DebugInfo/DWARFDebugLine.cpp b/lib/DebugInfo/DWARFDebugLine.cpp index 16ef896..7b32d4f 100644 --- a/lib/DebugInfo/DWARFDebugLine.cpp +++ b/lib/DebugInfo/DWARFDebugLine.cpp @@ -525,6 +525,81 @@ DWARFDebugLine::LineTable::lookupAddress(uint64_t address) const { } bool +DWARFDebugLine::LineTable::lookupAddressRange(uint64_t address, + uint64_t size, + std::vector& result) const { + if (Sequences.empty()) + return false; + uint64_t end_addr = address + size; + // First, find an instruction sequence containing the given address. + DWARFDebugLine::Sequence sequence; + sequence.LowPC = address; + SequenceIter first_seq = Sequences.begin(); + SequenceIter last_seq = Sequences.end(); + SequenceIter seq_pos = std::lower_bound(first_seq, last_seq, sequence, + DWARFDebugLine::Sequence::orderByLowPC); + if (seq_pos == last_seq || seq_pos->LowPC != address) { + if (seq_pos == first_seq) + return false; + seq_pos--; + } + if (!seq_pos->containsPC(address)) + return false; + + SequenceIter start_pos = seq_pos; + + // Add the rows from the first sequence to the vector, starting with the + // index we just calculated + + while (seq_pos != last_seq && seq_pos->LowPC < end_addr) { + DWARFDebugLine::Sequence cur_seq = *seq_pos; + uint32_t first_row_index; + uint32_t last_row_index; + if (seq_pos == start_pos) { + // For the first sequence, we need to find which row in the sequence is the + // first in our range. Rows are stored in a vector, so we may use + // arithmetical operations with iterators. + DWARFDebugLine::Row row; + row.Address = address; + RowIter first_row = Rows.begin() + cur_seq.FirstRowIndex; + RowIter last_row = Rows.begin() + cur_seq.LastRowIndex; + RowIter row_pos = std::upper_bound(first_row, last_row, row, + DWARFDebugLine::Row::orderByAddress); + // The 'row_pos' iterator references the first row that is greater than + // our start address. Unless that's the first row, we want to start at + // the row before that. + first_row_index = cur_seq.FirstRowIndex + (row_pos - first_row); + if (row_pos != first_row) + --first_row_index; + } else + first_row_index = cur_seq.FirstRowIndex; + + // For the last sequence in our range, we need to figure out the last row in + // range. For all other sequences we can go to the end of the sequence. + if (cur_seq.HighPC > end_addr) { + DWARFDebugLine::Row row; + row.Address = end_addr; + RowIter first_row = Rows.begin() + cur_seq.FirstRowIndex; + RowIter last_row = Rows.begin() + cur_seq.LastRowIndex; + RowIter row_pos = std::upper_bound(first_row, last_row, row, + DWARFDebugLine::Row::orderByAddress); + // The 'row_pos' iterator references the first row that is greater than + // our end address. The row before that is the last row we want. + last_row_index = cur_seq.FirstRowIndex + (row_pos - first_row) - 1; + } else + // Contrary to what you might expect, DWARFDebugLine::SequenceLastRowIndex + // isn't a valid index within the current sequence. It's that plus one. + last_row_index = cur_seq.LastRowIndex - 1; + + for (uint32_t i = first_row_index; i <= last_row_index; ++i) { + result.push_back(i); + } + + ++seq_pos; + } +} + +bool DWARFDebugLine::LineTable::getFileNameByIndex(uint64_t FileIndex, bool NeedsAbsoluteFilePath, std::string &Result) const { diff --git a/lib/DebugInfo/DWARFDebugLine.h b/lib/DebugInfo/DWARFDebugLine.h index dbaf91d..2990756 100644 --- a/lib/DebugInfo/DWARFDebugLine.h +++ b/lib/DebugInfo/DWARFDebugLine.h @@ -178,6 +178,10 @@ public: // or -1 if there is no such row. uint32_t lookupAddress(uint64_t address) const; + bool lookupAddressRange(uint64_t address, + uint64_t size, + std::vector& result) const; + // Extracts filename by its index in filename table in prologue. // Returns true on success. bool getFileNameByIndex(uint64_t FileIndex, -- cgit v1.1 From 8090a24bd6369662dfcd9d9df21b9c122238ed10 Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Sat, 26 Jan 2013 01:45:06 +0000 Subject: DWARFDebugLine.cpp: Fix true path. Did you forget "return true" here? git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173552 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/DebugInfo/DWARFDebugLine.cpp | 2 ++ 1 file changed, 2 insertions(+) (limited to 'lib') diff --git a/lib/DebugInfo/DWARFDebugLine.cpp b/lib/DebugInfo/DWARFDebugLine.cpp index 7b32d4f..192381c 100644 --- a/lib/DebugInfo/DWARFDebugLine.cpp +++ b/lib/DebugInfo/DWARFDebugLine.cpp @@ -597,6 +597,8 @@ DWARFDebugLine::LineTable::lookupAddressRange(uint64_t address, ++seq_pos; } + + return true; } bool -- cgit v1.1 From d07c64dce67595e9b27e039c558800b7499df7c7 Mon Sep 17 00:00:00 2001 From: Reed Kotler Date: Sat, 26 Jan 2013 06:58:35 +0000 Subject: fix use of std::std. it's ordered set. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173563 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsISelLowering.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index f37b652f..0551bb4 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -206,14 +206,14 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const { } namespace { - struct eqstr { + struct ltstr { bool operator()(const char *s1, const char *s2) const { - return strcmp(s1, s2) == 0; + return strcmp(s1, s2) < 0; } }; - std::set noHelperNeeded; + std::set noHelperNeeded; const char* addToNoHelperNeeded(const char* s) { noHelperNeeded.insert(s); -- cgit v1.1 From 11f2bf7f15a7d3b3df500f3f3e76355c888c23c7 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sat, 26 Jan 2013 11:44:21 +0000 Subject: X86: Do splat promotion later, so the optimizer can chew on it first. This catches many cases where we can emit a more efficient shuffle for a specific mask or when the mask contains undefs. Once the splat is lowered to unpacks we can't do that anymore. There is a possibility of moving the promotion after pshufb matching, but I'm not sure if pshufb with a mask loaded from memory is faster than 3 shuffles, so I avoided that for now. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173569 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 0c12410..6daa9b6 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -5839,6 +5839,11 @@ LowerVECTOR_SHUFFLEv8i16(SDValue Op, const X86Subtarget *Subtarget, } } + // Promote splats to a larger type which usually leads to more efficient code. + // FIXME: Is this true if pshufb is available? + if (SVOp->isSplat()) + return PromoteSplat(SVOp, DAG); + // If we have SSSE3, and all words of the result are from 1 input vector, // case 2 is generated, otherwise case 3 is generated. If no SSSE3 // is present, fall back to case 4. @@ -5972,6 +5977,11 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, DebugLoc dl = SVOp->getDebugLoc(); ArrayRef MaskVals = SVOp->getMask(); + // Promote splats to a larger type which usually leads to more efficient code. + // FIXME: Is this true if pshufb is available? + if (SVOp->isSplat()) + return PromoteSplat(SVOp, DAG); + // If we have SSSE3, case 1 is generated when all result bytes come from // one of the inputs. Otherwise, case 2 is generated. If no SSSE3 is // present, fall back to case 3. @@ -6669,20 +6679,10 @@ X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const { // Handle splat operations if (SVOp->isSplat()) { - unsigned NumElem = VT.getVectorNumElements(); - // Use vbroadcast whenever the splat comes from a foldable load SDValue Broadcast = LowerVectorBroadcast(Op, DAG); if (Broadcast.getNode()) return Broadcast; - - // Handle splats by matching through known shuffle masks - if ((VT.is128BitVector() && NumElem <= 4) || - (VT.is256BitVector() && NumElem <= 8)) - return SDValue(); - - // All remaning splats are promoted to target supported vector shuffles. - return PromoteSplat(SVOp, DAG); } // Check integer expanding shuffles. -- cgit v1.1 From 200b306f2006533a0e7a0ca75cb3103620e7aa84 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sat, 26 Jan 2013 13:31:37 +0000 Subject: X86: Decode PALIGN operands so I don't have to do it in my head. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173572 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/InstPrinter/X86InstComments.cpp | 22 ++++++++++++++++++++++ lib/Target/X86/Utils/X86ShuffleDecode.cpp | 8 ++++++++ lib/Target/X86/Utils/X86ShuffleDecode.h | 2 ++ lib/Target/X86/X86ISelLowering.cpp | 5 ++++- 4 files changed, 36 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp index 64ac5e6..d8a45ea 100644 --- a/lib/Target/X86/InstPrinter/X86InstComments.cpp +++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp @@ -69,6 +69,28 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, DecodeMOVHLPSMask(2, ShuffleMask); break; + case X86::PALIGNR128rr: + case X86::VPALIGNR128rr: + Src1Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::PALIGNR128rm: + case X86::VPALIGNR128rm: + Src2Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + DecodePALIGNMask(MVT::v16i8, + MI->getOperand(MI->getNumOperands()-1).getImm(), + ShuffleMask); + break; + case X86::VPALIGNR256rr: + Src1Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VPALIGNR256rm: + Src2Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + DecodePALIGNMask(MVT::v32i8, + MI->getOperand(MI->getNumOperands()-1).getImm(), + ShuffleMask); + case X86::PSHUFDri: case X86::VPSHUFDri: Src1Name = getRegName(MI->getOperand(1).getReg()); diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp index 8b87c1f..9694808 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp +++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp @@ -61,6 +61,14 @@ void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl &ShuffleMask) { ShuffleMask.push_back(NElts+i); } +void DecodePALIGNMask(MVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask) { + unsigned NumElts = VT.getVectorNumElements(); + unsigned Offset = Imm * (VT.getVectorElementType().getSizeInBits() / 8); + + for (unsigned i = 0; i != NumElts; ++i) + ShuffleMask.push_back((i + Offset) % (NumElts * 2)); +} + /// DecodePSHUFMask - This decodes the shuffle masks for pshufd, and vpermilp*. /// VT indicates the type of the vector allowing it to handle different /// datatypes and vector widths. diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.h index 70d8171..69ce443 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.h +++ b/lib/Target/X86/Utils/X86ShuffleDecode.h @@ -35,6 +35,8 @@ void DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl &ShuffleMask); // <0,2> or <0,1,4,5> void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl &ShuffleMask); +void DecodePALIGNMask(MVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask); + void DecodePSHUFMask(MVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask); void DecodePSHUFHWMask(MVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask); diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 6daa9b6..76ec12c 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -4592,6 +4592,10 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, case X86ISD::MOVLHPS: DecodeMOVLHPSMask(NumElems, Mask); break; + case X86ISD::PALIGN: + ImmN = N->getOperand(N->getNumOperands()-1); + DecodePALIGNMask(VT, cast(ImmN)->getZExtValue(), Mask); + break; case X86ISD::PSHUFD: case X86ISD::VPERMILP: ImmN = N->getOperand(N->getNumOperands()-1); @@ -4635,7 +4639,6 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, case X86ISD::MOVLPS: case X86ISD::MOVSHDUP: case X86ISD::MOVSLDUP: - case X86ISD::PALIGN: // Not yet implemented return false; default: llvm_unreachable("unknown target shuffle node"); -- cgit v1.1 From 7a8aba906416b6998347b52c3c08610fdc190638 Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Sat, 26 Jan 2013 16:49:03 +0000 Subject: BBVectorize: Fix anomalous capital letter in comment git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173579 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/BBVectorize.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp index d72a4a1..411d7f6 100644 --- a/lib/Transforms/Vectorize/BBVectorize.cpp +++ b/lib/Transforms/Vectorize/BBVectorize.cpp @@ -1367,7 +1367,7 @@ namespace { IsInPair.insert(C->second); } - // Iterate through the basic block, recording all Users of each + // Iterate through the basic block, recording all users of each // pairable instruction. BasicBlock::iterator E = BB.end(); -- cgit v1.1 From ec5c3c2bd37828e9fbd913f5ac7b7f75711ddd59 Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Sat, 26 Jan 2013 16:49:04 +0000 Subject: BBVectorize: Add a additional comment about the cost computation git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173580 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/BBVectorize.cpp | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'lib') diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp index 411d7f6..4bd11c6 100644 --- a/lib/Transforms/Vectorize/BBVectorize.cpp +++ b/lib/Transforms/Vectorize/BBVectorize.cpp @@ -994,6 +994,12 @@ namespace { unsigned JCost = getInstrCost(J->getOpcode(), JT1, JT2); Type *VT1 = getVecTypeForPair(IT1, JT1), *VT2 = getVecTypeForPair(IT2, JT2); + + // Note that this procedure is incorrect for insert and extract element + // instructions (because combining these often results in a shuffle), + // but this cost is ignored (because insert and extract element + // instructions are assigned a zero depth factor and are not really + // fused in general). unsigned VCost = getInstrCost(I->getOpcode(), VT1, VT2); if (VCost > ICost + JCost) -- cgit v1.1 From 0c2f0ff9ccee3d711893b963b1dd8426beb7ddfe Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Sun, 27 Jan 2013 00:36:48 +0000 Subject: Use the AttributeSet instead of AttributeWithIndex object. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173598 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Bitcode/Reader/BitcodeReader.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index 4190161..2e1a512 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -437,7 +437,7 @@ bool BitcodeReader::ParseAttributeBlock() { SmallVector Record; - SmallVector Attrs; + SmallVector Attrs; // Read all the records. while (1) { @@ -472,8 +472,7 @@ bool BitcodeReader::ParseAttributeBlock() { for (unsigned i = 0, e = Record.size(); i != e; i += 2) { AttrBuilder B(Record[i+1]); if (B.hasAttributes()) - Attrs.push_back(AttributeWithIndex::get(Record[i], - Attribute::get(Context, B))); + Attrs.push_back(AttributeSet::get(Context, Record[i], B)); } MAttributes.push_back(AttributeSet::get(Context, Attrs)); -- cgit v1.1 From 9a131c544cc06c46a3c39ed0c3e6d4311998b5f1 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Sun, 27 Jan 2013 01:22:51 +0000 Subject: Convert the CPP backend to use the AttributeSet instead of AttributeWithIndex. Further removal of the introspective AttributeWithIndex thing. Also fix the #includes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173599 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/CppBackend/CPPBackend.cpp | 63 +++++++++++++++++++++--------------- 1 file changed, 37 insertions(+), 26 deletions(-) (limited to 'lib') diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp index 1f4bdf8..604abf9 100644 --- a/lib/Target/CppBackend/CPPBackend.cpp +++ b/lib/Target/CppBackend/CPPBackend.cpp @@ -470,18 +470,19 @@ void CppWriter::printAttributes(const AttributeSet &PAL, nl(Out); if (!PAL.isEmpty()) { Out << '{'; in(); nl(Out); - Out << "SmallVector Attrs;"; nl(Out); - Out << "AttributeWithIndex PAWI;"; nl(Out); + Out << "SmallVector Attrs;"; nl(Out); + Out << "AttributeSet PAS;"; in(); nl(Out); for (unsigned i = 0; i < PAL.getNumSlots(); ++i) { unsigned index = PAL.getSlotIndex(i); AttrBuilder attrs(PAL.getSlotAttributes(i), index); - Out << "PAWI.Index = " << index << "U;\n"; - Out << " {\n AttrBuilder B;\n"; + Out << "{"; in(); nl(Out); + Out << "AttrBuilder B;"; nl(Out); -#define HANDLE_ATTR(X) \ - if (attrs.contains(Attribute::X)) \ - Out << " B.addAttribute(Attribute::" #X ");\n"; \ - attrs.removeAttribute(Attribute::X); +#define HANDLE_ATTR(X) \ + if (attrs.contains(Attribute::X)) { \ + Out << "B.addAttribute(Attribute::" #X ");"; nl(Out); \ + attrs.removeAttribute(Attribute::X); \ + } HANDLE_ATTR(SExt); HANDLE_ATTR(ZExt); @@ -510,14 +511,23 @@ void CppWriter::printAttributes(const AttributeSet &PAL, HANDLE_ATTR(NonLazyBind); HANDLE_ATTR(MinSize); #undef HANDLE_ATTR - if (attrs.contains(Attribute::StackAlignment)) - Out << " B.addStackAlignmentAttr(" << attrs.getStackAlignment() << ")\n"; - attrs.removeAttribute(Attribute::StackAlignment); + + if (attrs.contains(Attribute::StackAlignment)) { + Out << "B.addStackAlignmentAttr(" << attrs.getStackAlignment()<<')'; + nl(Out); + attrs.removeAttribute(Attribute::StackAlignment); + } + assert(!attrs.hasAttributes() && "Unhandled attribute!"); - Out << " PAWI.Attrs = Attribute::get(mod->getContext(), B);\n }"; - nl(Out); - Out << "Attrs.push_back(PAWI);"; + Out << "PAS = AttributeSet::get(mod->getContext(), "; + if (index == ~0U) + Out << "~0U,"; + else + Out << index << "U,"; + Out << " B);"; out(); nl(Out); + Out << "}"; out(); nl(Out); nl(Out); + Out << "Attrs.push_back(PAS);"; nl(Out); } Out << name << "_PAL = AttributeSet::get(mod->getContext(), Attrs);"; nl(Out); @@ -1889,23 +1899,24 @@ void CppWriter::printModuleBody() { void CppWriter::printProgram(const std::string& fname, const std::string& mName) { - Out << "#include \n"; - Out << "#include \n"; - Out << "#include \n"; - Out << "#include \n"; - Out << "#include \n"; - Out << "#include \n"; - Out << "#include \n"; - Out << "#include \n"; - Out << "#include \n"; - Out << "#include \n"; - Out << "#include \n"; - Out << "#include \n"; Out << "#include \n"; Out << "#include \n"; + Out << "#include \n"; Out << "#include \n"; Out << "#include \n"; + Out << "#include \n"; + Out << "#include \n"; + Out << "#include \n"; + Out << "#include \n"; + Out << "#include \n"; + Out << "#include \n"; + Out << "#include \n"; + Out << "#include \n"; + Out << "#include \n"; + Out << "#include \n"; + Out << "#include \n"; + Out << "#include \n"; Out << "#include \n"; Out << "using namespace llvm;\n\n"; Out << "Module* " << fname << "();\n\n"; -- cgit v1.1 From d04b2d45d97312475867d9f20724701267738240 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Sun, 27 Jan 2013 01:44:34 +0000 Subject: Use the AttributeSet instead of AttributeWithIndex. In the future, AttributeWithIndex won't be used anymore. Besides, it exposes the internals of the AttributeSet to outside users, which isn't goodness. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173600 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/IPO/DeadArgumentElimination.cpp | 42 +++++++++----------------- 1 file changed, 15 insertions(+), 27 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp index 61b37d8..e651fb8 100644 --- a/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -277,11 +277,8 @@ bool DAE::DeleteDeadVarargs(Function &Fn) { for (unsigned i = 0; PAL.getSlotIndex(i) <= NumArgs; ++i) AttributesVec.push_back(PAL.getSlotAttributes(i)); if (PAL.hasAttributes(AttributeSet::FunctionIndex)) - AttributesVec.push_back( - AttributeSet::get(Fn.getContext(), - AttributeWithIndex::get(Fn.getContext(), - AttributeSet::FunctionIndex, - PAL.getFnAttributes()))); + AttributesVec.push_back(AttributeSet::get(Fn.getContext(), + PAL.getFnAttributes())); PAL = AttributeSet::get(Fn.getContext(), AttributesVec); } @@ -699,7 +696,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { std::vector Params; // Set up to build a new list of parameter attributes. - SmallVector AttributesVec; + SmallVector AttributesVec; const AttributeSet &PAL = F->getAttributes(); // Find out the new return value. @@ -774,9 +771,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { "Return attributes no longer compatible?"); if (RAttrs.hasAttributes(AttributeSet::ReturnIndex)) - AttributesVec.push_back(AttributeWithIndex::get(NRetTy->getContext(), - AttributeSet::ReturnIndex, - RAttrs)); + AttributesVec.push_back(AttributeSet::get(NRetTy->getContext(), RAttrs)); // Remember which arguments are still alive. SmallVector ArgAlive(FTy->getNumParams(), false); @@ -794,10 +789,9 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { // Get the original parameter attributes (skipping the first one, that is // for the return value. if (PAL.hasAttributes(i + 1)) { + AttrBuilder B(PAL, i + 1); AttributesVec. - push_back(AttributeWithIndex::get(F->getContext(), i + 1, - PAL.getParamAttributes(i + 1))); - AttributesVec.back().Index = Params.size(); + push_back(AttributeSet::get(F->getContext(), Params.size(), B)); } } else { ++NumArgumentsEliminated; @@ -807,9 +801,8 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { } if (PAL.hasAttributes(AttributeSet::FunctionIndex)) - AttributesVec.push_back(AttributeWithIndex::get(F->getContext(), - AttributeSet::FunctionIndex, - PAL.getFnAttributes())); + AttributesVec.push_back(AttributeSet::get(F->getContext(), + PAL.getFnAttributes())); // Reconstruct the AttributesList based on the vector we constructed. AttributeSet NewPAL = AttributeSet::get(F->getContext(), AttributesVec); @@ -850,9 +843,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { AttrBuilder(RAttrs, AttributeSet::ReturnIndex). removeAttributes(AttributeFuncs::typeIncompatible(NF->getReturnType()))); if (RAttrs.hasAttributes(AttributeSet::ReturnIndex)) - AttributesVec.push_back(AttributeWithIndex::get(NF->getContext(), - AttributeSet::ReturnIndex, - RAttrs)); + AttributesVec.push_back(AttributeSet::get(NF->getContext(), RAttrs)); // Declare these outside of the loops, so we can reuse them for the second // loop, which loops the varargs. @@ -865,10 +856,9 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { Args.push_back(*I); // Get original parameter attributes, but skip return attributes. if (CallPAL.hasAttributes(i + 1)) { + AttrBuilder B(CallPAL, i + 1); AttributesVec. - push_back(AttributeWithIndex::get(F->getContext(), i + 1, - CallPAL.getParamAttributes(i + 1))); - AttributesVec.back().Index = Args.size(); + push_back(AttributeSet::get(F->getContext(), Args.size(), B)); } } @@ -876,17 +866,15 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { for (CallSite::arg_iterator E = CS.arg_end(); I != E; ++I, ++i) { Args.push_back(*I); if (CallPAL.hasAttributes(i + 1)) { + AttrBuilder B(CallPAL, i + 1); AttributesVec. - push_back(AttributeWithIndex::get(F->getContext(), i + 1, - CallPAL.getParamAttributes(i + 1))); - AttributesVec.back().Index = Args.size(); + push_back(AttributeSet::get(F->getContext(), Args.size(), B)); } } if (CallPAL.hasAttributes(AttributeSet::FunctionIndex)) - AttributesVec.push_back(AttributeWithIndex::get(Call->getContext(), - AttributeSet::FunctionIndex, - CallPAL.getFnAttributes())); + AttributesVec.push_back(AttributeSet::get(Call->getContext(), + CallPAL.getFnAttributes())); // Reconstruct the AttributesList based on the vector we constructed. AttributeSet NewCallPAL = AttributeSet::get(F->getContext(), AttributesVec); -- cgit v1.1 From b2484b4332ffe385421e338de21372ea8a9dc5cf Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Sun, 27 Jan 2013 01:57:28 +0000 Subject: Use the AttributeSet instead of AttributeWithIndex. In the future, AttributeWithIndex won't be used anymore. Besides, it exposes the internals of the AttributeSet to outside users, which isn't goodness. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173601 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/IPO/ArgumentPromotion.cpp | 37 +++++++++++++------------------- 1 file changed, 15 insertions(+), 22 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index 627012f..e6fa4ed 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -514,14 +514,13 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, // Attribute - Keep track of the parameter attributes for the arguments // that we are *not* promoting. For the ones that we do promote, the parameter // attributes are lost - SmallVector AttributesVec; + SmallVector AttributesVec; const AttributeSet &PAL = F->getAttributes(); // Add any return attributes. if (PAL.hasAttributes(AttributeSet::ReturnIndex)) - AttributesVec.push_back(AttributeWithIndex::get(F->getContext(), - AttributeSet::ReturnIndex, - PAL.getRetAttributes())); + AttributesVec.push_back(AttributeSet::get(F->getContext(), + PAL.getRetAttributes())); // First, determine the new argument list unsigned ArgIndex = 1; @@ -539,10 +538,9 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, Params.push_back(I->getType()); AttributeSet attrs = PAL.getParamAttributes(ArgIndex); if (attrs.hasAttributes(ArgIndex)) { + AttrBuilder B(attrs, ArgIndex); AttributesVec. - push_back(AttributeWithIndex::get(F->getContext(), - ArgIndex, attrs)); - AttributesVec.back().Index = Params.size(); + push_back(AttributeSet::get(F->getContext(), Params.size(), B)); } } else if (I->use_empty()) { // Dead argument (which are always marked as promotable) @@ -596,9 +594,8 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, // Add any function attributes. if (PAL.hasAttributes(AttributeSet::FunctionIndex)) - AttributesVec.push_back(AttributeWithIndex::get(FTy->getContext(), - AttributeSet::FunctionIndex, - PAL.getFnAttributes())); + AttributesVec.push_back(AttributeSet::get(FTy->getContext(), + PAL.getFnAttributes())); Type *RetTy = FTy->getReturnType(); @@ -644,9 +641,8 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, // Add any return attributes. if (CallPAL.hasAttributes(AttributeSet::ReturnIndex)) - AttributesVec.push_back(AttributeWithIndex::get(F->getContext(), - AttributeSet::ReturnIndex, - CallPAL.getRetAttributes())); + AttributesVec.push_back(AttributeSet::get(F->getContext(), + CallPAL.getRetAttributes())); // Loop over the operands, inserting GEP and loads in the caller as // appropriate. @@ -658,10 +654,9 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, Args.push_back(*AI); // Unmodified argument if (CallPAL.hasAttributes(ArgIndex)) { + AttrBuilder B(CallPAL, ArgIndex); AttributesVec. - push_back(AttributeWithIndex::get(F->getContext(), ArgIndex, - CallPAL.getParamAttributes(ArgIndex))); - AttributesVec.back().Index = Args.size(); + push_back(AttributeSet::get(F->getContext(), Args.size(), B)); } } else if (ByValArgsToTransform.count(I)) { // Emit a GEP and load for each element of the struct. @@ -722,18 +717,16 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, for (; AI != CS.arg_end(); ++AI, ++ArgIndex) { Args.push_back(*AI); if (CallPAL.hasAttributes(ArgIndex)) { + AttrBuilder B(CallPAL, ArgIndex); AttributesVec. - push_back(AttributeWithIndex::get(F->getContext(), ArgIndex, - CallPAL.getParamAttributes(ArgIndex))); - AttributesVec.back().Index = Args.size(); + push_back(AttributeSet::get(F->getContext(), Args.size(), B)); } } // Add any function attributes. if (CallPAL.hasAttributes(AttributeSet::FunctionIndex)) - AttributesVec.push_back(AttributeWithIndex::get(Call->getContext(), - AttributeSet::FunctionIndex, - CallPAL.getFnAttributes())); + AttributesVec.push_back(AttributeSet::get(Call->getContext(), + CallPAL.getFnAttributes())); Instruction *New; if (InvokeInst *II = dyn_cast(Call)) { -- cgit v1.1 From ac90696722bef3993e5fb9f72a6fa4209bbc8763 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Sun, 27 Jan 2013 02:08:22 +0000 Subject: Use the AttributeSet instead of AttributeWithIndex. In the future, AttributeWithIndex won't be used anymore. Besides, it exposes the internals of the AttributeSet to outside users, which isn't goodness. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173602 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/InstCombine/InstCombineCalls.cpp | 43 +++++++++++-------------- 1 file changed, 18 insertions(+), 25 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 19eb965..f56dc95 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1116,7 +1116,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { // inserting cast instructions as necessary. std::vector Args; Args.reserve(NumActualArgs); - SmallVector attrVec; + SmallVector attrVec; attrVec.reserve(NumCommonArgs); // Get any return attributes. @@ -1128,9 +1128,8 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { // Add the new return attributes. if (RAttrs.hasAttributes()) - attrVec.push_back( - AttributeWithIndex::get(AttributeSet::ReturnIndex, - Attribute::get(FT->getContext(), RAttrs))); + attrVec.push_back(AttributeSet::get(Caller->getContext(), + AttributeSet::ReturnIndex, RAttrs)); AI = CS.arg_begin(); for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) { @@ -1146,9 +1145,8 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { // Add any parameter attributes. AttrBuilder PAttrs(CallerPAL.getParamAttributes(i + 1), i + 1); if (PAttrs.hasAttributes()) - attrVec.push_back( - AttributeWithIndex::get(i + 1, - Attribute::get(FT->getContext(), PAttrs))); + attrVec.push_back(AttributeSet::get(Caller->getContext(), i + 1, + PAttrs)); } // If the function takes more arguments than the call was taking, add them @@ -1175,18 +1173,15 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { // Add any parameter attributes. AttrBuilder PAttrs(CallerPAL.getParamAttributes(i + 1), i + 1); if (PAttrs.hasAttributes()) - attrVec.push_back( - AttributeWithIndex::get(i + 1, - Attribute::get(FT->getContext(), PAttrs))); + attrVec.push_back(AttributeSet::get(FT->getContext(), i + 1, + PAttrs)); } } } AttributeSet FnAttrs = CallerPAL.getFnAttributes(); if (CallerPAL.hasAttributes(AttributeSet::FunctionIndex)) - attrVec.push_back(AttributeWithIndex::get(Callee->getContext(), - AttributeSet::FunctionIndex, - FnAttrs)); + attrVec.push_back(AttributeSet::get(Callee->getContext(), FnAttrs)); if (NewRetTy->isVoidTy()) Caller->setName(""); // Void type should not have a name. @@ -1287,7 +1282,7 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, std::vector NewArgs; NewArgs.reserve(unsigned(CS.arg_end()-CS.arg_begin())+1); - SmallVector NewAttrs; + SmallVector NewAttrs; NewAttrs.reserve(Attrs.getNumSlots() + 1); // Insert the nest argument into the call argument list, which may @@ -1295,9 +1290,8 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, // Add any result attributes. if (Attrs.hasAttributes(AttributeSet::ReturnIndex)) - NewAttrs.push_back(AttributeWithIndex::get(Caller->getContext(), - AttributeSet::ReturnIndex, - Attrs.getRetAttributes())); + NewAttrs.push_back(AttributeSet::get(Caller->getContext(), + Attrs.getRetAttributes())); { unsigned Idx = 1; @@ -1309,8 +1303,8 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, if (NestVal->getType() != NestTy) NestVal = Builder->CreateBitCast(NestVal, NestTy, "nest"); NewArgs.push_back(NestVal); - NewAttrs.push_back(AttributeWithIndex::get(Caller->getContext(), - NestIdx, NestAttr)); + NewAttrs.push_back(AttributeSet::get(Caller->getContext(), + NestAttr)); } if (I == E) @@ -1320,9 +1314,9 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, NewArgs.push_back(*I); AttributeSet Attr = Attrs.getParamAttributes(Idx); if (Attr.hasAttributes(Idx)) { - NewAttrs.push_back - (AttributeWithIndex::get(Caller->getContext(), Idx, Attr)); - NewAttrs.back().Index = Idx + (Idx >= NestIdx); + AttrBuilder B(Attr, Idx); + NewAttrs.push_back(AttributeSet::get(Caller->getContext(), + Idx + (Idx >= NestIdx), B)); } ++Idx, ++I; @@ -1331,9 +1325,8 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, // Add any function attributes. if (Attrs.hasAttributes(AttributeSet::FunctionIndex)) - NewAttrs.push_back(AttributeWithIndex::get(FTy->getContext(), - AttributeSet::FunctionIndex, - Attrs.getFnAttributes())); + NewAttrs.push_back(AttributeSet::get(FTy->getContext(), + Attrs.getFnAttributes())); // The trampoline may have been bitcast to a bogus type (FTy). // Handle this by synthesizing a new function type, equal to FTy -- cgit v1.1 From a1683d6c4835c953bbab12f54b70b9a75cfe01f4 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Sun, 27 Jan 2013 02:24:02 +0000 Subject: Use the AttributeSet instead of AttributeWithIndex. In the future, AttributeWithIndex won't be used anymore. Besides, it exposes the internals of the AttributeSet to outside users, which isn't goodness. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173603 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/AsmParser/LLParser.cpp | 66 +++++++++++++++++++++++----------------------- 1 file changed, 33 insertions(+), 33 deletions(-) (limited to 'lib') diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index 0eb6023..a38f9ea 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -2813,25 +2813,25 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { // Okay, if we got here, the function is syntactically valid. Convert types // and do semantic checks. std::vector ParamTypeList; - SmallVector Attrs; + SmallVector Attrs; if (RetAttrs.hasAttributes()) - Attrs.push_back( - AttributeWithIndex::get(AttributeSet::ReturnIndex, - Attribute::get(RetType->getContext(), - RetAttrs))); + Attrs.push_back(AttributeSet::get(RetType->getContext(), + AttributeSet::ReturnIndex, + RetAttrs)); for (unsigned i = 0, e = ArgList.size(); i != e; ++i) { ParamTypeList.push_back(ArgList[i].Ty); - if (ArgList[i].Attrs.hasAttributes()) - Attrs.push_back(AttributeWithIndex::get(i+1, ArgList[i].Attrs)); + if (ArgList[i].Attrs.hasAttributes()) { + AttrBuilder B(ArgList[i].Attrs); + Attrs.push_back(AttributeSet::get(RetType->getContext(), i + 1, B)); + } } if (FuncAttrs.hasAttributes()) - Attrs.push_back( - AttributeWithIndex::get(AttributeSet::FunctionIndex, - Attribute::get(RetType->getContext(), - FuncAttrs))); + Attrs.push_back(AttributeSet::get(RetType->getContext(), + AttributeSet::FunctionIndex, + FuncAttrs)); AttributeSet PAL = AttributeSet::get(Context, Attrs); @@ -3358,12 +3358,11 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) { if (ConvertValIDToValue(PFTy, CalleeID, Callee, &PFS)) return true; // Set up the Attribute for the function. - SmallVector Attrs; + SmallVector Attrs; if (RetAttrs.hasAttributes()) - Attrs.push_back( - AttributeWithIndex::get(AttributeSet::ReturnIndex, - Attribute::get(Callee->getContext(), - RetAttrs))); + Attrs.push_back(AttributeSet::get(RetType->getContext(), + AttributeSet::ReturnIndex, + RetAttrs)); SmallVector Args; @@ -3383,18 +3382,19 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) { return Error(ArgList[i].Loc, "argument is not of expected type '" + getTypeString(ExpectedTy) + "'"); Args.push_back(ArgList[i].V); - if (ArgList[i].Attrs.hasAttributes()) - Attrs.push_back(AttributeWithIndex::get(i+1, ArgList[i].Attrs)); + if (ArgList[i].Attrs.hasAttributes()) { + AttrBuilder B(ArgList[i].Attrs); + Attrs.push_back(AttributeSet::get(RetType->getContext(), i + 1, B)); + } } if (I != E) return Error(CallLoc, "not enough parameters specified for call"); if (FnAttrs.hasAttributes()) - Attrs.push_back( - AttributeWithIndex::get(AttributeSet::FunctionIndex, - Attribute::get(Callee->getContext(), - FnAttrs))); + Attrs.push_back(AttributeSet::get(RetType->getContext(), + AttributeSet::FunctionIndex, + FnAttrs)); // Finish off the Attribute and check them AttributeSet PAL = AttributeSet::get(Context, Attrs); @@ -3760,12 +3760,11 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS, if (ConvertValIDToValue(PFTy, CalleeID, Callee, &PFS)) return true; // Set up the Attribute for the function. - SmallVector Attrs; + SmallVector Attrs; if (RetAttrs.hasAttributes()) - Attrs.push_back( - AttributeWithIndex::get(AttributeSet::ReturnIndex, - Attribute::get(Callee->getContext(), - RetAttrs))); + Attrs.push_back(AttributeSet::get(RetType->getContext(), + AttributeSet::ReturnIndex, + RetAttrs)); SmallVector Args; @@ -3785,18 +3784,19 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS, return Error(ArgList[i].Loc, "argument is not of expected type '" + getTypeString(ExpectedTy) + "'"); Args.push_back(ArgList[i].V); - if (ArgList[i].Attrs.hasAttributes()) - Attrs.push_back(AttributeWithIndex::get(i+1, ArgList[i].Attrs)); + if (ArgList[i].Attrs.hasAttributes()) { + AttrBuilder B(ArgList[i].Attrs); + Attrs.push_back(AttributeSet::get(RetType->getContext(), i + 1, B)); + } } if (I != E) return Error(CallLoc, "not enough parameters specified for call"); if (FnAttrs.hasAttributes()) - Attrs.push_back( - AttributeWithIndex::get(AttributeSet::FunctionIndex, - Attribute::get(Callee->getContext(), - FnAttrs))); + Attrs.push_back(AttributeSet::get(RetType->getContext(), + AttributeSet::FunctionIndex, + FnAttrs)); // Finish off the Attribute and check them AttributeSet PAL = AttributeSet::get(Context, Attrs); -- cgit v1.1 From 6de90c0820db87335ac14d42d13f75e1ee4bb417 Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Sun, 27 Jan 2013 06:19:48 +0000 Subject: Renamed function IsPotentialUse to IsPotentialRetainableObjPtr. This name change does the following: 1. Causes the function name to use proper ARC terminology. 2. Makes it clear what the function truly does. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173609 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/ObjCARC.cpp | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp index 1c7acb0..0dab0ff 100644 --- a/lib/Transforms/Scalar/ObjCARC.cpp +++ b/lib/Transforms/Scalar/ObjCARC.cpp @@ -223,26 +223,26 @@ namespace { } } -/// \brief Test whether the given value is possible a reference-counted pointer. -static bool IsPotentialUse(const Value *Op) { - // Pointers to static or stack storage are not reference-counted pointers. +/// \brief Test whether the given value is possible a retainable object pointer. +static bool IsPotentialRetainableObjPtr(const Value *Op) { + // Pointers to static or stack storage are not valid retainable object pointers. if (isa(Op) || isa(Op)) return false; - // Special arguments are not reference-counted. + // Special arguments can not be a valid retainable object pointer. if (const Argument *Arg = dyn_cast(Op)) if (Arg->hasByValAttr() || Arg->hasNestAttr() || Arg->hasStructRetAttr()) return false; // Only consider values with pointer types. + // // It seemes intuitive to exclude function pointer types as well, since - // functions are never reference-counted, however clang occasionally - // bitcasts reference-counted pointers to function-pointer type - // temporarily. + // functions are never retainable object pointers, however clang occasionally + // bitcasts retainable object pointers to function-pointer type temporarily. PointerType *Ty = dyn_cast(Op->getType()); if (!Ty) return false; - // Conservatively assume anything else is a potential use. + // Conservatively assume anything else is a potential retainable object pointer. return true; } @@ -251,7 +251,7 @@ static bool IsPotentialUse(const Value *Op) { static InstructionClass GetCallSiteClass(ImmutableCallSite CS) { for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); I != E; ++I) - if (IsPotentialUse(*I)) + if (IsPotentialRetainableObjPtr(*I)) return CS.onlyReadsMemory() ? IC_User : IC_CallOrUser; return CS.onlyReadsMemory() ? IC_None : IC_Call; @@ -400,7 +400,7 @@ static InstructionClass GetInstructionClass(const Value *V) { // Comparing a pointer with null, or any other constant, isn't an // interesting use, because we don't care what the pointer points to, or // about the values of any other dynamic reference-counted pointers. - if (IsPotentialUse(I->getOperand(1))) + if (IsPotentialRetainableObjPtr(I->getOperand(1))) return IC_User; break; default: @@ -411,7 +411,7 @@ static InstructionClass GetInstructionClass(const Value *V) { // it, so we have to consider it potentially used. for (User::const_op_iterator OI = I->op_begin(), OE = I->op_end(); OI != OE; ++OI) - if (IsPotentialUse(*OI)) + if (IsPotentialRetainableObjPtr(*OI)) return IC_User; } } @@ -2023,9 +2023,9 @@ Constant *ObjCARCOpt::getAutoreleaseCallee(Module *M) { /// Test whether the given value is possible a reference-counted pointer, /// including tests which utilize AliasAnalysis. -static bool IsPotentialUse(const Value *Op, AliasAnalysis &AA) { +static bool IsPotentialRetainableObjPtr(const Value *Op, AliasAnalysis &AA) { // First make the rudimentary check. - if (!IsPotentialUse(Op)) + if (!IsPotentialRetainableObjPtr(Op)) return false; // Objects in constant memory are not reference-counted. @@ -2066,7 +2066,7 @@ CanAlterRefCount(const Instruction *Inst, const Value *Ptr, for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); I != E; ++I) { const Value *Op = *I; - if (IsPotentialUse(Op, *PA.getAA()) && PA.related(Ptr, Op)) + if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Ptr, Op)) return true; } return false; @@ -2091,14 +2091,14 @@ CanUse(const Instruction *Inst, const Value *Ptr, ProvenanceAnalysis &PA, // Comparing a pointer with null, or any other constant, isn't really a use, // because we don't care what the pointer points to, or about the values // of any other dynamic reference-counted pointers. - if (!IsPotentialUse(ICI->getOperand(1), *PA.getAA())) + if (!IsPotentialRetainableObjPtr(ICI->getOperand(1), *PA.getAA())) return false; } else if (ImmutableCallSite CS = static_cast(Inst)) { // For calls, just check the arguments (and not the callee operand). for (ImmutableCallSite::arg_iterator OI = CS.arg_begin(), OE = CS.arg_end(); OI != OE; ++OI) { const Value *Op = *OI; - if (IsPotentialUse(Op, *PA.getAA()) && PA.related(Ptr, Op)) + if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Ptr, Op)) return true; } return false; @@ -2108,14 +2108,14 @@ CanUse(const Instruction *Inst, const Value *Ptr, ProvenanceAnalysis &PA, const Value *Op = GetUnderlyingObjCPtr(SI->getPointerOperand()); // If we can't tell what the underlying object was, assume there is a // dependence. - return IsPotentialUse(Op, *PA.getAA()) && PA.related(Op, Ptr); + return IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Op, Ptr); } // Check each operand for a match. for (User::const_op_iterator OI = Inst->op_begin(), OE = Inst->op_end(); OI != OE; ++OI) { const Value *Op = *OI; - if (IsPotentialUse(Op, *PA.getAA()) && PA.related(Ptr, Op)) + if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Ptr, Op)) return true; } return false; -- cgit v1.1 From 455151e4f7b6dc264183ecc892a2f9678ff5fe46 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Sun, 27 Jan 2013 06:42:03 +0000 Subject: Re-revert r173342, without losing the compile time improvements, flat out bug fixes, or functionality preserving refactorings. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173610 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Utils/SimplifyCFG.cpp | 39 +++++++++++------------------------- 1 file changed, 12 insertions(+), 27 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 3cf3984..a63d31d 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1369,8 +1369,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) { /// \endcode /// /// \returns true if the conditional block is removed. -static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, - const TargetTransformInfo &TTI) { +static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) { // Be conservative for now. FP select instruction can often be expensive. Value *BrCond = BI->getCondition(); if (isa(BrCond)) @@ -1406,24 +1405,15 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, // Only speculatively execution a single instruction (not counting the // terminator) for now. - SpeculationCost += TTI.getUserCost(I); - if (SpeculationCost > TargetTransformInfo::TCC_Basic) + ++SpeculationCost; + if (SpeculationCost > 1) return false; // Don't hoist the instruction if it's unsafe or expensive. if (!isSafeToSpeculativelyExecute(I)) return false; - // FIXME: These should really be cost metrics, but our cost model doesn't - // accurately model the expense of selects and floating point operations. - // FIXME: Is it really safe to speculate floating point operations? - // Signaling NaNs break with this, but we shouldn't care, right? - if (isa(I) || I->getType()->isFPOrFPVectorTy()) + if (ComputeSpeculationCost(I) > PHINodeFoldingThreshold) return false; - // FIXME: The cost metric currently doesn't reason accurately about simple - // versus complex GEPs, take a conservative approach here. - if (GEPOperator *GEP = dyn_cast(I)) - if (!GEP->hasAllConstantIndices()) - return false; // Do not hoist the instruction if any of its operands are defined but not // used in this BB. The transformation will prevent the operand from @@ -1446,8 +1436,8 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, SinkCandidateUseCounts.begin(), E = SinkCandidateUseCounts.end(); I != E; ++I) if (I->first->getNumUses() == I->second) { - SpeculationCost += TTI.getUserCost(I->first); - if (SpeculationCost > TargetTransformInfo::TCC_Basic) + ++SpeculationCost; + if (SpeculationCost > 1) return false; } @@ -1469,20 +1459,15 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, if (!isSafeToSpeculativelyExecute(CE)) return false; - - // Don't speculate into a select with a constant select expression operand. - // FIXME: This should really be a cost metric, but our cost model doesn't - // accurately model the expense of select. - if (Operator::getOpcode(CE) == Instruction::Select) + if (ComputeSpeculationCost(CE) > PHINodeFoldingThreshold) return false; // Account for the cost of an unfolded ConstantExpr which could end up // getting expanded into Instructions. // FIXME: This doesn't account for how many operations are combined in the - // constant expression. The cost functions in TTI don't yet correctly model - // constant expression costs. - SpeculationCost += TargetTransformInfo::TCC_Basic; - if (SpeculationCost > TargetTransformInfo::TCC_Basic) + // constant expression. + ++SpeculationCost; + if (SpeculationCost > 1) return false; } @@ -3899,7 +3884,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { TerminatorInst *Succ0TI = BI->getSuccessor(0)->getTerminator(); if (Succ0TI->getNumSuccessors() == 1 && Succ0TI->getSuccessor(0) == BI->getSuccessor(1)) - if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0), TTI)) + if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0))) return SimplifyCFG(BB, TTI, TD) | true; } } else if (BI->getSuccessor(1)->getSinglePredecessor() != 0) { @@ -3908,7 +3893,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { TerminatorInst *Succ1TI = BI->getSuccessor(1)->getTerminator(); if (Succ1TI->getNumSuccessors() == 1 && Succ1TI->getSuccessor(0) == BI->getSuccessor(0)) - if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1), TTI)) + if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1))) return SimplifyCFG(BB, TTI, TD) | true; } -- cgit v1.1 From 09dda440ba6622b64713ef5f7d9531411e3589ea Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Sun, 27 Jan 2013 09:55:44 +0000 Subject: Remove dead methods. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173611 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Attributes.cpp | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) (limited to 'lib') diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index c033b5a..937514a 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -30,11 +30,8 @@ using namespace llvm; // Attribute Implementation //===----------------------------------------------------------------------===// -Attribute Attribute::get(LLVMContext &Context, ArrayRef Vals) { - AttrBuilder B; - for (ArrayRef::iterator I = Vals.begin(), E = Vals.end(); - I != E; ++I) - B.addAttribute(*I); +Attribute Attribute::get(LLVMContext &Context, AttrKind Kind) { + AttrBuilder B(Kind); return Attribute::get(Context, B); } @@ -487,17 +484,6 @@ void AttributeImpl::Profile(FoldingSetNodeID &ID, Constant *Data, } //===----------------------------------------------------------------------===// -// AttributeWithIndex Definition -//===----------------------------------------------------------------------===// - -AttributeWithIndex AttributeWithIndex::get(LLVMContext &C, unsigned Idx, - AttributeSet AS) { - // FIXME: This is temporary, but necessary for the conversion. - AttrBuilder B(AS, Idx); - return get(Idx, Attribute::get(C, B)); -} - -//===----------------------------------------------------------------------===// // AttributeSetNode Definition //===----------------------------------------------------------------------===// -- cgit v1.1 From 16f95669ec814d98ce28ad514df603c01d662ee8 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Sun, 27 Jan 2013 10:28:39 +0000 Subject: Fix miscompile. Add back the use of the ArrayRef version of the ::get method. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173613 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Attributes.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 937514a..8ec192b 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -30,8 +30,11 @@ using namespace llvm; // Attribute Implementation //===----------------------------------------------------------------------===// -Attribute Attribute::get(LLVMContext &Context, AttrKind Kind) { - AttrBuilder B(Kind); +Attribute Attribute::get(LLVMContext &Context, ArrayRef Kinds) { + AttrBuilder B; + for (ArrayRef::iterator I = Kinds.begin(), E = Kinds.end(); + I != E; ++I) + B.addAttribute(*I); return Attribute::get(Context, B); } -- cgit v1.1 From 914f8c4825140a97219e94b815beb2bc77395d10 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sun, 27 Jan 2013 11:19:11 +0000 Subject: When the legalizer is splitting vector shifts, the result may not have the right shift amount type. Fix that by adding a cast to the shift expander. This came up with vector shifts on sse-less X86 CPUs. <2 x i64> = shl <2 x i64> <2 x i64> -> i64,i64 = shl i64 i64; shl i64 i64 -> i32,i32,i32,i32 = shl_parts i32 i32 i64; shl_parts i32 i32 i64 Now we cast the last two i64s to the right type. Fixes the crash in PR14668. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173615 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 5da6d29..e08dd6d 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -2095,9 +2095,16 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, // Expand the subcomponents. SDValue LHSL, LHSH; GetExpandedInteger(N->getOperand(0), LHSL, LHSH); - - SDValue Ops[] = { LHSL, LHSH, N->getOperand(1) }; EVT VT = LHSL.getValueType(); + + // If the shift amount operand is coming from a vector legalization it may + // not have the right return type. Fix that first by casting the operand. + SDValue ShiftOp = N->getOperand(1); + MVT ShiftTy = TLI.getShiftAmountTy(VT); + if (ShiftOp.getValueType() != ShiftTy) + ShiftOp = DAG.getZExtOrTrunc(ShiftOp, dl, ShiftTy); + + SDValue Ops[] = { LHSL, LHSH, ShiftOp }; Lo = DAG.getNode(PartsOpc, dl, DAG.getVTList(VT, VT), Ops, 3); Hi = Lo.getValue(1); return; -- cgit v1.1 From 6a325cc46db9cb0c631ac832474e5df039d04b19 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Sun, 27 Jan 2013 12:50:02 +0000 Subject: In the AttributeSetImpl c'tor, fill in the AttrNodes data structure with the attributes being passed in. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173618 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/AttributeImpl.h | 6 ++---- lib/IR/Attributes.cpp | 44 ++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 42 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/IR/AttributeImpl.h b/lib/IR/AttributeImpl.h index d7ebec5..b4c788f 100644 --- a/lib/IR/AttributeImpl.h +++ b/lib/IR/AttributeImpl.h @@ -115,11 +115,9 @@ class AttributeSetImpl : public FoldingSetNode { void operator=(const AttributeSetImpl &) LLVM_DELETED_FUNCTION; AttributeSetImpl(const AttributeSetImpl &) LLVM_DELETED_FUNCTION; public: - AttributeSetImpl(LLVMContext &C, ArrayRef attrs) - : Context(C), AttrList(attrs.begin(), attrs.end()) {} + AttributeSetImpl(LLVMContext &C, ArrayRef attrs); AttributeSetImpl(LLVMContext &C, - ArrayRef > attrs) - : Context(C), AttrNodes(attrs.begin(), attrs.end()) {} + ArrayRef > attrs); LLVMContext &getContext() { return Context; } ArrayRef getAttributes() const { return AttrList; } diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 8ec192b..780da00 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -525,6 +525,46 @@ AttributeSetNode *AttributeSetNode::get(LLVMContext &C, // AttributeSetImpl Definition //===----------------------------------------------------------------------===// +AttributeSetImpl:: +AttributeSetImpl(LLVMContext &C, + ArrayRef attrs) + : Context(C), AttrList(attrs.begin(), attrs.end()) { + for (unsigned I = 0, E = attrs.size(); I != E; ++I) { + const AttributeWithIndex &AWI = attrs[I]; + uint64_t Mask = AWI.Attrs.Raw(); + SmallVector Attrs; + + for (Attribute::AttrKind II = Attribute::None; + II != Attribute::EndAttrKinds; II = Attribute::AttrKind(II + 1)) { + if (uint64_t A = (Mask & AttributeImpl::getAttrMask(II))) { + AttrBuilder B; + + if (II == Attribute::Alignment) + B.addAlignmentAttr(1ULL << ((A >> 16) - 1)); + else if (II == Attribute::StackAlignment) + B.addStackAlignmentAttr(1ULL << ((A >> 26) - 1)); + else + B.addAttribute(II); + + Attrs.push_back(Attribute::get(C, B)); + } + } + + AttrNodes.push_back(std::make_pair(AWI.Index, + AttributeSetNode::get(C, Attrs))); + } +} + +AttributeSetImpl:: +AttributeSetImpl(LLVMContext &C, + ArrayRef > attrs) + : Context(C), AttrNodes(attrs.begin(), attrs.end()) { +} + +//===----------------------------------------------------------------------===// +// AttributeSet Method Implementations +//===----------------------------------------------------------------------===// + AttributeSet AttributeSet::getParamAttributes(unsigned Idx) const { // FIXME: Remove. return AttrList && hasAttributes(Idx) ? @@ -616,10 +656,6 @@ AttributeSet AttributeSet::get(LLVMContext &C, ArrayRef Attrs) { return get(C, AttrList); } -//===----------------------------------------------------------------------===// -// AttributeSet Method Implementations -//===----------------------------------------------------------------------===// - const AttributeSet &AttributeSet::operator=(const AttributeSet &RHS) { AttrList = RHS.AttrList; return *this; -- cgit v1.1 From 022688c260f472b4befcd974a6c907c6830f308e Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sun, 27 Jan 2013 15:04:43 +0000 Subject: Legalizer: Add an assert and tweak a comment to clarify the assumptions this code makes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173620 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index e08dd6d..88e72ec 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -2098,9 +2098,13 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, EVT VT = LHSL.getValueType(); // If the shift amount operand is coming from a vector legalization it may - // not have the right return type. Fix that first by casting the operand. + // have an illegal type. Fix that first by casting the operand. Otherwise + // the new SHL_PARTS operation would need further legalization, and the + // legalizer assumes that illegal SHL_PARTS never occur. SDValue ShiftOp = N->getOperand(1); MVT ShiftTy = TLI.getShiftAmountTy(VT); + assert(ShiftTy.getSizeInBits() >= Log2_32_Ceil(VT.getSizeInBits()) && + "ShiftAmountTy is too small to cover the range of this type!"); if (ShiftOp.getValueType() != ShiftTy) ShiftOp = DAG.getZExtOrTrunc(ShiftOp, dl, ShiftTy); -- cgit v1.1 From 245b657ab636a505066ea6a81591a9a8b93604d2 Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Sun, 27 Jan 2013 20:07:01 +0000 Subject: BBVectorize: Better use of TTI->getShuffleCost When flipping the pair of subvectors that form a vector, if the vector length is 2, we can use the SK_Reverse shuffle kind to get more-accurate cost information. Also we can use the SK_ExtractSubvector shuffle kind to get accurate subvector extraction costs. The current cost model implementations don't yet seem complex enough for this to make a difference (thus, there are no test cases with this commit), but it should help in future. Depending on how the various targets optimize and combine shuffles in practice, we might be able to get more-accurate costs by combining the costs of multiple shuffle kinds. For example, the cost of flipping the subvector pairs could be modeled as two extractions and two subvector insertions. These changes, however, should probably be motivated by specific test cases. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173621 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/BBVectorize.cpp | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp index 4bd11c6..9bf09ff 100644 --- a/lib/Transforms/Vectorize/BBVectorize.cpp +++ b/lib/Transforms/Vectorize/BBVectorize.cpp @@ -1824,6 +1824,16 @@ namespace { R->second == PairConnectionSplat) { int ESContrib = (int) getInstrCost(Instruction::ShuffleVector, VTy, VTy); + + if (VTy->getVectorNumElements() == 2) { + if (R->second == PairConnectionSplat) + ESContrib = std::min(ESContrib, (int) TTI->getShuffleCost( + TargetTransformInfo::SK_Broadcast, VTy)); + else + ESContrib = std::min(ESContrib, (int) TTI->getShuffleCost( + TargetTransformInfo::SK_Reverse, VTy)); + } + DEBUG(if (DebugPairSelection) dbgs() << "\tcost {" << *Q->second.first << " <-> " << *Q->second.second << "} -> {" << @@ -1860,10 +1870,12 @@ namespace { if (NeedsExtraction) { int ESContrib; - if (Ty1->isVectorTy()) + if (Ty1->isVectorTy()) { ESContrib = (int) getInstrCost(Instruction::ShuffleVector, Ty1, VTy); - else + ESContrib = std::min(ESContrib, (int) TTI->getShuffleCost( + TargetTransformInfo::SK_ExtractSubvector, VTy, 0, Ty1)); + } else ESContrib = (int) TTI->getVectorInstrCost( Instruction::ExtractElement, VTy, 0); @@ -1890,10 +1902,13 @@ namespace { if (NeedsExtraction) { int ESContrib; - if (Ty2->isVectorTy()) + if (Ty2->isVectorTy()) { ESContrib = (int) getInstrCost(Instruction::ShuffleVector, Ty2, VTy); - else + ESContrib = std::min(ESContrib, (int) TTI->getShuffleCost( + TargetTransformInfo::SK_ExtractSubvector, VTy, + Ty1->isVectorTy() ? Ty1->getVectorNumElements() : 1, Ty2)); + } else ESContrib = (int) TTI->getVectorInstrCost( Instruction::ExtractElement, VTy, 1); DEBUG(if (DebugPairSelection) dbgs() << "\tcost {" << @@ -1969,6 +1984,10 @@ namespace { } else if (IncomingPairs.count(VPR)) { ESContrib = (int) getInstrCost(Instruction::ShuffleVector, VTy, VTy); + + if (VTy->getVectorNumElements() == 2) + ESContrib = std::min(ESContrib, (int) TTI->getShuffleCost( + TargetTransformInfo::SK_Reverse, VTy)); } else if (!Ty1->isVectorTy() && !Ty2->isVectorTy()) { ESContrib = (int) TTI->getVectorInstrCost( Instruction::InsertElement, VTy, 0); -- cgit v1.1 From f5e7e793f196cfba4427321ee9f38ecc8bb8470f Mon Sep 17 00:00:00 2001 From: Richard Osborne Date: Sun, 27 Jan 2013 20:42:57 +0000 Subject: [XCore] Add missing 0r instructions. These instructions are not targeted by the compiler but they are needed for the MC layer. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173623 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/XCore/XCoreInstrInfo.td | 47 +++++++++++++++++++++++++++++++++++--- 1 file changed, 44 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td index 770b1fc..9ac9105 100644 --- a/lib/Target/XCore/XCoreInstrInfo.td +++ b/lib/Target/XCore/XCoreInstrInfo.td @@ -924,12 +924,21 @@ def EEU_1r : _F1R<0b000001, (outs), (ins GRRegs:$a), [(int_xcore_eeu GRRegs:$a)]>; // Zero operand short -// TODO freet, ldspc, stspc, ldssr, stssr, ldsed, stsed, -// stet, getkep, getksp, setkep, getid, kret, dcall, dret, -// dentsp, drestsp def CLRE_0R : _F0R<0b0000001101, (outs), (ins), "clre", [(int_xcore_clre)]>; +def DCALL_0R : _F0R<0b0000011100, (outs), (ins), "dcall", []>; + +let Defs = [SP], Uses = [SP] in +def DENTSP_0R : _F0R<0b0001001100, (outs), (ins), "dentsp", []>; + +let Defs = [SP] in +def DRESTSP_0R : _F0R<0b0001001101, (outs), (ins), "drestsp", []>; + +def DRET_0R : _F0R<0b0000011110, (outs), (ins), "dret", []>; + +def FREET_0R : _F0R<0b0000001111, (outs), (ins), "freet", []>; + let Defs = [R11] in { def GETID_0R : _F0R<0b0001001110, (outs), (ins), "get r11, id", @@ -942,12 +951,44 @@ def GETED_0R : _F0R<0b0000111110, (outs), (ins), def GETET_0R : _F0R<0b0000111111, (outs), (ins), "get r11, et", [(set R11, (int_xcore_getet))]>; + +def GETKEP_0R : _F0R<0b0001001111, (outs), (ins), + "get r11, kep", []>; + +def GETKSP_0R : _F0R<0b0001011100, (outs), (ins), + "get r11, ksp", []>; } +let Defs = [SP] in +def KRET_0R : _F0R<0b0000011101, (outs), (ins), "kret", []>; + +let Uses = [SP], mayLoad = 1 in { +def LDET_0R : _F0R<0b0001011110, (outs), (ins), "ldw et, sp[4]", []>; + +def LDSED_0R : _F0R<0b0001011101, (outs), (ins), "ldw sed, sp[3]", []>; + +def LDSPC_0R : _F0R<0b0000101100, (outs), (ins), "ldw spc, sp[1]", []>; + +def LDSSR_0R : _F0R<0b0000101110, (outs), (ins), "ldw ssr, sp[2]", []>; +} + +let Uses=[R11] in +def SETKEP_0R : _F0R<0b0000011111, (outs), (ins), "set kep, r11", []>; + def SSYNC_0r : _F0R<0b0000001110, (outs), (ins), "ssync", [(int_xcore_ssync)]>; +let Uses = [SP], mayStore = 1 in { +def STET_0R : _F0R<0b0000111101, (outs), (ins), "stw et, sp[4]", []>; + +def STSED_0R : _F0R<0b0000111100, (outs), (ins), "stw sed, sp[3]", []>; + +def STSPC_0R : _F0R<0b0000101101, (outs), (ins), "stw spc, sp[1]", []>; + +def STSSR_0R : _F0R<0b0000101111, (outs), (ins), "stw ssr, sp[2]", []>; +} + let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1, hasSideEffects = 1 in def WAITEU_0R : _F0R<0b0000001100, (outs), (ins), -- cgit v1.1 From 9d2b1aef1b5bc8926c66b38f03583a77d015e921 Mon Sep 17 00:00:00 2001 From: Richard Osborne Date: Sun, 27 Jan 2013 20:46:21 +0000 Subject: [XCore] Add missing 1r instructions. These instructions are not targeted by the compiler but they are needed for the MC layer. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173624 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/XCore/XCoreInstrInfo.td | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td index 9ac9105..185083a 100644 --- a/lib/Target/XCore/XCoreInstrInfo.td +++ b/lib/Target/XCore/XCoreInstrInfo.td @@ -853,9 +853,6 @@ def SETRDY_l2r : _FLR2R<0b0010111000, (outs), (ins GRRegs:$src1, GRRegs:$src2), [(int_xcore_setrdy GRRegs:$src1, GRRegs:$src2)]>; // One operand short -// TODO edu, eeu, waitet, waitef, tstart, clrtp -// setdp, setcp, setev, kcall -// dgetreg def MSYNC_1r : _F1R<0b000111, (outs), (ins GRRegs:$a), "msync res[$a]", [(int_xcore_msync GRRegs:$a)]>; @@ -879,9 +876,13 @@ def BR_JT32 : PseudoInstXCore<(outs), (ins InlineJT32:$t, GRRegs:$i), [(XCoreBR_JT32 tjumptable:$t, GRRegs:$i)]>; let Defs=[SP], neverHasSideEffects=1 in -def SETSP_1r : _F1R<0b001011, (outs), (ins GRRegs:$a), - "set sp, $a", - []>; +def SETSP_1r : _F1R<0b001011, (outs), (ins GRRegs:$a), "set sp, $a", []>; + +let neverHasSideEffects=1 in +def SETDP_1r : _F1R<0b001100, (outs), (ins GRRegs:$a), "set dp, $a", []>; + +let neverHasSideEffects=1 in +def SETCP_1r : _F1R<0b001101, (outs), (ins GRRegs:$a), "set cp, $a", []>; let hasCtrlDep = 1 in def ECALLT_1r : _F1R<0b010011, (outs), (ins GRRegs:$a), @@ -919,10 +920,24 @@ def SETEV_1r : _F1R<0b001111, (outs), (ins GRRegs:$a), [(int_xcore_setev GRRegs:$a, R11)]>; } +def DGETREG_1r : _F1R<0b001110, (outs GRRegs:$a), (ins), "dgetreg $a", []>; + +def EDU_1r : _F1R<0b000000, (outs), (ins GRRegs:$a), "edu res[$a]", []>; + def EEU_1r : _F1R<0b000001, (outs), (ins GRRegs:$a), "eeu res[$a]", [(int_xcore_eeu GRRegs:$a)]>; +def KCALL_1r : _F1R<0b010000, (outs), (ins GRRegs:$a), "kcall $a", []>; + +def WAITEF_1R : _F1R<0b000011, (outs), (ins GRRegs:$a), "waitef $a", []>; + +def WAITET_1R : _F1R<0b000010, (outs), (ins GRRegs:$a), "waitet $a", []>; + +def TSTART_1R : _F1R<0b000110, (outs), (ins GRRegs:$a), "start t[$a]", []>; + +def CLRPT_1R : _F1R<0b100000, (outs), (ins GRRegs:$a), "clrpt res[$a]", []>; + // Zero operand short def CLRE_0R : _F0R<0b0000001101, (outs), (ins), "clre", [(int_xcore_clre)]>; -- cgit v1.1 From 0fef46f50e116e9aa0811ba8eb605d82ab64faed Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sun, 27 Jan 2013 21:02:52 +0000 Subject: Legalizer: Reword comment again, per Duncan's suggestion. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173625 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 88e72ec..182b7f3 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -2098,9 +2098,8 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, EVT VT = LHSL.getValueType(); // If the shift amount operand is coming from a vector legalization it may - // have an illegal type. Fix that first by casting the operand. Otherwise - // the new SHL_PARTS operation would need further legalization, and the - // legalizer assumes that illegal SHL_PARTS never occur. + // have an illegal type. Fix that first by casting the operand, otherwise + // the new SHL_PARTS operation would need further legalization. SDValue ShiftOp = N->getOperand(1); MVT ShiftTy = TLI.getShiftAmountTy(VT); assert(ShiftTy.getSizeInBits() >= Log2_32_Ceil(VT.getSizeInBits()) && -- cgit v1.1 From a5372d270864316fb0752dd366f03fc1b45c6143 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Sun, 27 Jan 2013 21:20:06 +0000 Subject: Start using more of the AttrNode in the AttributeSetImpl class. Also add some asserts. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173627 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/AttributeImpl.h | 9 ++++----- lib/IR/Attributes.cpp | 12 +++++++----- 2 files changed, 11 insertions(+), 10 deletions(-) (limited to 'lib') diff --git a/lib/IR/AttributeImpl.h b/lib/IR/AttributeImpl.h index b4c788f..2aba3c4 100644 --- a/lib/IR/AttributeImpl.h +++ b/lib/IR/AttributeImpl.h @@ -116,15 +116,14 @@ class AttributeSetImpl : public FoldingSetNode { AttributeSetImpl(const AttributeSetImpl &) LLVM_DELETED_FUNCTION; public: AttributeSetImpl(LLVMContext &C, ArrayRef attrs); - AttributeSetImpl(LLVMContext &C, - ArrayRef > attrs); LLVMContext &getContext() { return Context; } ArrayRef getAttributes() const { return AttrList; } - unsigned getNumAttributes() const { return AttrList.size(); } + unsigned getNumAttributes() const { + return AttrNodes.size(); + } unsigned getSlotIndex(unsigned Slot) const { - // FIXME: This needs to use AttrNodes instead. - return AttrList[Slot].Index; + return AttrNodes[Slot].first; } AttributeSet getSlotAttributes(unsigned Slot) const { // FIXME: This needs to use AttrNodes instead. diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 780da00..00b542d 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -553,12 +553,14 @@ AttributeSetImpl(LLVMContext &C, AttrNodes.push_back(std::make_pair(AWI.Index, AttributeSetNode::get(C, Attrs))); } -} -AttributeSetImpl:: -AttributeSetImpl(LLVMContext &C, - ArrayRef > attrs) - : Context(C), AttrNodes(attrs.begin(), attrs.end()) { + assert(AttrNodes.size() == AttrList.size() && + "Number of attributes is different between lists!"); +#ifndef NDEBUG + for (unsigned I = 0, E = AttrNodes.size(); I != E; ++I) + assert((I == 0 || AttrNodes[I - 1].first < AttrNodes[I].first) && + "Attributes not in ascending order!"); +#endif } //===----------------------------------------------------------------------===// -- cgit v1.1 From ec2589863b32da169240c4fa120ef1e3798615d4 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Sun, 27 Jan 2013 21:23:46 +0000 Subject: s/AttrList/pImpl/g in AttributeSet. No functionality change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173628 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Attributes.cpp | 46 +++++++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 23 deletions(-) (limited to 'lib') diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 00b542d..b982b05 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -183,7 +183,7 @@ std::string Attribute::getAsString() const { AttrBuilder::AttrBuilder(AttributeSet AS, unsigned Idx) : Alignment(0), StackAlignment(0) { - AttributeSetImpl *pImpl = AS.AttrList; + AttributeSetImpl *pImpl = AS.pImpl; if (!pImpl) return; ArrayRef AttrList = pImpl->getAttributes(); @@ -569,16 +569,16 @@ AttributeSetImpl(LLVMContext &C, AttributeSet AttributeSet::getParamAttributes(unsigned Idx) const { // FIXME: Remove. - return AttrList && hasAttributes(Idx) ? - AttributeSet::get(AttrList->getContext(), + return pImpl && hasAttributes(Idx) ? + AttributeSet::get(pImpl->getContext(), AttributeWithIndex::get(Idx, getAttributes(Idx))) : AttributeSet(); } AttributeSet AttributeSet::getRetAttributes() const { // FIXME: Remove. - return AttrList && hasAttributes(ReturnIndex) ? - AttributeSet::get(AttrList->getContext(), + return pImpl && hasAttributes(ReturnIndex) ? + AttributeSet::get(pImpl->getContext(), AttributeWithIndex::get(ReturnIndex, getAttributes(ReturnIndex))) : AttributeSet(); @@ -586,8 +586,8 @@ AttributeSet AttributeSet::getRetAttributes() const { AttributeSet AttributeSet::getFnAttributes() const { // FIXME: Remove. - return AttrList && hasAttributes(FunctionIndex) ? - AttributeSet::get(AttrList->getContext(), + return pImpl && hasAttributes(FunctionIndex) ? + AttributeSet::get(pImpl->getContext(), AttributeWithIndex::get(FunctionIndex, getAttributes(FunctionIndex))) : AttributeSet(); @@ -651,15 +651,15 @@ AttributeSet AttributeSet::get(LLVMContext &C, ArrayRef Attrs) { for (ArrayRef::iterator I = Attrs.begin(), E = Attrs.end(); I != E; ++I) { AttributeSet AS = *I; - if (!AS.AttrList) continue; - AttrList.append(AS.AttrList->AttrList.begin(), AS.AttrList->AttrList.end()); + if (!AS.pImpl) continue; + AttrList.append(AS.pImpl->AttrList.begin(), AS.pImpl->AttrList.end()); } return get(C, AttrList); } const AttributeSet &AttributeSet::operator=(const AttributeSet &RHS) { - AttrList = RHS.AttrList; + pImpl = RHS.pImpl; return *this; } @@ -667,19 +667,19 @@ const AttributeSet &AttributeSet::operator=(const AttributeSet &RHS) { /// This is the number of arguments that have an attribute set on them /// (including the function itself). unsigned AttributeSet::getNumSlots() const { - return AttrList ? AttrList->getNumAttributes() : 0; + return pImpl ? pImpl->getNumAttributes() : 0; } unsigned AttributeSet::getSlotIndex(unsigned Slot) const { - assert(AttrList && Slot < AttrList->getNumAttributes() && + assert(pImpl && Slot < pImpl->getNumAttributes() && "Slot # out of range!"); - return AttrList->getSlotIndex(Slot); + return pImpl->getSlotIndex(Slot); } AttributeSet AttributeSet::getSlotAttributes(unsigned Slot) const { - assert(AttrList && Slot < AttrList->getNumAttributes() && + assert(pImpl && Slot < pImpl->getNumAttributes() && "Slot # out of range!"); - return AttrList->getSlotAttributes(Slot); + return pImpl->getSlotAttributes(Slot); } bool AttributeSet::hasAttribute(unsigned Index, Attribute::AttrKind Kind) const{ @@ -709,9 +709,9 @@ uint64_t AttributeSet::Raw(unsigned Index) const { /// getAttributes - The attributes for the specified index are returned. Attribute AttributeSet::getAttributes(unsigned Idx) const { - if (AttrList == 0) return Attribute(); + if (pImpl == 0) return Attribute(); - ArrayRef Attrs = AttrList->getAttributes(); + ArrayRef Attrs = pImpl->getAttributes(); for (unsigned i = 0, e = Attrs.size(); i != e && Attrs[i].Index <= Idx; ++i) if (Attrs[i].Index == Idx) return Attrs[i].Attrs; @@ -722,9 +722,9 @@ Attribute AttributeSet::getAttributes(unsigned Idx) const { /// hasAttrSomewhere - Return true if the specified attribute is set for at /// least one parameter or for the return value. bool AttributeSet::hasAttrSomewhere(Attribute::AttrKind Attr) const { - if (AttrList == 0) return false; + if (pImpl == 0) return false; - ArrayRef Attrs = AttrList->getAttributes(); + ArrayRef Attrs = pImpl->getAttributes(); for (unsigned i = 0, e = Attrs.size(); i != e; ++i) if (Attrs[i].Attrs.hasAttribute(Attr)) return true; @@ -760,10 +760,10 @@ AttributeSet AttributeSet::addAttr(LLVMContext &C, unsigned Idx, return *this; SmallVector NewAttrList; - if (AttrList == 0) + if (pImpl == 0) NewAttrList.push_back(AttributeWithIndex::get(Idx, Attrs)); else { - ArrayRef OldAttrList = AttrList->getAttributes(); + ArrayRef OldAttrList = pImpl->getAttributes(); unsigned i = 0, e = OldAttrList.size(); // Copy attributes for arguments before this one. for (; i != e && OldAttrList[i].Index < Idx; ++i) @@ -805,7 +805,7 @@ AttributeSet AttributeSet::removeAttr(LLVMContext &C, unsigned Idx, assert(!Attrs.hasAttribute(Attribute::Alignment) && "Attempt to exclude alignment!"); #endif - if (AttrList == 0) return AttributeSet(); + if (pImpl == 0) return AttributeSet(); Attribute OldAttrs = getAttributes(Idx); AttrBuilder NewAttrs = @@ -814,7 +814,7 @@ AttributeSet AttributeSet::removeAttr(LLVMContext &C, unsigned Idx, return *this; SmallVector NewAttrList; - ArrayRef OldAttrList = AttrList->getAttributes(); + ArrayRef OldAttrList = pImpl->getAttributes(); unsigned i = 0, e = OldAttrList.size(); // Copy attributes for arguments before this one. -- cgit v1.1 From b719d8b1007f6b31ae6d1a66258a26e6a91749bc Mon Sep 17 00:00:00 2001 From: Richard Osborne Date: Sun, 27 Jan 2013 21:26:02 +0000 Subject: [XCore] Add missing l2r instructions. These instructions are not targeted by the compiler but they are needed for the MC layer. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173629 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/XCore/XCoreInstrInfo.td | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td index 185083a..613f9cb 100644 --- a/lib/Target/XCore/XCoreInstrInfo.td +++ b/lib/Target/XCore/XCoreInstrInfo.td @@ -811,7 +811,6 @@ def ENDIN_2r : _F2R<0b100101, (outs GRRegs:$dst), (ins GRRegs:$src), [(set GRRegs:$dst, (int_xcore_endin GRRegs:$src))]>; // Two operand long -// getd, testlcl def BITREV_l2r : _FL2R<0b0000011000, (outs GRRegs:$dst), (ins GRRegs:$src), "bitrev $dst, $src", [(set GRRegs:$dst, (int_xcore_bitrev GRRegs:$src))]>; @@ -824,6 +823,12 @@ def CLZ_l2r : _FL2R<0b000111000, (outs GRRegs:$dst), (ins GRRegs:$src), "clz $dst, $src", [(set GRRegs:$dst, (ctlz GRRegs:$src))]>; +def GETD_l2r : _FL2R<0b0001111001, (outs GRRegs:$dst), (ins GRRegs:$src), + "getd $dst, res[$src]", []>; + +def GETN_l2r : _FL2R<0b0011011001, (outs GRRegs:$dst), (ins GRRegs:$src), + "getn $dst, res[$src]", []>; + def SETC_l2r : _FL2R<0b0010111001, (outs), (ins GRRegs:$r, GRRegs:$val), "setc res[$r], $val", [(int_xcore_setc GRRegs:$r, GRRegs:$val)]>; @@ -848,10 +853,16 @@ def SETCLK_l2r : _FLR2R<0b0000111001, (outs), (ins GRRegs:$src1, GRRegs:$src2), "setclk res[$src1], $src2", [(int_xcore_setclk GRRegs:$src1, GRRegs:$src2)]>; +def SETN_l2r : _FLR2R<0b0011011000, (outs), (ins GRRegs:$src1, GRRegs:$src2), + "setn res[$src1], $src2", []>; + def SETRDY_l2r : _FLR2R<0b0010111000, (outs), (ins GRRegs:$src1, GRRegs:$src2), "setrdy res[$src1], $src2", [(int_xcore_setrdy GRRegs:$src1, GRRegs:$src2)]>; +def TESTLCL_l2r : _FL2R<0b0010011000, (outs GRRegs:$dst), (ins GRRegs:$src), + "testlcl $dst, res[$src]", []>; + // One operand short def MSYNC_1r : _F1R<0b000111, (outs), (ins GRRegs:$a), "msync res[$a]", -- cgit v1.1 From 893eac1648151ca42e3d0f9e9da092d817c9f8ba Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Sun, 27 Jan 2013 21:32:11 +0000 Subject: Add some helpful comments. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173631 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/AttributeImpl.h | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/IR/AttributeImpl.h b/lib/IR/AttributeImpl.h index 2aba3c4..c565364 100644 --- a/lib/IR/AttributeImpl.h +++ b/lib/IR/AttributeImpl.h @@ -117,14 +117,23 @@ class AttributeSetImpl : public FoldingSetNode { public: AttributeSetImpl(LLVMContext &C, ArrayRef attrs); + /// \brief Get the context that created this AttributeSetImpl. LLVMContext &getContext() { return Context; } + ArrayRef getAttributes() const { return AttrList; } - unsigned getNumAttributes() const { - return AttrNodes.size(); - } - unsigned getSlotIndex(unsigned Slot) const { - return AttrNodes[Slot].first; - } + + /// \brief Return the number of attributes this AttributeSet contains. + unsigned getNumAttributes() const { return AttrNodes.size(); } + + /// \brief Get the index of the given "slot" in the AttrNodes list. This index + /// is the index of the return, parameter, or function object that the + /// attributes are applied to, not the index into the AttrNodes list where the + /// attributes reside. + unsigned getSlotIndex(unsigned Slot) const { return AttrNodes[Slot].first; } + + /// \brief Retrieve the attributes for the given "slot" in the AttrNode list. + /// \p Slot is an index into the AttrNodes list, not the index of the return / + /// parameter/ function which the attributes apply to. AttributeSet getSlotAttributes(unsigned Slot) const { // FIXME: This needs to use AttrNodes instead. return AttributeSet::get(Context, AttrList[Slot]); -- cgit v1.1 From 1cc0d5a4311c2d4bc01051561549390307b789a1 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Sun, 27 Jan 2013 21:38:03 +0000 Subject: Privitize some the copy c'tor and assignment operator of uniquified objects. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173632 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/AttributeImpl.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'lib') diff --git a/lib/IR/AttributeImpl.h b/lib/IR/AttributeImpl.h index c565364..b4fb0c0 100644 --- a/lib/IR/AttributeImpl.h +++ b/lib/IR/AttributeImpl.h @@ -32,6 +32,10 @@ class AttributeImpl : public FoldingSetNode { LLVMContext &Context; Constant *Data; SmallVector Vals; + + // AttributesImpl is uniqued, these should not be publicly available. + void operator=(const AttributeImpl &) LLVM_DELETED_FUNCTION; + AttributeImpl(const AttributeImpl &) LLVM_DELETED_FUNCTION; public: explicit AttributeImpl(LLVMContext &C, uint64_t data); explicit AttributeImpl(LLVMContext &C, Attribute::AttrKind data); @@ -78,6 +82,10 @@ class AttributeSetNode : public FoldingSetNode { AttributeSetNode(ArrayRef Attrs) : AttrList(Attrs.begin(), Attrs.end()) {} + + // AttributesSetNode is uniqued, these should not be publicly available. + void operator=(const AttributeSetNode &) LLVM_DELETED_FUNCTION; + AttributeSetNode(const AttributeSetNode &) LLVM_DELETED_FUNCTION; public: static AttributeSetNode *get(LLVMContext &C, ArrayRef Attrs); -- cgit v1.1 From 970a479c02a418726950580e13136acd2a2dc13f Mon Sep 17 00:00:00 2001 From: Richard Osborne Date: Sun, 27 Jan 2013 22:28:30 +0000 Subject: [XCore] Add missing l2rus instructions. These instructions are not targeted by the compiler but they are needed for the MC layer. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173634 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/XCore/Disassembler/XCoreDisassembler.cpp | 6 ++++++ lib/Target/XCore/XCoreInstrInfo.td | 9 ++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp b/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp index a94f5b9..c995a9c 100644 --- a/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp +++ b/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp @@ -449,6 +449,12 @@ DecodeL2OpInstructionFail(MCInst &Inst, unsigned Insn, uint64_t Address, case 0x12c: Inst.setOpcode(XCore::ASHR_l2rus); return DecodeL2RUSBitpInstruction(Inst, Insn, Address, Decoder); + case 0x12d: + Inst.setOpcode(XCore::OUTPW_l2rus); + return DecodeL2RUSBitpInstruction(Inst, Insn, Address, Decoder); + case 0x12e: + Inst.setOpcode(XCore::INPW_l2rus); + return DecodeL2RUSBitpInstruction(Inst, Insn, Address, Decoder); case 0x13c: Inst.setOpcode(XCore::LDAWF_l2rus); return DecodeL2RUSInstruction(Inst, Insn, Address, Decoder); diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td index 613f9cb..befc096 100644 --- a/lib/Target/XCore/XCoreInstrInfo.td +++ b/lib/Target/XCore/XCoreInstrInfo.td @@ -451,7 +451,6 @@ def CRC_l3r : _FL3RSrcDst<0b101011100, (outs GRRegs:$dst), (int_xcore_crc32 GRRegs:$src1, GRRegs:$src2, GRRegs:$src3))]>; -// TODO inpw, outpw let mayStore=1 in { def ST16_l3r : _FL3R<0b100001100, (outs), (ins GRRegs:$val, GRRegs:$addr, GRRegs:$offset), @@ -462,6 +461,14 @@ def ST8_l3r : _FL3R<0b100011100, (outs), "st8 $val, $addr[$offset]", []>; } +def INPW_l2rus : _FL2RUSBitp<0b100101110, (outs GRRegs:$a), + (ins GRRegs:$b, i32imm:$c), "inpw $a, res[$b], $c", + []>; + +def OUTPW_l2rus : _FL2RUSBitp<0b100101101, (outs), + (ins GRRegs:$a, GRRegs:$b, i32imm:$c), + "outpw res[$b], $a, $c", []>; + // Four operand long let Constraints = "$e = $a,$f = $b" in { def MACCU_l4r : _FL4RSrcDstSrcDst< -- cgit v1.1 From c08a5ef6581f2c7550e92d31f63cd65ec29c39e0 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Sun, 27 Jan 2013 22:43:04 +0000 Subject: Add special 'get' methods to create an Attribute with an alignment. Also do some random cleanup. No functionality change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173635 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Attributes.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'lib') diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index b982b05..1b057bb 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -62,6 +62,17 @@ Attribute Attribute::get(LLVMContext &Context, AttrBuilder &B) { return Attribute(PA); } +Attribute Attribute::getWithAlignment(LLVMContext &Context, uint64_t Align) { + AttrBuilder B; + return get(Context, B.addAlignmentAttr(Align)); +} + +Attribute Attribute::getWithStackAlignment(LLVMContext &Context, + uint64_t Align) { + AttrBuilder B; + return get(Context, B.addStackAlignmentAttr(Align)); +} + bool Attribute::hasAttribute(AttrKind Val) const { return pImpl && pImpl->hasAttribute(Val); } -- cgit v1.1 From d05204aea4977eaec25e96bc7605a7bb9d806fc0 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Sun, 27 Jan 2013 23:41:29 +0000 Subject: Push the calculation of the 'Raw' attribute mask down into the implementation. It in turn uses the correct list for calculating the 'Raw' value. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173637 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/AttributeImpl.h | 3 +++ lib/IR/Attributes.cpp | 31 ++++++++++++++++++++++--------- 2 files changed, 25 insertions(+), 9 deletions(-) (limited to 'lib') diff --git a/lib/IR/AttributeImpl.h b/lib/IR/AttributeImpl.h index b4fb0c0..babc14e 100644 --- a/lib/IR/AttributeImpl.h +++ b/lib/IR/AttributeImpl.h @@ -165,6 +165,9 @@ public: ID.AddPointer(Nodes[i].second); } } + + // FIXME: This atrocity is temporary. + uint64_t Raw(uint64_t Index) const; }; } // end llvm namespace diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 1b057bb..81390f0 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -574,6 +574,24 @@ AttributeSetImpl(LLVMContext &C, #endif } +uint64_t AttributeSetImpl::Raw(uint64_t Index) const { + for (unsigned I = 0, E = getNumAttributes(); I != E; ++I) { + if (getSlotIndex(I) != Index) continue; + const AttributeSetNode *ASN = AttrNodes[I].second; + AttrBuilder B; + + for (AttributeSetNode::const_iterator II = ASN->begin(), + IE = ASN->end(); II != IE; ++II) + B.addAttributes(*II); + + assert(B.Raw() == AttrList[I].Attrs.Raw() && + "Attributes aren't the same!"); + return B.Raw(); + } + + return 0; +} + //===----------------------------------------------------------------------===// // AttributeSet Method Implementations //===----------------------------------------------------------------------===// @@ -669,14 +687,9 @@ AttributeSet AttributeSet::get(LLVMContext &C, ArrayRef Attrs) { return get(C, AttrList); } -const AttributeSet &AttributeSet::operator=(const AttributeSet &RHS) { - pImpl = RHS.pImpl; - return *this; -} - -/// getNumSlots - Return the number of slots used in this attribute list. -/// This is the number of arguments that have an attribute set on them -/// (including the function itself). +/// \brief Return the number of slots used in this attribute list. This is the +/// number of arguments that have an attribute set on them (including the +/// function itself). unsigned AttributeSet::getNumSlots() const { return pImpl ? pImpl->getNumAttributes() : 0; } @@ -715,7 +728,7 @@ unsigned AttributeSet::getStackAlignment(unsigned Index) const { uint64_t AttributeSet::Raw(unsigned Index) const { // FIXME: Remove this. - return getAttributes(Index).Raw(); + return pImpl ? pImpl->Raw(Index) : 0; } /// getAttributes - The attributes for the specified index are returned. -- cgit v1.1 From 5f33912f91ec2673545a5e000cc7bf7a0ecccdbd Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Sun, 27 Jan 2013 23:49:44 +0000 Subject: Use proper return type for attribute index. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173638 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/AttributeImpl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/IR/AttributeImpl.h b/lib/IR/AttributeImpl.h index babc14e..f9a716c 100644 --- a/lib/IR/AttributeImpl.h +++ b/lib/IR/AttributeImpl.h @@ -137,7 +137,7 @@ public: /// is the index of the return, parameter, or function object that the /// attributes are applied to, not the index into the AttrNodes list where the /// attributes reside. - unsigned getSlotIndex(unsigned Slot) const { return AttrNodes[Slot].first; } + uint64_t getSlotIndex(unsigned Slot) const { return AttrNodes[Slot].first; } /// \brief Retrieve the attributes for the given "slot" in the AttrNode list. /// \p Slot is an index into the AttrNodes list, not the index of the return / -- cgit v1.1 From 3e3e789aede6ec38d39c95d88ad4e8634d5a259b Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Sun, 27 Jan 2013 23:50:44 +0000 Subject: Use proper return type for attribute index. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173639 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Attributes.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 81390f0..8e64a49 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -694,7 +694,7 @@ unsigned AttributeSet::getNumSlots() const { return pImpl ? pImpl->getNumAttributes() : 0; } -unsigned AttributeSet::getSlotIndex(unsigned Slot) const { +uint64_t AttributeSet::getSlotIndex(unsigned Slot) const { assert(pImpl && Slot < pImpl->getNumAttributes() && "Slot # out of range!"); return pImpl->getSlotIndex(Slot); -- cgit v1.1 From 49716e5d84142d7bd3eeff7304f9bc708bff99d0 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Sun, 27 Jan 2013 23:53:56 +0000 Subject: Improve the debug output a bit. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173640 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Attributes.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 8e64a49..361f3d6 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -863,8 +863,13 @@ AttributeSet AttributeSet::removeAttr(LLVMContext &C, unsigned Idx, void AttributeSet::dump() const { dbgs() << "PAL[ "; for (unsigned i = 0; i < getNumSlots(); ++i) { - unsigned Index = getSlotIndex(i); - dbgs() << "{ " << Index << " => " << getAsString(Index) << " } "; + uint64_t Index = getSlotIndex(i); + dbgs() << " { "; + if (Index == ~0U) + dbgs() << "~0U"; + else + dbgs() << Index; + dbgs() << " => " << getAsString(Index) << " }\n"; } dbgs() << "]\n"; -- cgit v1.1 From 73bc452bcd63620daefb0a3bc613746af7418076 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Mon, 28 Jan 2013 00:21:34 +0000 Subject: Remove a use of AttributeWithIndex. We want to remove AttributeWithIndex because it provides a non-encapsulated view of the AttributeSetImpl object. Instead, use accessor methods and iterators. Eventually, this code can be simplified because the Attribute object will hold only one attribute instead of multiple attributes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173641 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/AttributeImpl.h | 20 ++++++++++++++++++-- lib/IR/Attributes.cpp | 26 +++++++++++++++----------- 2 files changed, 33 insertions(+), 13 deletions(-) (limited to 'lib') diff --git a/lib/IR/AttributeImpl.h b/lib/IR/AttributeImpl.h index f9a716c..8d5de77 100644 --- a/lib/IR/AttributeImpl.h +++ b/lib/IR/AttributeImpl.h @@ -117,7 +117,8 @@ class AttributeSetImpl : public FoldingSetNode { LLVMContext &Context; SmallVector AttrList; - SmallVector, 4> AttrNodes; + typedef std::pair IndexAttrPair; + SmallVector AttrNodes; // AttributesSet is uniqued, these should not be publicly available. void operator=(const AttributeSetImpl &) LLVM_DELETED_FUNCTION; @@ -137,7 +138,9 @@ public: /// is the index of the return, parameter, or function object that the /// attributes are applied to, not the index into the AttrNodes list where the /// attributes reside. - uint64_t getSlotIndex(unsigned Slot) const { return AttrNodes[Slot].first; } + uint64_t getSlotIndex(unsigned Slot) const { + return AttrNodes[Slot].first; + } /// \brief Retrieve the attributes for the given "slot" in the AttrNode list. /// \p Slot is an index into the AttrNodes list, not the index of the return / @@ -147,6 +150,19 @@ public: return AttributeSet::get(Context, AttrList[Slot]); } + typedef AttributeSetNode::iterator iterator; + typedef AttributeSetNode::const_iterator const_iterator; + + iterator begin(unsigned Idx) + { return AttrNodes[Idx].second->begin(); } + iterator end(unsigned Idx) + { return AttrNodes[Idx].second->end(); } + + const_iterator begin(unsigned Idx) const + { return AttrNodes[Idx].second->begin(); } + const_iterator end(unsigned Idx) const + { return AttrNodes[Idx].second->end(); } + void Profile(FoldingSetNodeID &ID) const { Profile(ID, AttrList); } diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 361f3d6..59d3ef0 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -197,17 +197,21 @@ AttrBuilder::AttrBuilder(AttributeSet AS, unsigned Idx) AttributeSetImpl *pImpl = AS.pImpl; if (!pImpl) return; - ArrayRef AttrList = pImpl->getAttributes(); - const AttributeWithIndex *AWI = 0; - for (unsigned I = 0, E = AttrList.size(); I != E; ++I) - if (AttrList[I].Index == Idx) { - AWI = &AttrList[I]; - break; - } + AttrBuilder B; + + for (unsigned I = 0, E = pImpl->getNumAttributes(); I != E; ++I) { + if (pImpl->getSlotIndex(I) != Idx) continue; + + for (AttributeSetNode::const_iterator II = pImpl->begin(I), + IE = pImpl->end(I); II != IE; ++II) + B.addAttributes(*II); + + break; + } - if (!AWI) return; + if (!B.hasAttributes()) return; - uint64_t Mask = AWI->Attrs.Raw(); + uint64_t Mask = B.Raw(); for (Attribute::AttrKind I = Attribute::None; I != Attribute::EndAttrKinds; I = Attribute::AttrKind(I + 1)) { @@ -861,8 +865,8 @@ AttributeSet AttributeSet::removeAttr(LLVMContext &C, unsigned Idx, } void AttributeSet::dump() const { - dbgs() << "PAL[ "; - for (unsigned i = 0; i < getNumSlots(); ++i) { + dbgs() << "PAL[\n"; + for (unsigned i = 0, e = getNumSlots(); i < e; ++i) { uint64_t Index = getSlotIndex(i); dbgs() << " { "; if (Index == ~0U) -- cgit v1.1 From 70cdaaae925862b9d52d41729e93cf6417c3370b Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Mon, 28 Jan 2013 00:46:02 +0000 Subject: Remove another use of AttributeWithIndex, using the AttributeSetImpl accessors instead. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173642 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Attributes.cpp | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 59d3ef0..c41fb318 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -752,10 +752,11 @@ Attribute AttributeSet::getAttributes(unsigned Idx) const { bool AttributeSet::hasAttrSomewhere(Attribute::AttrKind Attr) const { if (pImpl == 0) return false; - ArrayRef Attrs = pImpl->getAttributes(); - for (unsigned i = 0, e = Attrs.size(); i != e; ++i) - if (Attrs[i].Attrs.hasAttribute(Attr)) - return true; + for (unsigned I = 0, E = pImpl->getNumAttributes(); I != E; ++I) + for (AttributeSetImpl::iterator II = pImpl->begin(I), + IE = pImpl->end(I); II != IE; ++II) + if (II->hasAttribute(Attr)) + return true; return false; } @@ -788,11 +789,12 @@ AttributeSet AttributeSet::addAttr(LLVMContext &C, unsigned Idx, return *this; SmallVector NewAttrList; - if (pImpl == 0) + if (pImpl == 0) { NewAttrList.push_back(AttributeWithIndex::get(Idx, Attrs)); - else { + } else { ArrayRef OldAttrList = pImpl->getAttributes(); unsigned i = 0, e = OldAttrList.size(); + // Copy attributes for arguments before this one. for (; i != e && OldAttrList[i].Index < Idx; ++i) NewAttrList.push_back(OldAttrList[i]); -- cgit v1.1 From e2501f56a620be88665d04dc481f40ef3275eea0 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Mon, 28 Jan 2013 01:11:42 +0000 Subject: Remove another use of AttributeWithIndex, using the AttributeSetImpl accessors instead. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173644 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Attributes.cpp | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index c41fb318..6745486 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -735,14 +735,22 @@ uint64_t AttributeSet::Raw(unsigned Index) const { return pImpl ? pImpl->Raw(Index) : 0; } -/// getAttributes - The attributes for the specified index are returned. +/// \brief The attributes for the specified index are returned. +/// +/// FIXME: This shouldn't return 'Attribute'. Attribute AttributeSet::getAttributes(unsigned Idx) const { if (pImpl == 0) return Attribute(); - ArrayRef Attrs = pImpl->getAttributes(); - for (unsigned i = 0, e = Attrs.size(); i != e && Attrs[i].Index <= Idx; ++i) - if (Attrs[i].Index == Idx) - return Attrs[i].Attrs; + // Loop through to find the attribute we want. + for (unsigned I = 0, E = pImpl->getNumAttributes(); I != E; ++I) { + if (pImpl->getSlotIndex(I) != Idx) continue; + + AttrBuilder B; + for (AttributeSetImpl::const_iterator II = pImpl->begin(I), + IE = pImpl->end(I); II != IE; ++II) + B.addAttributes(*II); + return Attribute::get(pImpl->getContext(), B); + } return Attribute(); } @@ -753,7 +761,7 @@ bool AttributeSet::hasAttrSomewhere(Attribute::AttrKind Attr) const { if (pImpl == 0) return false; for (unsigned I = 0, E = pImpl->getNumAttributes(); I != E; ++I) - for (AttributeSetImpl::iterator II = pImpl->begin(I), + for (AttributeSetImpl::const_iterator II = pImpl->begin(I), IE = pImpl->end(I); II != IE; ++II) if (II->hasAttribute(Attr)) return true; -- cgit v1.1 From a9b9f0ad802ef6973534a9a247716e4a8e079635 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Mon, 28 Jan 2013 01:30:29 +0000 Subject: Use proper type for the index. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173646 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Attributes.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 6745486..544c3cf 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -710,27 +710,27 @@ AttributeSet AttributeSet::getSlotAttributes(unsigned Slot) const { return pImpl->getSlotAttributes(Slot); } -bool AttributeSet::hasAttribute(unsigned Index, Attribute::AttrKind Kind) const{ +bool AttributeSet::hasAttribute(uint64_t Index, Attribute::AttrKind Kind) const{ return getAttributes(Index).hasAttribute(Kind); } -bool AttributeSet::hasAttributes(unsigned Index) const { +bool AttributeSet::hasAttributes(uint64_t Index) const { return getAttributes(Index).hasAttributes(); } -std::string AttributeSet::getAsString(unsigned Index) const { +std::string AttributeSet::getAsString(uint64_t Index) const { return getAttributes(Index).getAsString(); } -unsigned AttributeSet::getParamAlignment(unsigned Idx) const { +unsigned AttributeSet::getParamAlignment(uint64_t Idx) const { return getAttributes(Idx).getAlignment(); } -unsigned AttributeSet::getStackAlignment(unsigned Index) const { +unsigned AttributeSet::getStackAlignment(uint64_t Index) const { return getAttributes(Index).getStackAlignment(); } -uint64_t AttributeSet::Raw(unsigned Index) const { +uint64_t AttributeSet::Raw(uint64_t Index) const { // FIXME: Remove this. return pImpl ? pImpl->Raw(Index) : 0; } @@ -738,7 +738,7 @@ uint64_t AttributeSet::Raw(unsigned Index) const { /// \brief The attributes for the specified index are returned. /// /// FIXME: This shouldn't return 'Attribute'. -Attribute AttributeSet::getAttributes(unsigned Idx) const { +Attribute AttributeSet::getAttributes(uint64_t Idx) const { if (pImpl == 0) return Attribute(); // Loop through to find the attribute we want. -- cgit v1.1 From 24c4898973a074713201fb9351d302b9f7733e92 Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Mon, 28 Jan 2013 01:35:51 +0000 Subject: Extracted ObjCARC.cpp into its own library libLLVMObjCARCOpts in preparation for refactoring the ARC Optimizer. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173647 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/LLVMBuild.txt | 2 +- lib/Transforms/CMakeLists.txt | 1 + lib/Transforms/IPO/LLVMBuild.txt | 2 +- lib/Transforms/LLVMBuild.txt | 2 +- lib/Transforms/Makefile | 2 +- lib/Transforms/ObjCARC/CMakeLists.txt | 6 + lib/Transforms/ObjCARC/LLVMBuild.txt | 23 + lib/Transforms/ObjCARC/Makefile | 15 + lib/Transforms/ObjCARC/ObjCARC.cpp | 38 + lib/Transforms/ObjCARC/ObjCARCOpts.cpp | 4568 ++++++++++++++++++++++++++++++++ lib/Transforms/Scalar/CMakeLists.txt | 1 - lib/Transforms/Scalar/ObjCARC.cpp | 4568 -------------------------------- lib/Transforms/Scalar/Scalar.cpp | 5 - 13 files changed, 4655 insertions(+), 4578 deletions(-) create mode 100644 lib/Transforms/ObjCARC/CMakeLists.txt create mode 100644 lib/Transforms/ObjCARC/LLVMBuild.txt create mode 100644 lib/Transforms/ObjCARC/Makefile create mode 100644 lib/Transforms/ObjCARC/ObjCARC.cpp create mode 100644 lib/Transforms/ObjCARC/ObjCARCOpts.cpp delete mode 100644 lib/Transforms/Scalar/ObjCARC.cpp (limited to 'lib') diff --git a/lib/CodeGen/LLVMBuild.txt b/lib/CodeGen/LLVMBuild.txt index fee0347..81ef1aa 100644 --- a/lib/CodeGen/LLVMBuild.txt +++ b/lib/CodeGen/LLVMBuild.txt @@ -22,4 +22,4 @@ subdirectories = AsmPrinter SelectionDAG type = Library name = CodeGen parent = Libraries -required_libraries = Analysis Core MC Scalar Support Target TransformUtils +required_libraries = Analysis Core MC Scalar Support Target TransformUtils ObjCARC diff --git a/lib/Transforms/CMakeLists.txt b/lib/Transforms/CMakeLists.txt index de1353e..2bb6e90 100644 --- a/lib/Transforms/CMakeLists.txt +++ b/lib/Transforms/CMakeLists.txt @@ -5,3 +5,4 @@ add_subdirectory(Scalar) add_subdirectory(IPO) add_subdirectory(Vectorize) add_subdirectory(Hello) +add_subdirectory(ObjCARC) diff --git a/lib/Transforms/IPO/LLVMBuild.txt b/lib/Transforms/IPO/LLVMBuild.txt index b18c915..124cbb6 100644 --- a/lib/Transforms/IPO/LLVMBuild.txt +++ b/lib/Transforms/IPO/LLVMBuild.txt @@ -20,4 +20,4 @@ type = Library name = IPO parent = Transforms library_name = ipo -required_libraries = Analysis Core IPA InstCombine Scalar Vectorize Support Target TransformUtils +required_libraries = Analysis Core IPA InstCombine Scalar Vectorize Support Target TransformUtils ObjCARC diff --git a/lib/Transforms/LLVMBuild.txt b/lib/Transforms/LLVMBuild.txt index f7bca06..15e9fba 100644 --- a/lib/Transforms/LLVMBuild.txt +++ b/lib/Transforms/LLVMBuild.txt @@ -16,7 +16,7 @@ ;===------------------------------------------------------------------------===; [common] -subdirectories = IPO InstCombine Instrumentation Scalar Utils Vectorize +subdirectories = IPO InstCombine Instrumentation Scalar Utils Vectorize ObjCARC [component_0] type = Group diff --git a/lib/Transforms/Makefile b/lib/Transforms/Makefile index 8b1df92..c390517 100644 --- a/lib/Transforms/Makefile +++ b/lib/Transforms/Makefile @@ -8,7 +8,7 @@ ##===----------------------------------------------------------------------===## LEVEL = ../.. -PARALLEL_DIRS = Utils Instrumentation Scalar InstCombine IPO Vectorize Hello +PARALLEL_DIRS = Utils Instrumentation Scalar InstCombine IPO Vectorize Hello ObjCARC include $(LEVEL)/Makefile.config diff --git a/lib/Transforms/ObjCARC/CMakeLists.txt b/lib/Transforms/ObjCARC/CMakeLists.txt new file mode 100644 index 0000000..ab17267 --- /dev/null +++ b/lib/Transforms/ObjCARC/CMakeLists.txt @@ -0,0 +1,6 @@ +add_llvm_library(LLVMObjCARCOpts + ObjCARC.cpp + ObjCARCOpts.cpp + ) + +add_dependencies(LLVMObjCARCOpts intrinsics_gen) diff --git a/lib/Transforms/ObjCARC/LLVMBuild.txt b/lib/Transforms/ObjCARC/LLVMBuild.txt new file mode 100644 index 0000000..61eced0 --- /dev/null +++ b/lib/Transforms/ObjCARC/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/Transforms/ObjCARC/LLVMBuild.txt --- ---------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = ObjCARC +parent = Transforms +library_name = ObjCARCOpts +required_libraries = Analysis Core InstCombine Support Target TransformUtils diff --git a/lib/Transforms/ObjCARC/Makefile b/lib/Transforms/ObjCARC/Makefile new file mode 100644 index 0000000..2a34e21 --- /dev/null +++ b/lib/Transforms/ObjCARC/Makefile @@ -0,0 +1,15 @@ +##===- lib/Transforms/ObjCARC/Makefile ---------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../.. +LIBRARYNAME = LLVMObjCARCOpts +BUILD_ARCHIVE = 1 + +include $(LEVEL)/Makefile.common + diff --git a/lib/Transforms/ObjCARC/ObjCARC.cpp b/lib/Transforms/ObjCARC/ObjCARC.cpp new file mode 100644 index 0000000..d4e3149 --- /dev/null +++ b/lib/Transforms/ObjCARC/ObjCARC.cpp @@ -0,0 +1,38 @@ +//===-- ObjCARC.cpp --------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements common infrastructure for libLLVMObjCARCOpts.a, which +// implements several scalar transformations over the LLVM intermediate +// representation, including the C bindings for that library. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/ObjCARC.h" +#include "llvm-c/Initialization.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/Verifier.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/InitializePasses.h" +#include "llvm/PassManager.h" + +using namespace llvm; + +/// initializeObjCARCOptsPasses - Initialize all passes linked into the +/// ObjCARCOpts library. +void llvm::initializeObjCARCOpts(PassRegistry &Registry) { + initializeObjCARCAliasAnalysisPass(Registry); + initializeObjCARCAPElimPass(Registry); + initializeObjCARCExpandPass(Registry); + initializeObjCARCContractPass(Registry); + initializeObjCARCOptPass(Registry); +} + +void LLVMInitializeObjCARCOpts(LLVMPassRegistryRef R) { + initializeObjCARCOpts(*unwrap(R)); +} diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp new file mode 100644 index 0000000..411da64 --- /dev/null +++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp @@ -0,0 +1,4568 @@ +//===- ObjCARCOpts.cpp - ObjC ARC Optimization ----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file defines ObjC ARC optimizations. ARC stands for Automatic +/// Reference Counting and is a system for managing reference counts for objects +/// in Objective C. +/// +/// The optimizations performed include elimination of redundant, partially +/// redundant, and inconsequential reference count operations, elimination of +/// redundant weak pointer operations, pattern-matching and replacement of +/// low-level operations into higher-level operations, and numerous minor +/// simplifications. +/// +/// This file also defines a simple ARC-aware AliasAnalysis. +/// +/// WARNING: This file knows about certain library functions. It recognizes them +/// by name, and hardwires knowledge of their semantics. +/// +/// WARNING: This file knows about how certain Objective-C library functions are +/// used. Naive LLVM IR transformations which would otherwise be +/// behavior-preserving may break these assumptions. +/// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "objc-arc" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +/// \brief A handy option to enable/disable all optimizations in this file. +static cl::opt EnableARCOpts("enable-objc-arc-opts", cl::init(true)); + +/// \defgroup MiscUtils Miscellaneous utilities that are not ARC specific. +/// @{ + +namespace { + /// \brief An associative container with fast insertion-order (deterministic) + /// iteration over its elements. Plus the special blot operation. + template + class MapVector { + /// Map keys to indices in Vector. + typedef DenseMap MapTy; + MapTy Map; + + typedef std::vector > VectorTy; + /// Keys and values. + VectorTy Vector; + + public: + typedef typename VectorTy::iterator iterator; + typedef typename VectorTy::const_iterator const_iterator; + iterator begin() { return Vector.begin(); } + iterator end() { return Vector.end(); } + const_iterator begin() const { return Vector.begin(); } + const_iterator end() const { return Vector.end(); } + +#ifdef XDEBUG + ~MapVector() { + assert(Vector.size() >= Map.size()); // May differ due to blotting. + for (typename MapTy::const_iterator I = Map.begin(), E = Map.end(); + I != E; ++I) { + assert(I->second < Vector.size()); + assert(Vector[I->second].first == I->first); + } + for (typename VectorTy::const_iterator I = Vector.begin(), + E = Vector.end(); I != E; ++I) + assert(!I->first || + (Map.count(I->first) && + Map[I->first] == size_t(I - Vector.begin()))); + } +#endif + + ValueT &operator[](const KeyT &Arg) { + std::pair Pair = + Map.insert(std::make_pair(Arg, size_t(0))); + if (Pair.second) { + size_t Num = Vector.size(); + Pair.first->second = Num; + Vector.push_back(std::make_pair(Arg, ValueT())); + return Vector[Num].second; + } + return Vector[Pair.first->second].second; + } + + std::pair + insert(const std::pair &InsertPair) { + std::pair Pair = + Map.insert(std::make_pair(InsertPair.first, size_t(0))); + if (Pair.second) { + size_t Num = Vector.size(); + Pair.first->second = Num; + Vector.push_back(InsertPair); + return std::make_pair(Vector.begin() + Num, true); + } + return std::make_pair(Vector.begin() + Pair.first->second, false); + } + + const_iterator find(const KeyT &Key) const { + typename MapTy::const_iterator It = Map.find(Key); + if (It == Map.end()) return Vector.end(); + return Vector.begin() + It->second; + } + + /// This is similar to erase, but instead of removing the element from the + /// vector, it just zeros out the key in the vector. This leaves iterators + /// intact, but clients must be prepared for zeroed-out keys when iterating. + void blot(const KeyT &Key) { + typename MapTy::iterator It = Map.find(Key); + if (It == Map.end()) return; + Vector[It->second].first = KeyT(); + Map.erase(It); + } + + void clear() { + Map.clear(); + Vector.clear(); + } + }; +} + +/// @} +/// +/// \defgroup ARCUtilities Utility declarations/definitions specific to ARC. +/// @{ + +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Transforms/Utils/Local.h" + +namespace { + /// \enum InstructionClass + /// \brief A simple classification for instructions. + enum InstructionClass { + IC_Retain, ///< objc_retain + IC_RetainRV, ///< objc_retainAutoreleasedReturnValue + IC_RetainBlock, ///< objc_retainBlock + IC_Release, ///< objc_release + IC_Autorelease, ///< objc_autorelease + IC_AutoreleaseRV, ///< objc_autoreleaseReturnValue + IC_AutoreleasepoolPush, ///< objc_autoreleasePoolPush + IC_AutoreleasepoolPop, ///< objc_autoreleasePoolPop + IC_NoopCast, ///< objc_retainedObject, etc. + IC_FusedRetainAutorelease, ///< objc_retainAutorelease + IC_FusedRetainAutoreleaseRV, ///< objc_retainAutoreleaseReturnValue + IC_LoadWeakRetained, ///< objc_loadWeakRetained (primitive) + IC_StoreWeak, ///< objc_storeWeak (primitive) + IC_InitWeak, ///< objc_initWeak (derived) + IC_LoadWeak, ///< objc_loadWeak (derived) + IC_MoveWeak, ///< objc_moveWeak (derived) + IC_CopyWeak, ///< objc_copyWeak (derived) + IC_DestroyWeak, ///< objc_destroyWeak (derived) + IC_StoreStrong, ///< objc_storeStrong (derived) + IC_CallOrUser, ///< could call objc_release and/or "use" pointers + IC_Call, ///< could call objc_release + IC_User, ///< could "use" a pointer + IC_None ///< anything else + }; + + raw_ostream &operator<<(raw_ostream &OS, const InstructionClass Class) + LLVM_ATTRIBUTE_USED; + raw_ostream &operator<<(raw_ostream &OS, const InstructionClass Class) { + switch (Class) { + case IC_Retain: + return OS << "IC_Retain"; + case IC_RetainRV: + return OS << "IC_RetainRV"; + case IC_RetainBlock: + return OS << "IC_RetainBlock"; + case IC_Release: + return OS << "IC_Release"; + case IC_Autorelease: + return OS << "IC_Autorelease"; + case IC_AutoreleaseRV: + return OS << "IC_AutoreleaseRV"; + case IC_AutoreleasepoolPush: + return OS << "IC_AutoreleasepoolPush"; + case IC_AutoreleasepoolPop: + return OS << "IC_AutoreleasepoolPop"; + case IC_NoopCast: + return OS << "IC_NoopCast"; + case IC_FusedRetainAutorelease: + return OS << "IC_FusedRetainAutorelease"; + case IC_FusedRetainAutoreleaseRV: + return OS << "IC_FusedRetainAutoreleaseRV"; + case IC_LoadWeakRetained: + return OS << "IC_LoadWeakRetained"; + case IC_StoreWeak: + return OS << "IC_StoreWeak"; + case IC_InitWeak: + return OS << "IC_InitWeak"; + case IC_LoadWeak: + return OS << "IC_LoadWeak"; + case IC_MoveWeak: + return OS << "IC_MoveWeak"; + case IC_CopyWeak: + return OS << "IC_CopyWeak"; + case IC_DestroyWeak: + return OS << "IC_DestroyWeak"; + case IC_StoreStrong: + return OS << "IC_StoreStrong"; + case IC_CallOrUser: + return OS << "IC_CallOrUser"; + case IC_Call: + return OS << "IC_Call"; + case IC_User: + return OS << "IC_User"; + case IC_None: + return OS << "IC_None"; + } + llvm_unreachable("Unknown instruction class!"); + } +} + +/// \brief Test whether the given value is possible a retainable object pointer. +static bool IsPotentialRetainableObjPtr(const Value *Op) { + // Pointers to static or stack storage are not valid retainable object pointers. + if (isa(Op) || isa(Op)) + return false; + // Special arguments can not be a valid retainable object pointer. + if (const Argument *Arg = dyn_cast(Op)) + if (Arg->hasByValAttr() || + Arg->hasNestAttr() || + Arg->hasStructRetAttr()) + return false; + // Only consider values with pointer types. + // + // It seemes intuitive to exclude function pointer types as well, since + // functions are never retainable object pointers, however clang occasionally + // bitcasts retainable object pointers to function-pointer type temporarily. + PointerType *Ty = dyn_cast(Op->getType()); + if (!Ty) + return false; + // Conservatively assume anything else is a potential retainable object pointer. + return true; +} + +/// \brief Helper for GetInstructionClass. Determines what kind of construct CS +/// is. +static InstructionClass GetCallSiteClass(ImmutableCallSite CS) { + for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); + I != E; ++I) + if (IsPotentialRetainableObjPtr(*I)) + return CS.onlyReadsMemory() ? IC_User : IC_CallOrUser; + + return CS.onlyReadsMemory() ? IC_None : IC_Call; +} + +/// \brief Determine if F is one of the special known Functions. If it isn't, +/// return IC_CallOrUser. +static InstructionClass GetFunctionClass(const Function *F) { + Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); + + // No arguments. + if (AI == AE) + return StringSwitch(F->getName()) + .Case("objc_autoreleasePoolPush", IC_AutoreleasepoolPush) + .Default(IC_CallOrUser); + + // One argument. + const Argument *A0 = AI++; + if (AI == AE) + // Argument is a pointer. + if (PointerType *PTy = dyn_cast(A0->getType())) { + Type *ETy = PTy->getElementType(); + // Argument is i8*. + if (ETy->isIntegerTy(8)) + return StringSwitch(F->getName()) + .Case("objc_retain", IC_Retain) + .Case("objc_retainAutoreleasedReturnValue", IC_RetainRV) + .Case("objc_retainBlock", IC_RetainBlock) + .Case("objc_release", IC_Release) + .Case("objc_autorelease", IC_Autorelease) + .Case("objc_autoreleaseReturnValue", IC_AutoreleaseRV) + .Case("objc_autoreleasePoolPop", IC_AutoreleasepoolPop) + .Case("objc_retainedObject", IC_NoopCast) + .Case("objc_unretainedObject", IC_NoopCast) + .Case("objc_unretainedPointer", IC_NoopCast) + .Case("objc_retain_autorelease", IC_FusedRetainAutorelease) + .Case("objc_retainAutorelease", IC_FusedRetainAutorelease) + .Case("objc_retainAutoreleaseReturnValue",IC_FusedRetainAutoreleaseRV) + .Default(IC_CallOrUser); + + // Argument is i8** + if (PointerType *Pte = dyn_cast(ETy)) + if (Pte->getElementType()->isIntegerTy(8)) + return StringSwitch(F->getName()) + .Case("objc_loadWeakRetained", IC_LoadWeakRetained) + .Case("objc_loadWeak", IC_LoadWeak) + .Case("objc_destroyWeak", IC_DestroyWeak) + .Default(IC_CallOrUser); + } + + // Two arguments, first is i8**. + const Argument *A1 = AI++; + if (AI == AE) + if (PointerType *PTy = dyn_cast(A0->getType())) + if (PointerType *Pte = dyn_cast(PTy->getElementType())) + if (Pte->getElementType()->isIntegerTy(8)) + if (PointerType *PTy1 = dyn_cast(A1->getType())) { + Type *ETy1 = PTy1->getElementType(); + // Second argument is i8* + if (ETy1->isIntegerTy(8)) + return StringSwitch(F->getName()) + .Case("objc_storeWeak", IC_StoreWeak) + .Case("objc_initWeak", IC_InitWeak) + .Case("objc_storeStrong", IC_StoreStrong) + .Default(IC_CallOrUser); + // Second argument is i8**. + if (PointerType *Pte1 = dyn_cast(ETy1)) + if (Pte1->getElementType()->isIntegerTy(8)) + return StringSwitch(F->getName()) + .Case("objc_moveWeak", IC_MoveWeak) + .Case("objc_copyWeak", IC_CopyWeak) + .Default(IC_CallOrUser); + } + + // Anything else. + return IC_CallOrUser; +} + +/// \brief Determine what kind of construct V is. +static InstructionClass GetInstructionClass(const Value *V) { + if (const Instruction *I = dyn_cast(V)) { + // Any instruction other than bitcast and gep with a pointer operand have a + // use of an objc pointer. Bitcasts, GEPs, Selects, PHIs transfer a pointer + // to a subsequent use, rather than using it themselves, in this sense. + // As a short cut, several other opcodes are known to have no pointer + // operands of interest. And ret is never followed by a release, so it's + // not interesting to examine. + switch (I->getOpcode()) { + case Instruction::Call: { + const CallInst *CI = cast(I); + // Check for calls to special functions. + if (const Function *F = CI->getCalledFunction()) { + InstructionClass Class = GetFunctionClass(F); + if (Class != IC_CallOrUser) + return Class; + + // None of the intrinsic functions do objc_release. For intrinsics, the + // only question is whether or not they may be users. + switch (F->getIntrinsicID()) { + case Intrinsic::returnaddress: case Intrinsic::frameaddress: + case Intrinsic::stacksave: case Intrinsic::stackrestore: + case Intrinsic::vastart: case Intrinsic::vacopy: case Intrinsic::vaend: + case Intrinsic::objectsize: case Intrinsic::prefetch: + case Intrinsic::stackprotector: + case Intrinsic::eh_return_i32: case Intrinsic::eh_return_i64: + case Intrinsic::eh_typeid_for: case Intrinsic::eh_dwarf_cfa: + case Intrinsic::eh_sjlj_lsda: case Intrinsic::eh_sjlj_functioncontext: + case Intrinsic::init_trampoline: case Intrinsic::adjust_trampoline: + case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: + case Intrinsic::invariant_start: case Intrinsic::invariant_end: + // Don't let dbg info affect our results. + case Intrinsic::dbg_declare: case Intrinsic::dbg_value: + // Short cut: Some intrinsics obviously don't use ObjC pointers. + return IC_None; + default: + break; + } + } + return GetCallSiteClass(CI); + } + case Instruction::Invoke: + return GetCallSiteClass(cast(I)); + case Instruction::BitCast: + case Instruction::GetElementPtr: + case Instruction::Select: case Instruction::PHI: + case Instruction::Ret: case Instruction::Br: + case Instruction::Switch: case Instruction::IndirectBr: + case Instruction::Alloca: case Instruction::VAArg: + case Instruction::Add: case Instruction::FAdd: + case Instruction::Sub: case Instruction::FSub: + case Instruction::Mul: case Instruction::FMul: + case Instruction::SDiv: case Instruction::UDiv: case Instruction::FDiv: + case Instruction::SRem: case Instruction::URem: case Instruction::FRem: + case Instruction::Shl: case Instruction::LShr: case Instruction::AShr: + case Instruction::And: case Instruction::Or: case Instruction::Xor: + case Instruction::SExt: case Instruction::ZExt: case Instruction::Trunc: + case Instruction::IntToPtr: case Instruction::FCmp: + case Instruction::FPTrunc: case Instruction::FPExt: + case Instruction::FPToUI: case Instruction::FPToSI: + case Instruction::UIToFP: case Instruction::SIToFP: + case Instruction::InsertElement: case Instruction::ExtractElement: + case Instruction::ShuffleVector: + case Instruction::ExtractValue: + break; + case Instruction::ICmp: + // Comparing a pointer with null, or any other constant, isn't an + // interesting use, because we don't care what the pointer points to, or + // about the values of any other dynamic reference-counted pointers. + if (IsPotentialRetainableObjPtr(I->getOperand(1))) + return IC_User; + break; + default: + // For anything else, check all the operands. + // Note that this includes both operands of a Store: while the first + // operand isn't actually being dereferenced, it is being stored to + // memory where we can no longer track who might read it and dereference + // it, so we have to consider it potentially used. + for (User::const_op_iterator OI = I->op_begin(), OE = I->op_end(); + OI != OE; ++OI) + if (IsPotentialRetainableObjPtr(*OI)) + return IC_User; + } + } + + // Otherwise, it's totally inert for ARC purposes. + return IC_None; +} + +/// \brief Determine which objc runtime call instruction class V belongs to. +/// +/// This is similar to GetInstructionClass except that it only detects objc +/// runtime calls. This allows it to be faster. +/// +static InstructionClass GetBasicInstructionClass(const Value *V) { + if (const CallInst *CI = dyn_cast(V)) { + if (const Function *F = CI->getCalledFunction()) + return GetFunctionClass(F); + // Otherwise, be conservative. + return IC_CallOrUser; + } + + // Otherwise, be conservative. + return isa(V) ? IC_CallOrUser : IC_User; +} + +/// \brief Test if the given class is objc_retain or equivalent. +static bool IsRetain(InstructionClass Class) { + return Class == IC_Retain || + Class == IC_RetainRV; +} + +/// \brief Test if the given class is objc_autorelease or equivalent. +static bool IsAutorelease(InstructionClass Class) { + return Class == IC_Autorelease || + Class == IC_AutoreleaseRV; +} + +/// \brief Test if the given class represents instructions which return their +/// argument verbatim. +static bool IsForwarding(InstructionClass Class) { + // objc_retainBlock technically doesn't always return its argument + // verbatim, but it doesn't matter for our purposes here. + return Class == IC_Retain || + Class == IC_RetainRV || + Class == IC_Autorelease || + Class == IC_AutoreleaseRV || + Class == IC_RetainBlock || + Class == IC_NoopCast; +} + +/// \brief Test if the given class represents instructions which do nothing if +/// passed a null pointer. +static bool IsNoopOnNull(InstructionClass Class) { + return Class == IC_Retain || + Class == IC_RetainRV || + Class == IC_Release || + Class == IC_Autorelease || + Class == IC_AutoreleaseRV || + Class == IC_RetainBlock; +} + +/// \brief Test if the given class represents instructions which are always safe +/// to mark with the "tail" keyword. +static bool IsAlwaysTail(InstructionClass Class) { + // IC_RetainBlock may be given a stack argument. + return Class == IC_Retain || + Class == IC_RetainRV || + Class == IC_AutoreleaseRV; +} + +/// \brief Test if the given class represents instructions which are never safe +/// to mark with the "tail" keyword. +static bool IsNeverTail(InstructionClass Class) { + /// It is never safe to tail call objc_autorelease since by tail calling + /// objc_autorelease, we also tail call -[NSObject autorelease] which supports + /// fast autoreleasing causing our object to be potentially reclaimed from the + /// autorelease pool which violates the semantics of __autoreleasing types in + /// ARC. + return Class == IC_Autorelease; +} + +/// \brief Test if the given class represents instructions which are always safe +/// to mark with the nounwind attribute. +static bool IsNoThrow(InstructionClass Class) { + // objc_retainBlock is not nounwind because it calls user copy constructors + // which could theoretically throw. + return Class == IC_Retain || + Class == IC_RetainRV || + Class == IC_Release || + Class == IC_Autorelease || + Class == IC_AutoreleaseRV || + Class == IC_AutoreleasepoolPush || + Class == IC_AutoreleasepoolPop; +} + +/// \brief Erase the given instruction. +/// +/// Many ObjC calls return their argument verbatim, +/// so if it's such a call and the return value has users, replace them with the +/// argument value. +/// +static void EraseInstruction(Instruction *CI) { + Value *OldArg = cast(CI)->getArgOperand(0); + + bool Unused = CI->use_empty(); + + if (!Unused) { + // Replace the return value with the argument. + assert(IsForwarding(GetBasicInstructionClass(CI)) && + "Can't delete non-forwarding instruction with users!"); + CI->replaceAllUsesWith(OldArg); + } + + CI->eraseFromParent(); + + if (Unused) + RecursivelyDeleteTriviallyDeadInstructions(OldArg); +} + +/// \brief This is a wrapper around getUnderlyingObject which also knows how to +/// look through objc_retain and objc_autorelease calls, which we know to return +/// their argument verbatim. +static const Value *GetUnderlyingObjCPtr(const Value *V) { + for (;;) { + V = GetUnderlyingObject(V); + if (!IsForwarding(GetBasicInstructionClass(V))) + break; + V = cast(V)->getArgOperand(0); + } + + return V; +} + +/// \brief This is a wrapper around Value::stripPointerCasts which also knows +/// how to look through objc_retain and objc_autorelease calls, which we know to +/// return their argument verbatim. +static const Value *StripPointerCastsAndObjCCalls(const Value *V) { + for (;;) { + V = V->stripPointerCasts(); + if (!IsForwarding(GetBasicInstructionClass(V))) + break; + V = cast(V)->getArgOperand(0); + } + return V; +} + +/// \brief This is a wrapper around Value::stripPointerCasts which also knows +/// how to look through objc_retain and objc_autorelease calls, which we know to +/// return their argument verbatim. +static Value *StripPointerCastsAndObjCCalls(Value *V) { + for (;;) { + V = V->stripPointerCasts(); + if (!IsForwarding(GetBasicInstructionClass(V))) + break; + V = cast(V)->getArgOperand(0); + } + return V; +} + +/// \brief Assuming the given instruction is one of the special calls such as +/// objc_retain or objc_release, return the argument value, stripped of no-op +/// casts and forwarding calls. +static Value *GetObjCArg(Value *Inst) { + return StripPointerCastsAndObjCCalls(cast(Inst)->getArgOperand(0)); +} + +/// \brief Return true if this value refers to a distinct and identifiable +/// object. +/// +/// This is similar to AliasAnalysis's isIdentifiedObject, except that it uses +/// special knowledge of ObjC conventions. +static bool IsObjCIdentifiedObject(const Value *V) { + // Assume that call results and arguments have their own "provenance". + // Constants (including GlobalVariables) and Allocas are never + // reference-counted. + if (isa(V) || isa(V) || + isa(V) || isa(V) || + isa(V)) + return true; + + if (const LoadInst *LI = dyn_cast(V)) { + const Value *Pointer = + StripPointerCastsAndObjCCalls(LI->getPointerOperand()); + if (const GlobalVariable *GV = dyn_cast(Pointer)) { + // A constant pointer can't be pointing to an object on the heap. It may + // be reference-counted, but it won't be deleted. + if (GV->isConstant()) + return true; + StringRef Name = GV->getName(); + // These special variables are known to hold values which are not + // reference-counted pointers. + if (Name.startswith("\01L_OBJC_SELECTOR_REFERENCES_") || + Name.startswith("\01L_OBJC_CLASSLIST_REFERENCES_") || + Name.startswith("\01L_OBJC_CLASSLIST_SUP_REFS_$_") || + Name.startswith("\01L_OBJC_METH_VAR_NAME_") || + Name.startswith("\01l_objc_msgSend_fixup_")) + return true; + } + } + + return false; +} + +/// \brief This is similar to StripPointerCastsAndObjCCalls but it stops as soon +/// as it finds a value with multiple uses. +static const Value *FindSingleUseIdentifiedObject(const Value *Arg) { + if (Arg->hasOneUse()) { + if (const BitCastInst *BC = dyn_cast(Arg)) + return FindSingleUseIdentifiedObject(BC->getOperand(0)); + if (const GetElementPtrInst *GEP = dyn_cast(Arg)) + if (GEP->hasAllZeroIndices()) + return FindSingleUseIdentifiedObject(GEP->getPointerOperand()); + if (IsForwarding(GetBasicInstructionClass(Arg))) + return FindSingleUseIdentifiedObject( + cast(Arg)->getArgOperand(0)); + if (!IsObjCIdentifiedObject(Arg)) + return 0; + return Arg; + } + + // If we found an identifiable object but it has multiple uses, but they are + // trivial uses, we can still consider this to be a single-use value. + if (IsObjCIdentifiedObject(Arg)) { + for (Value::const_use_iterator UI = Arg->use_begin(), UE = Arg->use_end(); + UI != UE; ++UI) { + const User *U = *UI; + if (!U->use_empty() || StripPointerCastsAndObjCCalls(U) != Arg) + return 0; + } + + return Arg; + } + + return 0; +} + +/// \brief Test if the given module looks interesting to run ARC optimization +/// on. +static bool ModuleHasARC(const Module &M) { + return + M.getNamedValue("objc_retain") || + M.getNamedValue("objc_release") || + M.getNamedValue("objc_autorelease") || + M.getNamedValue("objc_retainAutoreleasedReturnValue") || + M.getNamedValue("objc_retainBlock") || + M.getNamedValue("objc_autoreleaseReturnValue") || + M.getNamedValue("objc_autoreleasePoolPush") || + M.getNamedValue("objc_loadWeakRetained") || + M.getNamedValue("objc_loadWeak") || + M.getNamedValue("objc_destroyWeak") || + M.getNamedValue("objc_storeWeak") || + M.getNamedValue("objc_initWeak") || + M.getNamedValue("objc_moveWeak") || + M.getNamedValue("objc_copyWeak") || + M.getNamedValue("objc_retainedObject") || + M.getNamedValue("objc_unretainedObject") || + M.getNamedValue("objc_unretainedPointer"); +} + +/// \brief Test whether the given pointer, which is an Objective C block +/// pointer, does not "escape". +/// +/// This differs from regular escape analysis in that a use as an +/// argument to a call is not considered an escape. +/// +static bool DoesObjCBlockEscape(const Value *BlockPtr) { + + DEBUG(dbgs() << "DoesObjCBlockEscape: Target: " << *BlockPtr << "\n"); + + // Walk the def-use chains. + SmallVector Worklist; + Worklist.push_back(BlockPtr); + + // Ensure we do not visit any value twice. + SmallPtrSet VisitedSet; + + do { + const Value *V = Worklist.pop_back_val(); + + DEBUG(dbgs() << "DoesObjCBlockEscape: Visiting: " << *V << "\n"); + + for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end(); + UI != UE; ++UI) { + const User *UUser = *UI; + + DEBUG(dbgs() << "DoesObjCBlockEscape: User: " << *UUser << "\n"); + + // Special - Use by a call (callee or argument) is not considered + // to be an escape. + switch (GetBasicInstructionClass(UUser)) { + case IC_StoreWeak: + case IC_InitWeak: + case IC_StoreStrong: + case IC_Autorelease: + case IC_AutoreleaseRV: { + DEBUG(dbgs() << "DoesObjCBlockEscape: User copies pointer arguments. " + "Block Escapes!\n"); + // These special functions make copies of their pointer arguments. + return true; + } + case IC_User: + case IC_None: + // Use by an instruction which copies the value is an escape if the + // result is an escape. + if (isa(UUser) || isa(UUser) || + isa(UUser) || isa(UUser)) { + + if (!VisitedSet.insert(UUser)) { + DEBUG(dbgs() << "DoesObjCBlockEscape: User copies value. Escapes " + "if result escapes. Adding to list.\n"); + Worklist.push_back(UUser); + } else { + DEBUG(dbgs() << "DoesObjCBlockEscape: Already visited node.\n"); + } + continue; + } + // Use by a load is not an escape. + if (isa(UUser)) + continue; + // Use by a store is not an escape if the use is the address. + if (const StoreInst *SI = dyn_cast(UUser)) + if (V != SI->getValueOperand()) + continue; + break; + default: + // Regular calls and other stuff are not considered escapes. + continue; + } + // Otherwise, conservatively assume an escape. + DEBUG(dbgs() << "DoesObjCBlockEscape: Assuming block escapes.\n"); + return true; + } + } while (!Worklist.empty()); + + // No escapes found. + DEBUG(dbgs() << "DoesObjCBlockEscape: Block does not escape.\n"); + return false; +} + +/// @} +/// +/// \defgroup ARCAA Extends alias analysis using ObjC specific knowledge. +/// @{ + +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Pass.h" + +namespace { + /// \brief This is a simple alias analysis implementation that uses knowledge + /// of ARC constructs to answer queries. + /// + /// TODO: This class could be generalized to know about other ObjC-specific + /// tricks. Such as knowing that ivars in the non-fragile ABI are non-aliasing + /// even though their offsets are dynamic. + class ObjCARCAliasAnalysis : public ImmutablePass, + public AliasAnalysis { + public: + static char ID; // Class identification, replacement for typeinfo + ObjCARCAliasAnalysis() : ImmutablePass(ID) { + initializeObjCARCAliasAnalysisPass(*PassRegistry::getPassRegistry()); + } + + private: + virtual void initializePass() { + InitializeAliasAnalysis(this); + } + + /// This method is used when a pass implements an analysis interface through + /// multiple inheritance. If needed, it should override this to adjust the + /// this pointer as needed for the specified pass info. + virtual void *getAdjustedAnalysisPointer(const void *PI) { + if (PI == &AliasAnalysis::ID) + return static_cast(this); + return this; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + virtual AliasResult alias(const Location &LocA, const Location &LocB); + virtual bool pointsToConstantMemory(const Location &Loc, bool OrLocal); + virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS); + virtual ModRefBehavior getModRefBehavior(const Function *F); + virtual ModRefResult getModRefInfo(ImmutableCallSite CS, + const Location &Loc); + virtual ModRefResult getModRefInfo(ImmutableCallSite CS1, + ImmutableCallSite CS2); + }; +} // End of anonymous namespace + +// Register this pass... +char ObjCARCAliasAnalysis::ID = 0; +INITIALIZE_AG_PASS(ObjCARCAliasAnalysis, AliasAnalysis, "objc-arc-aa", + "ObjC-ARC-Based Alias Analysis", false, true, false) + +ImmutablePass *llvm::createObjCARCAliasAnalysisPass() { + return new ObjCARCAliasAnalysis(); +} + +void +ObjCARCAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AliasAnalysis::getAnalysisUsage(AU); +} + +AliasAnalysis::AliasResult +ObjCARCAliasAnalysis::alias(const Location &LocA, const Location &LocB) { + if (!EnableARCOpts) + return AliasAnalysis::alias(LocA, LocB); + + // First, strip off no-ops, including ObjC-specific no-ops, and try making a + // precise alias query. + const Value *SA = StripPointerCastsAndObjCCalls(LocA.Ptr); + const Value *SB = StripPointerCastsAndObjCCalls(LocB.Ptr); + AliasResult Result = + AliasAnalysis::alias(Location(SA, LocA.Size, LocA.TBAATag), + Location(SB, LocB.Size, LocB.TBAATag)); + if (Result != MayAlias) + return Result; + + // If that failed, climb to the underlying object, including climbing through + // ObjC-specific no-ops, and try making an imprecise alias query. + const Value *UA = GetUnderlyingObjCPtr(SA); + const Value *UB = GetUnderlyingObjCPtr(SB); + if (UA != SA || UB != SB) { + Result = AliasAnalysis::alias(Location(UA), Location(UB)); + // We can't use MustAlias or PartialAlias results here because + // GetUnderlyingObjCPtr may return an offsetted pointer value. + if (Result == NoAlias) + return NoAlias; + } + + // If that failed, fail. We don't need to chain here, since that's covered + // by the earlier precise query. + return MayAlias; +} + +bool +ObjCARCAliasAnalysis::pointsToConstantMemory(const Location &Loc, + bool OrLocal) { + if (!EnableARCOpts) + return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); + + // First, strip off no-ops, including ObjC-specific no-ops, and try making + // a precise alias query. + const Value *S = StripPointerCastsAndObjCCalls(Loc.Ptr); + if (AliasAnalysis::pointsToConstantMemory(Location(S, Loc.Size, Loc.TBAATag), + OrLocal)) + return true; + + // If that failed, climb to the underlying object, including climbing through + // ObjC-specific no-ops, and try making an imprecise alias query. + const Value *U = GetUnderlyingObjCPtr(S); + if (U != S) + return AliasAnalysis::pointsToConstantMemory(Location(U), OrLocal); + + // If that failed, fail. We don't need to chain here, since that's covered + // by the earlier precise query. + return false; +} + +AliasAnalysis::ModRefBehavior +ObjCARCAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) { + // We have nothing to do. Just chain to the next AliasAnalysis. + return AliasAnalysis::getModRefBehavior(CS); +} + +AliasAnalysis::ModRefBehavior +ObjCARCAliasAnalysis::getModRefBehavior(const Function *F) { + if (!EnableARCOpts) + return AliasAnalysis::getModRefBehavior(F); + + switch (GetFunctionClass(F)) { + case IC_NoopCast: + return DoesNotAccessMemory; + default: + break; + } + + return AliasAnalysis::getModRefBehavior(F); +} + +AliasAnalysis::ModRefResult +ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS, const Location &Loc) { + if (!EnableARCOpts) + return AliasAnalysis::getModRefInfo(CS, Loc); + + switch (GetBasicInstructionClass(CS.getInstruction())) { + case IC_Retain: + case IC_RetainRV: + case IC_Autorelease: + case IC_AutoreleaseRV: + case IC_NoopCast: + case IC_AutoreleasepoolPush: + case IC_FusedRetainAutorelease: + case IC_FusedRetainAutoreleaseRV: + // These functions don't access any memory visible to the compiler. + // Note that this doesn't include objc_retainBlock, because it updates + // pointers when it copies block data. + return NoModRef; + default: + break; + } + + return AliasAnalysis::getModRefInfo(CS, Loc); +} + +AliasAnalysis::ModRefResult +ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS1, + ImmutableCallSite CS2) { + // TODO: Theoretically we could check for dependencies between objc_* calls + // and OnlyAccessesArgumentPointees calls or other well-behaved calls. + return AliasAnalysis::getModRefInfo(CS1, CS2); +} + +/// @} +/// +/// \defgroup ARCExpansion Early ARC Optimizations. +/// @{ + +#include "llvm/Support/InstIterator.h" +#include "llvm/Transforms/ObjCARC.h" + +namespace { + /// \brief Early ARC transformations. + class ObjCARCExpand : public FunctionPass { + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + virtual bool doInitialization(Module &M); + virtual bool runOnFunction(Function &F); + + /// A flag indicating whether this optimization pass should run. + bool Run; + + public: + static char ID; + ObjCARCExpand() : FunctionPass(ID) { + initializeObjCARCExpandPass(*PassRegistry::getPassRegistry()); + } + }; +} + +char ObjCARCExpand::ID = 0; +INITIALIZE_PASS(ObjCARCExpand, + "objc-arc-expand", "ObjC ARC expansion", false, false) + +Pass *llvm::createObjCARCExpandPass() { + return new ObjCARCExpand(); +} + +void ObjCARCExpand::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); +} + +bool ObjCARCExpand::doInitialization(Module &M) { + Run = ModuleHasARC(M); + return false; +} + +bool ObjCARCExpand::runOnFunction(Function &F) { + if (!EnableARCOpts) + return false; + + // If nothing in the Module uses ARC, don't do anything. + if (!Run) + return false; + + bool Changed = false; + + DEBUG(dbgs() << "ObjCARCExpand: Visiting Function: " << F.getName() << "\n"); + + for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) { + Instruction *Inst = &*I; + + DEBUG(dbgs() << "ObjCARCExpand: Visiting: " << *Inst << "\n"); + + switch (GetBasicInstructionClass(Inst)) { + case IC_Retain: + case IC_RetainRV: + case IC_Autorelease: + case IC_AutoreleaseRV: + case IC_FusedRetainAutorelease: + case IC_FusedRetainAutoreleaseRV: { + // These calls return their argument verbatim, as a low-level + // optimization. However, this makes high-level optimizations + // harder. Undo any uses of this optimization that the front-end + // emitted here. We'll redo them in the contract pass. + Changed = true; + Value *Value = cast(Inst)->getArgOperand(0); + DEBUG(dbgs() << "ObjCARCExpand: Old = " << *Inst << "\n" + " New = " << *Value << "\n"); + Inst->replaceAllUsesWith(Value); + break; + } + default: + break; + } + } + + DEBUG(dbgs() << "ObjCARCExpand: Finished List.\n\n"); + + return Changed; +} + +/// @} +/// +/// \defgroup ARCAPElim ARC Autorelease Pool Elimination. +/// @{ + +#include "llvm/ADT/STLExtras.h" +#include "llvm/IR/Constants.h" + +namespace { + /// \brief Autorelease pool elimination. + class ObjCARCAPElim : public ModulePass { + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + virtual bool runOnModule(Module &M); + + static bool MayAutorelease(ImmutableCallSite CS, unsigned Depth = 0); + static bool OptimizeBB(BasicBlock *BB); + + public: + static char ID; + ObjCARCAPElim() : ModulePass(ID) { + initializeObjCARCAPElimPass(*PassRegistry::getPassRegistry()); + } + }; +} + +char ObjCARCAPElim::ID = 0; +INITIALIZE_PASS(ObjCARCAPElim, + "objc-arc-apelim", + "ObjC ARC autorelease pool elimination", + false, false) + +Pass *llvm::createObjCARCAPElimPass() { + return new ObjCARCAPElim(); +} + +void ObjCARCAPElim::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); +} + +/// Interprocedurally determine if calls made by the given call site can +/// possibly produce autoreleases. +bool ObjCARCAPElim::MayAutorelease(ImmutableCallSite CS, unsigned Depth) { + if (const Function *Callee = CS.getCalledFunction()) { + if (Callee->isDeclaration() || Callee->mayBeOverridden()) + return true; + for (Function::const_iterator I = Callee->begin(), E = Callee->end(); + I != E; ++I) { + const BasicBlock *BB = I; + for (BasicBlock::const_iterator J = BB->begin(), F = BB->end(); + J != F; ++J) + if (ImmutableCallSite JCS = ImmutableCallSite(J)) + // This recursion depth limit is arbitrary. It's just great + // enough to cover known interesting testcases. + if (Depth < 3 && + !JCS.onlyReadsMemory() && + MayAutorelease(JCS, Depth + 1)) + return true; + } + return false; + } + + return true; +} + +bool ObjCARCAPElim::OptimizeBB(BasicBlock *BB) { + bool Changed = false; + + Instruction *Push = 0; + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) { + Instruction *Inst = I++; + switch (GetBasicInstructionClass(Inst)) { + case IC_AutoreleasepoolPush: + Push = Inst; + break; + case IC_AutoreleasepoolPop: + // If this pop matches a push and nothing in between can autorelease, + // zap the pair. + if (Push && cast(Inst)->getArgOperand(0) == Push) { + Changed = true; + DEBUG(dbgs() << "ObjCARCAPElim::OptimizeBB: Zapping push pop " + "autorelease pair:\n" + " Pop: " << *Inst << "\n" + << " Push: " << *Push << "\n"); + Inst->eraseFromParent(); + Push->eraseFromParent(); + } + Push = 0; + break; + case IC_CallOrUser: + if (MayAutorelease(ImmutableCallSite(Inst))) + Push = 0; + break; + default: + break; + } + } + + return Changed; +} + +bool ObjCARCAPElim::runOnModule(Module &M) { + if (!EnableARCOpts) + return false; + + // If nothing in the Module uses ARC, don't do anything. + if (!ModuleHasARC(M)) + return false; + + // Find the llvm.global_ctors variable, as the first step in + // identifying the global constructors. In theory, unnecessary autorelease + // pools could occur anywhere, but in practice it's pretty rare. Global + // ctors are a place where autorelease pools get inserted automatically, + // so it's pretty common for them to be unnecessary, and it's pretty + // profitable to eliminate them. + GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors"); + if (!GV) + return false; + + assert(GV->hasDefinitiveInitializer() && + "llvm.global_ctors is uncooperative!"); + + bool Changed = false; + + // Dig the constructor functions out of GV's initializer. + ConstantArray *Init = cast(GV->getInitializer()); + for (User::op_iterator OI = Init->op_begin(), OE = Init->op_end(); + OI != OE; ++OI) { + Value *Op = *OI; + // llvm.global_ctors is an array of pairs where the second members + // are constructor functions. + Function *F = dyn_cast(cast(Op)->getOperand(1)); + // If the user used a constructor function with the wrong signature and + // it got bitcasted or whatever, look the other way. + if (!F) + continue; + // Only look at function definitions. + if (F->isDeclaration()) + continue; + // Only look at functions with one basic block. + if (llvm::next(F->begin()) != F->end()) + continue; + // Ok, a single-block constructor function definition. Try to optimize it. + Changed |= OptimizeBB(F->begin()); + } + + return Changed; +} + +/// @} +/// +/// \defgroup ARCOpt ARC Optimization. +/// @{ + +// TODO: On code like this: +// +// objc_retain(%x) +// stuff_that_cannot_release() +// objc_autorelease(%x) +// stuff_that_cannot_release() +// objc_retain(%x) +// stuff_that_cannot_release() +// objc_autorelease(%x) +// +// The second retain and autorelease can be deleted. + +// TODO: It should be possible to delete +// objc_autoreleasePoolPush and objc_autoreleasePoolPop +// pairs if nothing is actually autoreleased between them. Also, autorelease +// calls followed by objc_autoreleasePoolPop calls (perhaps in ObjC++ code +// after inlining) can be turned into plain release calls. + +// TODO: Critical-edge splitting. If the optimial insertion point is +// a critical edge, the current algorithm has to fail, because it doesn't +// know how to split edges. It should be possible to make the optimizer +// think in terms of edges, rather than blocks, and then split critical +// edges on demand. + +// TODO: OptimizeSequences could generalized to be Interprocedural. + +// TODO: Recognize that a bunch of other objc runtime calls have +// non-escaping arguments and non-releasing arguments, and may be +// non-autoreleasing. + +// TODO: Sink autorelease calls as far as possible. Unfortunately we +// usually can't sink them past other calls, which would be the main +// case where it would be useful. + +// TODO: The pointer returned from objc_loadWeakRetained is retained. + +// TODO: Delete release+retain pairs (rare). + +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/Support/CFG.h" + +STATISTIC(NumNoops, "Number of no-op objc calls eliminated"); +STATISTIC(NumPartialNoops, "Number of partially no-op objc calls eliminated"); +STATISTIC(NumAutoreleases,"Number of autoreleases converted to releases"); +STATISTIC(NumRets, "Number of return value forwarding " + "retain+autoreleaes eliminated"); +STATISTIC(NumRRs, "Number of retain+release paths eliminated"); +STATISTIC(NumPeeps, "Number of calls peephole-optimized"); + +namespace { + /// \brief This is similar to BasicAliasAnalysis, and it uses many of the same + /// techniques, except it uses special ObjC-specific reasoning about pointer + /// relationships. + /// + /// In this context ``Provenance'' is defined as the history of an object's + /// ownership. Thus ``Provenance Analysis'' is defined by using the notion of + /// an ``independent provenance source'' of a pointer to determine whether or + /// not two pointers have the same provenance source and thus could + /// potentially be related. + class ProvenanceAnalysis { + AliasAnalysis *AA; + + typedef std::pair ValuePairTy; + typedef DenseMap CachedResultsTy; + CachedResultsTy CachedResults; + + bool relatedCheck(const Value *A, const Value *B); + bool relatedSelect(const SelectInst *A, const Value *B); + bool relatedPHI(const PHINode *A, const Value *B); + + void operator=(const ProvenanceAnalysis &) LLVM_DELETED_FUNCTION; + ProvenanceAnalysis(const ProvenanceAnalysis &) LLVM_DELETED_FUNCTION; + + public: + ProvenanceAnalysis() {} + + void setAA(AliasAnalysis *aa) { AA = aa; } + + AliasAnalysis *getAA() const { return AA; } + + bool related(const Value *A, const Value *B); + + void clear() { + CachedResults.clear(); + } + }; +} + +bool ProvenanceAnalysis::relatedSelect(const SelectInst *A, const Value *B) { + // If the values are Selects with the same condition, we can do a more precise + // check: just check for relations between the values on corresponding arms. + if (const SelectInst *SB = dyn_cast(B)) + if (A->getCondition() == SB->getCondition()) + return related(A->getTrueValue(), SB->getTrueValue()) || + related(A->getFalseValue(), SB->getFalseValue()); + + // Check both arms of the Select node individually. + return related(A->getTrueValue(), B) || + related(A->getFalseValue(), B); +} + +bool ProvenanceAnalysis::relatedPHI(const PHINode *A, const Value *B) { + // If the values are PHIs in the same block, we can do a more precise as well + // as efficient check: just check for relations between the values on + // corresponding edges. + if (const PHINode *PNB = dyn_cast(B)) + if (PNB->getParent() == A->getParent()) { + for (unsigned i = 0, e = A->getNumIncomingValues(); i != e; ++i) + if (related(A->getIncomingValue(i), + PNB->getIncomingValueForBlock(A->getIncomingBlock(i)))) + return true; + return false; + } + + // Check each unique source of the PHI node against B. + SmallPtrSet UniqueSrc; + for (unsigned i = 0, e = A->getNumIncomingValues(); i != e; ++i) { + const Value *PV1 = A->getIncomingValue(i); + if (UniqueSrc.insert(PV1) && related(PV1, B)) + return true; + } + + // All of the arms checked out. + return false; +} + +/// Test if the value of P, or any value covered by its provenance, is ever +/// stored within the function (not counting callees). +static bool isStoredObjCPointer(const Value *P) { + SmallPtrSet Visited; + SmallVector Worklist; + Worklist.push_back(P); + Visited.insert(P); + do { + P = Worklist.pop_back_val(); + for (Value::const_use_iterator UI = P->use_begin(), UE = P->use_end(); + UI != UE; ++UI) { + const User *Ur = *UI; + if (isa(Ur)) { + if (UI.getOperandNo() == 0) + // The pointer is stored. + return true; + // The pointed is stored through. + continue; + } + if (isa(Ur)) + // The pointer is passed as an argument, ignore this. + continue; + if (isa(P)) + // Assume the worst. + return true; + if (Visited.insert(Ur)) + Worklist.push_back(Ur); + } + } while (!Worklist.empty()); + + // Everything checked out. + return false; +} + +bool ProvenanceAnalysis::relatedCheck(const Value *A, const Value *B) { + // Skip past provenance pass-throughs. + A = GetUnderlyingObjCPtr(A); + B = GetUnderlyingObjCPtr(B); + + // Quick check. + if (A == B) + return true; + + // Ask regular AliasAnalysis, for a first approximation. + switch (AA->alias(A, B)) { + case AliasAnalysis::NoAlias: + return false; + case AliasAnalysis::MustAlias: + case AliasAnalysis::PartialAlias: + return true; + case AliasAnalysis::MayAlias: + break; + } + + bool AIsIdentified = IsObjCIdentifiedObject(A); + bool BIsIdentified = IsObjCIdentifiedObject(B); + + // An ObjC-Identified object can't alias a load if it is never locally stored. + if (AIsIdentified) { + // Check for an obvious escape. + if (isa(B)) + return isStoredObjCPointer(A); + if (BIsIdentified) { + // Check for an obvious escape. + if (isa(A)) + return isStoredObjCPointer(B); + // Both pointers are identified and escapes aren't an evident problem. + return false; + } + } else if (BIsIdentified) { + // Check for an obvious escape. + if (isa(A)) + return isStoredObjCPointer(B); + } + + // Special handling for PHI and Select. + if (const PHINode *PN = dyn_cast(A)) + return relatedPHI(PN, B); + if (const PHINode *PN = dyn_cast(B)) + return relatedPHI(PN, A); + if (const SelectInst *S = dyn_cast(A)) + return relatedSelect(S, B); + if (const SelectInst *S = dyn_cast(B)) + return relatedSelect(S, A); + + // Conservative. + return true; +} + +bool ProvenanceAnalysis::related(const Value *A, const Value *B) { + // Begin by inserting a conservative value into the map. If the insertion + // fails, we have the answer already. If it succeeds, leave it there until we + // compute the real answer to guard against recursive queries. + if (A > B) std::swap(A, B); + std::pair Pair = + CachedResults.insert(std::make_pair(ValuePairTy(A, B), true)); + if (!Pair.second) + return Pair.first->second; + + bool Result = relatedCheck(A, B); + CachedResults[ValuePairTy(A, B)] = Result; + return Result; +} + +namespace { + /// \enum Sequence + /// + /// \brief A sequence of states that a pointer may go through in which an + /// objc_retain and objc_release are actually needed. + enum Sequence { + S_None, + S_Retain, ///< objc_retain(x) + S_CanRelease, ///< foo(x) -- x could possibly see a ref count decrement + S_Use, ///< any use of x + S_Stop, ///< like S_Release, but code motion is stopped + S_Release, ///< objc_release(x) + S_MovableRelease ///< objc_release(x), !clang.imprecise_release + }; +} + +static Sequence MergeSeqs(Sequence A, Sequence B, bool TopDown) { + // The easy cases. + if (A == B) + return A; + if (A == S_None || B == S_None) + return S_None; + + if (A > B) std::swap(A, B); + if (TopDown) { + // Choose the side which is further along in the sequence. + if ((A == S_Retain || A == S_CanRelease) && + (B == S_CanRelease || B == S_Use)) + return B; + } else { + // Choose the side which is further along in the sequence. + if ((A == S_Use || A == S_CanRelease) && + (B == S_Use || B == S_Release || B == S_Stop || B == S_MovableRelease)) + return A; + // If both sides are releases, choose the more conservative one. + if (A == S_Stop && (B == S_Release || B == S_MovableRelease)) + return A; + if (A == S_Release && B == S_MovableRelease) + return A; + } + + return S_None; +} + +namespace { + /// \brief Unidirectional information about either a + /// retain-decrement-use-release sequence or release-use-decrement-retain + /// reverese sequence. + struct RRInfo { + /// After an objc_retain, the reference count of the referenced + /// object is known to be positive. Similarly, before an objc_release, the + /// reference count of the referenced object is known to be positive. If + /// there are retain-release pairs in code regions where the retain count + /// is known to be positive, they can be eliminated, regardless of any side + /// effects between them. + /// + /// Also, a retain+release pair nested within another retain+release + /// pair all on the known same pointer value can be eliminated, regardless + /// of any intervening side effects. + /// + /// KnownSafe is true when either of these conditions is satisfied. + bool KnownSafe; + + /// True if the Calls are objc_retainBlock calls (as opposed to objc_retain + /// calls). + bool IsRetainBlock; + + /// True of the objc_release calls are all marked with the "tail" keyword. + bool IsTailCallRelease; + + /// If the Calls are objc_release calls and they all have a + /// clang.imprecise_release tag, this is the metadata tag. + MDNode *ReleaseMetadata; + + /// For a top-down sequence, the set of objc_retains or + /// objc_retainBlocks. For bottom-up, the set of objc_releases. + SmallPtrSet Calls; + + /// The set of optimal insert positions for moving calls in the opposite + /// sequence. + SmallPtrSet ReverseInsertPts; + + RRInfo() : + KnownSafe(false), IsRetainBlock(false), + IsTailCallRelease(false), + ReleaseMetadata(0) {} + + void clear(); + }; +} + +void RRInfo::clear() { + KnownSafe = false; + IsRetainBlock = false; + IsTailCallRelease = false; + ReleaseMetadata = 0; + Calls.clear(); + ReverseInsertPts.clear(); +} + +namespace { + /// \brief This class summarizes several per-pointer runtime properties which + /// are propogated through the flow graph. + class PtrState { + /// True if the reference count is known to be incremented. + bool KnownPositiveRefCount; + + /// True of we've seen an opportunity for partial RR elimination, such as + /// pushing calls into a CFG triangle or into one side of a CFG diamond. + bool Partial; + + /// The current position in the sequence. + Sequence Seq : 8; + + public: + /// Unidirectional information about the current sequence. + /// + /// TODO: Encapsulate this better. + RRInfo RRI; + + PtrState() : KnownPositiveRefCount(false), Partial(false), + Seq(S_None) {} + + void SetKnownPositiveRefCount() { + KnownPositiveRefCount = true; + } + + void ClearRefCount() { + KnownPositiveRefCount = false; + } + + bool IsKnownIncremented() const { + return KnownPositiveRefCount; + } + + void SetSeq(Sequence NewSeq) { + Seq = NewSeq; + } + + Sequence GetSeq() const { + return Seq; + } + + void ClearSequenceProgress() { + ResetSequenceProgress(S_None); + } + + void ResetSequenceProgress(Sequence NewSeq) { + Seq = NewSeq; + Partial = false; + RRI.clear(); + } + + void Merge(const PtrState &Other, bool TopDown); + }; +} + +void +PtrState::Merge(const PtrState &Other, bool TopDown) { + Seq = MergeSeqs(Seq, Other.Seq, TopDown); + KnownPositiveRefCount = KnownPositiveRefCount && Other.KnownPositiveRefCount; + + // We can't merge a plain objc_retain with an objc_retainBlock. + if (RRI.IsRetainBlock != Other.RRI.IsRetainBlock) + Seq = S_None; + + // If we're not in a sequence (anymore), drop all associated state. + if (Seq == S_None) { + Partial = false; + RRI.clear(); + } else if (Partial || Other.Partial) { + // If we're doing a merge on a path that's previously seen a partial + // merge, conservatively drop the sequence, to avoid doing partial + // RR elimination. If the branch predicates for the two merge differ, + // mixing them is unsafe. + ClearSequenceProgress(); + } else { + // Conservatively merge the ReleaseMetadata information. + if (RRI.ReleaseMetadata != Other.RRI.ReleaseMetadata) + RRI.ReleaseMetadata = 0; + + RRI.KnownSafe = RRI.KnownSafe && Other.RRI.KnownSafe; + RRI.IsTailCallRelease = RRI.IsTailCallRelease && + Other.RRI.IsTailCallRelease; + RRI.Calls.insert(Other.RRI.Calls.begin(), Other.RRI.Calls.end()); + + // Merge the insert point sets. If there are any differences, + // that makes this a partial merge. + Partial = RRI.ReverseInsertPts.size() != Other.RRI.ReverseInsertPts.size(); + for (SmallPtrSet::const_iterator + I = Other.RRI.ReverseInsertPts.begin(), + E = Other.RRI.ReverseInsertPts.end(); I != E; ++I) + Partial |= RRI.ReverseInsertPts.insert(*I); + } +} + +namespace { + /// \brief Per-BasicBlock state. + class BBState { + /// The number of unique control paths from the entry which can reach this + /// block. + unsigned TopDownPathCount; + + /// The number of unique control paths to exits from this block. + unsigned BottomUpPathCount; + + /// A type for PerPtrTopDown and PerPtrBottomUp. + typedef MapVector MapTy; + + /// The top-down traversal uses this to record information known about a + /// pointer at the bottom of each block. + MapTy PerPtrTopDown; + + /// The bottom-up traversal uses this to record information known about a + /// pointer at the top of each block. + MapTy PerPtrBottomUp; + + /// Effective predecessors of the current block ignoring ignorable edges and + /// ignored backedges. + SmallVector Preds; + /// Effective successors of the current block ignoring ignorable edges and + /// ignored backedges. + SmallVector Succs; + + public: + BBState() : TopDownPathCount(0), BottomUpPathCount(0) {} + + typedef MapTy::iterator ptr_iterator; + typedef MapTy::const_iterator ptr_const_iterator; + + ptr_iterator top_down_ptr_begin() { return PerPtrTopDown.begin(); } + ptr_iterator top_down_ptr_end() { return PerPtrTopDown.end(); } + ptr_const_iterator top_down_ptr_begin() const { + return PerPtrTopDown.begin(); + } + ptr_const_iterator top_down_ptr_end() const { + return PerPtrTopDown.end(); + } + + ptr_iterator bottom_up_ptr_begin() { return PerPtrBottomUp.begin(); } + ptr_iterator bottom_up_ptr_end() { return PerPtrBottomUp.end(); } + ptr_const_iterator bottom_up_ptr_begin() const { + return PerPtrBottomUp.begin(); + } + ptr_const_iterator bottom_up_ptr_end() const { + return PerPtrBottomUp.end(); + } + + /// Mark this block as being an entry block, which has one path from the + /// entry by definition. + void SetAsEntry() { TopDownPathCount = 1; } + + /// Mark this block as being an exit block, which has one path to an exit by + /// definition. + void SetAsExit() { BottomUpPathCount = 1; } + + PtrState &getPtrTopDownState(const Value *Arg) { + return PerPtrTopDown[Arg]; + } + + PtrState &getPtrBottomUpState(const Value *Arg) { + return PerPtrBottomUp[Arg]; + } + + void clearBottomUpPointers() { + PerPtrBottomUp.clear(); + } + + void clearTopDownPointers() { + PerPtrTopDown.clear(); + } + + void InitFromPred(const BBState &Other); + void InitFromSucc(const BBState &Other); + void MergePred(const BBState &Other); + void MergeSucc(const BBState &Other); + + /// Return the number of possible unique paths from an entry to an exit + /// which pass through this block. This is only valid after both the + /// top-down and bottom-up traversals are complete. + unsigned GetAllPathCount() const { + assert(TopDownPathCount != 0); + assert(BottomUpPathCount != 0); + return TopDownPathCount * BottomUpPathCount; + } + + // Specialized CFG utilities. + typedef SmallVectorImpl::const_iterator edge_iterator; + edge_iterator pred_begin() { return Preds.begin(); } + edge_iterator pred_end() { return Preds.end(); } + edge_iterator succ_begin() { return Succs.begin(); } + edge_iterator succ_end() { return Succs.end(); } + + void addSucc(BasicBlock *Succ) { Succs.push_back(Succ); } + void addPred(BasicBlock *Pred) { Preds.push_back(Pred); } + + bool isExit() const { return Succs.empty(); } + }; +} + +void BBState::InitFromPred(const BBState &Other) { + PerPtrTopDown = Other.PerPtrTopDown; + TopDownPathCount = Other.TopDownPathCount; +} + +void BBState::InitFromSucc(const BBState &Other) { + PerPtrBottomUp = Other.PerPtrBottomUp; + BottomUpPathCount = Other.BottomUpPathCount; +} + +/// The top-down traversal uses this to merge information about predecessors to +/// form the initial state for a new block. +void BBState::MergePred(const BBState &Other) { + // Other.TopDownPathCount can be 0, in which case it is either dead or a + // loop backedge. Loop backedges are special. + TopDownPathCount += Other.TopDownPathCount; + + // Check for overflow. If we have overflow, fall back to conservative + // behavior. + if (TopDownPathCount < Other.TopDownPathCount) { + clearTopDownPointers(); + return; + } + + // For each entry in the other set, if our set has an entry with the same key, + // merge the entries. Otherwise, copy the entry and merge it with an empty + // entry. + for (ptr_const_iterator MI = Other.top_down_ptr_begin(), + ME = Other.top_down_ptr_end(); MI != ME; ++MI) { + std::pair Pair = PerPtrTopDown.insert(*MI); + Pair.first->second.Merge(Pair.second ? PtrState() : MI->second, + /*TopDown=*/true); + } + + // For each entry in our set, if the other set doesn't have an entry with the + // same key, force it to merge with an empty entry. + for (ptr_iterator MI = top_down_ptr_begin(), + ME = top_down_ptr_end(); MI != ME; ++MI) + if (Other.PerPtrTopDown.find(MI->first) == Other.PerPtrTopDown.end()) + MI->second.Merge(PtrState(), /*TopDown=*/true); +} + +/// The bottom-up traversal uses this to merge information about successors to +/// form the initial state for a new block. +void BBState::MergeSucc(const BBState &Other) { + // Other.BottomUpPathCount can be 0, in which case it is either dead or a + // loop backedge. Loop backedges are special. + BottomUpPathCount += Other.BottomUpPathCount; + + // Check for overflow. If we have overflow, fall back to conservative + // behavior. + if (BottomUpPathCount < Other.BottomUpPathCount) { + clearBottomUpPointers(); + return; + } + + // For each entry in the other set, if our set has an entry with the + // same key, merge the entries. Otherwise, copy the entry and merge + // it with an empty entry. + for (ptr_const_iterator MI = Other.bottom_up_ptr_begin(), + ME = Other.bottom_up_ptr_end(); MI != ME; ++MI) { + std::pair Pair = PerPtrBottomUp.insert(*MI); + Pair.first->second.Merge(Pair.second ? PtrState() : MI->second, + /*TopDown=*/false); + } + + // For each entry in our set, if the other set doesn't have an entry + // with the same key, force it to merge with an empty entry. + for (ptr_iterator MI = bottom_up_ptr_begin(), + ME = bottom_up_ptr_end(); MI != ME; ++MI) + if (Other.PerPtrBottomUp.find(MI->first) == Other.PerPtrBottomUp.end()) + MI->second.Merge(PtrState(), /*TopDown=*/false); +} + +namespace { + /// \brief The main ARC optimization pass. + class ObjCARCOpt : public FunctionPass { + bool Changed; + ProvenanceAnalysis PA; + + /// A flag indicating whether this optimization pass should run. + bool Run; + + /// Declarations for ObjC runtime functions, for use in creating calls to + /// them. These are initialized lazily to avoid cluttering up the Module + /// with unused declarations. + + /// Declaration for ObjC runtime function + /// objc_retainAutoreleasedReturnValue. + Constant *RetainRVCallee; + /// Declaration for ObjC runtime function objc_autoreleaseReturnValue. + Constant *AutoreleaseRVCallee; + /// Declaration for ObjC runtime function objc_release. + Constant *ReleaseCallee; + /// Declaration for ObjC runtime function objc_retain. + Constant *RetainCallee; + /// Declaration for ObjC runtime function objc_retainBlock. + Constant *RetainBlockCallee; + /// Declaration for ObjC runtime function objc_autorelease. + Constant *AutoreleaseCallee; + + /// Flags which determine whether each of the interesting runtine functions + /// is in fact used in the current function. + unsigned UsedInThisFunction; + + /// The Metadata Kind for clang.imprecise_release metadata. + unsigned ImpreciseReleaseMDKind; + + /// The Metadata Kind for clang.arc.copy_on_escape metadata. + unsigned CopyOnEscapeMDKind; + + /// The Metadata Kind for clang.arc.no_objc_arc_exceptions metadata. + unsigned NoObjCARCExceptionsMDKind; + + Constant *getRetainRVCallee(Module *M); + Constant *getAutoreleaseRVCallee(Module *M); + Constant *getReleaseCallee(Module *M); + Constant *getRetainCallee(Module *M); + Constant *getRetainBlockCallee(Module *M); + Constant *getAutoreleaseCallee(Module *M); + + bool IsRetainBlockOptimizable(const Instruction *Inst); + + void OptimizeRetainCall(Function &F, Instruction *Retain); + bool OptimizeRetainRVCall(Function &F, Instruction *RetainRV); + void OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV, + InstructionClass &Class); + void OptimizeIndividualCalls(Function &F); + + void CheckForCFGHazards(const BasicBlock *BB, + DenseMap &BBStates, + BBState &MyStates) const; + bool VisitInstructionBottomUp(Instruction *Inst, + BasicBlock *BB, + MapVector &Retains, + BBState &MyStates); + bool VisitBottomUp(BasicBlock *BB, + DenseMap &BBStates, + MapVector &Retains); + bool VisitInstructionTopDown(Instruction *Inst, + DenseMap &Releases, + BBState &MyStates); + bool VisitTopDown(BasicBlock *BB, + DenseMap &BBStates, + DenseMap &Releases); + bool Visit(Function &F, + DenseMap &BBStates, + MapVector &Retains, + DenseMap &Releases); + + void MoveCalls(Value *Arg, RRInfo &RetainsToMove, RRInfo &ReleasesToMove, + MapVector &Retains, + DenseMap &Releases, + SmallVectorImpl &DeadInsts, + Module *M); + + bool ConnectTDBUTraversals(DenseMap &BBStates, + MapVector &Retains, + DenseMap &Releases, + Module *M, + SmallVector &NewRetains, + SmallVector &NewReleases, + SmallVector &DeadInsts, + RRInfo &RetainsToMove, + RRInfo &ReleasesToMove, + Value *Arg, + bool KnownSafe, + bool &AnyPairsCompletelyEliminated); + + bool PerformCodePlacement(DenseMap &BBStates, + MapVector &Retains, + DenseMap &Releases, + Module *M); + + void OptimizeWeakCalls(Function &F); + + bool OptimizeSequences(Function &F); + + void OptimizeReturns(Function &F); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + virtual bool doInitialization(Module &M); + virtual bool runOnFunction(Function &F); + virtual void releaseMemory(); + + public: + static char ID; + ObjCARCOpt() : FunctionPass(ID) { + initializeObjCARCOptPass(*PassRegistry::getPassRegistry()); + } + }; +} + +char ObjCARCOpt::ID = 0; +INITIALIZE_PASS_BEGIN(ObjCARCOpt, + "objc-arc", "ObjC ARC optimization", false, false) +INITIALIZE_PASS_DEPENDENCY(ObjCARCAliasAnalysis) +INITIALIZE_PASS_END(ObjCARCOpt, + "objc-arc", "ObjC ARC optimization", false, false) + +Pass *llvm::createObjCARCOptPass() { + return new ObjCARCOpt(); +} + +void ObjCARCOpt::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.addRequired(); + // ARC optimization doesn't currently split critical edges. + AU.setPreservesCFG(); +} + +bool ObjCARCOpt::IsRetainBlockOptimizable(const Instruction *Inst) { + // Without the magic metadata tag, we have to assume this might be an + // objc_retainBlock call inserted to convert a block pointer to an id, + // in which case it really is needed. + if (!Inst->getMetadata(CopyOnEscapeMDKind)) + return false; + + // If the pointer "escapes" (not including being used in a call), + // the copy may be needed. + if (DoesObjCBlockEscape(Inst)) + return false; + + // Otherwise, it's not needed. + return true; +} + +Constant *ObjCARCOpt::getRetainRVCallee(Module *M) { + if (!RetainRVCallee) { + LLVMContext &C = M->getContext(); + Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); + Type *Params[] = { I8X }; + FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false); + AttributeSet Attribute = + AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex, + Attribute::NoUnwind); + RetainRVCallee = + M->getOrInsertFunction("objc_retainAutoreleasedReturnValue", FTy, + Attribute); + } + return RetainRVCallee; +} + +Constant *ObjCARCOpt::getAutoreleaseRVCallee(Module *M) { + if (!AutoreleaseRVCallee) { + LLVMContext &C = M->getContext(); + Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); + Type *Params[] = { I8X }; + FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false); + AttributeSet Attribute = + AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex, + Attribute::NoUnwind); + AutoreleaseRVCallee = + M->getOrInsertFunction("objc_autoreleaseReturnValue", FTy, + Attribute); + } + return AutoreleaseRVCallee; +} + +Constant *ObjCARCOpt::getReleaseCallee(Module *M) { + if (!ReleaseCallee) { + LLVMContext &C = M->getContext(); + Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) }; + AttributeSet Attribute = + AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex, + Attribute::NoUnwind); + ReleaseCallee = + M->getOrInsertFunction( + "objc_release", + FunctionType::get(Type::getVoidTy(C), Params, /*isVarArg=*/false), + Attribute); + } + return ReleaseCallee; +} + +Constant *ObjCARCOpt::getRetainCallee(Module *M) { + if (!RetainCallee) { + LLVMContext &C = M->getContext(); + Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) }; + AttributeSet Attribute = + AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex, + Attribute::NoUnwind); + RetainCallee = + M->getOrInsertFunction( + "objc_retain", + FunctionType::get(Params[0], Params, /*isVarArg=*/false), + Attribute); + } + return RetainCallee; +} + +Constant *ObjCARCOpt::getRetainBlockCallee(Module *M) { + if (!RetainBlockCallee) { + LLVMContext &C = M->getContext(); + Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) }; + // objc_retainBlock is not nounwind because it calls user copy constructors + // which could theoretically throw. + RetainBlockCallee = + M->getOrInsertFunction( + "objc_retainBlock", + FunctionType::get(Params[0], Params, /*isVarArg=*/false), + AttributeSet()); + } + return RetainBlockCallee; +} + +Constant *ObjCARCOpt::getAutoreleaseCallee(Module *M) { + if (!AutoreleaseCallee) { + LLVMContext &C = M->getContext(); + Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) }; + AttributeSet Attribute = + AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex, + Attribute::NoUnwind); + AutoreleaseCallee = + M->getOrInsertFunction( + "objc_autorelease", + FunctionType::get(Params[0], Params, /*isVarArg=*/false), + Attribute); + } + return AutoreleaseCallee; +} + +/// Test whether the given value is possible a reference-counted pointer, +/// including tests which utilize AliasAnalysis. +static bool IsPotentialRetainableObjPtr(const Value *Op, AliasAnalysis &AA) { + // First make the rudimentary check. + if (!IsPotentialRetainableObjPtr(Op)) + return false; + + // Objects in constant memory are not reference-counted. + if (AA.pointsToConstantMemory(Op)) + return false; + + // Pointers in constant memory are not pointing to reference-counted objects. + if (const LoadInst *LI = dyn_cast(Op)) + if (AA.pointsToConstantMemory(LI->getPointerOperand())) + return false; + + // Otherwise assume the worst. + return true; +} + +/// Test whether the given instruction can result in a reference count +/// modification (positive or negative) for the pointer's object. +static bool +CanAlterRefCount(const Instruction *Inst, const Value *Ptr, + ProvenanceAnalysis &PA, InstructionClass Class) { + switch (Class) { + case IC_Autorelease: + case IC_AutoreleaseRV: + case IC_User: + // These operations never directly modify a reference count. + return false; + default: break; + } + + ImmutableCallSite CS = static_cast(Inst); + assert(CS && "Only calls can alter reference counts!"); + + // See if AliasAnalysis can help us with the call. + AliasAnalysis::ModRefBehavior MRB = PA.getAA()->getModRefBehavior(CS); + if (AliasAnalysis::onlyReadsMemory(MRB)) + return false; + if (AliasAnalysis::onlyAccessesArgPointees(MRB)) { + for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); + I != E; ++I) { + const Value *Op = *I; + if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Ptr, Op)) + return true; + } + return false; + } + + // Assume the worst. + return true; +} + +/// Test whether the given instruction can "use" the given pointer's object in a +/// way that requires the reference count to be positive. +static bool +CanUse(const Instruction *Inst, const Value *Ptr, ProvenanceAnalysis &PA, + InstructionClass Class) { + // IC_Call operations (as opposed to IC_CallOrUser) never "use" objc pointers. + if (Class == IC_Call) + return false; + + // Consider various instructions which may have pointer arguments which are + // not "uses". + if (const ICmpInst *ICI = dyn_cast(Inst)) { + // Comparing a pointer with null, or any other constant, isn't really a use, + // because we don't care what the pointer points to, or about the values + // of any other dynamic reference-counted pointers. + if (!IsPotentialRetainableObjPtr(ICI->getOperand(1), *PA.getAA())) + return false; + } else if (ImmutableCallSite CS = static_cast(Inst)) { + // For calls, just check the arguments (and not the callee operand). + for (ImmutableCallSite::arg_iterator OI = CS.arg_begin(), + OE = CS.arg_end(); OI != OE; ++OI) { + const Value *Op = *OI; + if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Ptr, Op)) + return true; + } + return false; + } else if (const StoreInst *SI = dyn_cast(Inst)) { + // Special-case stores, because we don't care about the stored value, just + // the store address. + const Value *Op = GetUnderlyingObjCPtr(SI->getPointerOperand()); + // If we can't tell what the underlying object was, assume there is a + // dependence. + return IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Op, Ptr); + } + + // Check each operand for a match. + for (User::const_op_iterator OI = Inst->op_begin(), OE = Inst->op_end(); + OI != OE; ++OI) { + const Value *Op = *OI; + if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Ptr, Op)) + return true; + } + return false; +} + +/// Test whether the given instruction can autorelease any pointer or cause an +/// autoreleasepool pop. +static bool +CanInterruptRV(InstructionClass Class) { + switch (Class) { + case IC_AutoreleasepoolPop: + case IC_CallOrUser: + case IC_Call: + case IC_Autorelease: + case IC_AutoreleaseRV: + case IC_FusedRetainAutorelease: + case IC_FusedRetainAutoreleaseRV: + return true; + default: + return false; + } +} + +namespace { + /// \enum DependenceKind + /// \brief Defines different dependence kinds among various ARC constructs. + /// + /// There are several kinds of dependence-like concepts in use here. + /// + enum DependenceKind { + NeedsPositiveRetainCount, + AutoreleasePoolBoundary, + CanChangeRetainCount, + RetainAutoreleaseDep, ///< Blocks objc_retainAutorelease. + RetainAutoreleaseRVDep, ///< Blocks objc_retainAutoreleaseReturnValue. + RetainRVDep ///< Blocks objc_retainAutoreleasedReturnValue. + }; +} + +/// Test if there can be dependencies on Inst through Arg. This function only +/// tests dependencies relevant for removing pairs of calls. +static bool +Depends(DependenceKind Flavor, Instruction *Inst, const Value *Arg, + ProvenanceAnalysis &PA) { + // If we've reached the definition of Arg, stop. + if (Inst == Arg) + return true; + + switch (Flavor) { + case NeedsPositiveRetainCount: { + InstructionClass Class = GetInstructionClass(Inst); + switch (Class) { + case IC_AutoreleasepoolPop: + case IC_AutoreleasepoolPush: + case IC_None: + return false; + default: + return CanUse(Inst, Arg, PA, Class); + } + } + + case AutoreleasePoolBoundary: { + InstructionClass Class = GetInstructionClass(Inst); + switch (Class) { + case IC_AutoreleasepoolPop: + case IC_AutoreleasepoolPush: + // These mark the end and begin of an autorelease pool scope. + return true; + default: + // Nothing else does this. + return false; + } + } + + case CanChangeRetainCount: { + InstructionClass Class = GetInstructionClass(Inst); + switch (Class) { + case IC_AutoreleasepoolPop: + // Conservatively assume this can decrement any count. + return true; + case IC_AutoreleasepoolPush: + case IC_None: + return false; + default: + return CanAlterRefCount(Inst, Arg, PA, Class); + } + } + + case RetainAutoreleaseDep: + switch (GetBasicInstructionClass(Inst)) { + case IC_AutoreleasepoolPop: + case IC_AutoreleasepoolPush: + // Don't merge an objc_autorelease with an objc_retain inside a different + // autoreleasepool scope. + return true; + case IC_Retain: + case IC_RetainRV: + // Check for a retain of the same pointer for merging. + return GetObjCArg(Inst) == Arg; + default: + // Nothing else matters for objc_retainAutorelease formation. + return false; + } + + case RetainAutoreleaseRVDep: { + InstructionClass Class = GetBasicInstructionClass(Inst); + switch (Class) { + case IC_Retain: + case IC_RetainRV: + // Check for a retain of the same pointer for merging. + return GetObjCArg(Inst) == Arg; + default: + // Anything that can autorelease interrupts + // retainAutoreleaseReturnValue formation. + return CanInterruptRV(Class); + } + } + + case RetainRVDep: + return CanInterruptRV(GetBasicInstructionClass(Inst)); + } + + llvm_unreachable("Invalid dependence flavor"); +} + +/// Walk up the CFG from StartPos (which is in StartBB) and find local and +/// non-local dependencies on Arg. +/// +/// TODO: Cache results? +static void +FindDependencies(DependenceKind Flavor, + const Value *Arg, + BasicBlock *StartBB, Instruction *StartInst, + SmallPtrSet &DependingInstructions, + SmallPtrSet &Visited, + ProvenanceAnalysis &PA) { + BasicBlock::iterator StartPos = StartInst; + + SmallVector, 4> Worklist; + Worklist.push_back(std::make_pair(StartBB, StartPos)); + do { + std::pair Pair = + Worklist.pop_back_val(); + BasicBlock *LocalStartBB = Pair.first; + BasicBlock::iterator LocalStartPos = Pair.second; + BasicBlock::iterator StartBBBegin = LocalStartBB->begin(); + for (;;) { + if (LocalStartPos == StartBBBegin) { + pred_iterator PI(LocalStartBB), PE(LocalStartBB, false); + if (PI == PE) + // If we've reached the function entry, produce a null dependence. + DependingInstructions.insert(0); + else + // Add the predecessors to the worklist. + do { + BasicBlock *PredBB = *PI; + if (Visited.insert(PredBB)) + Worklist.push_back(std::make_pair(PredBB, PredBB->end())); + } while (++PI != PE); + break; + } + + Instruction *Inst = --LocalStartPos; + if (Depends(Flavor, Inst, Arg, PA)) { + DependingInstructions.insert(Inst); + break; + } + } + } while (!Worklist.empty()); + + // Determine whether the original StartBB post-dominates all of the blocks we + // visited. If not, insert a sentinal indicating that most optimizations are + // not safe. + for (SmallPtrSet::const_iterator I = Visited.begin(), + E = Visited.end(); I != E; ++I) { + const BasicBlock *BB = *I; + if (BB == StartBB) + continue; + const TerminatorInst *TI = cast(&BB->back()); + for (succ_const_iterator SI(TI), SE(TI, false); SI != SE; ++SI) { + const BasicBlock *Succ = *SI; + if (Succ != StartBB && !Visited.count(Succ)) { + DependingInstructions.insert(reinterpret_cast(-1)); + return; + } + } + } +} + +static bool isNullOrUndef(const Value *V) { + return isa(V) || isa(V); +} + +static bool isNoopInstruction(const Instruction *I) { + return isa(I) || + (isa(I) && + cast(I)->hasAllZeroIndices()); +} + +/// Turn objc_retain into objc_retainAutoreleasedReturnValue if the operand is a +/// return value. +void +ObjCARCOpt::OptimizeRetainCall(Function &F, Instruction *Retain) { + ImmutableCallSite CS(GetObjCArg(Retain)); + const Instruction *Call = CS.getInstruction(); + if (!Call) return; + if (Call->getParent() != Retain->getParent()) return; + + // Check that the call is next to the retain. + BasicBlock::const_iterator I = Call; + ++I; + while (isNoopInstruction(I)) ++I; + if (&*I != Retain) + return; + + // Turn it to an objc_retainAutoreleasedReturnValue.. + Changed = true; + ++NumPeeps; + + DEBUG(dbgs() << "ObjCARCOpt::OptimizeRetainCall: Transforming " + "objc_retain => objc_retainAutoreleasedReturnValue" + " since the operand is a return value.\n" + " Old: " + << *Retain << "\n"); + + cast(Retain)->setCalledFunction(getRetainRVCallee(F.getParent())); + + DEBUG(dbgs() << " New: " + << *Retain << "\n"); +} + +/// Turn objc_retainAutoreleasedReturnValue into objc_retain if the operand is +/// not a return value. Or, if it can be paired with an +/// objc_autoreleaseReturnValue, delete the pair and return true. +bool +ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) { + // Check for the argument being from an immediately preceding call or invoke. + const Value *Arg = GetObjCArg(RetainRV); + ImmutableCallSite CS(Arg); + if (const Instruction *Call = CS.getInstruction()) { + if (Call->getParent() == RetainRV->getParent()) { + BasicBlock::const_iterator I = Call; + ++I; + while (isNoopInstruction(I)) ++I; + if (&*I == RetainRV) + return false; + } else if (const InvokeInst *II = dyn_cast(Call)) { + BasicBlock *RetainRVParent = RetainRV->getParent(); + if (II->getNormalDest() == RetainRVParent) { + BasicBlock::const_iterator I = RetainRVParent->begin(); + while (isNoopInstruction(I)) ++I; + if (&*I == RetainRV) + return false; + } + } + } + + // Check for being preceded by an objc_autoreleaseReturnValue on the same + // pointer. In this case, we can delete the pair. + BasicBlock::iterator I = RetainRV, Begin = RetainRV->getParent()->begin(); + if (I != Begin) { + do --I; while (I != Begin && isNoopInstruction(I)); + if (GetBasicInstructionClass(I) == IC_AutoreleaseRV && + GetObjCArg(I) == Arg) { + Changed = true; + ++NumPeeps; + + DEBUG(dbgs() << "ObjCARCOpt::OptimizeRetainRVCall: Erasing " << *I << "\n" + << " Erasing " << *RetainRV + << "\n"); + + EraseInstruction(I); + EraseInstruction(RetainRV); + return true; + } + } + + // Turn it to a plain objc_retain. + Changed = true; + ++NumPeeps; + + DEBUG(dbgs() << "ObjCARCOpt::OptimizeRetainRVCall: Transforming " + "objc_retainAutoreleasedReturnValue => " + "objc_retain since the operand is not a return value.\n" + " Old: " + << *RetainRV << "\n"); + + cast(RetainRV)->setCalledFunction(getRetainCallee(F.getParent())); + + DEBUG(dbgs() << " New: " + << *RetainRV << "\n"); + + return false; +} + +/// Turn objc_autoreleaseReturnValue into objc_autorelease if the result is not +/// used as a return value. +void +ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV, + InstructionClass &Class) { + // Check for a return of the pointer value. + const Value *Ptr = GetObjCArg(AutoreleaseRV); + SmallVector Users; + Users.push_back(Ptr); + do { + Ptr = Users.pop_back_val(); + for (Value::const_use_iterator UI = Ptr->use_begin(), UE = Ptr->use_end(); + UI != UE; ++UI) { + const User *I = *UI; + if (isa(I) || GetBasicInstructionClass(I) == IC_RetainRV) + return; + if (isa(I)) + Users.push_back(I); + } + } while (!Users.empty()); + + Changed = true; + ++NumPeeps; + + DEBUG(dbgs() << "ObjCARCOpt::OptimizeAutoreleaseRVCall: Transforming " + "objc_autoreleaseReturnValue => " + "objc_autorelease since its operand is not used as a return " + "value.\n" + " Old: " + << *AutoreleaseRV << "\n"); + + CallInst *AutoreleaseRVCI = cast(AutoreleaseRV); + AutoreleaseRVCI-> + setCalledFunction(getAutoreleaseCallee(F.getParent())); + AutoreleaseRVCI->setTailCall(false); // Never tail call objc_autorelease. + Class = IC_Autorelease; + + DEBUG(dbgs() << " New: " + << *AutoreleaseRV << "\n"); + +} + +/// Visit each call, one at a time, and make simplifications without doing any +/// additional analysis. +void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { + // Reset all the flags in preparation for recomputing them. + UsedInThisFunction = 0; + + // Visit all objc_* calls in F. + for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) { + Instruction *Inst = &*I++; + + InstructionClass Class = GetBasicInstructionClass(Inst); + + DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Visiting: Class: " + << Class << "; " << *Inst << "\n"); + + switch (Class) { + default: break; + + // Delete no-op casts. These function calls have special semantics, but + // the semantics are entirely implemented via lowering in the front-end, + // so by the time they reach the optimizer, they are just no-op calls + // which return their argument. + // + // There are gray areas here, as the ability to cast reference-counted + // pointers to raw void* and back allows code to break ARC assumptions, + // however these are currently considered to be unimportant. + case IC_NoopCast: + Changed = true; + ++NumNoops; + DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Erasing no-op cast:" + " " << *Inst << "\n"); + EraseInstruction(Inst); + continue; + + // If the pointer-to-weak-pointer is null, it's undefined behavior. + case IC_StoreWeak: + case IC_LoadWeak: + case IC_LoadWeakRetained: + case IC_InitWeak: + case IC_DestroyWeak: { + CallInst *CI = cast(Inst); + if (isNullOrUndef(CI->getArgOperand(0))) { + Changed = true; + Type *Ty = CI->getArgOperand(0)->getType(); + new StoreInst(UndefValue::get(cast(Ty)->getElementType()), + Constant::getNullValue(Ty), + CI); + llvm::Value *NewValue = UndefValue::get(CI->getType()); + DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: A null " + "pointer-to-weak-pointer is undefined behavior.\n" + " Old = " << *CI << + "\n New = " << + *NewValue << "\n"); + CI->replaceAllUsesWith(NewValue); + CI->eraseFromParent(); + continue; + } + break; + } + case IC_CopyWeak: + case IC_MoveWeak: { + CallInst *CI = cast(Inst); + if (isNullOrUndef(CI->getArgOperand(0)) || + isNullOrUndef(CI->getArgOperand(1))) { + Changed = true; + Type *Ty = CI->getArgOperand(0)->getType(); + new StoreInst(UndefValue::get(cast(Ty)->getElementType()), + Constant::getNullValue(Ty), + CI); + + llvm::Value *NewValue = UndefValue::get(CI->getType()); + DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: A null " + "pointer-to-weak-pointer is undefined behavior.\n" + " Old = " << *CI << + "\n New = " << + *NewValue << "\n"); + + CI->replaceAllUsesWith(NewValue); + CI->eraseFromParent(); + continue; + } + break; + } + case IC_Retain: + OptimizeRetainCall(F, Inst); + break; + case IC_RetainRV: + if (OptimizeRetainRVCall(F, Inst)) + continue; + break; + case IC_AutoreleaseRV: + OptimizeAutoreleaseRVCall(F, Inst, Class); + break; + } + + // objc_autorelease(x) -> objc_release(x) if x is otherwise unused. + if (IsAutorelease(Class) && Inst->use_empty()) { + CallInst *Call = cast(Inst); + const Value *Arg = Call->getArgOperand(0); + Arg = FindSingleUseIdentifiedObject(Arg); + if (Arg) { + Changed = true; + ++NumAutoreleases; + + // Create the declaration lazily. + LLVMContext &C = Inst->getContext(); + CallInst *NewCall = + CallInst::Create(getReleaseCallee(F.getParent()), + Call->getArgOperand(0), "", Call); + NewCall->setMetadata(ImpreciseReleaseMDKind, + MDNode::get(C, ArrayRef())); + + DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Replacing " + "objc_autorelease(x) with objc_release(x) since x is " + "otherwise unused.\n" + " Old: " << *Call << + "\n New: " << + *NewCall << "\n"); + + EraseInstruction(Call); + Inst = NewCall; + Class = IC_Release; + } + } + + // For functions which can never be passed stack arguments, add + // a tail keyword. + if (IsAlwaysTail(Class)) { + Changed = true; + DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Adding tail keyword" + " to function since it can never be passed stack args: " << *Inst << + "\n"); + cast(Inst)->setTailCall(); + } + + // Ensure that functions that can never have a "tail" keyword due to the + // semantics of ARC truly do not do so. + if (IsNeverTail(Class)) { + Changed = true; + DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Removing tail " + "keyword from function: " << *Inst << + "\n"); + cast(Inst)->setTailCall(false); + } + + // Set nounwind as needed. + if (IsNoThrow(Class)) { + Changed = true; + DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Found no throw" + " class. Setting nounwind on: " << *Inst << "\n"); + cast(Inst)->setDoesNotThrow(); + } + + if (!IsNoopOnNull(Class)) { + UsedInThisFunction |= 1 << Class; + continue; + } + + const Value *Arg = GetObjCArg(Inst); + + // ARC calls with null are no-ops. Delete them. + if (isNullOrUndef(Arg)) { + Changed = true; + ++NumNoops; + DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: ARC calls with " + " null are no-ops. Erasing: " << *Inst << "\n"); + EraseInstruction(Inst); + continue; + } + + // Keep track of which of retain, release, autorelease, and retain_block + // are actually present in this function. + UsedInThisFunction |= 1 << Class; + + // If Arg is a PHI, and one or more incoming values to the + // PHI are null, and the call is control-equivalent to the PHI, and there + // are no relevant side effects between the PHI and the call, the call + // could be pushed up to just those paths with non-null incoming values. + // For now, don't bother splitting critical edges for this. + SmallVector, 4> Worklist; + Worklist.push_back(std::make_pair(Inst, Arg)); + do { + std::pair Pair = Worklist.pop_back_val(); + Inst = Pair.first; + Arg = Pair.second; + + const PHINode *PN = dyn_cast(Arg); + if (!PN) continue; + + // Determine if the PHI has any null operands, or any incoming + // critical edges. + bool HasNull = false; + bool HasCriticalEdges = false; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + Value *Incoming = + StripPointerCastsAndObjCCalls(PN->getIncomingValue(i)); + if (isNullOrUndef(Incoming)) + HasNull = true; + else if (cast(PN->getIncomingBlock(i)->back()) + .getNumSuccessors() != 1) { + HasCriticalEdges = true; + break; + } + } + // If we have null operands and no critical edges, optimize. + if (!HasCriticalEdges && HasNull) { + SmallPtrSet DependingInstructions; + SmallPtrSet Visited; + + // Check that there is nothing that cares about the reference + // count between the call and the phi. + switch (Class) { + case IC_Retain: + case IC_RetainBlock: + // These can always be moved up. + break; + case IC_Release: + // These can't be moved across things that care about the retain + // count. + FindDependencies(NeedsPositiveRetainCount, Arg, + Inst->getParent(), Inst, + DependingInstructions, Visited, PA); + break; + case IC_Autorelease: + // These can't be moved across autorelease pool scope boundaries. + FindDependencies(AutoreleasePoolBoundary, Arg, + Inst->getParent(), Inst, + DependingInstructions, Visited, PA); + break; + case IC_RetainRV: + case IC_AutoreleaseRV: + // Don't move these; the RV optimization depends on the autoreleaseRV + // being tail called, and the retainRV being immediately after a call + // (which might still happen if we get lucky with codegen layout, but + // it's not worth taking the chance). + continue; + default: + llvm_unreachable("Invalid dependence flavor"); + } + + if (DependingInstructions.size() == 1 && + *DependingInstructions.begin() == PN) { + Changed = true; + ++NumPartialNoops; + // Clone the call into each predecessor that has a non-null value. + CallInst *CInst = cast(Inst); + Type *ParamTy = CInst->getArgOperand(0)->getType(); + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + Value *Incoming = + StripPointerCastsAndObjCCalls(PN->getIncomingValue(i)); + if (!isNullOrUndef(Incoming)) { + CallInst *Clone = cast(CInst->clone()); + Value *Op = PN->getIncomingValue(i); + Instruction *InsertPos = &PN->getIncomingBlock(i)->back(); + if (Op->getType() != ParamTy) + Op = new BitCastInst(Op, ParamTy, "", InsertPos); + Clone->setArgOperand(0, Op); + Clone->insertBefore(InsertPos); + + DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Cloning " + << *CInst << "\n" + " And inserting " + "clone at " << *InsertPos << "\n"); + Worklist.push_back(std::make_pair(Clone, Incoming)); + } + } + // Erase the original call. + DEBUG(dbgs() << "Erasing: " << *CInst << "\n"); + EraseInstruction(CInst); + continue; + } + } + } while (!Worklist.empty()); + } + DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Finished List.\n"); +} + +/// Check for critical edges, loop boundaries, irreducible control flow, or +/// other CFG structures where moving code across the edge would result in it +/// being executed more. +void +ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB, + DenseMap &BBStates, + BBState &MyStates) const { + // If any top-down local-use or possible-dec has a succ which is earlier in + // the sequence, forget it. + for (BBState::ptr_iterator I = MyStates.top_down_ptr_begin(), + E = MyStates.top_down_ptr_end(); I != E; ++I) + switch (I->second.GetSeq()) { + default: break; + case S_Use: { + const Value *Arg = I->first; + const TerminatorInst *TI = cast(&BB->back()); + bool SomeSuccHasSame = false; + bool AllSuccsHaveSame = true; + PtrState &S = I->second; + succ_const_iterator SI(TI), SE(TI, false); + + for (; SI != SE; ++SI) { + Sequence SuccSSeq = S_None; + bool SuccSRRIKnownSafe = false; + // If VisitBottomUp has pointer information for this successor, take + // what we know about it. + DenseMap::iterator BBI = + BBStates.find(*SI); + assert(BBI != BBStates.end()); + const PtrState &SuccS = BBI->second.getPtrBottomUpState(Arg); + SuccSSeq = SuccS.GetSeq(); + SuccSRRIKnownSafe = SuccS.RRI.KnownSafe; + switch (SuccSSeq) { + case S_None: + case S_CanRelease: { + if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) { + S.ClearSequenceProgress(); + break; + } + continue; + } + case S_Use: + SomeSuccHasSame = true; + break; + case S_Stop: + case S_Release: + case S_MovableRelease: + if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) + AllSuccsHaveSame = false; + break; + case S_Retain: + llvm_unreachable("bottom-up pointer in retain state!"); + } + } + // If the state at the other end of any of the successor edges + // matches the current state, require all edges to match. This + // guards against loops in the middle of a sequence. + if (SomeSuccHasSame && !AllSuccsHaveSame) + S.ClearSequenceProgress(); + break; + } + case S_CanRelease: { + const Value *Arg = I->first; + const TerminatorInst *TI = cast(&BB->back()); + bool SomeSuccHasSame = false; + bool AllSuccsHaveSame = true; + PtrState &S = I->second; + succ_const_iterator SI(TI), SE(TI, false); + + for (; SI != SE; ++SI) { + Sequence SuccSSeq = S_None; + bool SuccSRRIKnownSafe = false; + // If VisitBottomUp has pointer information for this successor, take + // what we know about it. + DenseMap::iterator BBI = + BBStates.find(*SI); + assert(BBI != BBStates.end()); + const PtrState &SuccS = BBI->second.getPtrBottomUpState(Arg); + SuccSSeq = SuccS.GetSeq(); + SuccSRRIKnownSafe = SuccS.RRI.KnownSafe; + switch (SuccSSeq) { + case S_None: { + if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) { + S.ClearSequenceProgress(); + break; + } + continue; + } + case S_CanRelease: + SomeSuccHasSame = true; + break; + case S_Stop: + case S_Release: + case S_MovableRelease: + case S_Use: + if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) + AllSuccsHaveSame = false; + break; + case S_Retain: + llvm_unreachable("bottom-up pointer in retain state!"); + } + } + // If the state at the other end of any of the successor edges + // matches the current state, require all edges to match. This + // guards against loops in the middle of a sequence. + if (SomeSuccHasSame && !AllSuccsHaveSame) + S.ClearSequenceProgress(); + break; + } + } +} + +bool +ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst, + BasicBlock *BB, + MapVector &Retains, + BBState &MyStates) { + bool NestingDetected = false; + InstructionClass Class = GetInstructionClass(Inst); + const Value *Arg = 0; + + switch (Class) { + case IC_Release: { + Arg = GetObjCArg(Inst); + + PtrState &S = MyStates.getPtrBottomUpState(Arg); + + // If we see two releases in a row on the same pointer. If so, make + // a note, and we'll cicle back to revisit it after we've + // hopefully eliminated the second release, which may allow us to + // eliminate the first release too. + // Theoretically we could implement removal of nested retain+release + // pairs by making PtrState hold a stack of states, but this is + // simple and avoids adding overhead for the non-nested case. + if (S.GetSeq() == S_Release || S.GetSeq() == S_MovableRelease) { + DEBUG(dbgs() << "ObjCARCOpt::VisitInstructionBottomUp: Found nested " + "releases (i.e. a release pair)\n"); + NestingDetected = true; + } + + MDNode *ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind); + S.ResetSequenceProgress(ReleaseMetadata ? S_MovableRelease : S_Release); + S.RRI.ReleaseMetadata = ReleaseMetadata; + S.RRI.KnownSafe = S.IsKnownIncremented(); + S.RRI.IsTailCallRelease = cast(Inst)->isTailCall(); + S.RRI.Calls.insert(Inst); + + S.SetKnownPositiveRefCount(); + break; + } + case IC_RetainBlock: + // An objc_retainBlock call with just a use may need to be kept, + // because it may be copying a block from the stack to the heap. + if (!IsRetainBlockOptimizable(Inst)) + break; + // FALLTHROUGH + case IC_Retain: + case IC_RetainRV: { + Arg = GetObjCArg(Inst); + + PtrState &S = MyStates.getPtrBottomUpState(Arg); + S.SetKnownPositiveRefCount(); + + switch (S.GetSeq()) { + case S_Stop: + case S_Release: + case S_MovableRelease: + case S_Use: + S.RRI.ReverseInsertPts.clear(); + // FALL THROUGH + case S_CanRelease: + // Don't do retain+release tracking for IC_RetainRV, because it's + // better to let it remain as the first instruction after a call. + if (Class != IC_RetainRV) { + S.RRI.IsRetainBlock = Class == IC_RetainBlock; + Retains[Inst] = S.RRI; + } + S.ClearSequenceProgress(); + break; + case S_None: + break; + case S_Retain: + llvm_unreachable("bottom-up pointer in retain state!"); + } + return NestingDetected; + } + case IC_AutoreleasepoolPop: + // Conservatively, clear MyStates for all known pointers. + MyStates.clearBottomUpPointers(); + return NestingDetected; + case IC_AutoreleasepoolPush: + case IC_None: + // These are irrelevant. + return NestingDetected; + default: + break; + } + + // Consider any other possible effects of this instruction on each + // pointer being tracked. + for (BBState::ptr_iterator MI = MyStates.bottom_up_ptr_begin(), + ME = MyStates.bottom_up_ptr_end(); MI != ME; ++MI) { + const Value *Ptr = MI->first; + if (Ptr == Arg) + continue; // Handled above. + PtrState &S = MI->second; + Sequence Seq = S.GetSeq(); + + // Check for possible releases. + if (CanAlterRefCount(Inst, Ptr, PA, Class)) { + S.ClearRefCount(); + switch (Seq) { + case S_Use: + S.SetSeq(S_CanRelease); + continue; + case S_CanRelease: + case S_Release: + case S_MovableRelease: + case S_Stop: + case S_None: + break; + case S_Retain: + llvm_unreachable("bottom-up pointer in retain state!"); + } + } + + // Check for possible direct uses. + switch (Seq) { + case S_Release: + case S_MovableRelease: + if (CanUse(Inst, Ptr, PA, Class)) { + assert(S.RRI.ReverseInsertPts.empty()); + // If this is an invoke instruction, we're scanning it as part of + // one of its successor blocks, since we can't insert code after it + // in its own block, and we don't want to split critical edges. + if (isa(Inst)) + S.RRI.ReverseInsertPts.insert(BB->getFirstInsertionPt()); + else + S.RRI.ReverseInsertPts.insert(llvm::next(BasicBlock::iterator(Inst))); + S.SetSeq(S_Use); + } else if (Seq == S_Release && + (Class == IC_User || Class == IC_CallOrUser)) { + // Non-movable releases depend on any possible objc pointer use. + S.SetSeq(S_Stop); + assert(S.RRI.ReverseInsertPts.empty()); + // As above; handle invoke specially. + if (isa(Inst)) + S.RRI.ReverseInsertPts.insert(BB->getFirstInsertionPt()); + else + S.RRI.ReverseInsertPts.insert(llvm::next(BasicBlock::iterator(Inst))); + } + break; + case S_Stop: + if (CanUse(Inst, Ptr, PA, Class)) + S.SetSeq(S_Use); + break; + case S_CanRelease: + case S_Use: + case S_None: + break; + case S_Retain: + llvm_unreachable("bottom-up pointer in retain state!"); + } + } + + return NestingDetected; +} + +bool +ObjCARCOpt::VisitBottomUp(BasicBlock *BB, + DenseMap &BBStates, + MapVector &Retains) { + bool NestingDetected = false; + BBState &MyStates = BBStates[BB]; + + // Merge the states from each successor to compute the initial state + // for the current block. + BBState::edge_iterator SI(MyStates.succ_begin()), + SE(MyStates.succ_end()); + if (SI != SE) { + const BasicBlock *Succ = *SI; + DenseMap::iterator I = BBStates.find(Succ); + assert(I != BBStates.end()); + MyStates.InitFromSucc(I->second); + ++SI; + for (; SI != SE; ++SI) { + Succ = *SI; + I = BBStates.find(Succ); + assert(I != BBStates.end()); + MyStates.MergeSucc(I->second); + } + } + + // Visit all the instructions, bottom-up. + for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; --I) { + Instruction *Inst = llvm::prior(I); + + // Invoke instructions are visited as part of their successors (below). + if (isa(Inst)) + continue; + + DEBUG(dbgs() << "ObjCARCOpt::VisitButtonUp: Visiting " << *Inst << "\n"); + + NestingDetected |= VisitInstructionBottomUp(Inst, BB, Retains, MyStates); + } + + // If there's a predecessor with an invoke, visit the invoke as if it were + // part of this block, since we can't insert code after an invoke in its own + // block, and we don't want to split critical edges. + for (BBState::edge_iterator PI(MyStates.pred_begin()), + PE(MyStates.pred_end()); PI != PE; ++PI) { + BasicBlock *Pred = *PI; + if (InvokeInst *II = dyn_cast(&Pred->back())) + NestingDetected |= VisitInstructionBottomUp(II, BB, Retains, MyStates); + } + + return NestingDetected; +} + +bool +ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst, + DenseMap &Releases, + BBState &MyStates) { + bool NestingDetected = false; + InstructionClass Class = GetInstructionClass(Inst); + const Value *Arg = 0; + + switch (Class) { + case IC_RetainBlock: + // An objc_retainBlock call with just a use may need to be kept, + // because it may be copying a block from the stack to the heap. + if (!IsRetainBlockOptimizable(Inst)) + break; + // FALLTHROUGH + case IC_Retain: + case IC_RetainRV: { + Arg = GetObjCArg(Inst); + + PtrState &S = MyStates.getPtrTopDownState(Arg); + + // Don't do retain+release tracking for IC_RetainRV, because it's + // better to let it remain as the first instruction after a call. + if (Class != IC_RetainRV) { + // If we see two retains in a row on the same pointer. If so, make + // a note, and we'll cicle back to revisit it after we've + // hopefully eliminated the second retain, which may allow us to + // eliminate the first retain too. + // Theoretically we could implement removal of nested retain+release + // pairs by making PtrState hold a stack of states, but this is + // simple and avoids adding overhead for the non-nested case. + if (S.GetSeq() == S_Retain) + NestingDetected = true; + + S.ResetSequenceProgress(S_Retain); + S.RRI.IsRetainBlock = Class == IC_RetainBlock; + S.RRI.KnownSafe = S.IsKnownIncremented(); + S.RRI.Calls.insert(Inst); + } + + S.SetKnownPositiveRefCount(); + + // A retain can be a potential use; procede to the generic checking + // code below. + break; + } + case IC_Release: { + Arg = GetObjCArg(Inst); + + PtrState &S = MyStates.getPtrTopDownState(Arg); + S.ClearRefCount(); + + switch (S.GetSeq()) { + case S_Retain: + case S_CanRelease: + S.RRI.ReverseInsertPts.clear(); + // FALL THROUGH + case S_Use: + S.RRI.ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind); + S.RRI.IsTailCallRelease = cast(Inst)->isTailCall(); + Releases[Inst] = S.RRI; + S.ClearSequenceProgress(); + break; + case S_None: + break; + case S_Stop: + case S_Release: + case S_MovableRelease: + llvm_unreachable("top-down pointer in release state!"); + } + break; + } + case IC_AutoreleasepoolPop: + // Conservatively, clear MyStates for all known pointers. + MyStates.clearTopDownPointers(); + return NestingDetected; + case IC_AutoreleasepoolPush: + case IC_None: + // These are irrelevant. + return NestingDetected; + default: + break; + } + + // Consider any other possible effects of this instruction on each + // pointer being tracked. + for (BBState::ptr_iterator MI = MyStates.top_down_ptr_begin(), + ME = MyStates.top_down_ptr_end(); MI != ME; ++MI) { + const Value *Ptr = MI->first; + if (Ptr == Arg) + continue; // Handled above. + PtrState &S = MI->second; + Sequence Seq = S.GetSeq(); + + // Check for possible releases. + if (CanAlterRefCount(Inst, Ptr, PA, Class)) { + S.ClearRefCount(); + switch (Seq) { + case S_Retain: + S.SetSeq(S_CanRelease); + assert(S.RRI.ReverseInsertPts.empty()); + S.RRI.ReverseInsertPts.insert(Inst); + + // One call can't cause a transition from S_Retain to S_CanRelease + // and S_CanRelease to S_Use. If we've made the first transition, + // we're done. + continue; + case S_Use: + case S_CanRelease: + case S_None: + break; + case S_Stop: + case S_Release: + case S_MovableRelease: + llvm_unreachable("top-down pointer in release state!"); + } + } + + // Check for possible direct uses. + switch (Seq) { + case S_CanRelease: + if (CanUse(Inst, Ptr, PA, Class)) + S.SetSeq(S_Use); + break; + case S_Retain: + case S_Use: + case S_None: + break; + case S_Stop: + case S_Release: + case S_MovableRelease: + llvm_unreachable("top-down pointer in release state!"); + } + } + + return NestingDetected; +} + +bool +ObjCARCOpt::VisitTopDown(BasicBlock *BB, + DenseMap &BBStates, + DenseMap &Releases) { + bool NestingDetected = false; + BBState &MyStates = BBStates[BB]; + + // Merge the states from each predecessor to compute the initial state + // for the current block. + BBState::edge_iterator PI(MyStates.pred_begin()), + PE(MyStates.pred_end()); + if (PI != PE) { + const BasicBlock *Pred = *PI; + DenseMap::iterator I = BBStates.find(Pred); + assert(I != BBStates.end()); + MyStates.InitFromPred(I->second); + ++PI; + for (; PI != PE; ++PI) { + Pred = *PI; + I = BBStates.find(Pred); + assert(I != BBStates.end()); + MyStates.MergePred(I->second); + } + } + + // Visit all the instructions, top-down. + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + Instruction *Inst = I; + + DEBUG(dbgs() << "ObjCARCOpt::VisitTopDown: Visiting " << *Inst << "\n"); + + NestingDetected |= VisitInstructionTopDown(Inst, Releases, MyStates); + } + + CheckForCFGHazards(BB, BBStates, MyStates); + return NestingDetected; +} + +static void +ComputePostOrders(Function &F, + SmallVectorImpl &PostOrder, + SmallVectorImpl &ReverseCFGPostOrder, + unsigned NoObjCARCExceptionsMDKind, + DenseMap &BBStates) { + /// The visited set, for doing DFS walks. + SmallPtrSet Visited; + + // Do DFS, computing the PostOrder. + SmallPtrSet OnStack; + SmallVector, 16> SuccStack; + + // Functions always have exactly one entry block, and we don't have + // any other block that we treat like an entry block. + BasicBlock *EntryBB = &F.getEntryBlock(); + BBState &MyStates = BBStates[EntryBB]; + MyStates.SetAsEntry(); + TerminatorInst *EntryTI = cast(&EntryBB->back()); + SuccStack.push_back(std::make_pair(EntryBB, succ_iterator(EntryTI))); + Visited.insert(EntryBB); + OnStack.insert(EntryBB); + do { + dfs_next_succ: + BasicBlock *CurrBB = SuccStack.back().first; + TerminatorInst *TI = cast(&CurrBB->back()); + succ_iterator SE(TI, false); + + while (SuccStack.back().second != SE) { + BasicBlock *SuccBB = *SuccStack.back().second++; + if (Visited.insert(SuccBB)) { + TerminatorInst *TI = cast(&SuccBB->back()); + SuccStack.push_back(std::make_pair(SuccBB, succ_iterator(TI))); + BBStates[CurrBB].addSucc(SuccBB); + BBState &SuccStates = BBStates[SuccBB]; + SuccStates.addPred(CurrBB); + OnStack.insert(SuccBB); + goto dfs_next_succ; + } + + if (!OnStack.count(SuccBB)) { + BBStates[CurrBB].addSucc(SuccBB); + BBStates[SuccBB].addPred(CurrBB); + } + } + OnStack.erase(CurrBB); + PostOrder.push_back(CurrBB); + SuccStack.pop_back(); + } while (!SuccStack.empty()); + + Visited.clear(); + + // Do reverse-CFG DFS, computing the reverse-CFG PostOrder. + // Functions may have many exits, and there also blocks which we treat + // as exits due to ignored edges. + SmallVector, 16> PredStack; + for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { + BasicBlock *ExitBB = I; + BBState &MyStates = BBStates[ExitBB]; + if (!MyStates.isExit()) + continue; + + MyStates.SetAsExit(); + + PredStack.push_back(std::make_pair(ExitBB, MyStates.pred_begin())); + Visited.insert(ExitBB); + while (!PredStack.empty()) { + reverse_dfs_next_succ: + BBState::edge_iterator PE = BBStates[PredStack.back().first].pred_end(); + while (PredStack.back().second != PE) { + BasicBlock *BB = *PredStack.back().second++; + if (Visited.insert(BB)) { + PredStack.push_back(std::make_pair(BB, BBStates[BB].pred_begin())); + goto reverse_dfs_next_succ; + } + } + ReverseCFGPostOrder.push_back(PredStack.pop_back_val().first); + } + } +} + +// Visit the function both top-down and bottom-up. +bool +ObjCARCOpt::Visit(Function &F, + DenseMap &BBStates, + MapVector &Retains, + DenseMap &Releases) { + + // Use reverse-postorder traversals, because we magically know that loops + // will be well behaved, i.e. they won't repeatedly call retain on a single + // pointer without doing a release. We can't use the ReversePostOrderTraversal + // class here because we want the reverse-CFG postorder to consider each + // function exit point, and we want to ignore selected cycle edges. + SmallVector PostOrder; + SmallVector ReverseCFGPostOrder; + ComputePostOrders(F, PostOrder, ReverseCFGPostOrder, + NoObjCARCExceptionsMDKind, + BBStates); + + // Use reverse-postorder on the reverse CFG for bottom-up. + bool BottomUpNestingDetected = false; + for (SmallVectorImpl::const_reverse_iterator I = + ReverseCFGPostOrder.rbegin(), E = ReverseCFGPostOrder.rend(); + I != E; ++I) + BottomUpNestingDetected |= VisitBottomUp(*I, BBStates, Retains); + + // Use reverse-postorder for top-down. + bool TopDownNestingDetected = false; + for (SmallVectorImpl::const_reverse_iterator I = + PostOrder.rbegin(), E = PostOrder.rend(); + I != E; ++I) + TopDownNestingDetected |= VisitTopDown(*I, BBStates, Releases); + + return TopDownNestingDetected && BottomUpNestingDetected; +} + +/// Move the calls in RetainsToMove and ReleasesToMove. +void ObjCARCOpt::MoveCalls(Value *Arg, + RRInfo &RetainsToMove, + RRInfo &ReleasesToMove, + MapVector &Retains, + DenseMap &Releases, + SmallVectorImpl &DeadInsts, + Module *M) { + Type *ArgTy = Arg->getType(); + Type *ParamTy = PointerType::getUnqual(Type::getInt8Ty(ArgTy->getContext())); + + // Insert the new retain and release calls. + for (SmallPtrSet::const_iterator + PI = ReleasesToMove.ReverseInsertPts.begin(), + PE = ReleasesToMove.ReverseInsertPts.end(); PI != PE; ++PI) { + Instruction *InsertPt = *PI; + Value *MyArg = ArgTy == ParamTy ? Arg : + new BitCastInst(Arg, ParamTy, "", InsertPt); + CallInst *Call = + CallInst::Create(RetainsToMove.IsRetainBlock ? + getRetainBlockCallee(M) : getRetainCallee(M), + MyArg, "", InsertPt); + Call->setDoesNotThrow(); + if (RetainsToMove.IsRetainBlock) + Call->setMetadata(CopyOnEscapeMDKind, + MDNode::get(M->getContext(), ArrayRef())); + else + Call->setTailCall(); + + DEBUG(dbgs() << "ObjCARCOpt::MoveCalls: Inserting new Release: " << *Call + << "\n" + " At insertion point: " << *InsertPt + << "\n"); + } + for (SmallPtrSet::const_iterator + PI = RetainsToMove.ReverseInsertPts.begin(), + PE = RetainsToMove.ReverseInsertPts.end(); PI != PE; ++PI) { + Instruction *InsertPt = *PI; + Value *MyArg = ArgTy == ParamTy ? Arg : + new BitCastInst(Arg, ParamTy, "", InsertPt); + CallInst *Call = CallInst::Create(getReleaseCallee(M), MyArg, + "", InsertPt); + // Attach a clang.imprecise_release metadata tag, if appropriate. + if (MDNode *M = ReleasesToMove.ReleaseMetadata) + Call->setMetadata(ImpreciseReleaseMDKind, M); + Call->setDoesNotThrow(); + if (ReleasesToMove.IsTailCallRelease) + Call->setTailCall(); + + DEBUG(dbgs() << "ObjCARCOpt::MoveCalls: Inserting new Retain: " << *Call + << "\n" + " At insertion point: " << *InsertPt + << "\n"); + } + + // Delete the original retain and release calls. + for (SmallPtrSet::const_iterator + AI = RetainsToMove.Calls.begin(), + AE = RetainsToMove.Calls.end(); AI != AE; ++AI) { + Instruction *OrigRetain = *AI; + Retains.blot(OrigRetain); + DeadInsts.push_back(OrigRetain); + DEBUG(dbgs() << "ObjCARCOpt::MoveCalls: Deleting retain: " << *OrigRetain << + "\n"); + } + for (SmallPtrSet::const_iterator + AI = ReleasesToMove.Calls.begin(), + AE = ReleasesToMove.Calls.end(); AI != AE; ++AI) { + Instruction *OrigRelease = *AI; + Releases.erase(OrigRelease); + DeadInsts.push_back(OrigRelease); + DEBUG(dbgs() << "ObjCARCOpt::MoveCalls: Deleting release: " << *OrigRelease + << "\n"); + } +} + +bool +ObjCARCOpt::ConnectTDBUTraversals(DenseMap + &BBStates, + MapVector &Retains, + DenseMap &Releases, + Module *M, + SmallVector &NewRetains, + SmallVector &NewReleases, + SmallVector &DeadInsts, + RRInfo &RetainsToMove, + RRInfo &ReleasesToMove, + Value *Arg, + bool KnownSafe, + bool &AnyPairsCompletelyEliminated) { + // If a pair happens in a region where it is known that the reference count + // is already incremented, we can similarly ignore possible decrements. + bool KnownSafeTD = true, KnownSafeBU = true; + + // Connect the dots between the top-down-collected RetainsToMove and + // bottom-up-collected ReleasesToMove to form sets of related calls. + // This is an iterative process so that we connect multiple releases + // to multiple retains if needed. + unsigned OldDelta = 0; + unsigned NewDelta = 0; + unsigned OldCount = 0; + unsigned NewCount = 0; + bool FirstRelease = true; + bool FirstRetain = true; + for (;;) { + for (SmallVectorImpl::const_iterator + NI = NewRetains.begin(), NE = NewRetains.end(); NI != NE; ++NI) { + Instruction *NewRetain = *NI; + MapVector::const_iterator It = Retains.find(NewRetain); + assert(It != Retains.end()); + const RRInfo &NewRetainRRI = It->second; + KnownSafeTD &= NewRetainRRI.KnownSafe; + for (SmallPtrSet::const_iterator + LI = NewRetainRRI.Calls.begin(), + LE = NewRetainRRI.Calls.end(); LI != LE; ++LI) { + Instruction *NewRetainRelease = *LI; + DenseMap::const_iterator Jt = + Releases.find(NewRetainRelease); + if (Jt == Releases.end()) + return false; + const RRInfo &NewRetainReleaseRRI = Jt->second; + assert(NewRetainReleaseRRI.Calls.count(NewRetain)); + if (ReleasesToMove.Calls.insert(NewRetainRelease)) { + OldDelta -= + BBStates[NewRetainRelease->getParent()].GetAllPathCount(); + + // Merge the ReleaseMetadata and IsTailCallRelease values. + if (FirstRelease) { + ReleasesToMove.ReleaseMetadata = + NewRetainReleaseRRI.ReleaseMetadata; + ReleasesToMove.IsTailCallRelease = + NewRetainReleaseRRI.IsTailCallRelease; + FirstRelease = false; + } else { + if (ReleasesToMove.ReleaseMetadata != + NewRetainReleaseRRI.ReleaseMetadata) + ReleasesToMove.ReleaseMetadata = 0; + if (ReleasesToMove.IsTailCallRelease != + NewRetainReleaseRRI.IsTailCallRelease) + ReleasesToMove.IsTailCallRelease = false; + } + + // Collect the optimal insertion points. + if (!KnownSafe) + for (SmallPtrSet::const_iterator + RI = NewRetainReleaseRRI.ReverseInsertPts.begin(), + RE = NewRetainReleaseRRI.ReverseInsertPts.end(); + RI != RE; ++RI) { + Instruction *RIP = *RI; + if (ReleasesToMove.ReverseInsertPts.insert(RIP)) + NewDelta -= BBStates[RIP->getParent()].GetAllPathCount(); + } + NewReleases.push_back(NewRetainRelease); + } + } + } + NewRetains.clear(); + if (NewReleases.empty()) break; + + // Back the other way. + for (SmallVectorImpl::const_iterator + NI = NewReleases.begin(), NE = NewReleases.end(); NI != NE; ++NI) { + Instruction *NewRelease = *NI; + DenseMap::const_iterator It = + Releases.find(NewRelease); + assert(It != Releases.end()); + const RRInfo &NewReleaseRRI = It->second; + KnownSafeBU &= NewReleaseRRI.KnownSafe; + for (SmallPtrSet::const_iterator + LI = NewReleaseRRI.Calls.begin(), + LE = NewReleaseRRI.Calls.end(); LI != LE; ++LI) { + Instruction *NewReleaseRetain = *LI; + MapVector::const_iterator Jt = + Retains.find(NewReleaseRetain); + if (Jt == Retains.end()) + return false; + const RRInfo &NewReleaseRetainRRI = Jt->second; + assert(NewReleaseRetainRRI.Calls.count(NewRelease)); + if (RetainsToMove.Calls.insert(NewReleaseRetain)) { + unsigned PathCount = + BBStates[NewReleaseRetain->getParent()].GetAllPathCount(); + OldDelta += PathCount; + OldCount += PathCount; + + // Merge the IsRetainBlock values. + if (FirstRetain) { + RetainsToMove.IsRetainBlock = NewReleaseRetainRRI.IsRetainBlock; + FirstRetain = false; + } else if (ReleasesToMove.IsRetainBlock != + NewReleaseRetainRRI.IsRetainBlock) + // It's not possible to merge the sequences if one uses + // objc_retain and the other uses objc_retainBlock. + return false; + + // Collect the optimal insertion points. + if (!KnownSafe) + for (SmallPtrSet::const_iterator + RI = NewReleaseRetainRRI.ReverseInsertPts.begin(), + RE = NewReleaseRetainRRI.ReverseInsertPts.end(); + RI != RE; ++RI) { + Instruction *RIP = *RI; + if (RetainsToMove.ReverseInsertPts.insert(RIP)) { + PathCount = BBStates[RIP->getParent()].GetAllPathCount(); + NewDelta += PathCount; + NewCount += PathCount; + } + } + NewRetains.push_back(NewReleaseRetain); + } + } + } + NewReleases.clear(); + if (NewRetains.empty()) break; + } + + // If the pointer is known incremented or nested, we can safely delete the + // pair regardless of what's between them. + if (KnownSafeTD || KnownSafeBU) { + RetainsToMove.ReverseInsertPts.clear(); + ReleasesToMove.ReverseInsertPts.clear(); + NewCount = 0; + } else { + // Determine whether the new insertion points we computed preserve the + // balance of retain and release calls through the program. + // TODO: If the fully aggressive solution isn't valid, try to find a + // less aggressive solution which is. + if (NewDelta != 0) + return false; + } + + // Determine whether the original call points are balanced in the retain and + // release calls through the program. If not, conservatively don't touch + // them. + // TODO: It's theoretically possible to do code motion in this case, as + // long as the existing imbalances are maintained. + if (OldDelta != 0) + return false; + + Changed = true; + assert(OldCount != 0 && "Unreachable code?"); + NumRRs += OldCount - NewCount; + // Set to true if we completely removed any RR pairs. + AnyPairsCompletelyEliminated = NewCount == 0; + + // We can move calls! + return true; +} + +/// Identify pairings between the retains and releases, and delete and/or move +/// them. +bool +ObjCARCOpt::PerformCodePlacement(DenseMap + &BBStates, + MapVector &Retains, + DenseMap &Releases, + Module *M) { + bool AnyPairsCompletelyEliminated = false; + RRInfo RetainsToMove; + RRInfo ReleasesToMove; + SmallVector NewRetains; + SmallVector NewReleases; + SmallVector DeadInsts; + + // Visit each retain. + for (MapVector::const_iterator I = Retains.begin(), + E = Retains.end(); I != E; ++I) { + Value *V = I->first; + if (!V) continue; // blotted + + Instruction *Retain = cast(V); + + DEBUG(dbgs() << "ObjCARCOpt::PerformCodePlacement: Visiting: " << *Retain + << "\n"); + + Value *Arg = GetObjCArg(Retain); + + // If the object being released is in static or stack storage, we know it's + // not being managed by ObjC reference counting, so we can delete pairs + // regardless of what possible decrements or uses lie between them. + bool KnownSafe = isa(Arg) || isa(Arg); + + // A constant pointer can't be pointing to an object on the heap. It may + // be reference-counted, but it won't be deleted. + if (const LoadInst *LI = dyn_cast(Arg)) + if (const GlobalVariable *GV = + dyn_cast( + StripPointerCastsAndObjCCalls(LI->getPointerOperand()))) + if (GV->isConstant()) + KnownSafe = true; + + // Connect the dots between the top-down-collected RetainsToMove and + // bottom-up-collected ReleasesToMove to form sets of related calls. + NewRetains.push_back(Retain); + bool PerformMoveCalls = + ConnectTDBUTraversals(BBStates, Retains, Releases, M, NewRetains, + NewReleases, DeadInsts, RetainsToMove, + ReleasesToMove, Arg, KnownSafe, + AnyPairsCompletelyEliminated); + + if (PerformMoveCalls) { + // Ok, everything checks out and we're all set. Let's move/delete some + // code! + MoveCalls(Arg, RetainsToMove, ReleasesToMove, + Retains, Releases, DeadInsts, M); + } + + // Clean up state for next retain. + NewReleases.clear(); + NewRetains.clear(); + RetainsToMove.clear(); + ReleasesToMove.clear(); + } + + // Now that we're done moving everything, we can delete the newly dead + // instructions, as we no longer need them as insert points. + while (!DeadInsts.empty()) + EraseInstruction(DeadInsts.pop_back_val()); + + return AnyPairsCompletelyEliminated; +} + +/// Weak pointer optimizations. +void ObjCARCOpt::OptimizeWeakCalls(Function &F) { + // First, do memdep-style RLE and S2L optimizations. We can't use memdep + // itself because it uses AliasAnalysis and we need to do provenance + // queries instead. + for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) { + Instruction *Inst = &*I++; + + DEBUG(dbgs() << "ObjCARCOpt::OptimizeWeakCalls: Visiting: " << *Inst << + "\n"); + + InstructionClass Class = GetBasicInstructionClass(Inst); + if (Class != IC_LoadWeak && Class != IC_LoadWeakRetained) + continue; + + // Delete objc_loadWeak calls with no users. + if (Class == IC_LoadWeak && Inst->use_empty()) { + Inst->eraseFromParent(); + continue; + } + + // TODO: For now, just look for an earlier available version of this value + // within the same block. Theoretically, we could do memdep-style non-local + // analysis too, but that would want caching. A better approach would be to + // use the technique that EarlyCSE uses. + inst_iterator Current = llvm::prior(I); + BasicBlock *CurrentBB = Current.getBasicBlockIterator(); + for (BasicBlock::iterator B = CurrentBB->begin(), + J = Current.getInstructionIterator(); + J != B; --J) { + Instruction *EarlierInst = &*llvm::prior(J); + InstructionClass EarlierClass = GetInstructionClass(EarlierInst); + switch (EarlierClass) { + case IC_LoadWeak: + case IC_LoadWeakRetained: { + // If this is loading from the same pointer, replace this load's value + // with that one. + CallInst *Call = cast(Inst); + CallInst *EarlierCall = cast(EarlierInst); + Value *Arg = Call->getArgOperand(0); + Value *EarlierArg = EarlierCall->getArgOperand(0); + switch (PA.getAA()->alias(Arg, EarlierArg)) { + case AliasAnalysis::MustAlias: + Changed = true; + // If the load has a builtin retain, insert a plain retain for it. + if (Class == IC_LoadWeakRetained) { + CallInst *CI = + CallInst::Create(getRetainCallee(F.getParent()), EarlierCall, + "", Call); + CI->setTailCall(); + } + // Zap the fully redundant load. + Call->replaceAllUsesWith(EarlierCall); + Call->eraseFromParent(); + goto clobbered; + case AliasAnalysis::MayAlias: + case AliasAnalysis::PartialAlias: + goto clobbered; + case AliasAnalysis::NoAlias: + break; + } + break; + } + case IC_StoreWeak: + case IC_InitWeak: { + // If this is storing to the same pointer and has the same size etc. + // replace this load's value with the stored value. + CallInst *Call = cast(Inst); + CallInst *EarlierCall = cast(EarlierInst); + Value *Arg = Call->getArgOperand(0); + Value *EarlierArg = EarlierCall->getArgOperand(0); + switch (PA.getAA()->alias(Arg, EarlierArg)) { + case AliasAnalysis::MustAlias: + Changed = true; + // If the load has a builtin retain, insert a plain retain for it. + if (Class == IC_LoadWeakRetained) { + CallInst *CI = + CallInst::Create(getRetainCallee(F.getParent()), EarlierCall, + "", Call); + CI->setTailCall(); + } + // Zap the fully redundant load. + Call->replaceAllUsesWith(EarlierCall->getArgOperand(1)); + Call->eraseFromParent(); + goto clobbered; + case AliasAnalysis::MayAlias: + case AliasAnalysis::PartialAlias: + goto clobbered; + case AliasAnalysis::NoAlias: + break; + } + break; + } + case IC_MoveWeak: + case IC_CopyWeak: + // TOOD: Grab the copied value. + goto clobbered; + case IC_AutoreleasepoolPush: + case IC_None: + case IC_User: + // Weak pointers are only modified through the weak entry points + // (and arbitrary calls, which could call the weak entry points). + break; + default: + // Anything else could modify the weak pointer. + goto clobbered; + } + } + clobbered:; + } + + // Then, for each destroyWeak with an alloca operand, check to see if + // the alloca and all its users can be zapped. + for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) { + Instruction *Inst = &*I++; + InstructionClass Class = GetBasicInstructionClass(Inst); + if (Class != IC_DestroyWeak) + continue; + + CallInst *Call = cast(Inst); + Value *Arg = Call->getArgOperand(0); + if (AllocaInst *Alloca = dyn_cast(Arg)) { + for (Value::use_iterator UI = Alloca->use_begin(), + UE = Alloca->use_end(); UI != UE; ++UI) { + const Instruction *UserInst = cast(*UI); + switch (GetBasicInstructionClass(UserInst)) { + case IC_InitWeak: + case IC_StoreWeak: + case IC_DestroyWeak: + continue; + default: + goto done; + } + } + Changed = true; + for (Value::use_iterator UI = Alloca->use_begin(), + UE = Alloca->use_end(); UI != UE; ) { + CallInst *UserInst = cast(*UI++); + switch (GetBasicInstructionClass(UserInst)) { + case IC_InitWeak: + case IC_StoreWeak: + // These functions return their second argument. + UserInst->replaceAllUsesWith(UserInst->getArgOperand(1)); + break; + case IC_DestroyWeak: + // No return value. + break; + default: + llvm_unreachable("alloca really is used!"); + } + UserInst->eraseFromParent(); + } + Alloca->eraseFromParent(); + done:; + } + } + + DEBUG(dbgs() << "ObjCARCOpt::OptimizeWeakCalls: Finished List.\n\n"); + +} + +/// Identify program paths which execute sequences of retains and releases which +/// can be eliminated. +bool ObjCARCOpt::OptimizeSequences(Function &F) { + /// Releases, Retains - These are used to store the results of the main flow + /// analysis. These use Value* as the key instead of Instruction* so that the + /// map stays valid when we get around to rewriting code and calls get + /// replaced by arguments. + DenseMap Releases; + MapVector Retains; + + /// This is used during the traversal of the function to track the + /// states for each identified object at each block. + DenseMap BBStates; + + // Analyze the CFG of the function, and all instructions. + bool NestingDetected = Visit(F, BBStates, Retains, Releases); + + // Transform. + return PerformCodePlacement(BBStates, Retains, Releases, F.getParent()) && + NestingDetected; +} + +/// Look for this pattern: +/// \code +/// %call = call i8* @something(...) +/// %2 = call i8* @objc_retain(i8* %call) +/// %3 = call i8* @objc_autorelease(i8* %2) +/// ret i8* %3 +/// \endcode +/// And delete the retain and autorelease. +/// +/// Otherwise if it's just this: +/// \code +/// %3 = call i8* @objc_autorelease(i8* %2) +/// ret i8* %3 +/// \endcode +/// convert the autorelease to autoreleaseRV. +void ObjCARCOpt::OptimizeReturns(Function &F) { + if (!F.getReturnType()->isPointerTy()) + return; + + SmallPtrSet DependingInstructions; + SmallPtrSet Visited; + for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) { + BasicBlock *BB = FI; + ReturnInst *Ret = dyn_cast(&BB->back()); + + DEBUG(dbgs() << "ObjCARCOpt::OptimizeReturns: Visiting: " << *Ret << "\n"); + + if (!Ret) continue; + + const Value *Arg = StripPointerCastsAndObjCCalls(Ret->getOperand(0)); + FindDependencies(NeedsPositiveRetainCount, Arg, + BB, Ret, DependingInstructions, Visited, PA); + if (DependingInstructions.size() != 1) + goto next_block; + + { + CallInst *Autorelease = + dyn_cast_or_null(*DependingInstructions.begin()); + if (!Autorelease) + goto next_block; + InstructionClass AutoreleaseClass = GetBasicInstructionClass(Autorelease); + if (!IsAutorelease(AutoreleaseClass)) + goto next_block; + if (GetObjCArg(Autorelease) != Arg) + goto next_block; + + DependingInstructions.clear(); + Visited.clear(); + + // Check that there is nothing that can affect the reference + // count between the autorelease and the retain. + FindDependencies(CanChangeRetainCount, Arg, + BB, Autorelease, DependingInstructions, Visited, PA); + if (DependingInstructions.size() != 1) + goto next_block; + + { + CallInst *Retain = + dyn_cast_or_null(*DependingInstructions.begin()); + + // Check that we found a retain with the same argument. + if (!Retain || + !IsRetain(GetBasicInstructionClass(Retain)) || + GetObjCArg(Retain) != Arg) + goto next_block; + + DependingInstructions.clear(); + Visited.clear(); + + // Convert the autorelease to an autoreleaseRV, since it's + // returning the value. + if (AutoreleaseClass == IC_Autorelease) { + DEBUG(dbgs() << "ObjCARCOpt::OptimizeReturns: Converting autorelease " + "=> autoreleaseRV since it's returning a value.\n" + " In: " << *Autorelease + << "\n"); + Autorelease->setCalledFunction(getAutoreleaseRVCallee(F.getParent())); + DEBUG(dbgs() << " Out: " << *Autorelease + << "\n"); + Autorelease->setTailCall(); // Always tail call autoreleaseRV. + AutoreleaseClass = IC_AutoreleaseRV; + } + + // Check that there is nothing that can affect the reference + // count between the retain and the call. + // Note that Retain need not be in BB. + FindDependencies(CanChangeRetainCount, Arg, Retain->getParent(), Retain, + DependingInstructions, Visited, PA); + if (DependingInstructions.size() != 1) + goto next_block; + + { + CallInst *Call = + dyn_cast_or_null(*DependingInstructions.begin()); + + // Check that the pointer is the return value of the call. + if (!Call || Arg != Call) + goto next_block; + + // Check that the call is a regular call. + InstructionClass Class = GetBasicInstructionClass(Call); + if (Class != IC_CallOrUser && Class != IC_Call) + goto next_block; + + // If so, we can zap the retain and autorelease. + Changed = true; + ++NumRets; + DEBUG(dbgs() << "ObjCARCOpt::OptimizeReturns: Erasing: " << *Retain + << "\n Erasing: " + << *Autorelease << "\n"); + EraseInstruction(Retain); + EraseInstruction(Autorelease); + } + } + } + + next_block: + DependingInstructions.clear(); + Visited.clear(); + } + + DEBUG(dbgs() << "ObjCARCOpt::OptimizeReturns: Finished List.\n\n"); + +} + +bool ObjCARCOpt::doInitialization(Module &M) { + if (!EnableARCOpts) + return false; + + // If nothing in the Module uses ARC, don't do anything. + Run = ModuleHasARC(M); + if (!Run) + return false; + + // Identify the imprecise release metadata kind. + ImpreciseReleaseMDKind = + M.getContext().getMDKindID("clang.imprecise_release"); + CopyOnEscapeMDKind = + M.getContext().getMDKindID("clang.arc.copy_on_escape"); + NoObjCARCExceptionsMDKind = + M.getContext().getMDKindID("clang.arc.no_objc_arc_exceptions"); + + // Intuitively, objc_retain and others are nocapture, however in practice + // they are not, because they return their argument value. And objc_release + // calls finalizers which can have arbitrary side effects. + + // These are initialized lazily. + RetainRVCallee = 0; + AutoreleaseRVCallee = 0; + ReleaseCallee = 0; + RetainCallee = 0; + RetainBlockCallee = 0; + AutoreleaseCallee = 0; + + return false; +} + +bool ObjCARCOpt::runOnFunction(Function &F) { + if (!EnableARCOpts) + return false; + + // If nothing in the Module uses ARC, don't do anything. + if (!Run) + return false; + + Changed = false; + + DEBUG(dbgs() << "ObjCARCOpt: Visiting Function: " << F.getName() << "\n"); + + PA.setAA(&getAnalysis()); + + // This pass performs several distinct transformations. As a compile-time aid + // when compiling code that isn't ObjC, skip these if the relevant ObjC + // library functions aren't declared. + + // Preliminary optimizations. This also computs UsedInThisFunction. + OptimizeIndividualCalls(F); + + // Optimizations for weak pointers. + if (UsedInThisFunction & ((1 << IC_LoadWeak) | + (1 << IC_LoadWeakRetained) | + (1 << IC_StoreWeak) | + (1 << IC_InitWeak) | + (1 << IC_CopyWeak) | + (1 << IC_MoveWeak) | + (1 << IC_DestroyWeak))) + OptimizeWeakCalls(F); + + // Optimizations for retain+release pairs. + if (UsedInThisFunction & ((1 << IC_Retain) | + (1 << IC_RetainRV) | + (1 << IC_RetainBlock))) + if (UsedInThisFunction & (1 << IC_Release)) + // Run OptimizeSequences until it either stops making changes or + // no retain+release pair nesting is detected. + while (OptimizeSequences(F)) {} + + // Optimizations if objc_autorelease is used. + if (UsedInThisFunction & ((1 << IC_Autorelease) | + (1 << IC_AutoreleaseRV))) + OptimizeReturns(F); + + DEBUG(dbgs() << "\n"); + + return Changed; +} + +void ObjCARCOpt::releaseMemory() { + PA.clear(); +} + +/// @} +/// +/// \defgroup ARCContract ARC Contraction. +/// @{ + +// TODO: ObjCARCContract could insert PHI nodes when uses aren't +// dominated by single calls. + +#include "llvm/Analysis/Dominators.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Operator.h" + +STATISTIC(NumStoreStrongs, "Number objc_storeStrong calls formed"); + +namespace { + /// \brief Late ARC optimizations + /// + /// These change the IR in a way that makes it difficult to be analyzed by + /// ObjCARCOpt, so it's run late. + class ObjCARCContract : public FunctionPass { + bool Changed; + AliasAnalysis *AA; + DominatorTree *DT; + ProvenanceAnalysis PA; + + /// A flag indicating whether this optimization pass should run. + bool Run; + + /// Declarations for ObjC runtime functions, for use in creating calls to + /// them. These are initialized lazily to avoid cluttering up the Module + /// with unused declarations. + + /// Declaration for objc_storeStrong(). + Constant *StoreStrongCallee; + /// Declaration for objc_retainAutorelease(). + Constant *RetainAutoreleaseCallee; + /// Declaration for objc_retainAutoreleaseReturnValue(). + Constant *RetainAutoreleaseRVCallee; + + /// The inline asm string to insert between calls and RetainRV calls to make + /// the optimization work on targets which need it. + const MDString *RetainRVMarker; + + /// The set of inserted objc_storeStrong calls. If at the end of walking the + /// function we have found no alloca instructions, these calls can be marked + /// "tail". + SmallPtrSet StoreStrongCalls; + + Constant *getStoreStrongCallee(Module *M); + Constant *getRetainAutoreleaseCallee(Module *M); + Constant *getRetainAutoreleaseRVCallee(Module *M); + + bool ContractAutorelease(Function &F, Instruction *Autorelease, + InstructionClass Class, + SmallPtrSet + &DependingInstructions, + SmallPtrSet + &Visited); + + void ContractRelease(Instruction *Release, + inst_iterator &Iter); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + virtual bool doInitialization(Module &M); + virtual bool runOnFunction(Function &F); + + public: + static char ID; + ObjCARCContract() : FunctionPass(ID) { + initializeObjCARCContractPass(*PassRegistry::getPassRegistry()); + } + }; +} + +char ObjCARCContract::ID = 0; +INITIALIZE_PASS_BEGIN(ObjCARCContract, + "objc-arc-contract", "ObjC ARC contraction", false, false) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_END(ObjCARCContract, + "objc-arc-contract", "ObjC ARC contraction", false, false) + +Pass *llvm::createObjCARCContractPass() { + return new ObjCARCContract(); +} + +void ObjCARCContract::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.addRequired(); + AU.setPreservesCFG(); +} + +Constant *ObjCARCContract::getStoreStrongCallee(Module *M) { + if (!StoreStrongCallee) { + LLVMContext &C = M->getContext(); + Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); + Type *I8XX = PointerType::getUnqual(I8X); + Type *Params[] = { I8XX, I8X }; + + AttributeSet Attr = AttributeSet() + .addAttribute(M->getContext(), AttributeSet::FunctionIndex, + Attribute::NoUnwind) + .addAttribute(M->getContext(), 1, Attribute::NoCapture); + + StoreStrongCallee = + M->getOrInsertFunction( + "objc_storeStrong", + FunctionType::get(Type::getVoidTy(C), Params, /*isVarArg=*/false), + Attr); + } + return StoreStrongCallee; +} + +Constant *ObjCARCContract::getRetainAutoreleaseCallee(Module *M) { + if (!RetainAutoreleaseCallee) { + LLVMContext &C = M->getContext(); + Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); + Type *Params[] = { I8X }; + FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false); + AttributeSet Attribute = + AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex, + Attribute::NoUnwind); + RetainAutoreleaseCallee = + M->getOrInsertFunction("objc_retainAutorelease", FTy, Attribute); + } + return RetainAutoreleaseCallee; +} + +Constant *ObjCARCContract::getRetainAutoreleaseRVCallee(Module *M) { + if (!RetainAutoreleaseRVCallee) { + LLVMContext &C = M->getContext(); + Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); + Type *Params[] = { I8X }; + FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false); + AttributeSet Attribute = + AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex, + Attribute::NoUnwind); + RetainAutoreleaseRVCallee = + M->getOrInsertFunction("objc_retainAutoreleaseReturnValue", FTy, + Attribute); + } + return RetainAutoreleaseRVCallee; +} + +/// Merge an autorelease with a retain into a fused call. +bool +ObjCARCContract::ContractAutorelease(Function &F, Instruction *Autorelease, + InstructionClass Class, + SmallPtrSet + &DependingInstructions, + SmallPtrSet + &Visited) { + const Value *Arg = GetObjCArg(Autorelease); + + // Check that there are no instructions between the retain and the autorelease + // (such as an autorelease_pop) which may change the count. + CallInst *Retain = 0; + if (Class == IC_AutoreleaseRV) + FindDependencies(RetainAutoreleaseRVDep, Arg, + Autorelease->getParent(), Autorelease, + DependingInstructions, Visited, PA); + else + FindDependencies(RetainAutoreleaseDep, Arg, + Autorelease->getParent(), Autorelease, + DependingInstructions, Visited, PA); + + Visited.clear(); + if (DependingInstructions.size() != 1) { + DependingInstructions.clear(); + return false; + } + + Retain = dyn_cast_or_null(*DependingInstructions.begin()); + DependingInstructions.clear(); + + if (!Retain || + GetBasicInstructionClass(Retain) != IC_Retain || + GetObjCArg(Retain) != Arg) + return false; + + Changed = true; + ++NumPeeps; + + DEBUG(dbgs() << "ObjCARCContract::ContractAutorelease: Fusing " + "retain/autorelease. Erasing: " << *Autorelease << "\n" + " Old Retain: " + << *Retain << "\n"); + + if (Class == IC_AutoreleaseRV) + Retain->setCalledFunction(getRetainAutoreleaseRVCallee(F.getParent())); + else + Retain->setCalledFunction(getRetainAutoreleaseCallee(F.getParent())); + + DEBUG(dbgs() << " New Retain: " + << *Retain << "\n"); + + EraseInstruction(Autorelease); + return true; +} + +/// Attempt to merge an objc_release with a store, load, and objc_retain to form +/// an objc_storeStrong. This can be a little tricky because the instructions +/// don't always appear in order, and there may be unrelated intervening +/// instructions. +void ObjCARCContract::ContractRelease(Instruction *Release, + inst_iterator &Iter) { + LoadInst *Load = dyn_cast(GetObjCArg(Release)); + if (!Load || !Load->isSimple()) return; + + // For now, require everything to be in one basic block. + BasicBlock *BB = Release->getParent(); + if (Load->getParent() != BB) return; + + // Walk down to find the store and the release, which may be in either order. + BasicBlock::iterator I = Load, End = BB->end(); + ++I; + AliasAnalysis::Location Loc = AA->getLocation(Load); + StoreInst *Store = 0; + bool SawRelease = false; + for (; !Store || !SawRelease; ++I) { + if (I == End) + return; + + Instruction *Inst = I; + if (Inst == Release) { + SawRelease = true; + continue; + } + + InstructionClass Class = GetBasicInstructionClass(Inst); + + // Unrelated retains are harmless. + if (IsRetain(Class)) + continue; + + if (Store) { + // The store is the point where we're going to put the objc_storeStrong, + // so make sure there are no uses after it. + if (CanUse(Inst, Load, PA, Class)) + return; + } else if (AA->getModRefInfo(Inst, Loc) & AliasAnalysis::Mod) { + // We are moving the load down to the store, so check for anything + // else which writes to the memory between the load and the store. + Store = dyn_cast(Inst); + if (!Store || !Store->isSimple()) return; + if (Store->getPointerOperand() != Loc.Ptr) return; + } + } + + Value *New = StripPointerCastsAndObjCCalls(Store->getValueOperand()); + + // Walk up to find the retain. + I = Store; + BasicBlock::iterator Begin = BB->begin(); + while (I != Begin && GetBasicInstructionClass(I) != IC_Retain) + --I; + Instruction *Retain = I; + if (GetBasicInstructionClass(Retain) != IC_Retain) return; + if (GetObjCArg(Retain) != New) return; + + Changed = true; + ++NumStoreStrongs; + + LLVMContext &C = Release->getContext(); + Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); + Type *I8XX = PointerType::getUnqual(I8X); + + Value *Args[] = { Load->getPointerOperand(), New }; + if (Args[0]->getType() != I8XX) + Args[0] = new BitCastInst(Args[0], I8XX, "", Store); + if (Args[1]->getType() != I8X) + Args[1] = new BitCastInst(Args[1], I8X, "", Store); + CallInst *StoreStrong = + CallInst::Create(getStoreStrongCallee(BB->getParent()->getParent()), + Args, "", Store); + StoreStrong->setDoesNotThrow(); + StoreStrong->setDebugLoc(Store->getDebugLoc()); + + // We can't set the tail flag yet, because we haven't yet determined + // whether there are any escaping allocas. Remember this call, so that + // we can set the tail flag once we know it's safe. + StoreStrongCalls.insert(StoreStrong); + + if (&*Iter == Store) ++Iter; + Store->eraseFromParent(); + Release->eraseFromParent(); + EraseInstruction(Retain); + if (Load->use_empty()) + Load->eraseFromParent(); +} + +bool ObjCARCContract::doInitialization(Module &M) { + // If nothing in the Module uses ARC, don't do anything. + Run = ModuleHasARC(M); + if (!Run) + return false; + + // These are initialized lazily. + StoreStrongCallee = 0; + RetainAutoreleaseCallee = 0; + RetainAutoreleaseRVCallee = 0; + + // Initialize RetainRVMarker. + RetainRVMarker = 0; + if (NamedMDNode *NMD = + M.getNamedMetadata("clang.arc.retainAutoreleasedReturnValueMarker")) + if (NMD->getNumOperands() == 1) { + const MDNode *N = NMD->getOperand(0); + if (N->getNumOperands() == 1) + if (const MDString *S = dyn_cast(N->getOperand(0))) + RetainRVMarker = S; + } + + return false; +} + +bool ObjCARCContract::runOnFunction(Function &F) { + if (!EnableARCOpts) + return false; + + // If nothing in the Module uses ARC, don't do anything. + if (!Run) + return false; + + Changed = false; + AA = &getAnalysis(); + DT = &getAnalysis(); + + PA.setAA(&getAnalysis()); + + // Track whether it's ok to mark objc_storeStrong calls with the "tail" + // keyword. Be conservative if the function has variadic arguments. + // It seems that functions which "return twice" are also unsafe for the + // "tail" argument, because they are setjmp, which could need to + // return to an earlier stack state. + bool TailOkForStoreStrongs = !F.isVarArg() && + !F.callsFunctionThatReturnsTwice(); + + // For ObjC library calls which return their argument, replace uses of the + // argument with uses of the call return value, if it dominates the use. This + // reduces register pressure. + SmallPtrSet DependingInstructions; + SmallPtrSet Visited; + for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) { + Instruction *Inst = &*I++; + + DEBUG(dbgs() << "ObjCARCContract: Visiting: " << *Inst << "\n"); + + // Only these library routines return their argument. In particular, + // objc_retainBlock does not necessarily return its argument. + InstructionClass Class = GetBasicInstructionClass(Inst); + switch (Class) { + case IC_Retain: + case IC_FusedRetainAutorelease: + case IC_FusedRetainAutoreleaseRV: + break; + case IC_Autorelease: + case IC_AutoreleaseRV: + if (ContractAutorelease(F, Inst, Class, DependingInstructions, Visited)) + continue; + break; + case IC_RetainRV: { + // If we're compiling for a target which needs a special inline-asm + // marker to do the retainAutoreleasedReturnValue optimization, + // insert it now. + if (!RetainRVMarker) + break; + BasicBlock::iterator BBI = Inst; + BasicBlock *InstParent = Inst->getParent(); + + // Step up to see if the call immediately precedes the RetainRV call. + // If it's an invoke, we have to cross a block boundary. And we have + // to carefully dodge no-op instructions. + do { + if (&*BBI == InstParent->begin()) { + BasicBlock *Pred = InstParent->getSinglePredecessor(); + if (!Pred) + goto decline_rv_optimization; + BBI = Pred->getTerminator(); + break; + } + --BBI; + } while (isNoopInstruction(BBI)); + + if (&*BBI == GetObjCArg(Inst)) { + DEBUG(dbgs() << "ObjCARCContract: Adding inline asm marker for " + "retainAutoreleasedReturnValue optimization.\n"); + Changed = true; + InlineAsm *IA = + InlineAsm::get(FunctionType::get(Type::getVoidTy(Inst->getContext()), + /*isVarArg=*/false), + RetainRVMarker->getString(), + /*Constraints=*/"", /*hasSideEffects=*/true); + CallInst::Create(IA, "", Inst); + } + decline_rv_optimization: + break; + } + case IC_InitWeak: { + // objc_initWeak(p, null) => *p = null + CallInst *CI = cast(Inst); + if (isNullOrUndef(CI->getArgOperand(1))) { + Value *Null = + ConstantPointerNull::get(cast(CI->getType())); + Changed = true; + new StoreInst(Null, CI->getArgOperand(0), CI); + + DEBUG(dbgs() << "OBJCARCContract: Old = " << *CI << "\n" + << " New = " << *Null << "\n"); + + CI->replaceAllUsesWith(Null); + CI->eraseFromParent(); + } + continue; + } + case IC_Release: + ContractRelease(Inst, I); + continue; + case IC_User: + // Be conservative if the function has any alloca instructions. + // Technically we only care about escaping alloca instructions, + // but this is sufficient to handle some interesting cases. + if (isa(Inst)) + TailOkForStoreStrongs = false; + continue; + default: + continue; + } + + DEBUG(dbgs() << "ObjCARCContract: Finished List.\n\n"); + + // Don't use GetObjCArg because we don't want to look through bitcasts + // and such; to do the replacement, the argument must have type i8*. + const Value *Arg = cast(Inst)->getArgOperand(0); + for (;;) { + // If we're compiling bugpointed code, don't get in trouble. + if (!isa(Arg) && !isa(Arg)) + break; + // Look through the uses of the pointer. + for (Value::const_use_iterator UI = Arg->use_begin(), UE = Arg->use_end(); + UI != UE; ) { + Use &U = UI.getUse(); + unsigned OperandNo = UI.getOperandNo(); + ++UI; // Increment UI now, because we may unlink its element. + + // If the call's return value dominates a use of the call's argument + // value, rewrite the use to use the return value. We check for + // reachability here because an unreachable call is considered to + // trivially dominate itself, which would lead us to rewriting its + // argument in terms of its return value, which would lead to + // infinite loops in GetObjCArg. + if (DT->isReachableFromEntry(U) && DT->dominates(Inst, U)) { + Changed = true; + Instruction *Replacement = Inst; + Type *UseTy = U.get()->getType(); + if (PHINode *PHI = dyn_cast(U.getUser())) { + // For PHI nodes, insert the bitcast in the predecessor block. + unsigned ValNo = PHINode::getIncomingValueNumForOperand(OperandNo); + BasicBlock *BB = PHI->getIncomingBlock(ValNo); + if (Replacement->getType() != UseTy) + Replacement = new BitCastInst(Replacement, UseTy, "", + &BB->back()); + // While we're here, rewrite all edges for this PHI, rather + // than just one use at a time, to minimize the number of + // bitcasts we emit. + for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) + if (PHI->getIncomingBlock(i) == BB) { + // Keep the UI iterator valid. + if (&PHI->getOperandUse( + PHINode::getOperandNumForIncomingValue(i)) == + &UI.getUse()) + ++UI; + PHI->setIncomingValue(i, Replacement); + } + } else { + if (Replacement->getType() != UseTy) + Replacement = new BitCastInst(Replacement, UseTy, "", + cast(U.getUser())); + U.set(Replacement); + } + } + } + + // If Arg is a no-op casted pointer, strip one level of casts and iterate. + if (const BitCastInst *BI = dyn_cast(Arg)) + Arg = BI->getOperand(0); + else if (isa(Arg) && + cast(Arg)->hasAllZeroIndices()) + Arg = cast(Arg)->getPointerOperand(); + else if (isa(Arg) && + !cast(Arg)->mayBeOverridden()) + Arg = cast(Arg)->getAliasee(); + else + break; + } + } + + // If this function has no escaping allocas or suspicious vararg usage, + // objc_storeStrong calls can be marked with the "tail" keyword. + if (TailOkForStoreStrongs) + for (SmallPtrSet::iterator I = StoreStrongCalls.begin(), + E = StoreStrongCalls.end(); I != E; ++I) + (*I)->setTailCall(); + StoreStrongCalls.clear(); + + return Changed; +} + +/// @} +/// diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt index b3fc6e3..fd55e08 100644 --- a/lib/Transforms/Scalar/CMakeLists.txt +++ b/lib/Transforms/Scalar/CMakeLists.txt @@ -21,7 +21,6 @@ add_llvm_library(LLVMScalarOpts LoopUnswitch.cpp LowerAtomic.cpp MemCpyOptimizer.cpp - ObjCARC.cpp Reassociate.cpp Reg2Mem.cpp SCCP.cpp diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp deleted file mode 100644 index 0dab0ff..0000000 --- a/lib/Transforms/Scalar/ObjCARC.cpp +++ /dev/null @@ -1,4568 +0,0 @@ -//===- ObjCARC.cpp - ObjC ARC Optimization --------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// This file defines ObjC ARC optimizations. ARC stands for Automatic -/// Reference Counting and is a system for managing reference counts for objects -/// in Objective C. -/// -/// The optimizations performed include elimination of redundant, partially -/// redundant, and inconsequential reference count operations, elimination of -/// redundant weak pointer operations, pattern-matching and replacement of -/// low-level operations into higher-level operations, and numerous minor -/// simplifications. -/// -/// This file also defines a simple ARC-aware AliasAnalysis. -/// -/// WARNING: This file knows about certain library functions. It recognizes them -/// by name, and hardwires knowledge of their semantics. -/// -/// WARNING: This file knows about how certain Objective-C library functions are -/// used. Naive LLVM IR transformations which would otherwise be -/// behavior-preserving may break these assumptions. -/// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "objc-arc" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; - -/// \brief A handy option to enable/disable all optimizations in this file. -static cl::opt EnableARCOpts("enable-objc-arc-opts", cl::init(true)); - -/// \defgroup MiscUtils Miscellaneous utilities that are not ARC specific. -/// @{ - -namespace { - /// \brief An associative container with fast insertion-order (deterministic) - /// iteration over its elements. Plus the special blot operation. - template - class MapVector { - /// Map keys to indices in Vector. - typedef DenseMap MapTy; - MapTy Map; - - typedef std::vector > VectorTy; - /// Keys and values. - VectorTy Vector; - - public: - typedef typename VectorTy::iterator iterator; - typedef typename VectorTy::const_iterator const_iterator; - iterator begin() { return Vector.begin(); } - iterator end() { return Vector.end(); } - const_iterator begin() const { return Vector.begin(); } - const_iterator end() const { return Vector.end(); } - -#ifdef XDEBUG - ~MapVector() { - assert(Vector.size() >= Map.size()); // May differ due to blotting. - for (typename MapTy::const_iterator I = Map.begin(), E = Map.end(); - I != E; ++I) { - assert(I->second < Vector.size()); - assert(Vector[I->second].first == I->first); - } - for (typename VectorTy::const_iterator I = Vector.begin(), - E = Vector.end(); I != E; ++I) - assert(!I->first || - (Map.count(I->first) && - Map[I->first] == size_t(I - Vector.begin()))); - } -#endif - - ValueT &operator[](const KeyT &Arg) { - std::pair Pair = - Map.insert(std::make_pair(Arg, size_t(0))); - if (Pair.second) { - size_t Num = Vector.size(); - Pair.first->second = Num; - Vector.push_back(std::make_pair(Arg, ValueT())); - return Vector[Num].second; - } - return Vector[Pair.first->second].second; - } - - std::pair - insert(const std::pair &InsertPair) { - std::pair Pair = - Map.insert(std::make_pair(InsertPair.first, size_t(0))); - if (Pair.second) { - size_t Num = Vector.size(); - Pair.first->second = Num; - Vector.push_back(InsertPair); - return std::make_pair(Vector.begin() + Num, true); - } - return std::make_pair(Vector.begin() + Pair.first->second, false); - } - - const_iterator find(const KeyT &Key) const { - typename MapTy::const_iterator It = Map.find(Key); - if (It == Map.end()) return Vector.end(); - return Vector.begin() + It->second; - } - - /// This is similar to erase, but instead of removing the element from the - /// vector, it just zeros out the key in the vector. This leaves iterators - /// intact, but clients must be prepared for zeroed-out keys when iterating. - void blot(const KeyT &Key) { - typename MapTy::iterator It = Map.find(Key); - if (It == Map.end()) return; - Vector[It->second].first = KeyT(); - Map.erase(It); - } - - void clear() { - Map.clear(); - Vector.clear(); - } - }; -} - -/// @} -/// -/// \defgroup ARCUtilities Utility declarations/definitions specific to ARC. -/// @{ - -#include "llvm/ADT/StringSwitch.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/Intrinsics.h" -#include "llvm/IR/Module.h" -#include "llvm/Support/CallSite.h" -#include "llvm/Transforms/Utils/Local.h" - -namespace { - /// \enum InstructionClass - /// \brief A simple classification for instructions. - enum InstructionClass { - IC_Retain, ///< objc_retain - IC_RetainRV, ///< objc_retainAutoreleasedReturnValue - IC_RetainBlock, ///< objc_retainBlock - IC_Release, ///< objc_release - IC_Autorelease, ///< objc_autorelease - IC_AutoreleaseRV, ///< objc_autoreleaseReturnValue - IC_AutoreleasepoolPush, ///< objc_autoreleasePoolPush - IC_AutoreleasepoolPop, ///< objc_autoreleasePoolPop - IC_NoopCast, ///< objc_retainedObject, etc. - IC_FusedRetainAutorelease, ///< objc_retainAutorelease - IC_FusedRetainAutoreleaseRV, ///< objc_retainAutoreleaseReturnValue - IC_LoadWeakRetained, ///< objc_loadWeakRetained (primitive) - IC_StoreWeak, ///< objc_storeWeak (primitive) - IC_InitWeak, ///< objc_initWeak (derived) - IC_LoadWeak, ///< objc_loadWeak (derived) - IC_MoveWeak, ///< objc_moveWeak (derived) - IC_CopyWeak, ///< objc_copyWeak (derived) - IC_DestroyWeak, ///< objc_destroyWeak (derived) - IC_StoreStrong, ///< objc_storeStrong (derived) - IC_CallOrUser, ///< could call objc_release and/or "use" pointers - IC_Call, ///< could call objc_release - IC_User, ///< could "use" a pointer - IC_None ///< anything else - }; - - raw_ostream &operator<<(raw_ostream &OS, const InstructionClass Class) - LLVM_ATTRIBUTE_USED; - raw_ostream &operator<<(raw_ostream &OS, const InstructionClass Class) { - switch (Class) { - case IC_Retain: - return OS << "IC_Retain"; - case IC_RetainRV: - return OS << "IC_RetainRV"; - case IC_RetainBlock: - return OS << "IC_RetainBlock"; - case IC_Release: - return OS << "IC_Release"; - case IC_Autorelease: - return OS << "IC_Autorelease"; - case IC_AutoreleaseRV: - return OS << "IC_AutoreleaseRV"; - case IC_AutoreleasepoolPush: - return OS << "IC_AutoreleasepoolPush"; - case IC_AutoreleasepoolPop: - return OS << "IC_AutoreleasepoolPop"; - case IC_NoopCast: - return OS << "IC_NoopCast"; - case IC_FusedRetainAutorelease: - return OS << "IC_FusedRetainAutorelease"; - case IC_FusedRetainAutoreleaseRV: - return OS << "IC_FusedRetainAutoreleaseRV"; - case IC_LoadWeakRetained: - return OS << "IC_LoadWeakRetained"; - case IC_StoreWeak: - return OS << "IC_StoreWeak"; - case IC_InitWeak: - return OS << "IC_InitWeak"; - case IC_LoadWeak: - return OS << "IC_LoadWeak"; - case IC_MoveWeak: - return OS << "IC_MoveWeak"; - case IC_CopyWeak: - return OS << "IC_CopyWeak"; - case IC_DestroyWeak: - return OS << "IC_DestroyWeak"; - case IC_StoreStrong: - return OS << "IC_StoreStrong"; - case IC_CallOrUser: - return OS << "IC_CallOrUser"; - case IC_Call: - return OS << "IC_Call"; - case IC_User: - return OS << "IC_User"; - case IC_None: - return OS << "IC_None"; - } - llvm_unreachable("Unknown instruction class!"); - } -} - -/// \brief Test whether the given value is possible a retainable object pointer. -static bool IsPotentialRetainableObjPtr(const Value *Op) { - // Pointers to static or stack storage are not valid retainable object pointers. - if (isa(Op) || isa(Op)) - return false; - // Special arguments can not be a valid retainable object pointer. - if (const Argument *Arg = dyn_cast(Op)) - if (Arg->hasByValAttr() || - Arg->hasNestAttr() || - Arg->hasStructRetAttr()) - return false; - // Only consider values with pointer types. - // - // It seemes intuitive to exclude function pointer types as well, since - // functions are never retainable object pointers, however clang occasionally - // bitcasts retainable object pointers to function-pointer type temporarily. - PointerType *Ty = dyn_cast(Op->getType()); - if (!Ty) - return false; - // Conservatively assume anything else is a potential retainable object pointer. - return true; -} - -/// \brief Helper for GetInstructionClass. Determines what kind of construct CS -/// is. -static InstructionClass GetCallSiteClass(ImmutableCallSite CS) { - for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); - I != E; ++I) - if (IsPotentialRetainableObjPtr(*I)) - return CS.onlyReadsMemory() ? IC_User : IC_CallOrUser; - - return CS.onlyReadsMemory() ? IC_None : IC_Call; -} - -/// \brief Determine if F is one of the special known Functions. If it isn't, -/// return IC_CallOrUser. -static InstructionClass GetFunctionClass(const Function *F) { - Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); - - // No arguments. - if (AI == AE) - return StringSwitch(F->getName()) - .Case("objc_autoreleasePoolPush", IC_AutoreleasepoolPush) - .Default(IC_CallOrUser); - - // One argument. - const Argument *A0 = AI++; - if (AI == AE) - // Argument is a pointer. - if (PointerType *PTy = dyn_cast(A0->getType())) { - Type *ETy = PTy->getElementType(); - // Argument is i8*. - if (ETy->isIntegerTy(8)) - return StringSwitch(F->getName()) - .Case("objc_retain", IC_Retain) - .Case("objc_retainAutoreleasedReturnValue", IC_RetainRV) - .Case("objc_retainBlock", IC_RetainBlock) - .Case("objc_release", IC_Release) - .Case("objc_autorelease", IC_Autorelease) - .Case("objc_autoreleaseReturnValue", IC_AutoreleaseRV) - .Case("objc_autoreleasePoolPop", IC_AutoreleasepoolPop) - .Case("objc_retainedObject", IC_NoopCast) - .Case("objc_unretainedObject", IC_NoopCast) - .Case("objc_unretainedPointer", IC_NoopCast) - .Case("objc_retain_autorelease", IC_FusedRetainAutorelease) - .Case("objc_retainAutorelease", IC_FusedRetainAutorelease) - .Case("objc_retainAutoreleaseReturnValue",IC_FusedRetainAutoreleaseRV) - .Default(IC_CallOrUser); - - // Argument is i8** - if (PointerType *Pte = dyn_cast(ETy)) - if (Pte->getElementType()->isIntegerTy(8)) - return StringSwitch(F->getName()) - .Case("objc_loadWeakRetained", IC_LoadWeakRetained) - .Case("objc_loadWeak", IC_LoadWeak) - .Case("objc_destroyWeak", IC_DestroyWeak) - .Default(IC_CallOrUser); - } - - // Two arguments, first is i8**. - const Argument *A1 = AI++; - if (AI == AE) - if (PointerType *PTy = dyn_cast(A0->getType())) - if (PointerType *Pte = dyn_cast(PTy->getElementType())) - if (Pte->getElementType()->isIntegerTy(8)) - if (PointerType *PTy1 = dyn_cast(A1->getType())) { - Type *ETy1 = PTy1->getElementType(); - // Second argument is i8* - if (ETy1->isIntegerTy(8)) - return StringSwitch(F->getName()) - .Case("objc_storeWeak", IC_StoreWeak) - .Case("objc_initWeak", IC_InitWeak) - .Case("objc_storeStrong", IC_StoreStrong) - .Default(IC_CallOrUser); - // Second argument is i8**. - if (PointerType *Pte1 = dyn_cast(ETy1)) - if (Pte1->getElementType()->isIntegerTy(8)) - return StringSwitch(F->getName()) - .Case("objc_moveWeak", IC_MoveWeak) - .Case("objc_copyWeak", IC_CopyWeak) - .Default(IC_CallOrUser); - } - - // Anything else. - return IC_CallOrUser; -} - -/// \brief Determine what kind of construct V is. -static InstructionClass GetInstructionClass(const Value *V) { - if (const Instruction *I = dyn_cast(V)) { - // Any instruction other than bitcast and gep with a pointer operand have a - // use of an objc pointer. Bitcasts, GEPs, Selects, PHIs transfer a pointer - // to a subsequent use, rather than using it themselves, in this sense. - // As a short cut, several other opcodes are known to have no pointer - // operands of interest. And ret is never followed by a release, so it's - // not interesting to examine. - switch (I->getOpcode()) { - case Instruction::Call: { - const CallInst *CI = cast(I); - // Check for calls to special functions. - if (const Function *F = CI->getCalledFunction()) { - InstructionClass Class = GetFunctionClass(F); - if (Class != IC_CallOrUser) - return Class; - - // None of the intrinsic functions do objc_release. For intrinsics, the - // only question is whether or not they may be users. - switch (F->getIntrinsicID()) { - case Intrinsic::returnaddress: case Intrinsic::frameaddress: - case Intrinsic::stacksave: case Intrinsic::stackrestore: - case Intrinsic::vastart: case Intrinsic::vacopy: case Intrinsic::vaend: - case Intrinsic::objectsize: case Intrinsic::prefetch: - case Intrinsic::stackprotector: - case Intrinsic::eh_return_i32: case Intrinsic::eh_return_i64: - case Intrinsic::eh_typeid_for: case Intrinsic::eh_dwarf_cfa: - case Intrinsic::eh_sjlj_lsda: case Intrinsic::eh_sjlj_functioncontext: - case Intrinsic::init_trampoline: case Intrinsic::adjust_trampoline: - case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: - case Intrinsic::invariant_start: case Intrinsic::invariant_end: - // Don't let dbg info affect our results. - case Intrinsic::dbg_declare: case Intrinsic::dbg_value: - // Short cut: Some intrinsics obviously don't use ObjC pointers. - return IC_None; - default: - break; - } - } - return GetCallSiteClass(CI); - } - case Instruction::Invoke: - return GetCallSiteClass(cast(I)); - case Instruction::BitCast: - case Instruction::GetElementPtr: - case Instruction::Select: case Instruction::PHI: - case Instruction::Ret: case Instruction::Br: - case Instruction::Switch: case Instruction::IndirectBr: - case Instruction::Alloca: case Instruction::VAArg: - case Instruction::Add: case Instruction::FAdd: - case Instruction::Sub: case Instruction::FSub: - case Instruction::Mul: case Instruction::FMul: - case Instruction::SDiv: case Instruction::UDiv: case Instruction::FDiv: - case Instruction::SRem: case Instruction::URem: case Instruction::FRem: - case Instruction::Shl: case Instruction::LShr: case Instruction::AShr: - case Instruction::And: case Instruction::Or: case Instruction::Xor: - case Instruction::SExt: case Instruction::ZExt: case Instruction::Trunc: - case Instruction::IntToPtr: case Instruction::FCmp: - case Instruction::FPTrunc: case Instruction::FPExt: - case Instruction::FPToUI: case Instruction::FPToSI: - case Instruction::UIToFP: case Instruction::SIToFP: - case Instruction::InsertElement: case Instruction::ExtractElement: - case Instruction::ShuffleVector: - case Instruction::ExtractValue: - break; - case Instruction::ICmp: - // Comparing a pointer with null, or any other constant, isn't an - // interesting use, because we don't care what the pointer points to, or - // about the values of any other dynamic reference-counted pointers. - if (IsPotentialRetainableObjPtr(I->getOperand(1))) - return IC_User; - break; - default: - // For anything else, check all the operands. - // Note that this includes both operands of a Store: while the first - // operand isn't actually being dereferenced, it is being stored to - // memory where we can no longer track who might read it and dereference - // it, so we have to consider it potentially used. - for (User::const_op_iterator OI = I->op_begin(), OE = I->op_end(); - OI != OE; ++OI) - if (IsPotentialRetainableObjPtr(*OI)) - return IC_User; - } - } - - // Otherwise, it's totally inert for ARC purposes. - return IC_None; -} - -/// \brief Determine which objc runtime call instruction class V belongs to. -/// -/// This is similar to GetInstructionClass except that it only detects objc -/// runtime calls. This allows it to be faster. -/// -static InstructionClass GetBasicInstructionClass(const Value *V) { - if (const CallInst *CI = dyn_cast(V)) { - if (const Function *F = CI->getCalledFunction()) - return GetFunctionClass(F); - // Otherwise, be conservative. - return IC_CallOrUser; - } - - // Otherwise, be conservative. - return isa(V) ? IC_CallOrUser : IC_User; -} - -/// \brief Test if the given class is objc_retain or equivalent. -static bool IsRetain(InstructionClass Class) { - return Class == IC_Retain || - Class == IC_RetainRV; -} - -/// \brief Test if the given class is objc_autorelease or equivalent. -static bool IsAutorelease(InstructionClass Class) { - return Class == IC_Autorelease || - Class == IC_AutoreleaseRV; -} - -/// \brief Test if the given class represents instructions which return their -/// argument verbatim. -static bool IsForwarding(InstructionClass Class) { - // objc_retainBlock technically doesn't always return its argument - // verbatim, but it doesn't matter for our purposes here. - return Class == IC_Retain || - Class == IC_RetainRV || - Class == IC_Autorelease || - Class == IC_AutoreleaseRV || - Class == IC_RetainBlock || - Class == IC_NoopCast; -} - -/// \brief Test if the given class represents instructions which do nothing if -/// passed a null pointer. -static bool IsNoopOnNull(InstructionClass Class) { - return Class == IC_Retain || - Class == IC_RetainRV || - Class == IC_Release || - Class == IC_Autorelease || - Class == IC_AutoreleaseRV || - Class == IC_RetainBlock; -} - -/// \brief Test if the given class represents instructions which are always safe -/// to mark with the "tail" keyword. -static bool IsAlwaysTail(InstructionClass Class) { - // IC_RetainBlock may be given a stack argument. - return Class == IC_Retain || - Class == IC_RetainRV || - Class == IC_AutoreleaseRV; -} - -/// \brief Test if the given class represents instructions which are never safe -/// to mark with the "tail" keyword. -static bool IsNeverTail(InstructionClass Class) { - /// It is never safe to tail call objc_autorelease since by tail calling - /// objc_autorelease, we also tail call -[NSObject autorelease] which supports - /// fast autoreleasing causing our object to be potentially reclaimed from the - /// autorelease pool which violates the semantics of __autoreleasing types in - /// ARC. - return Class == IC_Autorelease; -} - -/// \brief Test if the given class represents instructions which are always safe -/// to mark with the nounwind attribute. -static bool IsNoThrow(InstructionClass Class) { - // objc_retainBlock is not nounwind because it calls user copy constructors - // which could theoretically throw. - return Class == IC_Retain || - Class == IC_RetainRV || - Class == IC_Release || - Class == IC_Autorelease || - Class == IC_AutoreleaseRV || - Class == IC_AutoreleasepoolPush || - Class == IC_AutoreleasepoolPop; -} - -/// \brief Erase the given instruction. -/// -/// Many ObjC calls return their argument verbatim, -/// so if it's such a call and the return value has users, replace them with the -/// argument value. -/// -static void EraseInstruction(Instruction *CI) { - Value *OldArg = cast(CI)->getArgOperand(0); - - bool Unused = CI->use_empty(); - - if (!Unused) { - // Replace the return value with the argument. - assert(IsForwarding(GetBasicInstructionClass(CI)) && - "Can't delete non-forwarding instruction with users!"); - CI->replaceAllUsesWith(OldArg); - } - - CI->eraseFromParent(); - - if (Unused) - RecursivelyDeleteTriviallyDeadInstructions(OldArg); -} - -/// \brief This is a wrapper around getUnderlyingObject which also knows how to -/// look through objc_retain and objc_autorelease calls, which we know to return -/// their argument verbatim. -static const Value *GetUnderlyingObjCPtr(const Value *V) { - for (;;) { - V = GetUnderlyingObject(V); - if (!IsForwarding(GetBasicInstructionClass(V))) - break; - V = cast(V)->getArgOperand(0); - } - - return V; -} - -/// \brief This is a wrapper around Value::stripPointerCasts which also knows -/// how to look through objc_retain and objc_autorelease calls, which we know to -/// return their argument verbatim. -static const Value *StripPointerCastsAndObjCCalls(const Value *V) { - for (;;) { - V = V->stripPointerCasts(); - if (!IsForwarding(GetBasicInstructionClass(V))) - break; - V = cast(V)->getArgOperand(0); - } - return V; -} - -/// \brief This is a wrapper around Value::stripPointerCasts which also knows -/// how to look through objc_retain and objc_autorelease calls, which we know to -/// return their argument verbatim. -static Value *StripPointerCastsAndObjCCalls(Value *V) { - for (;;) { - V = V->stripPointerCasts(); - if (!IsForwarding(GetBasicInstructionClass(V))) - break; - V = cast(V)->getArgOperand(0); - } - return V; -} - -/// \brief Assuming the given instruction is one of the special calls such as -/// objc_retain or objc_release, return the argument value, stripped of no-op -/// casts and forwarding calls. -static Value *GetObjCArg(Value *Inst) { - return StripPointerCastsAndObjCCalls(cast(Inst)->getArgOperand(0)); -} - -/// \brief Return true if this value refers to a distinct and identifiable -/// object. -/// -/// This is similar to AliasAnalysis's isIdentifiedObject, except that it uses -/// special knowledge of ObjC conventions. -static bool IsObjCIdentifiedObject(const Value *V) { - // Assume that call results and arguments have their own "provenance". - // Constants (including GlobalVariables) and Allocas are never - // reference-counted. - if (isa(V) || isa(V) || - isa(V) || isa(V) || - isa(V)) - return true; - - if (const LoadInst *LI = dyn_cast(V)) { - const Value *Pointer = - StripPointerCastsAndObjCCalls(LI->getPointerOperand()); - if (const GlobalVariable *GV = dyn_cast(Pointer)) { - // A constant pointer can't be pointing to an object on the heap. It may - // be reference-counted, but it won't be deleted. - if (GV->isConstant()) - return true; - StringRef Name = GV->getName(); - // These special variables are known to hold values which are not - // reference-counted pointers. - if (Name.startswith("\01L_OBJC_SELECTOR_REFERENCES_") || - Name.startswith("\01L_OBJC_CLASSLIST_REFERENCES_") || - Name.startswith("\01L_OBJC_CLASSLIST_SUP_REFS_$_") || - Name.startswith("\01L_OBJC_METH_VAR_NAME_") || - Name.startswith("\01l_objc_msgSend_fixup_")) - return true; - } - } - - return false; -} - -/// \brief This is similar to StripPointerCastsAndObjCCalls but it stops as soon -/// as it finds a value with multiple uses. -static const Value *FindSingleUseIdentifiedObject(const Value *Arg) { - if (Arg->hasOneUse()) { - if (const BitCastInst *BC = dyn_cast(Arg)) - return FindSingleUseIdentifiedObject(BC->getOperand(0)); - if (const GetElementPtrInst *GEP = dyn_cast(Arg)) - if (GEP->hasAllZeroIndices()) - return FindSingleUseIdentifiedObject(GEP->getPointerOperand()); - if (IsForwarding(GetBasicInstructionClass(Arg))) - return FindSingleUseIdentifiedObject( - cast(Arg)->getArgOperand(0)); - if (!IsObjCIdentifiedObject(Arg)) - return 0; - return Arg; - } - - // If we found an identifiable object but it has multiple uses, but they are - // trivial uses, we can still consider this to be a single-use value. - if (IsObjCIdentifiedObject(Arg)) { - for (Value::const_use_iterator UI = Arg->use_begin(), UE = Arg->use_end(); - UI != UE; ++UI) { - const User *U = *UI; - if (!U->use_empty() || StripPointerCastsAndObjCCalls(U) != Arg) - return 0; - } - - return Arg; - } - - return 0; -} - -/// \brief Test if the given module looks interesting to run ARC optimization -/// on. -static bool ModuleHasARC(const Module &M) { - return - M.getNamedValue("objc_retain") || - M.getNamedValue("objc_release") || - M.getNamedValue("objc_autorelease") || - M.getNamedValue("objc_retainAutoreleasedReturnValue") || - M.getNamedValue("objc_retainBlock") || - M.getNamedValue("objc_autoreleaseReturnValue") || - M.getNamedValue("objc_autoreleasePoolPush") || - M.getNamedValue("objc_loadWeakRetained") || - M.getNamedValue("objc_loadWeak") || - M.getNamedValue("objc_destroyWeak") || - M.getNamedValue("objc_storeWeak") || - M.getNamedValue("objc_initWeak") || - M.getNamedValue("objc_moveWeak") || - M.getNamedValue("objc_copyWeak") || - M.getNamedValue("objc_retainedObject") || - M.getNamedValue("objc_unretainedObject") || - M.getNamedValue("objc_unretainedPointer"); -} - -/// \brief Test whether the given pointer, which is an Objective C block -/// pointer, does not "escape". -/// -/// This differs from regular escape analysis in that a use as an -/// argument to a call is not considered an escape. -/// -static bool DoesObjCBlockEscape(const Value *BlockPtr) { - - DEBUG(dbgs() << "DoesObjCBlockEscape: Target: " << *BlockPtr << "\n"); - - // Walk the def-use chains. - SmallVector Worklist; - Worklist.push_back(BlockPtr); - - // Ensure we do not visit any value twice. - SmallPtrSet VisitedSet; - - do { - const Value *V = Worklist.pop_back_val(); - - DEBUG(dbgs() << "DoesObjCBlockEscape: Visiting: " << *V << "\n"); - - for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end(); - UI != UE; ++UI) { - const User *UUser = *UI; - - DEBUG(dbgs() << "DoesObjCBlockEscape: User: " << *UUser << "\n"); - - // Special - Use by a call (callee or argument) is not considered - // to be an escape. - switch (GetBasicInstructionClass(UUser)) { - case IC_StoreWeak: - case IC_InitWeak: - case IC_StoreStrong: - case IC_Autorelease: - case IC_AutoreleaseRV: { - DEBUG(dbgs() << "DoesObjCBlockEscape: User copies pointer arguments. " - "Block Escapes!\n"); - // These special functions make copies of their pointer arguments. - return true; - } - case IC_User: - case IC_None: - // Use by an instruction which copies the value is an escape if the - // result is an escape. - if (isa(UUser) || isa(UUser) || - isa(UUser) || isa(UUser)) { - - if (!VisitedSet.insert(UUser)) { - DEBUG(dbgs() << "DoesObjCBlockEscape: User copies value. Escapes " - "if result escapes. Adding to list.\n"); - Worklist.push_back(UUser); - } else { - DEBUG(dbgs() << "DoesObjCBlockEscape: Already visited node.\n"); - } - continue; - } - // Use by a load is not an escape. - if (isa(UUser)) - continue; - // Use by a store is not an escape if the use is the address. - if (const StoreInst *SI = dyn_cast(UUser)) - if (V != SI->getValueOperand()) - continue; - break; - default: - // Regular calls and other stuff are not considered escapes. - continue; - } - // Otherwise, conservatively assume an escape. - DEBUG(dbgs() << "DoesObjCBlockEscape: Assuming block escapes.\n"); - return true; - } - } while (!Worklist.empty()); - - // No escapes found. - DEBUG(dbgs() << "DoesObjCBlockEscape: Block does not escape.\n"); - return false; -} - -/// @} -/// -/// \defgroup ARCAA Extends alias analysis using ObjC specific knowledge. -/// @{ - -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/Passes.h" -#include "llvm/Pass.h" - -namespace { - /// \brief This is a simple alias analysis implementation that uses knowledge - /// of ARC constructs to answer queries. - /// - /// TODO: This class could be generalized to know about other ObjC-specific - /// tricks. Such as knowing that ivars in the non-fragile ABI are non-aliasing - /// even though their offsets are dynamic. - class ObjCARCAliasAnalysis : public ImmutablePass, - public AliasAnalysis { - public: - static char ID; // Class identification, replacement for typeinfo - ObjCARCAliasAnalysis() : ImmutablePass(ID) { - initializeObjCARCAliasAnalysisPass(*PassRegistry::getPassRegistry()); - } - - private: - virtual void initializePass() { - InitializeAliasAnalysis(this); - } - - /// This method is used when a pass implements an analysis interface through - /// multiple inheritance. If needed, it should override this to adjust the - /// this pointer as needed for the specified pass info. - virtual void *getAdjustedAnalysisPointer(const void *PI) { - if (PI == &AliasAnalysis::ID) - return static_cast(this); - return this; - } - - virtual void getAnalysisUsage(AnalysisUsage &AU) const; - virtual AliasResult alias(const Location &LocA, const Location &LocB); - virtual bool pointsToConstantMemory(const Location &Loc, bool OrLocal); - virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS); - virtual ModRefBehavior getModRefBehavior(const Function *F); - virtual ModRefResult getModRefInfo(ImmutableCallSite CS, - const Location &Loc); - virtual ModRefResult getModRefInfo(ImmutableCallSite CS1, - ImmutableCallSite CS2); - }; -} // End of anonymous namespace - -// Register this pass... -char ObjCARCAliasAnalysis::ID = 0; -INITIALIZE_AG_PASS(ObjCARCAliasAnalysis, AliasAnalysis, "objc-arc-aa", - "ObjC-ARC-Based Alias Analysis", false, true, false) - -ImmutablePass *llvm::createObjCARCAliasAnalysisPass() { - return new ObjCARCAliasAnalysis(); -} - -void -ObjCARCAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); - AliasAnalysis::getAnalysisUsage(AU); -} - -AliasAnalysis::AliasResult -ObjCARCAliasAnalysis::alias(const Location &LocA, const Location &LocB) { - if (!EnableARCOpts) - return AliasAnalysis::alias(LocA, LocB); - - // First, strip off no-ops, including ObjC-specific no-ops, and try making a - // precise alias query. - const Value *SA = StripPointerCastsAndObjCCalls(LocA.Ptr); - const Value *SB = StripPointerCastsAndObjCCalls(LocB.Ptr); - AliasResult Result = - AliasAnalysis::alias(Location(SA, LocA.Size, LocA.TBAATag), - Location(SB, LocB.Size, LocB.TBAATag)); - if (Result != MayAlias) - return Result; - - // If that failed, climb to the underlying object, including climbing through - // ObjC-specific no-ops, and try making an imprecise alias query. - const Value *UA = GetUnderlyingObjCPtr(SA); - const Value *UB = GetUnderlyingObjCPtr(SB); - if (UA != SA || UB != SB) { - Result = AliasAnalysis::alias(Location(UA), Location(UB)); - // We can't use MustAlias or PartialAlias results here because - // GetUnderlyingObjCPtr may return an offsetted pointer value. - if (Result == NoAlias) - return NoAlias; - } - - // If that failed, fail. We don't need to chain here, since that's covered - // by the earlier precise query. - return MayAlias; -} - -bool -ObjCARCAliasAnalysis::pointsToConstantMemory(const Location &Loc, - bool OrLocal) { - if (!EnableARCOpts) - return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); - - // First, strip off no-ops, including ObjC-specific no-ops, and try making - // a precise alias query. - const Value *S = StripPointerCastsAndObjCCalls(Loc.Ptr); - if (AliasAnalysis::pointsToConstantMemory(Location(S, Loc.Size, Loc.TBAATag), - OrLocal)) - return true; - - // If that failed, climb to the underlying object, including climbing through - // ObjC-specific no-ops, and try making an imprecise alias query. - const Value *U = GetUnderlyingObjCPtr(S); - if (U != S) - return AliasAnalysis::pointsToConstantMemory(Location(U), OrLocal); - - // If that failed, fail. We don't need to chain here, since that's covered - // by the earlier precise query. - return false; -} - -AliasAnalysis::ModRefBehavior -ObjCARCAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) { - // We have nothing to do. Just chain to the next AliasAnalysis. - return AliasAnalysis::getModRefBehavior(CS); -} - -AliasAnalysis::ModRefBehavior -ObjCARCAliasAnalysis::getModRefBehavior(const Function *F) { - if (!EnableARCOpts) - return AliasAnalysis::getModRefBehavior(F); - - switch (GetFunctionClass(F)) { - case IC_NoopCast: - return DoesNotAccessMemory; - default: - break; - } - - return AliasAnalysis::getModRefBehavior(F); -} - -AliasAnalysis::ModRefResult -ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS, const Location &Loc) { - if (!EnableARCOpts) - return AliasAnalysis::getModRefInfo(CS, Loc); - - switch (GetBasicInstructionClass(CS.getInstruction())) { - case IC_Retain: - case IC_RetainRV: - case IC_Autorelease: - case IC_AutoreleaseRV: - case IC_NoopCast: - case IC_AutoreleasepoolPush: - case IC_FusedRetainAutorelease: - case IC_FusedRetainAutoreleaseRV: - // These functions don't access any memory visible to the compiler. - // Note that this doesn't include objc_retainBlock, because it updates - // pointers when it copies block data. - return NoModRef; - default: - break; - } - - return AliasAnalysis::getModRefInfo(CS, Loc); -} - -AliasAnalysis::ModRefResult -ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS1, - ImmutableCallSite CS2) { - // TODO: Theoretically we could check for dependencies between objc_* calls - // and OnlyAccessesArgumentPointees calls or other well-behaved calls. - return AliasAnalysis::getModRefInfo(CS1, CS2); -} - -/// @} -/// -/// \defgroup ARCExpansion Early ARC Optimizations. -/// @{ - -#include "llvm/Support/InstIterator.h" -#include "llvm/Transforms/Scalar.h" - -namespace { - /// \brief Early ARC transformations. - class ObjCARCExpand : public FunctionPass { - virtual void getAnalysisUsage(AnalysisUsage &AU) const; - virtual bool doInitialization(Module &M); - virtual bool runOnFunction(Function &F); - - /// A flag indicating whether this optimization pass should run. - bool Run; - - public: - static char ID; - ObjCARCExpand() : FunctionPass(ID) { - initializeObjCARCExpandPass(*PassRegistry::getPassRegistry()); - } - }; -} - -char ObjCARCExpand::ID = 0; -INITIALIZE_PASS(ObjCARCExpand, - "objc-arc-expand", "ObjC ARC expansion", false, false) - -Pass *llvm::createObjCARCExpandPass() { - return new ObjCARCExpand(); -} - -void ObjCARCExpand::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); -} - -bool ObjCARCExpand::doInitialization(Module &M) { - Run = ModuleHasARC(M); - return false; -} - -bool ObjCARCExpand::runOnFunction(Function &F) { - if (!EnableARCOpts) - return false; - - // If nothing in the Module uses ARC, don't do anything. - if (!Run) - return false; - - bool Changed = false; - - DEBUG(dbgs() << "ObjCARCExpand: Visiting Function: " << F.getName() << "\n"); - - for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) { - Instruction *Inst = &*I; - - DEBUG(dbgs() << "ObjCARCExpand: Visiting: " << *Inst << "\n"); - - switch (GetBasicInstructionClass(Inst)) { - case IC_Retain: - case IC_RetainRV: - case IC_Autorelease: - case IC_AutoreleaseRV: - case IC_FusedRetainAutorelease: - case IC_FusedRetainAutoreleaseRV: { - // These calls return their argument verbatim, as a low-level - // optimization. However, this makes high-level optimizations - // harder. Undo any uses of this optimization that the front-end - // emitted here. We'll redo them in the contract pass. - Changed = true; - Value *Value = cast(Inst)->getArgOperand(0); - DEBUG(dbgs() << "ObjCARCExpand: Old = " << *Inst << "\n" - " New = " << *Value << "\n"); - Inst->replaceAllUsesWith(Value); - break; - } - default: - break; - } - } - - DEBUG(dbgs() << "ObjCARCExpand: Finished List.\n\n"); - - return Changed; -} - -/// @} -/// -/// \defgroup ARCAPElim ARC Autorelease Pool Elimination. -/// @{ - -#include "llvm/ADT/STLExtras.h" -#include "llvm/IR/Constants.h" - -namespace { - /// \brief Autorelease pool elimination. - class ObjCARCAPElim : public ModulePass { - virtual void getAnalysisUsage(AnalysisUsage &AU) const; - virtual bool runOnModule(Module &M); - - static bool MayAutorelease(ImmutableCallSite CS, unsigned Depth = 0); - static bool OptimizeBB(BasicBlock *BB); - - public: - static char ID; - ObjCARCAPElim() : ModulePass(ID) { - initializeObjCARCAPElimPass(*PassRegistry::getPassRegistry()); - } - }; -} - -char ObjCARCAPElim::ID = 0; -INITIALIZE_PASS(ObjCARCAPElim, - "objc-arc-apelim", - "ObjC ARC autorelease pool elimination", - false, false) - -Pass *llvm::createObjCARCAPElimPass() { - return new ObjCARCAPElim(); -} - -void ObjCARCAPElim::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); -} - -/// Interprocedurally determine if calls made by the given call site can -/// possibly produce autoreleases. -bool ObjCARCAPElim::MayAutorelease(ImmutableCallSite CS, unsigned Depth) { - if (const Function *Callee = CS.getCalledFunction()) { - if (Callee->isDeclaration() || Callee->mayBeOverridden()) - return true; - for (Function::const_iterator I = Callee->begin(), E = Callee->end(); - I != E; ++I) { - const BasicBlock *BB = I; - for (BasicBlock::const_iterator J = BB->begin(), F = BB->end(); - J != F; ++J) - if (ImmutableCallSite JCS = ImmutableCallSite(J)) - // This recursion depth limit is arbitrary. It's just great - // enough to cover known interesting testcases. - if (Depth < 3 && - !JCS.onlyReadsMemory() && - MayAutorelease(JCS, Depth + 1)) - return true; - } - return false; - } - - return true; -} - -bool ObjCARCAPElim::OptimizeBB(BasicBlock *BB) { - bool Changed = false; - - Instruction *Push = 0; - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) { - Instruction *Inst = I++; - switch (GetBasicInstructionClass(Inst)) { - case IC_AutoreleasepoolPush: - Push = Inst; - break; - case IC_AutoreleasepoolPop: - // If this pop matches a push and nothing in between can autorelease, - // zap the pair. - if (Push && cast(Inst)->getArgOperand(0) == Push) { - Changed = true; - DEBUG(dbgs() << "ObjCARCAPElim::OptimizeBB: Zapping push pop " - "autorelease pair:\n" - " Pop: " << *Inst << "\n" - << " Push: " << *Push << "\n"); - Inst->eraseFromParent(); - Push->eraseFromParent(); - } - Push = 0; - break; - case IC_CallOrUser: - if (MayAutorelease(ImmutableCallSite(Inst))) - Push = 0; - break; - default: - break; - } - } - - return Changed; -} - -bool ObjCARCAPElim::runOnModule(Module &M) { - if (!EnableARCOpts) - return false; - - // If nothing in the Module uses ARC, don't do anything. - if (!ModuleHasARC(M)) - return false; - - // Find the llvm.global_ctors variable, as the first step in - // identifying the global constructors. In theory, unnecessary autorelease - // pools could occur anywhere, but in practice it's pretty rare. Global - // ctors are a place where autorelease pools get inserted automatically, - // so it's pretty common for them to be unnecessary, and it's pretty - // profitable to eliminate them. - GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors"); - if (!GV) - return false; - - assert(GV->hasDefinitiveInitializer() && - "llvm.global_ctors is uncooperative!"); - - bool Changed = false; - - // Dig the constructor functions out of GV's initializer. - ConstantArray *Init = cast(GV->getInitializer()); - for (User::op_iterator OI = Init->op_begin(), OE = Init->op_end(); - OI != OE; ++OI) { - Value *Op = *OI; - // llvm.global_ctors is an array of pairs where the second members - // are constructor functions. - Function *F = dyn_cast(cast(Op)->getOperand(1)); - // If the user used a constructor function with the wrong signature and - // it got bitcasted or whatever, look the other way. - if (!F) - continue; - // Only look at function definitions. - if (F->isDeclaration()) - continue; - // Only look at functions with one basic block. - if (llvm::next(F->begin()) != F->end()) - continue; - // Ok, a single-block constructor function definition. Try to optimize it. - Changed |= OptimizeBB(F->begin()); - } - - return Changed; -} - -/// @} -/// -/// \defgroup ARCOpt ARC Optimization. -/// @{ - -// TODO: On code like this: -// -// objc_retain(%x) -// stuff_that_cannot_release() -// objc_autorelease(%x) -// stuff_that_cannot_release() -// objc_retain(%x) -// stuff_that_cannot_release() -// objc_autorelease(%x) -// -// The second retain and autorelease can be deleted. - -// TODO: It should be possible to delete -// objc_autoreleasePoolPush and objc_autoreleasePoolPop -// pairs if nothing is actually autoreleased between them. Also, autorelease -// calls followed by objc_autoreleasePoolPop calls (perhaps in ObjC++ code -// after inlining) can be turned into plain release calls. - -// TODO: Critical-edge splitting. If the optimial insertion point is -// a critical edge, the current algorithm has to fail, because it doesn't -// know how to split edges. It should be possible to make the optimizer -// think in terms of edges, rather than blocks, and then split critical -// edges on demand. - -// TODO: OptimizeSequences could generalized to be Interprocedural. - -// TODO: Recognize that a bunch of other objc runtime calls have -// non-escaping arguments and non-releasing arguments, and may be -// non-autoreleasing. - -// TODO: Sink autorelease calls as far as possible. Unfortunately we -// usually can't sink them past other calls, which would be the main -// case where it would be useful. - -// TODO: The pointer returned from objc_loadWeakRetained is retained. - -// TODO: Delete release+retain pairs (rare). - -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/Support/CFG.h" - -STATISTIC(NumNoops, "Number of no-op objc calls eliminated"); -STATISTIC(NumPartialNoops, "Number of partially no-op objc calls eliminated"); -STATISTIC(NumAutoreleases,"Number of autoreleases converted to releases"); -STATISTIC(NumRets, "Number of return value forwarding " - "retain+autoreleaes eliminated"); -STATISTIC(NumRRs, "Number of retain+release paths eliminated"); -STATISTIC(NumPeeps, "Number of calls peephole-optimized"); - -namespace { - /// \brief This is similar to BasicAliasAnalysis, and it uses many of the same - /// techniques, except it uses special ObjC-specific reasoning about pointer - /// relationships. - /// - /// In this context ``Provenance'' is defined as the history of an object's - /// ownership. Thus ``Provenance Analysis'' is defined by using the notion of - /// an ``independent provenance source'' of a pointer to determine whether or - /// not two pointers have the same provenance source and thus could - /// potentially be related. - class ProvenanceAnalysis { - AliasAnalysis *AA; - - typedef std::pair ValuePairTy; - typedef DenseMap CachedResultsTy; - CachedResultsTy CachedResults; - - bool relatedCheck(const Value *A, const Value *B); - bool relatedSelect(const SelectInst *A, const Value *B); - bool relatedPHI(const PHINode *A, const Value *B); - - void operator=(const ProvenanceAnalysis &) LLVM_DELETED_FUNCTION; - ProvenanceAnalysis(const ProvenanceAnalysis &) LLVM_DELETED_FUNCTION; - - public: - ProvenanceAnalysis() {} - - void setAA(AliasAnalysis *aa) { AA = aa; } - - AliasAnalysis *getAA() const { return AA; } - - bool related(const Value *A, const Value *B); - - void clear() { - CachedResults.clear(); - } - }; -} - -bool ProvenanceAnalysis::relatedSelect(const SelectInst *A, const Value *B) { - // If the values are Selects with the same condition, we can do a more precise - // check: just check for relations between the values on corresponding arms. - if (const SelectInst *SB = dyn_cast(B)) - if (A->getCondition() == SB->getCondition()) - return related(A->getTrueValue(), SB->getTrueValue()) || - related(A->getFalseValue(), SB->getFalseValue()); - - // Check both arms of the Select node individually. - return related(A->getTrueValue(), B) || - related(A->getFalseValue(), B); -} - -bool ProvenanceAnalysis::relatedPHI(const PHINode *A, const Value *B) { - // If the values are PHIs in the same block, we can do a more precise as well - // as efficient check: just check for relations between the values on - // corresponding edges. - if (const PHINode *PNB = dyn_cast(B)) - if (PNB->getParent() == A->getParent()) { - for (unsigned i = 0, e = A->getNumIncomingValues(); i != e; ++i) - if (related(A->getIncomingValue(i), - PNB->getIncomingValueForBlock(A->getIncomingBlock(i)))) - return true; - return false; - } - - // Check each unique source of the PHI node against B. - SmallPtrSet UniqueSrc; - for (unsigned i = 0, e = A->getNumIncomingValues(); i != e; ++i) { - const Value *PV1 = A->getIncomingValue(i); - if (UniqueSrc.insert(PV1) && related(PV1, B)) - return true; - } - - // All of the arms checked out. - return false; -} - -/// Test if the value of P, or any value covered by its provenance, is ever -/// stored within the function (not counting callees). -static bool isStoredObjCPointer(const Value *P) { - SmallPtrSet Visited; - SmallVector Worklist; - Worklist.push_back(P); - Visited.insert(P); - do { - P = Worklist.pop_back_val(); - for (Value::const_use_iterator UI = P->use_begin(), UE = P->use_end(); - UI != UE; ++UI) { - const User *Ur = *UI; - if (isa(Ur)) { - if (UI.getOperandNo() == 0) - // The pointer is stored. - return true; - // The pointed is stored through. - continue; - } - if (isa(Ur)) - // The pointer is passed as an argument, ignore this. - continue; - if (isa(P)) - // Assume the worst. - return true; - if (Visited.insert(Ur)) - Worklist.push_back(Ur); - } - } while (!Worklist.empty()); - - // Everything checked out. - return false; -} - -bool ProvenanceAnalysis::relatedCheck(const Value *A, const Value *B) { - // Skip past provenance pass-throughs. - A = GetUnderlyingObjCPtr(A); - B = GetUnderlyingObjCPtr(B); - - // Quick check. - if (A == B) - return true; - - // Ask regular AliasAnalysis, for a first approximation. - switch (AA->alias(A, B)) { - case AliasAnalysis::NoAlias: - return false; - case AliasAnalysis::MustAlias: - case AliasAnalysis::PartialAlias: - return true; - case AliasAnalysis::MayAlias: - break; - } - - bool AIsIdentified = IsObjCIdentifiedObject(A); - bool BIsIdentified = IsObjCIdentifiedObject(B); - - // An ObjC-Identified object can't alias a load if it is never locally stored. - if (AIsIdentified) { - // Check for an obvious escape. - if (isa(B)) - return isStoredObjCPointer(A); - if (BIsIdentified) { - // Check for an obvious escape. - if (isa(A)) - return isStoredObjCPointer(B); - // Both pointers are identified and escapes aren't an evident problem. - return false; - } - } else if (BIsIdentified) { - // Check for an obvious escape. - if (isa(A)) - return isStoredObjCPointer(B); - } - - // Special handling for PHI and Select. - if (const PHINode *PN = dyn_cast(A)) - return relatedPHI(PN, B); - if (const PHINode *PN = dyn_cast(B)) - return relatedPHI(PN, A); - if (const SelectInst *S = dyn_cast(A)) - return relatedSelect(S, B); - if (const SelectInst *S = dyn_cast(B)) - return relatedSelect(S, A); - - // Conservative. - return true; -} - -bool ProvenanceAnalysis::related(const Value *A, const Value *B) { - // Begin by inserting a conservative value into the map. If the insertion - // fails, we have the answer already. If it succeeds, leave it there until we - // compute the real answer to guard against recursive queries. - if (A > B) std::swap(A, B); - std::pair Pair = - CachedResults.insert(std::make_pair(ValuePairTy(A, B), true)); - if (!Pair.second) - return Pair.first->second; - - bool Result = relatedCheck(A, B); - CachedResults[ValuePairTy(A, B)] = Result; - return Result; -} - -namespace { - /// \enum Sequence - /// - /// \brief A sequence of states that a pointer may go through in which an - /// objc_retain and objc_release are actually needed. - enum Sequence { - S_None, - S_Retain, ///< objc_retain(x) - S_CanRelease, ///< foo(x) -- x could possibly see a ref count decrement - S_Use, ///< any use of x - S_Stop, ///< like S_Release, but code motion is stopped - S_Release, ///< objc_release(x) - S_MovableRelease ///< objc_release(x), !clang.imprecise_release - }; -} - -static Sequence MergeSeqs(Sequence A, Sequence B, bool TopDown) { - // The easy cases. - if (A == B) - return A; - if (A == S_None || B == S_None) - return S_None; - - if (A > B) std::swap(A, B); - if (TopDown) { - // Choose the side which is further along in the sequence. - if ((A == S_Retain || A == S_CanRelease) && - (B == S_CanRelease || B == S_Use)) - return B; - } else { - // Choose the side which is further along in the sequence. - if ((A == S_Use || A == S_CanRelease) && - (B == S_Use || B == S_Release || B == S_Stop || B == S_MovableRelease)) - return A; - // If both sides are releases, choose the more conservative one. - if (A == S_Stop && (B == S_Release || B == S_MovableRelease)) - return A; - if (A == S_Release && B == S_MovableRelease) - return A; - } - - return S_None; -} - -namespace { - /// \brief Unidirectional information about either a - /// retain-decrement-use-release sequence or release-use-decrement-retain - /// reverese sequence. - struct RRInfo { - /// After an objc_retain, the reference count of the referenced - /// object is known to be positive. Similarly, before an objc_release, the - /// reference count of the referenced object is known to be positive. If - /// there are retain-release pairs in code regions where the retain count - /// is known to be positive, they can be eliminated, regardless of any side - /// effects between them. - /// - /// Also, a retain+release pair nested within another retain+release - /// pair all on the known same pointer value can be eliminated, regardless - /// of any intervening side effects. - /// - /// KnownSafe is true when either of these conditions is satisfied. - bool KnownSafe; - - /// True if the Calls are objc_retainBlock calls (as opposed to objc_retain - /// calls). - bool IsRetainBlock; - - /// True of the objc_release calls are all marked with the "tail" keyword. - bool IsTailCallRelease; - - /// If the Calls are objc_release calls and they all have a - /// clang.imprecise_release tag, this is the metadata tag. - MDNode *ReleaseMetadata; - - /// For a top-down sequence, the set of objc_retains or - /// objc_retainBlocks. For bottom-up, the set of objc_releases. - SmallPtrSet Calls; - - /// The set of optimal insert positions for moving calls in the opposite - /// sequence. - SmallPtrSet ReverseInsertPts; - - RRInfo() : - KnownSafe(false), IsRetainBlock(false), - IsTailCallRelease(false), - ReleaseMetadata(0) {} - - void clear(); - }; -} - -void RRInfo::clear() { - KnownSafe = false; - IsRetainBlock = false; - IsTailCallRelease = false; - ReleaseMetadata = 0; - Calls.clear(); - ReverseInsertPts.clear(); -} - -namespace { - /// \brief This class summarizes several per-pointer runtime properties which - /// are propogated through the flow graph. - class PtrState { - /// True if the reference count is known to be incremented. - bool KnownPositiveRefCount; - - /// True of we've seen an opportunity for partial RR elimination, such as - /// pushing calls into a CFG triangle or into one side of a CFG diamond. - bool Partial; - - /// The current position in the sequence. - Sequence Seq : 8; - - public: - /// Unidirectional information about the current sequence. - /// - /// TODO: Encapsulate this better. - RRInfo RRI; - - PtrState() : KnownPositiveRefCount(false), Partial(false), - Seq(S_None) {} - - void SetKnownPositiveRefCount() { - KnownPositiveRefCount = true; - } - - void ClearRefCount() { - KnownPositiveRefCount = false; - } - - bool IsKnownIncremented() const { - return KnownPositiveRefCount; - } - - void SetSeq(Sequence NewSeq) { - Seq = NewSeq; - } - - Sequence GetSeq() const { - return Seq; - } - - void ClearSequenceProgress() { - ResetSequenceProgress(S_None); - } - - void ResetSequenceProgress(Sequence NewSeq) { - Seq = NewSeq; - Partial = false; - RRI.clear(); - } - - void Merge(const PtrState &Other, bool TopDown); - }; -} - -void -PtrState::Merge(const PtrState &Other, bool TopDown) { - Seq = MergeSeqs(Seq, Other.Seq, TopDown); - KnownPositiveRefCount = KnownPositiveRefCount && Other.KnownPositiveRefCount; - - // We can't merge a plain objc_retain with an objc_retainBlock. - if (RRI.IsRetainBlock != Other.RRI.IsRetainBlock) - Seq = S_None; - - // If we're not in a sequence (anymore), drop all associated state. - if (Seq == S_None) { - Partial = false; - RRI.clear(); - } else if (Partial || Other.Partial) { - // If we're doing a merge on a path that's previously seen a partial - // merge, conservatively drop the sequence, to avoid doing partial - // RR elimination. If the branch predicates for the two merge differ, - // mixing them is unsafe. - ClearSequenceProgress(); - } else { - // Conservatively merge the ReleaseMetadata information. - if (RRI.ReleaseMetadata != Other.RRI.ReleaseMetadata) - RRI.ReleaseMetadata = 0; - - RRI.KnownSafe = RRI.KnownSafe && Other.RRI.KnownSafe; - RRI.IsTailCallRelease = RRI.IsTailCallRelease && - Other.RRI.IsTailCallRelease; - RRI.Calls.insert(Other.RRI.Calls.begin(), Other.RRI.Calls.end()); - - // Merge the insert point sets. If there are any differences, - // that makes this a partial merge. - Partial = RRI.ReverseInsertPts.size() != Other.RRI.ReverseInsertPts.size(); - for (SmallPtrSet::const_iterator - I = Other.RRI.ReverseInsertPts.begin(), - E = Other.RRI.ReverseInsertPts.end(); I != E; ++I) - Partial |= RRI.ReverseInsertPts.insert(*I); - } -} - -namespace { - /// \brief Per-BasicBlock state. - class BBState { - /// The number of unique control paths from the entry which can reach this - /// block. - unsigned TopDownPathCount; - - /// The number of unique control paths to exits from this block. - unsigned BottomUpPathCount; - - /// A type for PerPtrTopDown and PerPtrBottomUp. - typedef MapVector MapTy; - - /// The top-down traversal uses this to record information known about a - /// pointer at the bottom of each block. - MapTy PerPtrTopDown; - - /// The bottom-up traversal uses this to record information known about a - /// pointer at the top of each block. - MapTy PerPtrBottomUp; - - /// Effective predecessors of the current block ignoring ignorable edges and - /// ignored backedges. - SmallVector Preds; - /// Effective successors of the current block ignoring ignorable edges and - /// ignored backedges. - SmallVector Succs; - - public: - BBState() : TopDownPathCount(0), BottomUpPathCount(0) {} - - typedef MapTy::iterator ptr_iterator; - typedef MapTy::const_iterator ptr_const_iterator; - - ptr_iterator top_down_ptr_begin() { return PerPtrTopDown.begin(); } - ptr_iterator top_down_ptr_end() { return PerPtrTopDown.end(); } - ptr_const_iterator top_down_ptr_begin() const { - return PerPtrTopDown.begin(); - } - ptr_const_iterator top_down_ptr_end() const { - return PerPtrTopDown.end(); - } - - ptr_iterator bottom_up_ptr_begin() { return PerPtrBottomUp.begin(); } - ptr_iterator bottom_up_ptr_end() { return PerPtrBottomUp.end(); } - ptr_const_iterator bottom_up_ptr_begin() const { - return PerPtrBottomUp.begin(); - } - ptr_const_iterator bottom_up_ptr_end() const { - return PerPtrBottomUp.end(); - } - - /// Mark this block as being an entry block, which has one path from the - /// entry by definition. - void SetAsEntry() { TopDownPathCount = 1; } - - /// Mark this block as being an exit block, which has one path to an exit by - /// definition. - void SetAsExit() { BottomUpPathCount = 1; } - - PtrState &getPtrTopDownState(const Value *Arg) { - return PerPtrTopDown[Arg]; - } - - PtrState &getPtrBottomUpState(const Value *Arg) { - return PerPtrBottomUp[Arg]; - } - - void clearBottomUpPointers() { - PerPtrBottomUp.clear(); - } - - void clearTopDownPointers() { - PerPtrTopDown.clear(); - } - - void InitFromPred(const BBState &Other); - void InitFromSucc(const BBState &Other); - void MergePred(const BBState &Other); - void MergeSucc(const BBState &Other); - - /// Return the number of possible unique paths from an entry to an exit - /// which pass through this block. This is only valid after both the - /// top-down and bottom-up traversals are complete. - unsigned GetAllPathCount() const { - assert(TopDownPathCount != 0); - assert(BottomUpPathCount != 0); - return TopDownPathCount * BottomUpPathCount; - } - - // Specialized CFG utilities. - typedef SmallVectorImpl::const_iterator edge_iterator; - edge_iterator pred_begin() { return Preds.begin(); } - edge_iterator pred_end() { return Preds.end(); } - edge_iterator succ_begin() { return Succs.begin(); } - edge_iterator succ_end() { return Succs.end(); } - - void addSucc(BasicBlock *Succ) { Succs.push_back(Succ); } - void addPred(BasicBlock *Pred) { Preds.push_back(Pred); } - - bool isExit() const { return Succs.empty(); } - }; -} - -void BBState::InitFromPred(const BBState &Other) { - PerPtrTopDown = Other.PerPtrTopDown; - TopDownPathCount = Other.TopDownPathCount; -} - -void BBState::InitFromSucc(const BBState &Other) { - PerPtrBottomUp = Other.PerPtrBottomUp; - BottomUpPathCount = Other.BottomUpPathCount; -} - -/// The top-down traversal uses this to merge information about predecessors to -/// form the initial state for a new block. -void BBState::MergePred(const BBState &Other) { - // Other.TopDownPathCount can be 0, in which case it is either dead or a - // loop backedge. Loop backedges are special. - TopDownPathCount += Other.TopDownPathCount; - - // Check for overflow. If we have overflow, fall back to conservative - // behavior. - if (TopDownPathCount < Other.TopDownPathCount) { - clearTopDownPointers(); - return; - } - - // For each entry in the other set, if our set has an entry with the same key, - // merge the entries. Otherwise, copy the entry and merge it with an empty - // entry. - for (ptr_const_iterator MI = Other.top_down_ptr_begin(), - ME = Other.top_down_ptr_end(); MI != ME; ++MI) { - std::pair Pair = PerPtrTopDown.insert(*MI); - Pair.first->second.Merge(Pair.second ? PtrState() : MI->second, - /*TopDown=*/true); - } - - // For each entry in our set, if the other set doesn't have an entry with the - // same key, force it to merge with an empty entry. - for (ptr_iterator MI = top_down_ptr_begin(), - ME = top_down_ptr_end(); MI != ME; ++MI) - if (Other.PerPtrTopDown.find(MI->first) == Other.PerPtrTopDown.end()) - MI->second.Merge(PtrState(), /*TopDown=*/true); -} - -/// The bottom-up traversal uses this to merge information about successors to -/// form the initial state for a new block. -void BBState::MergeSucc(const BBState &Other) { - // Other.BottomUpPathCount can be 0, in which case it is either dead or a - // loop backedge. Loop backedges are special. - BottomUpPathCount += Other.BottomUpPathCount; - - // Check for overflow. If we have overflow, fall back to conservative - // behavior. - if (BottomUpPathCount < Other.BottomUpPathCount) { - clearBottomUpPointers(); - return; - } - - // For each entry in the other set, if our set has an entry with the - // same key, merge the entries. Otherwise, copy the entry and merge - // it with an empty entry. - for (ptr_const_iterator MI = Other.bottom_up_ptr_begin(), - ME = Other.bottom_up_ptr_end(); MI != ME; ++MI) { - std::pair Pair = PerPtrBottomUp.insert(*MI); - Pair.first->second.Merge(Pair.second ? PtrState() : MI->second, - /*TopDown=*/false); - } - - // For each entry in our set, if the other set doesn't have an entry - // with the same key, force it to merge with an empty entry. - for (ptr_iterator MI = bottom_up_ptr_begin(), - ME = bottom_up_ptr_end(); MI != ME; ++MI) - if (Other.PerPtrBottomUp.find(MI->first) == Other.PerPtrBottomUp.end()) - MI->second.Merge(PtrState(), /*TopDown=*/false); -} - -namespace { - /// \brief The main ARC optimization pass. - class ObjCARCOpt : public FunctionPass { - bool Changed; - ProvenanceAnalysis PA; - - /// A flag indicating whether this optimization pass should run. - bool Run; - - /// Declarations for ObjC runtime functions, for use in creating calls to - /// them. These are initialized lazily to avoid cluttering up the Module - /// with unused declarations. - - /// Declaration for ObjC runtime function - /// objc_retainAutoreleasedReturnValue. - Constant *RetainRVCallee; - /// Declaration for ObjC runtime function objc_autoreleaseReturnValue. - Constant *AutoreleaseRVCallee; - /// Declaration for ObjC runtime function objc_release. - Constant *ReleaseCallee; - /// Declaration for ObjC runtime function objc_retain. - Constant *RetainCallee; - /// Declaration for ObjC runtime function objc_retainBlock. - Constant *RetainBlockCallee; - /// Declaration for ObjC runtime function objc_autorelease. - Constant *AutoreleaseCallee; - - /// Flags which determine whether each of the interesting runtine functions - /// is in fact used in the current function. - unsigned UsedInThisFunction; - - /// The Metadata Kind for clang.imprecise_release metadata. - unsigned ImpreciseReleaseMDKind; - - /// The Metadata Kind for clang.arc.copy_on_escape metadata. - unsigned CopyOnEscapeMDKind; - - /// The Metadata Kind for clang.arc.no_objc_arc_exceptions metadata. - unsigned NoObjCARCExceptionsMDKind; - - Constant *getRetainRVCallee(Module *M); - Constant *getAutoreleaseRVCallee(Module *M); - Constant *getReleaseCallee(Module *M); - Constant *getRetainCallee(Module *M); - Constant *getRetainBlockCallee(Module *M); - Constant *getAutoreleaseCallee(Module *M); - - bool IsRetainBlockOptimizable(const Instruction *Inst); - - void OptimizeRetainCall(Function &F, Instruction *Retain); - bool OptimizeRetainRVCall(Function &F, Instruction *RetainRV); - void OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV, - InstructionClass &Class); - void OptimizeIndividualCalls(Function &F); - - void CheckForCFGHazards(const BasicBlock *BB, - DenseMap &BBStates, - BBState &MyStates) const; - bool VisitInstructionBottomUp(Instruction *Inst, - BasicBlock *BB, - MapVector &Retains, - BBState &MyStates); - bool VisitBottomUp(BasicBlock *BB, - DenseMap &BBStates, - MapVector &Retains); - bool VisitInstructionTopDown(Instruction *Inst, - DenseMap &Releases, - BBState &MyStates); - bool VisitTopDown(BasicBlock *BB, - DenseMap &BBStates, - DenseMap &Releases); - bool Visit(Function &F, - DenseMap &BBStates, - MapVector &Retains, - DenseMap &Releases); - - void MoveCalls(Value *Arg, RRInfo &RetainsToMove, RRInfo &ReleasesToMove, - MapVector &Retains, - DenseMap &Releases, - SmallVectorImpl &DeadInsts, - Module *M); - - bool ConnectTDBUTraversals(DenseMap &BBStates, - MapVector &Retains, - DenseMap &Releases, - Module *M, - SmallVector &NewRetains, - SmallVector &NewReleases, - SmallVector &DeadInsts, - RRInfo &RetainsToMove, - RRInfo &ReleasesToMove, - Value *Arg, - bool KnownSafe, - bool &AnyPairsCompletelyEliminated); - - bool PerformCodePlacement(DenseMap &BBStates, - MapVector &Retains, - DenseMap &Releases, - Module *M); - - void OptimizeWeakCalls(Function &F); - - bool OptimizeSequences(Function &F); - - void OptimizeReturns(Function &F); - - virtual void getAnalysisUsage(AnalysisUsage &AU) const; - virtual bool doInitialization(Module &M); - virtual bool runOnFunction(Function &F); - virtual void releaseMemory(); - - public: - static char ID; - ObjCARCOpt() : FunctionPass(ID) { - initializeObjCARCOptPass(*PassRegistry::getPassRegistry()); - } - }; -} - -char ObjCARCOpt::ID = 0; -INITIALIZE_PASS_BEGIN(ObjCARCOpt, - "objc-arc", "ObjC ARC optimization", false, false) -INITIALIZE_PASS_DEPENDENCY(ObjCARCAliasAnalysis) -INITIALIZE_PASS_END(ObjCARCOpt, - "objc-arc", "ObjC ARC optimization", false, false) - -Pass *llvm::createObjCARCOptPass() { - return new ObjCARCOpt(); -} - -void ObjCARCOpt::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired(); - AU.addRequired(); - // ARC optimization doesn't currently split critical edges. - AU.setPreservesCFG(); -} - -bool ObjCARCOpt::IsRetainBlockOptimizable(const Instruction *Inst) { - // Without the magic metadata tag, we have to assume this might be an - // objc_retainBlock call inserted to convert a block pointer to an id, - // in which case it really is needed. - if (!Inst->getMetadata(CopyOnEscapeMDKind)) - return false; - - // If the pointer "escapes" (not including being used in a call), - // the copy may be needed. - if (DoesObjCBlockEscape(Inst)) - return false; - - // Otherwise, it's not needed. - return true; -} - -Constant *ObjCARCOpt::getRetainRVCallee(Module *M) { - if (!RetainRVCallee) { - LLVMContext &C = M->getContext(); - Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); - Type *Params[] = { I8X }; - FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false); - AttributeSet Attribute = - AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex, - Attribute::NoUnwind); - RetainRVCallee = - M->getOrInsertFunction("objc_retainAutoreleasedReturnValue", FTy, - Attribute); - } - return RetainRVCallee; -} - -Constant *ObjCARCOpt::getAutoreleaseRVCallee(Module *M) { - if (!AutoreleaseRVCallee) { - LLVMContext &C = M->getContext(); - Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); - Type *Params[] = { I8X }; - FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false); - AttributeSet Attribute = - AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex, - Attribute::NoUnwind); - AutoreleaseRVCallee = - M->getOrInsertFunction("objc_autoreleaseReturnValue", FTy, - Attribute); - } - return AutoreleaseRVCallee; -} - -Constant *ObjCARCOpt::getReleaseCallee(Module *M) { - if (!ReleaseCallee) { - LLVMContext &C = M->getContext(); - Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) }; - AttributeSet Attribute = - AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex, - Attribute::NoUnwind); - ReleaseCallee = - M->getOrInsertFunction( - "objc_release", - FunctionType::get(Type::getVoidTy(C), Params, /*isVarArg=*/false), - Attribute); - } - return ReleaseCallee; -} - -Constant *ObjCARCOpt::getRetainCallee(Module *M) { - if (!RetainCallee) { - LLVMContext &C = M->getContext(); - Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) }; - AttributeSet Attribute = - AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex, - Attribute::NoUnwind); - RetainCallee = - M->getOrInsertFunction( - "objc_retain", - FunctionType::get(Params[0], Params, /*isVarArg=*/false), - Attribute); - } - return RetainCallee; -} - -Constant *ObjCARCOpt::getRetainBlockCallee(Module *M) { - if (!RetainBlockCallee) { - LLVMContext &C = M->getContext(); - Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) }; - // objc_retainBlock is not nounwind because it calls user copy constructors - // which could theoretically throw. - RetainBlockCallee = - M->getOrInsertFunction( - "objc_retainBlock", - FunctionType::get(Params[0], Params, /*isVarArg=*/false), - AttributeSet()); - } - return RetainBlockCallee; -} - -Constant *ObjCARCOpt::getAutoreleaseCallee(Module *M) { - if (!AutoreleaseCallee) { - LLVMContext &C = M->getContext(); - Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) }; - AttributeSet Attribute = - AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex, - Attribute::NoUnwind); - AutoreleaseCallee = - M->getOrInsertFunction( - "objc_autorelease", - FunctionType::get(Params[0], Params, /*isVarArg=*/false), - Attribute); - } - return AutoreleaseCallee; -} - -/// Test whether the given value is possible a reference-counted pointer, -/// including tests which utilize AliasAnalysis. -static bool IsPotentialRetainableObjPtr(const Value *Op, AliasAnalysis &AA) { - // First make the rudimentary check. - if (!IsPotentialRetainableObjPtr(Op)) - return false; - - // Objects in constant memory are not reference-counted. - if (AA.pointsToConstantMemory(Op)) - return false; - - // Pointers in constant memory are not pointing to reference-counted objects. - if (const LoadInst *LI = dyn_cast(Op)) - if (AA.pointsToConstantMemory(LI->getPointerOperand())) - return false; - - // Otherwise assume the worst. - return true; -} - -/// Test whether the given instruction can result in a reference count -/// modification (positive or negative) for the pointer's object. -static bool -CanAlterRefCount(const Instruction *Inst, const Value *Ptr, - ProvenanceAnalysis &PA, InstructionClass Class) { - switch (Class) { - case IC_Autorelease: - case IC_AutoreleaseRV: - case IC_User: - // These operations never directly modify a reference count. - return false; - default: break; - } - - ImmutableCallSite CS = static_cast(Inst); - assert(CS && "Only calls can alter reference counts!"); - - // See if AliasAnalysis can help us with the call. - AliasAnalysis::ModRefBehavior MRB = PA.getAA()->getModRefBehavior(CS); - if (AliasAnalysis::onlyReadsMemory(MRB)) - return false; - if (AliasAnalysis::onlyAccessesArgPointees(MRB)) { - for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); - I != E; ++I) { - const Value *Op = *I; - if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Ptr, Op)) - return true; - } - return false; - } - - // Assume the worst. - return true; -} - -/// Test whether the given instruction can "use" the given pointer's object in a -/// way that requires the reference count to be positive. -static bool -CanUse(const Instruction *Inst, const Value *Ptr, ProvenanceAnalysis &PA, - InstructionClass Class) { - // IC_Call operations (as opposed to IC_CallOrUser) never "use" objc pointers. - if (Class == IC_Call) - return false; - - // Consider various instructions which may have pointer arguments which are - // not "uses". - if (const ICmpInst *ICI = dyn_cast(Inst)) { - // Comparing a pointer with null, or any other constant, isn't really a use, - // because we don't care what the pointer points to, or about the values - // of any other dynamic reference-counted pointers. - if (!IsPotentialRetainableObjPtr(ICI->getOperand(1), *PA.getAA())) - return false; - } else if (ImmutableCallSite CS = static_cast(Inst)) { - // For calls, just check the arguments (and not the callee operand). - for (ImmutableCallSite::arg_iterator OI = CS.arg_begin(), - OE = CS.arg_end(); OI != OE; ++OI) { - const Value *Op = *OI; - if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Ptr, Op)) - return true; - } - return false; - } else if (const StoreInst *SI = dyn_cast(Inst)) { - // Special-case stores, because we don't care about the stored value, just - // the store address. - const Value *Op = GetUnderlyingObjCPtr(SI->getPointerOperand()); - // If we can't tell what the underlying object was, assume there is a - // dependence. - return IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Op, Ptr); - } - - // Check each operand for a match. - for (User::const_op_iterator OI = Inst->op_begin(), OE = Inst->op_end(); - OI != OE; ++OI) { - const Value *Op = *OI; - if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Ptr, Op)) - return true; - } - return false; -} - -/// Test whether the given instruction can autorelease any pointer or cause an -/// autoreleasepool pop. -static bool -CanInterruptRV(InstructionClass Class) { - switch (Class) { - case IC_AutoreleasepoolPop: - case IC_CallOrUser: - case IC_Call: - case IC_Autorelease: - case IC_AutoreleaseRV: - case IC_FusedRetainAutorelease: - case IC_FusedRetainAutoreleaseRV: - return true; - default: - return false; - } -} - -namespace { - /// \enum DependenceKind - /// \brief Defines different dependence kinds among various ARC constructs. - /// - /// There are several kinds of dependence-like concepts in use here. - /// - enum DependenceKind { - NeedsPositiveRetainCount, - AutoreleasePoolBoundary, - CanChangeRetainCount, - RetainAutoreleaseDep, ///< Blocks objc_retainAutorelease. - RetainAutoreleaseRVDep, ///< Blocks objc_retainAutoreleaseReturnValue. - RetainRVDep ///< Blocks objc_retainAutoreleasedReturnValue. - }; -} - -/// Test if there can be dependencies on Inst through Arg. This function only -/// tests dependencies relevant for removing pairs of calls. -static bool -Depends(DependenceKind Flavor, Instruction *Inst, const Value *Arg, - ProvenanceAnalysis &PA) { - // If we've reached the definition of Arg, stop. - if (Inst == Arg) - return true; - - switch (Flavor) { - case NeedsPositiveRetainCount: { - InstructionClass Class = GetInstructionClass(Inst); - switch (Class) { - case IC_AutoreleasepoolPop: - case IC_AutoreleasepoolPush: - case IC_None: - return false; - default: - return CanUse(Inst, Arg, PA, Class); - } - } - - case AutoreleasePoolBoundary: { - InstructionClass Class = GetInstructionClass(Inst); - switch (Class) { - case IC_AutoreleasepoolPop: - case IC_AutoreleasepoolPush: - // These mark the end and begin of an autorelease pool scope. - return true; - default: - // Nothing else does this. - return false; - } - } - - case CanChangeRetainCount: { - InstructionClass Class = GetInstructionClass(Inst); - switch (Class) { - case IC_AutoreleasepoolPop: - // Conservatively assume this can decrement any count. - return true; - case IC_AutoreleasepoolPush: - case IC_None: - return false; - default: - return CanAlterRefCount(Inst, Arg, PA, Class); - } - } - - case RetainAutoreleaseDep: - switch (GetBasicInstructionClass(Inst)) { - case IC_AutoreleasepoolPop: - case IC_AutoreleasepoolPush: - // Don't merge an objc_autorelease with an objc_retain inside a different - // autoreleasepool scope. - return true; - case IC_Retain: - case IC_RetainRV: - // Check for a retain of the same pointer for merging. - return GetObjCArg(Inst) == Arg; - default: - // Nothing else matters for objc_retainAutorelease formation. - return false; - } - - case RetainAutoreleaseRVDep: { - InstructionClass Class = GetBasicInstructionClass(Inst); - switch (Class) { - case IC_Retain: - case IC_RetainRV: - // Check for a retain of the same pointer for merging. - return GetObjCArg(Inst) == Arg; - default: - // Anything that can autorelease interrupts - // retainAutoreleaseReturnValue formation. - return CanInterruptRV(Class); - } - } - - case RetainRVDep: - return CanInterruptRV(GetBasicInstructionClass(Inst)); - } - - llvm_unreachable("Invalid dependence flavor"); -} - -/// Walk up the CFG from StartPos (which is in StartBB) and find local and -/// non-local dependencies on Arg. -/// -/// TODO: Cache results? -static void -FindDependencies(DependenceKind Flavor, - const Value *Arg, - BasicBlock *StartBB, Instruction *StartInst, - SmallPtrSet &DependingInstructions, - SmallPtrSet &Visited, - ProvenanceAnalysis &PA) { - BasicBlock::iterator StartPos = StartInst; - - SmallVector, 4> Worklist; - Worklist.push_back(std::make_pair(StartBB, StartPos)); - do { - std::pair Pair = - Worklist.pop_back_val(); - BasicBlock *LocalStartBB = Pair.first; - BasicBlock::iterator LocalStartPos = Pair.second; - BasicBlock::iterator StartBBBegin = LocalStartBB->begin(); - for (;;) { - if (LocalStartPos == StartBBBegin) { - pred_iterator PI(LocalStartBB), PE(LocalStartBB, false); - if (PI == PE) - // If we've reached the function entry, produce a null dependence. - DependingInstructions.insert(0); - else - // Add the predecessors to the worklist. - do { - BasicBlock *PredBB = *PI; - if (Visited.insert(PredBB)) - Worklist.push_back(std::make_pair(PredBB, PredBB->end())); - } while (++PI != PE); - break; - } - - Instruction *Inst = --LocalStartPos; - if (Depends(Flavor, Inst, Arg, PA)) { - DependingInstructions.insert(Inst); - break; - } - } - } while (!Worklist.empty()); - - // Determine whether the original StartBB post-dominates all of the blocks we - // visited. If not, insert a sentinal indicating that most optimizations are - // not safe. - for (SmallPtrSet::const_iterator I = Visited.begin(), - E = Visited.end(); I != E; ++I) { - const BasicBlock *BB = *I; - if (BB == StartBB) - continue; - const TerminatorInst *TI = cast(&BB->back()); - for (succ_const_iterator SI(TI), SE(TI, false); SI != SE; ++SI) { - const BasicBlock *Succ = *SI; - if (Succ != StartBB && !Visited.count(Succ)) { - DependingInstructions.insert(reinterpret_cast(-1)); - return; - } - } - } -} - -static bool isNullOrUndef(const Value *V) { - return isa(V) || isa(V); -} - -static bool isNoopInstruction(const Instruction *I) { - return isa(I) || - (isa(I) && - cast(I)->hasAllZeroIndices()); -} - -/// Turn objc_retain into objc_retainAutoreleasedReturnValue if the operand is a -/// return value. -void -ObjCARCOpt::OptimizeRetainCall(Function &F, Instruction *Retain) { - ImmutableCallSite CS(GetObjCArg(Retain)); - const Instruction *Call = CS.getInstruction(); - if (!Call) return; - if (Call->getParent() != Retain->getParent()) return; - - // Check that the call is next to the retain. - BasicBlock::const_iterator I = Call; - ++I; - while (isNoopInstruction(I)) ++I; - if (&*I != Retain) - return; - - // Turn it to an objc_retainAutoreleasedReturnValue.. - Changed = true; - ++NumPeeps; - - DEBUG(dbgs() << "ObjCARCOpt::OptimizeRetainCall: Transforming " - "objc_retain => objc_retainAutoreleasedReturnValue" - " since the operand is a return value.\n" - " Old: " - << *Retain << "\n"); - - cast(Retain)->setCalledFunction(getRetainRVCallee(F.getParent())); - - DEBUG(dbgs() << " New: " - << *Retain << "\n"); -} - -/// Turn objc_retainAutoreleasedReturnValue into objc_retain if the operand is -/// not a return value. Or, if it can be paired with an -/// objc_autoreleaseReturnValue, delete the pair and return true. -bool -ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) { - // Check for the argument being from an immediately preceding call or invoke. - const Value *Arg = GetObjCArg(RetainRV); - ImmutableCallSite CS(Arg); - if (const Instruction *Call = CS.getInstruction()) { - if (Call->getParent() == RetainRV->getParent()) { - BasicBlock::const_iterator I = Call; - ++I; - while (isNoopInstruction(I)) ++I; - if (&*I == RetainRV) - return false; - } else if (const InvokeInst *II = dyn_cast(Call)) { - BasicBlock *RetainRVParent = RetainRV->getParent(); - if (II->getNormalDest() == RetainRVParent) { - BasicBlock::const_iterator I = RetainRVParent->begin(); - while (isNoopInstruction(I)) ++I; - if (&*I == RetainRV) - return false; - } - } - } - - // Check for being preceded by an objc_autoreleaseReturnValue on the same - // pointer. In this case, we can delete the pair. - BasicBlock::iterator I = RetainRV, Begin = RetainRV->getParent()->begin(); - if (I != Begin) { - do --I; while (I != Begin && isNoopInstruction(I)); - if (GetBasicInstructionClass(I) == IC_AutoreleaseRV && - GetObjCArg(I) == Arg) { - Changed = true; - ++NumPeeps; - - DEBUG(dbgs() << "ObjCARCOpt::OptimizeRetainRVCall: Erasing " << *I << "\n" - << " Erasing " << *RetainRV - << "\n"); - - EraseInstruction(I); - EraseInstruction(RetainRV); - return true; - } - } - - // Turn it to a plain objc_retain. - Changed = true; - ++NumPeeps; - - DEBUG(dbgs() << "ObjCARCOpt::OptimizeRetainRVCall: Transforming " - "objc_retainAutoreleasedReturnValue => " - "objc_retain since the operand is not a return value.\n" - " Old: " - << *RetainRV << "\n"); - - cast(RetainRV)->setCalledFunction(getRetainCallee(F.getParent())); - - DEBUG(dbgs() << " New: " - << *RetainRV << "\n"); - - return false; -} - -/// Turn objc_autoreleaseReturnValue into objc_autorelease if the result is not -/// used as a return value. -void -ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV, - InstructionClass &Class) { - // Check for a return of the pointer value. - const Value *Ptr = GetObjCArg(AutoreleaseRV); - SmallVector Users; - Users.push_back(Ptr); - do { - Ptr = Users.pop_back_val(); - for (Value::const_use_iterator UI = Ptr->use_begin(), UE = Ptr->use_end(); - UI != UE; ++UI) { - const User *I = *UI; - if (isa(I) || GetBasicInstructionClass(I) == IC_RetainRV) - return; - if (isa(I)) - Users.push_back(I); - } - } while (!Users.empty()); - - Changed = true; - ++NumPeeps; - - DEBUG(dbgs() << "ObjCARCOpt::OptimizeAutoreleaseRVCall: Transforming " - "objc_autoreleaseReturnValue => " - "objc_autorelease since its operand is not used as a return " - "value.\n" - " Old: " - << *AutoreleaseRV << "\n"); - - CallInst *AutoreleaseRVCI = cast(AutoreleaseRV); - AutoreleaseRVCI-> - setCalledFunction(getAutoreleaseCallee(F.getParent())); - AutoreleaseRVCI->setTailCall(false); // Never tail call objc_autorelease. - Class = IC_Autorelease; - - DEBUG(dbgs() << " New: " - << *AutoreleaseRV << "\n"); - -} - -/// Visit each call, one at a time, and make simplifications without doing any -/// additional analysis. -void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { - // Reset all the flags in preparation for recomputing them. - UsedInThisFunction = 0; - - // Visit all objc_* calls in F. - for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) { - Instruction *Inst = &*I++; - - InstructionClass Class = GetBasicInstructionClass(Inst); - - DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Visiting: Class: " - << Class << "; " << *Inst << "\n"); - - switch (Class) { - default: break; - - // Delete no-op casts. These function calls have special semantics, but - // the semantics are entirely implemented via lowering in the front-end, - // so by the time they reach the optimizer, they are just no-op calls - // which return their argument. - // - // There are gray areas here, as the ability to cast reference-counted - // pointers to raw void* and back allows code to break ARC assumptions, - // however these are currently considered to be unimportant. - case IC_NoopCast: - Changed = true; - ++NumNoops; - DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Erasing no-op cast:" - " " << *Inst << "\n"); - EraseInstruction(Inst); - continue; - - // If the pointer-to-weak-pointer is null, it's undefined behavior. - case IC_StoreWeak: - case IC_LoadWeak: - case IC_LoadWeakRetained: - case IC_InitWeak: - case IC_DestroyWeak: { - CallInst *CI = cast(Inst); - if (isNullOrUndef(CI->getArgOperand(0))) { - Changed = true; - Type *Ty = CI->getArgOperand(0)->getType(); - new StoreInst(UndefValue::get(cast(Ty)->getElementType()), - Constant::getNullValue(Ty), - CI); - llvm::Value *NewValue = UndefValue::get(CI->getType()); - DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: A null " - "pointer-to-weak-pointer is undefined behavior.\n" - " Old = " << *CI << - "\n New = " << - *NewValue << "\n"); - CI->replaceAllUsesWith(NewValue); - CI->eraseFromParent(); - continue; - } - break; - } - case IC_CopyWeak: - case IC_MoveWeak: { - CallInst *CI = cast(Inst); - if (isNullOrUndef(CI->getArgOperand(0)) || - isNullOrUndef(CI->getArgOperand(1))) { - Changed = true; - Type *Ty = CI->getArgOperand(0)->getType(); - new StoreInst(UndefValue::get(cast(Ty)->getElementType()), - Constant::getNullValue(Ty), - CI); - - llvm::Value *NewValue = UndefValue::get(CI->getType()); - DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: A null " - "pointer-to-weak-pointer is undefined behavior.\n" - " Old = " << *CI << - "\n New = " << - *NewValue << "\n"); - - CI->replaceAllUsesWith(NewValue); - CI->eraseFromParent(); - continue; - } - break; - } - case IC_Retain: - OptimizeRetainCall(F, Inst); - break; - case IC_RetainRV: - if (OptimizeRetainRVCall(F, Inst)) - continue; - break; - case IC_AutoreleaseRV: - OptimizeAutoreleaseRVCall(F, Inst, Class); - break; - } - - // objc_autorelease(x) -> objc_release(x) if x is otherwise unused. - if (IsAutorelease(Class) && Inst->use_empty()) { - CallInst *Call = cast(Inst); - const Value *Arg = Call->getArgOperand(0); - Arg = FindSingleUseIdentifiedObject(Arg); - if (Arg) { - Changed = true; - ++NumAutoreleases; - - // Create the declaration lazily. - LLVMContext &C = Inst->getContext(); - CallInst *NewCall = - CallInst::Create(getReleaseCallee(F.getParent()), - Call->getArgOperand(0), "", Call); - NewCall->setMetadata(ImpreciseReleaseMDKind, - MDNode::get(C, ArrayRef())); - - DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Replacing " - "objc_autorelease(x) with objc_release(x) since x is " - "otherwise unused.\n" - " Old: " << *Call << - "\n New: " << - *NewCall << "\n"); - - EraseInstruction(Call); - Inst = NewCall; - Class = IC_Release; - } - } - - // For functions which can never be passed stack arguments, add - // a tail keyword. - if (IsAlwaysTail(Class)) { - Changed = true; - DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Adding tail keyword" - " to function since it can never be passed stack args: " << *Inst << - "\n"); - cast(Inst)->setTailCall(); - } - - // Ensure that functions that can never have a "tail" keyword due to the - // semantics of ARC truly do not do so. - if (IsNeverTail(Class)) { - Changed = true; - DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Removing tail " - "keyword from function: " << *Inst << - "\n"); - cast(Inst)->setTailCall(false); - } - - // Set nounwind as needed. - if (IsNoThrow(Class)) { - Changed = true; - DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Found no throw" - " class. Setting nounwind on: " << *Inst << "\n"); - cast(Inst)->setDoesNotThrow(); - } - - if (!IsNoopOnNull(Class)) { - UsedInThisFunction |= 1 << Class; - continue; - } - - const Value *Arg = GetObjCArg(Inst); - - // ARC calls with null are no-ops. Delete them. - if (isNullOrUndef(Arg)) { - Changed = true; - ++NumNoops; - DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: ARC calls with " - " null are no-ops. Erasing: " << *Inst << "\n"); - EraseInstruction(Inst); - continue; - } - - // Keep track of which of retain, release, autorelease, and retain_block - // are actually present in this function. - UsedInThisFunction |= 1 << Class; - - // If Arg is a PHI, and one or more incoming values to the - // PHI are null, and the call is control-equivalent to the PHI, and there - // are no relevant side effects between the PHI and the call, the call - // could be pushed up to just those paths with non-null incoming values. - // For now, don't bother splitting critical edges for this. - SmallVector, 4> Worklist; - Worklist.push_back(std::make_pair(Inst, Arg)); - do { - std::pair Pair = Worklist.pop_back_val(); - Inst = Pair.first; - Arg = Pair.second; - - const PHINode *PN = dyn_cast(Arg); - if (!PN) continue; - - // Determine if the PHI has any null operands, or any incoming - // critical edges. - bool HasNull = false; - bool HasCriticalEdges = false; - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - Value *Incoming = - StripPointerCastsAndObjCCalls(PN->getIncomingValue(i)); - if (isNullOrUndef(Incoming)) - HasNull = true; - else if (cast(PN->getIncomingBlock(i)->back()) - .getNumSuccessors() != 1) { - HasCriticalEdges = true; - break; - } - } - // If we have null operands and no critical edges, optimize. - if (!HasCriticalEdges && HasNull) { - SmallPtrSet DependingInstructions; - SmallPtrSet Visited; - - // Check that there is nothing that cares about the reference - // count between the call and the phi. - switch (Class) { - case IC_Retain: - case IC_RetainBlock: - // These can always be moved up. - break; - case IC_Release: - // These can't be moved across things that care about the retain - // count. - FindDependencies(NeedsPositiveRetainCount, Arg, - Inst->getParent(), Inst, - DependingInstructions, Visited, PA); - break; - case IC_Autorelease: - // These can't be moved across autorelease pool scope boundaries. - FindDependencies(AutoreleasePoolBoundary, Arg, - Inst->getParent(), Inst, - DependingInstructions, Visited, PA); - break; - case IC_RetainRV: - case IC_AutoreleaseRV: - // Don't move these; the RV optimization depends on the autoreleaseRV - // being tail called, and the retainRV being immediately after a call - // (which might still happen if we get lucky with codegen layout, but - // it's not worth taking the chance). - continue; - default: - llvm_unreachable("Invalid dependence flavor"); - } - - if (DependingInstructions.size() == 1 && - *DependingInstructions.begin() == PN) { - Changed = true; - ++NumPartialNoops; - // Clone the call into each predecessor that has a non-null value. - CallInst *CInst = cast(Inst); - Type *ParamTy = CInst->getArgOperand(0)->getType(); - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - Value *Incoming = - StripPointerCastsAndObjCCalls(PN->getIncomingValue(i)); - if (!isNullOrUndef(Incoming)) { - CallInst *Clone = cast(CInst->clone()); - Value *Op = PN->getIncomingValue(i); - Instruction *InsertPos = &PN->getIncomingBlock(i)->back(); - if (Op->getType() != ParamTy) - Op = new BitCastInst(Op, ParamTy, "", InsertPos); - Clone->setArgOperand(0, Op); - Clone->insertBefore(InsertPos); - - DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Cloning " - << *CInst << "\n" - " And inserting " - "clone at " << *InsertPos << "\n"); - Worklist.push_back(std::make_pair(Clone, Incoming)); - } - } - // Erase the original call. - DEBUG(dbgs() << "Erasing: " << *CInst << "\n"); - EraseInstruction(CInst); - continue; - } - } - } while (!Worklist.empty()); - } - DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Finished List.\n"); -} - -/// Check for critical edges, loop boundaries, irreducible control flow, or -/// other CFG structures where moving code across the edge would result in it -/// being executed more. -void -ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB, - DenseMap &BBStates, - BBState &MyStates) const { - // If any top-down local-use or possible-dec has a succ which is earlier in - // the sequence, forget it. - for (BBState::ptr_iterator I = MyStates.top_down_ptr_begin(), - E = MyStates.top_down_ptr_end(); I != E; ++I) - switch (I->second.GetSeq()) { - default: break; - case S_Use: { - const Value *Arg = I->first; - const TerminatorInst *TI = cast(&BB->back()); - bool SomeSuccHasSame = false; - bool AllSuccsHaveSame = true; - PtrState &S = I->second; - succ_const_iterator SI(TI), SE(TI, false); - - for (; SI != SE; ++SI) { - Sequence SuccSSeq = S_None; - bool SuccSRRIKnownSafe = false; - // If VisitBottomUp has pointer information for this successor, take - // what we know about it. - DenseMap::iterator BBI = - BBStates.find(*SI); - assert(BBI != BBStates.end()); - const PtrState &SuccS = BBI->second.getPtrBottomUpState(Arg); - SuccSSeq = SuccS.GetSeq(); - SuccSRRIKnownSafe = SuccS.RRI.KnownSafe; - switch (SuccSSeq) { - case S_None: - case S_CanRelease: { - if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) { - S.ClearSequenceProgress(); - break; - } - continue; - } - case S_Use: - SomeSuccHasSame = true; - break; - case S_Stop: - case S_Release: - case S_MovableRelease: - if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) - AllSuccsHaveSame = false; - break; - case S_Retain: - llvm_unreachable("bottom-up pointer in retain state!"); - } - } - // If the state at the other end of any of the successor edges - // matches the current state, require all edges to match. This - // guards against loops in the middle of a sequence. - if (SomeSuccHasSame && !AllSuccsHaveSame) - S.ClearSequenceProgress(); - break; - } - case S_CanRelease: { - const Value *Arg = I->first; - const TerminatorInst *TI = cast(&BB->back()); - bool SomeSuccHasSame = false; - bool AllSuccsHaveSame = true; - PtrState &S = I->second; - succ_const_iterator SI(TI), SE(TI, false); - - for (; SI != SE; ++SI) { - Sequence SuccSSeq = S_None; - bool SuccSRRIKnownSafe = false; - // If VisitBottomUp has pointer information for this successor, take - // what we know about it. - DenseMap::iterator BBI = - BBStates.find(*SI); - assert(BBI != BBStates.end()); - const PtrState &SuccS = BBI->second.getPtrBottomUpState(Arg); - SuccSSeq = SuccS.GetSeq(); - SuccSRRIKnownSafe = SuccS.RRI.KnownSafe; - switch (SuccSSeq) { - case S_None: { - if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) { - S.ClearSequenceProgress(); - break; - } - continue; - } - case S_CanRelease: - SomeSuccHasSame = true; - break; - case S_Stop: - case S_Release: - case S_MovableRelease: - case S_Use: - if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) - AllSuccsHaveSame = false; - break; - case S_Retain: - llvm_unreachable("bottom-up pointer in retain state!"); - } - } - // If the state at the other end of any of the successor edges - // matches the current state, require all edges to match. This - // guards against loops in the middle of a sequence. - if (SomeSuccHasSame && !AllSuccsHaveSame) - S.ClearSequenceProgress(); - break; - } - } -} - -bool -ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst, - BasicBlock *BB, - MapVector &Retains, - BBState &MyStates) { - bool NestingDetected = false; - InstructionClass Class = GetInstructionClass(Inst); - const Value *Arg = 0; - - switch (Class) { - case IC_Release: { - Arg = GetObjCArg(Inst); - - PtrState &S = MyStates.getPtrBottomUpState(Arg); - - // If we see two releases in a row on the same pointer. If so, make - // a note, and we'll cicle back to revisit it after we've - // hopefully eliminated the second release, which may allow us to - // eliminate the first release too. - // Theoretically we could implement removal of nested retain+release - // pairs by making PtrState hold a stack of states, but this is - // simple and avoids adding overhead for the non-nested case. - if (S.GetSeq() == S_Release || S.GetSeq() == S_MovableRelease) { - DEBUG(dbgs() << "ObjCARCOpt::VisitInstructionBottomUp: Found nested " - "releases (i.e. a release pair)\n"); - NestingDetected = true; - } - - MDNode *ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind); - S.ResetSequenceProgress(ReleaseMetadata ? S_MovableRelease : S_Release); - S.RRI.ReleaseMetadata = ReleaseMetadata; - S.RRI.KnownSafe = S.IsKnownIncremented(); - S.RRI.IsTailCallRelease = cast(Inst)->isTailCall(); - S.RRI.Calls.insert(Inst); - - S.SetKnownPositiveRefCount(); - break; - } - case IC_RetainBlock: - // An objc_retainBlock call with just a use may need to be kept, - // because it may be copying a block from the stack to the heap. - if (!IsRetainBlockOptimizable(Inst)) - break; - // FALLTHROUGH - case IC_Retain: - case IC_RetainRV: { - Arg = GetObjCArg(Inst); - - PtrState &S = MyStates.getPtrBottomUpState(Arg); - S.SetKnownPositiveRefCount(); - - switch (S.GetSeq()) { - case S_Stop: - case S_Release: - case S_MovableRelease: - case S_Use: - S.RRI.ReverseInsertPts.clear(); - // FALL THROUGH - case S_CanRelease: - // Don't do retain+release tracking for IC_RetainRV, because it's - // better to let it remain as the first instruction after a call. - if (Class != IC_RetainRV) { - S.RRI.IsRetainBlock = Class == IC_RetainBlock; - Retains[Inst] = S.RRI; - } - S.ClearSequenceProgress(); - break; - case S_None: - break; - case S_Retain: - llvm_unreachable("bottom-up pointer in retain state!"); - } - return NestingDetected; - } - case IC_AutoreleasepoolPop: - // Conservatively, clear MyStates for all known pointers. - MyStates.clearBottomUpPointers(); - return NestingDetected; - case IC_AutoreleasepoolPush: - case IC_None: - // These are irrelevant. - return NestingDetected; - default: - break; - } - - // Consider any other possible effects of this instruction on each - // pointer being tracked. - for (BBState::ptr_iterator MI = MyStates.bottom_up_ptr_begin(), - ME = MyStates.bottom_up_ptr_end(); MI != ME; ++MI) { - const Value *Ptr = MI->first; - if (Ptr == Arg) - continue; // Handled above. - PtrState &S = MI->second; - Sequence Seq = S.GetSeq(); - - // Check for possible releases. - if (CanAlterRefCount(Inst, Ptr, PA, Class)) { - S.ClearRefCount(); - switch (Seq) { - case S_Use: - S.SetSeq(S_CanRelease); - continue; - case S_CanRelease: - case S_Release: - case S_MovableRelease: - case S_Stop: - case S_None: - break; - case S_Retain: - llvm_unreachable("bottom-up pointer in retain state!"); - } - } - - // Check for possible direct uses. - switch (Seq) { - case S_Release: - case S_MovableRelease: - if (CanUse(Inst, Ptr, PA, Class)) { - assert(S.RRI.ReverseInsertPts.empty()); - // If this is an invoke instruction, we're scanning it as part of - // one of its successor blocks, since we can't insert code after it - // in its own block, and we don't want to split critical edges. - if (isa(Inst)) - S.RRI.ReverseInsertPts.insert(BB->getFirstInsertionPt()); - else - S.RRI.ReverseInsertPts.insert(llvm::next(BasicBlock::iterator(Inst))); - S.SetSeq(S_Use); - } else if (Seq == S_Release && - (Class == IC_User || Class == IC_CallOrUser)) { - // Non-movable releases depend on any possible objc pointer use. - S.SetSeq(S_Stop); - assert(S.RRI.ReverseInsertPts.empty()); - // As above; handle invoke specially. - if (isa(Inst)) - S.RRI.ReverseInsertPts.insert(BB->getFirstInsertionPt()); - else - S.RRI.ReverseInsertPts.insert(llvm::next(BasicBlock::iterator(Inst))); - } - break; - case S_Stop: - if (CanUse(Inst, Ptr, PA, Class)) - S.SetSeq(S_Use); - break; - case S_CanRelease: - case S_Use: - case S_None: - break; - case S_Retain: - llvm_unreachable("bottom-up pointer in retain state!"); - } - } - - return NestingDetected; -} - -bool -ObjCARCOpt::VisitBottomUp(BasicBlock *BB, - DenseMap &BBStates, - MapVector &Retains) { - bool NestingDetected = false; - BBState &MyStates = BBStates[BB]; - - // Merge the states from each successor to compute the initial state - // for the current block. - BBState::edge_iterator SI(MyStates.succ_begin()), - SE(MyStates.succ_end()); - if (SI != SE) { - const BasicBlock *Succ = *SI; - DenseMap::iterator I = BBStates.find(Succ); - assert(I != BBStates.end()); - MyStates.InitFromSucc(I->second); - ++SI; - for (; SI != SE; ++SI) { - Succ = *SI; - I = BBStates.find(Succ); - assert(I != BBStates.end()); - MyStates.MergeSucc(I->second); - } - } - - // Visit all the instructions, bottom-up. - for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; --I) { - Instruction *Inst = llvm::prior(I); - - // Invoke instructions are visited as part of their successors (below). - if (isa(Inst)) - continue; - - DEBUG(dbgs() << "ObjCARCOpt::VisitButtonUp: Visiting " << *Inst << "\n"); - - NestingDetected |= VisitInstructionBottomUp(Inst, BB, Retains, MyStates); - } - - // If there's a predecessor with an invoke, visit the invoke as if it were - // part of this block, since we can't insert code after an invoke in its own - // block, and we don't want to split critical edges. - for (BBState::edge_iterator PI(MyStates.pred_begin()), - PE(MyStates.pred_end()); PI != PE; ++PI) { - BasicBlock *Pred = *PI; - if (InvokeInst *II = dyn_cast(&Pred->back())) - NestingDetected |= VisitInstructionBottomUp(II, BB, Retains, MyStates); - } - - return NestingDetected; -} - -bool -ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst, - DenseMap &Releases, - BBState &MyStates) { - bool NestingDetected = false; - InstructionClass Class = GetInstructionClass(Inst); - const Value *Arg = 0; - - switch (Class) { - case IC_RetainBlock: - // An objc_retainBlock call with just a use may need to be kept, - // because it may be copying a block from the stack to the heap. - if (!IsRetainBlockOptimizable(Inst)) - break; - // FALLTHROUGH - case IC_Retain: - case IC_RetainRV: { - Arg = GetObjCArg(Inst); - - PtrState &S = MyStates.getPtrTopDownState(Arg); - - // Don't do retain+release tracking for IC_RetainRV, because it's - // better to let it remain as the first instruction after a call. - if (Class != IC_RetainRV) { - // If we see two retains in a row on the same pointer. If so, make - // a note, and we'll cicle back to revisit it after we've - // hopefully eliminated the second retain, which may allow us to - // eliminate the first retain too. - // Theoretically we could implement removal of nested retain+release - // pairs by making PtrState hold a stack of states, but this is - // simple and avoids adding overhead for the non-nested case. - if (S.GetSeq() == S_Retain) - NestingDetected = true; - - S.ResetSequenceProgress(S_Retain); - S.RRI.IsRetainBlock = Class == IC_RetainBlock; - S.RRI.KnownSafe = S.IsKnownIncremented(); - S.RRI.Calls.insert(Inst); - } - - S.SetKnownPositiveRefCount(); - - // A retain can be a potential use; procede to the generic checking - // code below. - break; - } - case IC_Release: { - Arg = GetObjCArg(Inst); - - PtrState &S = MyStates.getPtrTopDownState(Arg); - S.ClearRefCount(); - - switch (S.GetSeq()) { - case S_Retain: - case S_CanRelease: - S.RRI.ReverseInsertPts.clear(); - // FALL THROUGH - case S_Use: - S.RRI.ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind); - S.RRI.IsTailCallRelease = cast(Inst)->isTailCall(); - Releases[Inst] = S.RRI; - S.ClearSequenceProgress(); - break; - case S_None: - break; - case S_Stop: - case S_Release: - case S_MovableRelease: - llvm_unreachable("top-down pointer in release state!"); - } - break; - } - case IC_AutoreleasepoolPop: - // Conservatively, clear MyStates for all known pointers. - MyStates.clearTopDownPointers(); - return NestingDetected; - case IC_AutoreleasepoolPush: - case IC_None: - // These are irrelevant. - return NestingDetected; - default: - break; - } - - // Consider any other possible effects of this instruction on each - // pointer being tracked. - for (BBState::ptr_iterator MI = MyStates.top_down_ptr_begin(), - ME = MyStates.top_down_ptr_end(); MI != ME; ++MI) { - const Value *Ptr = MI->first; - if (Ptr == Arg) - continue; // Handled above. - PtrState &S = MI->second; - Sequence Seq = S.GetSeq(); - - // Check for possible releases. - if (CanAlterRefCount(Inst, Ptr, PA, Class)) { - S.ClearRefCount(); - switch (Seq) { - case S_Retain: - S.SetSeq(S_CanRelease); - assert(S.RRI.ReverseInsertPts.empty()); - S.RRI.ReverseInsertPts.insert(Inst); - - // One call can't cause a transition from S_Retain to S_CanRelease - // and S_CanRelease to S_Use. If we've made the first transition, - // we're done. - continue; - case S_Use: - case S_CanRelease: - case S_None: - break; - case S_Stop: - case S_Release: - case S_MovableRelease: - llvm_unreachable("top-down pointer in release state!"); - } - } - - // Check for possible direct uses. - switch (Seq) { - case S_CanRelease: - if (CanUse(Inst, Ptr, PA, Class)) - S.SetSeq(S_Use); - break; - case S_Retain: - case S_Use: - case S_None: - break; - case S_Stop: - case S_Release: - case S_MovableRelease: - llvm_unreachable("top-down pointer in release state!"); - } - } - - return NestingDetected; -} - -bool -ObjCARCOpt::VisitTopDown(BasicBlock *BB, - DenseMap &BBStates, - DenseMap &Releases) { - bool NestingDetected = false; - BBState &MyStates = BBStates[BB]; - - // Merge the states from each predecessor to compute the initial state - // for the current block. - BBState::edge_iterator PI(MyStates.pred_begin()), - PE(MyStates.pred_end()); - if (PI != PE) { - const BasicBlock *Pred = *PI; - DenseMap::iterator I = BBStates.find(Pred); - assert(I != BBStates.end()); - MyStates.InitFromPred(I->second); - ++PI; - for (; PI != PE; ++PI) { - Pred = *PI; - I = BBStates.find(Pred); - assert(I != BBStates.end()); - MyStates.MergePred(I->second); - } - } - - // Visit all the instructions, top-down. - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { - Instruction *Inst = I; - - DEBUG(dbgs() << "ObjCARCOpt::VisitTopDown: Visiting " << *Inst << "\n"); - - NestingDetected |= VisitInstructionTopDown(Inst, Releases, MyStates); - } - - CheckForCFGHazards(BB, BBStates, MyStates); - return NestingDetected; -} - -static void -ComputePostOrders(Function &F, - SmallVectorImpl &PostOrder, - SmallVectorImpl &ReverseCFGPostOrder, - unsigned NoObjCARCExceptionsMDKind, - DenseMap &BBStates) { - /// The visited set, for doing DFS walks. - SmallPtrSet Visited; - - // Do DFS, computing the PostOrder. - SmallPtrSet OnStack; - SmallVector, 16> SuccStack; - - // Functions always have exactly one entry block, and we don't have - // any other block that we treat like an entry block. - BasicBlock *EntryBB = &F.getEntryBlock(); - BBState &MyStates = BBStates[EntryBB]; - MyStates.SetAsEntry(); - TerminatorInst *EntryTI = cast(&EntryBB->back()); - SuccStack.push_back(std::make_pair(EntryBB, succ_iterator(EntryTI))); - Visited.insert(EntryBB); - OnStack.insert(EntryBB); - do { - dfs_next_succ: - BasicBlock *CurrBB = SuccStack.back().first; - TerminatorInst *TI = cast(&CurrBB->back()); - succ_iterator SE(TI, false); - - while (SuccStack.back().second != SE) { - BasicBlock *SuccBB = *SuccStack.back().second++; - if (Visited.insert(SuccBB)) { - TerminatorInst *TI = cast(&SuccBB->back()); - SuccStack.push_back(std::make_pair(SuccBB, succ_iterator(TI))); - BBStates[CurrBB].addSucc(SuccBB); - BBState &SuccStates = BBStates[SuccBB]; - SuccStates.addPred(CurrBB); - OnStack.insert(SuccBB); - goto dfs_next_succ; - } - - if (!OnStack.count(SuccBB)) { - BBStates[CurrBB].addSucc(SuccBB); - BBStates[SuccBB].addPred(CurrBB); - } - } - OnStack.erase(CurrBB); - PostOrder.push_back(CurrBB); - SuccStack.pop_back(); - } while (!SuccStack.empty()); - - Visited.clear(); - - // Do reverse-CFG DFS, computing the reverse-CFG PostOrder. - // Functions may have many exits, and there also blocks which we treat - // as exits due to ignored edges. - SmallVector, 16> PredStack; - for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { - BasicBlock *ExitBB = I; - BBState &MyStates = BBStates[ExitBB]; - if (!MyStates.isExit()) - continue; - - MyStates.SetAsExit(); - - PredStack.push_back(std::make_pair(ExitBB, MyStates.pred_begin())); - Visited.insert(ExitBB); - while (!PredStack.empty()) { - reverse_dfs_next_succ: - BBState::edge_iterator PE = BBStates[PredStack.back().first].pred_end(); - while (PredStack.back().second != PE) { - BasicBlock *BB = *PredStack.back().second++; - if (Visited.insert(BB)) { - PredStack.push_back(std::make_pair(BB, BBStates[BB].pred_begin())); - goto reverse_dfs_next_succ; - } - } - ReverseCFGPostOrder.push_back(PredStack.pop_back_val().first); - } - } -} - -// Visit the function both top-down and bottom-up. -bool -ObjCARCOpt::Visit(Function &F, - DenseMap &BBStates, - MapVector &Retains, - DenseMap &Releases) { - - // Use reverse-postorder traversals, because we magically know that loops - // will be well behaved, i.e. they won't repeatedly call retain on a single - // pointer without doing a release. We can't use the ReversePostOrderTraversal - // class here because we want the reverse-CFG postorder to consider each - // function exit point, and we want to ignore selected cycle edges. - SmallVector PostOrder; - SmallVector ReverseCFGPostOrder; - ComputePostOrders(F, PostOrder, ReverseCFGPostOrder, - NoObjCARCExceptionsMDKind, - BBStates); - - // Use reverse-postorder on the reverse CFG for bottom-up. - bool BottomUpNestingDetected = false; - for (SmallVectorImpl::const_reverse_iterator I = - ReverseCFGPostOrder.rbegin(), E = ReverseCFGPostOrder.rend(); - I != E; ++I) - BottomUpNestingDetected |= VisitBottomUp(*I, BBStates, Retains); - - // Use reverse-postorder for top-down. - bool TopDownNestingDetected = false; - for (SmallVectorImpl::const_reverse_iterator I = - PostOrder.rbegin(), E = PostOrder.rend(); - I != E; ++I) - TopDownNestingDetected |= VisitTopDown(*I, BBStates, Releases); - - return TopDownNestingDetected && BottomUpNestingDetected; -} - -/// Move the calls in RetainsToMove and ReleasesToMove. -void ObjCARCOpt::MoveCalls(Value *Arg, - RRInfo &RetainsToMove, - RRInfo &ReleasesToMove, - MapVector &Retains, - DenseMap &Releases, - SmallVectorImpl &DeadInsts, - Module *M) { - Type *ArgTy = Arg->getType(); - Type *ParamTy = PointerType::getUnqual(Type::getInt8Ty(ArgTy->getContext())); - - // Insert the new retain and release calls. - for (SmallPtrSet::const_iterator - PI = ReleasesToMove.ReverseInsertPts.begin(), - PE = ReleasesToMove.ReverseInsertPts.end(); PI != PE; ++PI) { - Instruction *InsertPt = *PI; - Value *MyArg = ArgTy == ParamTy ? Arg : - new BitCastInst(Arg, ParamTy, "", InsertPt); - CallInst *Call = - CallInst::Create(RetainsToMove.IsRetainBlock ? - getRetainBlockCallee(M) : getRetainCallee(M), - MyArg, "", InsertPt); - Call->setDoesNotThrow(); - if (RetainsToMove.IsRetainBlock) - Call->setMetadata(CopyOnEscapeMDKind, - MDNode::get(M->getContext(), ArrayRef())); - else - Call->setTailCall(); - - DEBUG(dbgs() << "ObjCARCOpt::MoveCalls: Inserting new Release: " << *Call - << "\n" - " At insertion point: " << *InsertPt - << "\n"); - } - for (SmallPtrSet::const_iterator - PI = RetainsToMove.ReverseInsertPts.begin(), - PE = RetainsToMove.ReverseInsertPts.end(); PI != PE; ++PI) { - Instruction *InsertPt = *PI; - Value *MyArg = ArgTy == ParamTy ? Arg : - new BitCastInst(Arg, ParamTy, "", InsertPt); - CallInst *Call = CallInst::Create(getReleaseCallee(M), MyArg, - "", InsertPt); - // Attach a clang.imprecise_release metadata tag, if appropriate. - if (MDNode *M = ReleasesToMove.ReleaseMetadata) - Call->setMetadata(ImpreciseReleaseMDKind, M); - Call->setDoesNotThrow(); - if (ReleasesToMove.IsTailCallRelease) - Call->setTailCall(); - - DEBUG(dbgs() << "ObjCARCOpt::MoveCalls: Inserting new Retain: " << *Call - << "\n" - " At insertion point: " << *InsertPt - << "\n"); - } - - // Delete the original retain and release calls. - for (SmallPtrSet::const_iterator - AI = RetainsToMove.Calls.begin(), - AE = RetainsToMove.Calls.end(); AI != AE; ++AI) { - Instruction *OrigRetain = *AI; - Retains.blot(OrigRetain); - DeadInsts.push_back(OrigRetain); - DEBUG(dbgs() << "ObjCARCOpt::MoveCalls: Deleting retain: " << *OrigRetain << - "\n"); - } - for (SmallPtrSet::const_iterator - AI = ReleasesToMove.Calls.begin(), - AE = ReleasesToMove.Calls.end(); AI != AE; ++AI) { - Instruction *OrigRelease = *AI; - Releases.erase(OrigRelease); - DeadInsts.push_back(OrigRelease); - DEBUG(dbgs() << "ObjCARCOpt::MoveCalls: Deleting release: " << *OrigRelease - << "\n"); - } -} - -bool -ObjCARCOpt::ConnectTDBUTraversals(DenseMap - &BBStates, - MapVector &Retains, - DenseMap &Releases, - Module *M, - SmallVector &NewRetains, - SmallVector &NewReleases, - SmallVector &DeadInsts, - RRInfo &RetainsToMove, - RRInfo &ReleasesToMove, - Value *Arg, - bool KnownSafe, - bool &AnyPairsCompletelyEliminated) { - // If a pair happens in a region where it is known that the reference count - // is already incremented, we can similarly ignore possible decrements. - bool KnownSafeTD = true, KnownSafeBU = true; - - // Connect the dots between the top-down-collected RetainsToMove and - // bottom-up-collected ReleasesToMove to form sets of related calls. - // This is an iterative process so that we connect multiple releases - // to multiple retains if needed. - unsigned OldDelta = 0; - unsigned NewDelta = 0; - unsigned OldCount = 0; - unsigned NewCount = 0; - bool FirstRelease = true; - bool FirstRetain = true; - for (;;) { - for (SmallVectorImpl::const_iterator - NI = NewRetains.begin(), NE = NewRetains.end(); NI != NE; ++NI) { - Instruction *NewRetain = *NI; - MapVector::const_iterator It = Retains.find(NewRetain); - assert(It != Retains.end()); - const RRInfo &NewRetainRRI = It->second; - KnownSafeTD &= NewRetainRRI.KnownSafe; - for (SmallPtrSet::const_iterator - LI = NewRetainRRI.Calls.begin(), - LE = NewRetainRRI.Calls.end(); LI != LE; ++LI) { - Instruction *NewRetainRelease = *LI; - DenseMap::const_iterator Jt = - Releases.find(NewRetainRelease); - if (Jt == Releases.end()) - return false; - const RRInfo &NewRetainReleaseRRI = Jt->second; - assert(NewRetainReleaseRRI.Calls.count(NewRetain)); - if (ReleasesToMove.Calls.insert(NewRetainRelease)) { - OldDelta -= - BBStates[NewRetainRelease->getParent()].GetAllPathCount(); - - // Merge the ReleaseMetadata and IsTailCallRelease values. - if (FirstRelease) { - ReleasesToMove.ReleaseMetadata = - NewRetainReleaseRRI.ReleaseMetadata; - ReleasesToMove.IsTailCallRelease = - NewRetainReleaseRRI.IsTailCallRelease; - FirstRelease = false; - } else { - if (ReleasesToMove.ReleaseMetadata != - NewRetainReleaseRRI.ReleaseMetadata) - ReleasesToMove.ReleaseMetadata = 0; - if (ReleasesToMove.IsTailCallRelease != - NewRetainReleaseRRI.IsTailCallRelease) - ReleasesToMove.IsTailCallRelease = false; - } - - // Collect the optimal insertion points. - if (!KnownSafe) - for (SmallPtrSet::const_iterator - RI = NewRetainReleaseRRI.ReverseInsertPts.begin(), - RE = NewRetainReleaseRRI.ReverseInsertPts.end(); - RI != RE; ++RI) { - Instruction *RIP = *RI; - if (ReleasesToMove.ReverseInsertPts.insert(RIP)) - NewDelta -= BBStates[RIP->getParent()].GetAllPathCount(); - } - NewReleases.push_back(NewRetainRelease); - } - } - } - NewRetains.clear(); - if (NewReleases.empty()) break; - - // Back the other way. - for (SmallVectorImpl::const_iterator - NI = NewReleases.begin(), NE = NewReleases.end(); NI != NE; ++NI) { - Instruction *NewRelease = *NI; - DenseMap::const_iterator It = - Releases.find(NewRelease); - assert(It != Releases.end()); - const RRInfo &NewReleaseRRI = It->second; - KnownSafeBU &= NewReleaseRRI.KnownSafe; - for (SmallPtrSet::const_iterator - LI = NewReleaseRRI.Calls.begin(), - LE = NewReleaseRRI.Calls.end(); LI != LE; ++LI) { - Instruction *NewReleaseRetain = *LI; - MapVector::const_iterator Jt = - Retains.find(NewReleaseRetain); - if (Jt == Retains.end()) - return false; - const RRInfo &NewReleaseRetainRRI = Jt->second; - assert(NewReleaseRetainRRI.Calls.count(NewRelease)); - if (RetainsToMove.Calls.insert(NewReleaseRetain)) { - unsigned PathCount = - BBStates[NewReleaseRetain->getParent()].GetAllPathCount(); - OldDelta += PathCount; - OldCount += PathCount; - - // Merge the IsRetainBlock values. - if (FirstRetain) { - RetainsToMove.IsRetainBlock = NewReleaseRetainRRI.IsRetainBlock; - FirstRetain = false; - } else if (ReleasesToMove.IsRetainBlock != - NewReleaseRetainRRI.IsRetainBlock) - // It's not possible to merge the sequences if one uses - // objc_retain and the other uses objc_retainBlock. - return false; - - // Collect the optimal insertion points. - if (!KnownSafe) - for (SmallPtrSet::const_iterator - RI = NewReleaseRetainRRI.ReverseInsertPts.begin(), - RE = NewReleaseRetainRRI.ReverseInsertPts.end(); - RI != RE; ++RI) { - Instruction *RIP = *RI; - if (RetainsToMove.ReverseInsertPts.insert(RIP)) { - PathCount = BBStates[RIP->getParent()].GetAllPathCount(); - NewDelta += PathCount; - NewCount += PathCount; - } - } - NewRetains.push_back(NewReleaseRetain); - } - } - } - NewReleases.clear(); - if (NewRetains.empty()) break; - } - - // If the pointer is known incremented or nested, we can safely delete the - // pair regardless of what's between them. - if (KnownSafeTD || KnownSafeBU) { - RetainsToMove.ReverseInsertPts.clear(); - ReleasesToMove.ReverseInsertPts.clear(); - NewCount = 0; - } else { - // Determine whether the new insertion points we computed preserve the - // balance of retain and release calls through the program. - // TODO: If the fully aggressive solution isn't valid, try to find a - // less aggressive solution which is. - if (NewDelta != 0) - return false; - } - - // Determine whether the original call points are balanced in the retain and - // release calls through the program. If not, conservatively don't touch - // them. - // TODO: It's theoretically possible to do code motion in this case, as - // long as the existing imbalances are maintained. - if (OldDelta != 0) - return false; - - Changed = true; - assert(OldCount != 0 && "Unreachable code?"); - NumRRs += OldCount - NewCount; - // Set to true if we completely removed any RR pairs. - AnyPairsCompletelyEliminated = NewCount == 0; - - // We can move calls! - return true; -} - -/// Identify pairings between the retains and releases, and delete and/or move -/// them. -bool -ObjCARCOpt::PerformCodePlacement(DenseMap - &BBStates, - MapVector &Retains, - DenseMap &Releases, - Module *M) { - bool AnyPairsCompletelyEliminated = false; - RRInfo RetainsToMove; - RRInfo ReleasesToMove; - SmallVector NewRetains; - SmallVector NewReleases; - SmallVector DeadInsts; - - // Visit each retain. - for (MapVector::const_iterator I = Retains.begin(), - E = Retains.end(); I != E; ++I) { - Value *V = I->first; - if (!V) continue; // blotted - - Instruction *Retain = cast(V); - - DEBUG(dbgs() << "ObjCARCOpt::PerformCodePlacement: Visiting: " << *Retain - << "\n"); - - Value *Arg = GetObjCArg(Retain); - - // If the object being released is in static or stack storage, we know it's - // not being managed by ObjC reference counting, so we can delete pairs - // regardless of what possible decrements or uses lie between them. - bool KnownSafe = isa(Arg) || isa(Arg); - - // A constant pointer can't be pointing to an object on the heap. It may - // be reference-counted, but it won't be deleted. - if (const LoadInst *LI = dyn_cast(Arg)) - if (const GlobalVariable *GV = - dyn_cast( - StripPointerCastsAndObjCCalls(LI->getPointerOperand()))) - if (GV->isConstant()) - KnownSafe = true; - - // Connect the dots between the top-down-collected RetainsToMove and - // bottom-up-collected ReleasesToMove to form sets of related calls. - NewRetains.push_back(Retain); - bool PerformMoveCalls = - ConnectTDBUTraversals(BBStates, Retains, Releases, M, NewRetains, - NewReleases, DeadInsts, RetainsToMove, - ReleasesToMove, Arg, KnownSafe, - AnyPairsCompletelyEliminated); - - if (PerformMoveCalls) { - // Ok, everything checks out and we're all set. Let's move/delete some - // code! - MoveCalls(Arg, RetainsToMove, ReleasesToMove, - Retains, Releases, DeadInsts, M); - } - - // Clean up state for next retain. - NewReleases.clear(); - NewRetains.clear(); - RetainsToMove.clear(); - ReleasesToMove.clear(); - } - - // Now that we're done moving everything, we can delete the newly dead - // instructions, as we no longer need them as insert points. - while (!DeadInsts.empty()) - EraseInstruction(DeadInsts.pop_back_val()); - - return AnyPairsCompletelyEliminated; -} - -/// Weak pointer optimizations. -void ObjCARCOpt::OptimizeWeakCalls(Function &F) { - // First, do memdep-style RLE and S2L optimizations. We can't use memdep - // itself because it uses AliasAnalysis and we need to do provenance - // queries instead. - for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) { - Instruction *Inst = &*I++; - - DEBUG(dbgs() << "ObjCARCOpt::OptimizeWeakCalls: Visiting: " << *Inst << - "\n"); - - InstructionClass Class = GetBasicInstructionClass(Inst); - if (Class != IC_LoadWeak && Class != IC_LoadWeakRetained) - continue; - - // Delete objc_loadWeak calls with no users. - if (Class == IC_LoadWeak && Inst->use_empty()) { - Inst->eraseFromParent(); - continue; - } - - // TODO: For now, just look for an earlier available version of this value - // within the same block. Theoretically, we could do memdep-style non-local - // analysis too, but that would want caching. A better approach would be to - // use the technique that EarlyCSE uses. - inst_iterator Current = llvm::prior(I); - BasicBlock *CurrentBB = Current.getBasicBlockIterator(); - for (BasicBlock::iterator B = CurrentBB->begin(), - J = Current.getInstructionIterator(); - J != B; --J) { - Instruction *EarlierInst = &*llvm::prior(J); - InstructionClass EarlierClass = GetInstructionClass(EarlierInst); - switch (EarlierClass) { - case IC_LoadWeak: - case IC_LoadWeakRetained: { - // If this is loading from the same pointer, replace this load's value - // with that one. - CallInst *Call = cast(Inst); - CallInst *EarlierCall = cast(EarlierInst); - Value *Arg = Call->getArgOperand(0); - Value *EarlierArg = EarlierCall->getArgOperand(0); - switch (PA.getAA()->alias(Arg, EarlierArg)) { - case AliasAnalysis::MustAlias: - Changed = true; - // If the load has a builtin retain, insert a plain retain for it. - if (Class == IC_LoadWeakRetained) { - CallInst *CI = - CallInst::Create(getRetainCallee(F.getParent()), EarlierCall, - "", Call); - CI->setTailCall(); - } - // Zap the fully redundant load. - Call->replaceAllUsesWith(EarlierCall); - Call->eraseFromParent(); - goto clobbered; - case AliasAnalysis::MayAlias: - case AliasAnalysis::PartialAlias: - goto clobbered; - case AliasAnalysis::NoAlias: - break; - } - break; - } - case IC_StoreWeak: - case IC_InitWeak: { - // If this is storing to the same pointer and has the same size etc. - // replace this load's value with the stored value. - CallInst *Call = cast(Inst); - CallInst *EarlierCall = cast(EarlierInst); - Value *Arg = Call->getArgOperand(0); - Value *EarlierArg = EarlierCall->getArgOperand(0); - switch (PA.getAA()->alias(Arg, EarlierArg)) { - case AliasAnalysis::MustAlias: - Changed = true; - // If the load has a builtin retain, insert a plain retain for it. - if (Class == IC_LoadWeakRetained) { - CallInst *CI = - CallInst::Create(getRetainCallee(F.getParent()), EarlierCall, - "", Call); - CI->setTailCall(); - } - // Zap the fully redundant load. - Call->replaceAllUsesWith(EarlierCall->getArgOperand(1)); - Call->eraseFromParent(); - goto clobbered; - case AliasAnalysis::MayAlias: - case AliasAnalysis::PartialAlias: - goto clobbered; - case AliasAnalysis::NoAlias: - break; - } - break; - } - case IC_MoveWeak: - case IC_CopyWeak: - // TOOD: Grab the copied value. - goto clobbered; - case IC_AutoreleasepoolPush: - case IC_None: - case IC_User: - // Weak pointers are only modified through the weak entry points - // (and arbitrary calls, which could call the weak entry points). - break; - default: - // Anything else could modify the weak pointer. - goto clobbered; - } - } - clobbered:; - } - - // Then, for each destroyWeak with an alloca operand, check to see if - // the alloca and all its users can be zapped. - for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) { - Instruction *Inst = &*I++; - InstructionClass Class = GetBasicInstructionClass(Inst); - if (Class != IC_DestroyWeak) - continue; - - CallInst *Call = cast(Inst); - Value *Arg = Call->getArgOperand(0); - if (AllocaInst *Alloca = dyn_cast(Arg)) { - for (Value::use_iterator UI = Alloca->use_begin(), - UE = Alloca->use_end(); UI != UE; ++UI) { - const Instruction *UserInst = cast(*UI); - switch (GetBasicInstructionClass(UserInst)) { - case IC_InitWeak: - case IC_StoreWeak: - case IC_DestroyWeak: - continue; - default: - goto done; - } - } - Changed = true; - for (Value::use_iterator UI = Alloca->use_begin(), - UE = Alloca->use_end(); UI != UE; ) { - CallInst *UserInst = cast(*UI++); - switch (GetBasicInstructionClass(UserInst)) { - case IC_InitWeak: - case IC_StoreWeak: - // These functions return their second argument. - UserInst->replaceAllUsesWith(UserInst->getArgOperand(1)); - break; - case IC_DestroyWeak: - // No return value. - break; - default: - llvm_unreachable("alloca really is used!"); - } - UserInst->eraseFromParent(); - } - Alloca->eraseFromParent(); - done:; - } - } - - DEBUG(dbgs() << "ObjCARCOpt::OptimizeWeakCalls: Finished List.\n\n"); - -} - -/// Identify program paths which execute sequences of retains and releases which -/// can be eliminated. -bool ObjCARCOpt::OptimizeSequences(Function &F) { - /// Releases, Retains - These are used to store the results of the main flow - /// analysis. These use Value* as the key instead of Instruction* so that the - /// map stays valid when we get around to rewriting code and calls get - /// replaced by arguments. - DenseMap Releases; - MapVector Retains; - - /// This is used during the traversal of the function to track the - /// states for each identified object at each block. - DenseMap BBStates; - - // Analyze the CFG of the function, and all instructions. - bool NestingDetected = Visit(F, BBStates, Retains, Releases); - - // Transform. - return PerformCodePlacement(BBStates, Retains, Releases, F.getParent()) && - NestingDetected; -} - -/// Look for this pattern: -/// \code -/// %call = call i8* @something(...) -/// %2 = call i8* @objc_retain(i8* %call) -/// %3 = call i8* @objc_autorelease(i8* %2) -/// ret i8* %3 -/// \endcode -/// And delete the retain and autorelease. -/// -/// Otherwise if it's just this: -/// \code -/// %3 = call i8* @objc_autorelease(i8* %2) -/// ret i8* %3 -/// \endcode -/// convert the autorelease to autoreleaseRV. -void ObjCARCOpt::OptimizeReturns(Function &F) { - if (!F.getReturnType()->isPointerTy()) - return; - - SmallPtrSet DependingInstructions; - SmallPtrSet Visited; - for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) { - BasicBlock *BB = FI; - ReturnInst *Ret = dyn_cast(&BB->back()); - - DEBUG(dbgs() << "ObjCARCOpt::OptimizeReturns: Visiting: " << *Ret << "\n"); - - if (!Ret) continue; - - const Value *Arg = StripPointerCastsAndObjCCalls(Ret->getOperand(0)); - FindDependencies(NeedsPositiveRetainCount, Arg, - BB, Ret, DependingInstructions, Visited, PA); - if (DependingInstructions.size() != 1) - goto next_block; - - { - CallInst *Autorelease = - dyn_cast_or_null(*DependingInstructions.begin()); - if (!Autorelease) - goto next_block; - InstructionClass AutoreleaseClass = GetBasicInstructionClass(Autorelease); - if (!IsAutorelease(AutoreleaseClass)) - goto next_block; - if (GetObjCArg(Autorelease) != Arg) - goto next_block; - - DependingInstructions.clear(); - Visited.clear(); - - // Check that there is nothing that can affect the reference - // count between the autorelease and the retain. - FindDependencies(CanChangeRetainCount, Arg, - BB, Autorelease, DependingInstructions, Visited, PA); - if (DependingInstructions.size() != 1) - goto next_block; - - { - CallInst *Retain = - dyn_cast_or_null(*DependingInstructions.begin()); - - // Check that we found a retain with the same argument. - if (!Retain || - !IsRetain(GetBasicInstructionClass(Retain)) || - GetObjCArg(Retain) != Arg) - goto next_block; - - DependingInstructions.clear(); - Visited.clear(); - - // Convert the autorelease to an autoreleaseRV, since it's - // returning the value. - if (AutoreleaseClass == IC_Autorelease) { - DEBUG(dbgs() << "ObjCARCOpt::OptimizeReturns: Converting autorelease " - "=> autoreleaseRV since it's returning a value.\n" - " In: " << *Autorelease - << "\n"); - Autorelease->setCalledFunction(getAutoreleaseRVCallee(F.getParent())); - DEBUG(dbgs() << " Out: " << *Autorelease - << "\n"); - Autorelease->setTailCall(); // Always tail call autoreleaseRV. - AutoreleaseClass = IC_AutoreleaseRV; - } - - // Check that there is nothing that can affect the reference - // count between the retain and the call. - // Note that Retain need not be in BB. - FindDependencies(CanChangeRetainCount, Arg, Retain->getParent(), Retain, - DependingInstructions, Visited, PA); - if (DependingInstructions.size() != 1) - goto next_block; - - { - CallInst *Call = - dyn_cast_or_null(*DependingInstructions.begin()); - - // Check that the pointer is the return value of the call. - if (!Call || Arg != Call) - goto next_block; - - // Check that the call is a regular call. - InstructionClass Class = GetBasicInstructionClass(Call); - if (Class != IC_CallOrUser && Class != IC_Call) - goto next_block; - - // If so, we can zap the retain and autorelease. - Changed = true; - ++NumRets; - DEBUG(dbgs() << "ObjCARCOpt::OptimizeReturns: Erasing: " << *Retain - << "\n Erasing: " - << *Autorelease << "\n"); - EraseInstruction(Retain); - EraseInstruction(Autorelease); - } - } - } - - next_block: - DependingInstructions.clear(); - Visited.clear(); - } - - DEBUG(dbgs() << "ObjCARCOpt::OptimizeReturns: Finished List.\n\n"); - -} - -bool ObjCARCOpt::doInitialization(Module &M) { - if (!EnableARCOpts) - return false; - - // If nothing in the Module uses ARC, don't do anything. - Run = ModuleHasARC(M); - if (!Run) - return false; - - // Identify the imprecise release metadata kind. - ImpreciseReleaseMDKind = - M.getContext().getMDKindID("clang.imprecise_release"); - CopyOnEscapeMDKind = - M.getContext().getMDKindID("clang.arc.copy_on_escape"); - NoObjCARCExceptionsMDKind = - M.getContext().getMDKindID("clang.arc.no_objc_arc_exceptions"); - - // Intuitively, objc_retain and others are nocapture, however in practice - // they are not, because they return their argument value. And objc_release - // calls finalizers which can have arbitrary side effects. - - // These are initialized lazily. - RetainRVCallee = 0; - AutoreleaseRVCallee = 0; - ReleaseCallee = 0; - RetainCallee = 0; - RetainBlockCallee = 0; - AutoreleaseCallee = 0; - - return false; -} - -bool ObjCARCOpt::runOnFunction(Function &F) { - if (!EnableARCOpts) - return false; - - // If nothing in the Module uses ARC, don't do anything. - if (!Run) - return false; - - Changed = false; - - DEBUG(dbgs() << "ObjCARCOpt: Visiting Function: " << F.getName() << "\n"); - - PA.setAA(&getAnalysis()); - - // This pass performs several distinct transformations. As a compile-time aid - // when compiling code that isn't ObjC, skip these if the relevant ObjC - // library functions aren't declared. - - // Preliminary optimizations. This also computs UsedInThisFunction. - OptimizeIndividualCalls(F); - - // Optimizations for weak pointers. - if (UsedInThisFunction & ((1 << IC_LoadWeak) | - (1 << IC_LoadWeakRetained) | - (1 << IC_StoreWeak) | - (1 << IC_InitWeak) | - (1 << IC_CopyWeak) | - (1 << IC_MoveWeak) | - (1 << IC_DestroyWeak))) - OptimizeWeakCalls(F); - - // Optimizations for retain+release pairs. - if (UsedInThisFunction & ((1 << IC_Retain) | - (1 << IC_RetainRV) | - (1 << IC_RetainBlock))) - if (UsedInThisFunction & (1 << IC_Release)) - // Run OptimizeSequences until it either stops making changes or - // no retain+release pair nesting is detected. - while (OptimizeSequences(F)) {} - - // Optimizations if objc_autorelease is used. - if (UsedInThisFunction & ((1 << IC_Autorelease) | - (1 << IC_AutoreleaseRV))) - OptimizeReturns(F); - - DEBUG(dbgs() << "\n"); - - return Changed; -} - -void ObjCARCOpt::releaseMemory() { - PA.clear(); -} - -/// @} -/// -/// \defgroup ARCContract ARC Contraction. -/// @{ - -// TODO: ObjCARCContract could insert PHI nodes when uses aren't -// dominated by single calls. - -#include "llvm/Analysis/Dominators.h" -#include "llvm/IR/InlineAsm.h" -#include "llvm/IR/Operator.h" - -STATISTIC(NumStoreStrongs, "Number objc_storeStrong calls formed"); - -namespace { - /// \brief Late ARC optimizations - /// - /// These change the IR in a way that makes it difficult to be analyzed by - /// ObjCARCOpt, so it's run late. - class ObjCARCContract : public FunctionPass { - bool Changed; - AliasAnalysis *AA; - DominatorTree *DT; - ProvenanceAnalysis PA; - - /// A flag indicating whether this optimization pass should run. - bool Run; - - /// Declarations for ObjC runtime functions, for use in creating calls to - /// them. These are initialized lazily to avoid cluttering up the Module - /// with unused declarations. - - /// Declaration for objc_storeStrong(). - Constant *StoreStrongCallee; - /// Declaration for objc_retainAutorelease(). - Constant *RetainAutoreleaseCallee; - /// Declaration for objc_retainAutoreleaseReturnValue(). - Constant *RetainAutoreleaseRVCallee; - - /// The inline asm string to insert between calls and RetainRV calls to make - /// the optimization work on targets which need it. - const MDString *RetainRVMarker; - - /// The set of inserted objc_storeStrong calls. If at the end of walking the - /// function we have found no alloca instructions, these calls can be marked - /// "tail". - SmallPtrSet StoreStrongCalls; - - Constant *getStoreStrongCallee(Module *M); - Constant *getRetainAutoreleaseCallee(Module *M); - Constant *getRetainAutoreleaseRVCallee(Module *M); - - bool ContractAutorelease(Function &F, Instruction *Autorelease, - InstructionClass Class, - SmallPtrSet - &DependingInstructions, - SmallPtrSet - &Visited); - - void ContractRelease(Instruction *Release, - inst_iterator &Iter); - - virtual void getAnalysisUsage(AnalysisUsage &AU) const; - virtual bool doInitialization(Module &M); - virtual bool runOnFunction(Function &F); - - public: - static char ID; - ObjCARCContract() : FunctionPass(ID) { - initializeObjCARCContractPass(*PassRegistry::getPassRegistry()); - } - }; -} - -char ObjCARCContract::ID = 0; -INITIALIZE_PASS_BEGIN(ObjCARCContract, - "objc-arc-contract", "ObjC ARC contraction", false, false) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) -INITIALIZE_PASS_DEPENDENCY(DominatorTree) -INITIALIZE_PASS_END(ObjCARCContract, - "objc-arc-contract", "ObjC ARC contraction", false, false) - -Pass *llvm::createObjCARCContractPass() { - return new ObjCARCContract(); -} - -void ObjCARCContract::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired(); - AU.addRequired(); - AU.setPreservesCFG(); -} - -Constant *ObjCARCContract::getStoreStrongCallee(Module *M) { - if (!StoreStrongCallee) { - LLVMContext &C = M->getContext(); - Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); - Type *I8XX = PointerType::getUnqual(I8X); - Type *Params[] = { I8XX, I8X }; - - AttributeSet Attr = AttributeSet() - .addAttribute(M->getContext(), AttributeSet::FunctionIndex, - Attribute::NoUnwind) - .addAttribute(M->getContext(), 1, Attribute::NoCapture); - - StoreStrongCallee = - M->getOrInsertFunction( - "objc_storeStrong", - FunctionType::get(Type::getVoidTy(C), Params, /*isVarArg=*/false), - Attr); - } - return StoreStrongCallee; -} - -Constant *ObjCARCContract::getRetainAutoreleaseCallee(Module *M) { - if (!RetainAutoreleaseCallee) { - LLVMContext &C = M->getContext(); - Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); - Type *Params[] = { I8X }; - FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false); - AttributeSet Attribute = - AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex, - Attribute::NoUnwind); - RetainAutoreleaseCallee = - M->getOrInsertFunction("objc_retainAutorelease", FTy, Attribute); - } - return RetainAutoreleaseCallee; -} - -Constant *ObjCARCContract::getRetainAutoreleaseRVCallee(Module *M) { - if (!RetainAutoreleaseRVCallee) { - LLVMContext &C = M->getContext(); - Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); - Type *Params[] = { I8X }; - FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false); - AttributeSet Attribute = - AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex, - Attribute::NoUnwind); - RetainAutoreleaseRVCallee = - M->getOrInsertFunction("objc_retainAutoreleaseReturnValue", FTy, - Attribute); - } - return RetainAutoreleaseRVCallee; -} - -/// Merge an autorelease with a retain into a fused call. -bool -ObjCARCContract::ContractAutorelease(Function &F, Instruction *Autorelease, - InstructionClass Class, - SmallPtrSet - &DependingInstructions, - SmallPtrSet - &Visited) { - const Value *Arg = GetObjCArg(Autorelease); - - // Check that there are no instructions between the retain and the autorelease - // (such as an autorelease_pop) which may change the count. - CallInst *Retain = 0; - if (Class == IC_AutoreleaseRV) - FindDependencies(RetainAutoreleaseRVDep, Arg, - Autorelease->getParent(), Autorelease, - DependingInstructions, Visited, PA); - else - FindDependencies(RetainAutoreleaseDep, Arg, - Autorelease->getParent(), Autorelease, - DependingInstructions, Visited, PA); - - Visited.clear(); - if (DependingInstructions.size() != 1) { - DependingInstructions.clear(); - return false; - } - - Retain = dyn_cast_or_null(*DependingInstructions.begin()); - DependingInstructions.clear(); - - if (!Retain || - GetBasicInstructionClass(Retain) != IC_Retain || - GetObjCArg(Retain) != Arg) - return false; - - Changed = true; - ++NumPeeps; - - DEBUG(dbgs() << "ObjCARCContract::ContractAutorelease: Fusing " - "retain/autorelease. Erasing: " << *Autorelease << "\n" - " Old Retain: " - << *Retain << "\n"); - - if (Class == IC_AutoreleaseRV) - Retain->setCalledFunction(getRetainAutoreleaseRVCallee(F.getParent())); - else - Retain->setCalledFunction(getRetainAutoreleaseCallee(F.getParent())); - - DEBUG(dbgs() << " New Retain: " - << *Retain << "\n"); - - EraseInstruction(Autorelease); - return true; -} - -/// Attempt to merge an objc_release with a store, load, and objc_retain to form -/// an objc_storeStrong. This can be a little tricky because the instructions -/// don't always appear in order, and there may be unrelated intervening -/// instructions. -void ObjCARCContract::ContractRelease(Instruction *Release, - inst_iterator &Iter) { - LoadInst *Load = dyn_cast(GetObjCArg(Release)); - if (!Load || !Load->isSimple()) return; - - // For now, require everything to be in one basic block. - BasicBlock *BB = Release->getParent(); - if (Load->getParent() != BB) return; - - // Walk down to find the store and the release, which may be in either order. - BasicBlock::iterator I = Load, End = BB->end(); - ++I; - AliasAnalysis::Location Loc = AA->getLocation(Load); - StoreInst *Store = 0; - bool SawRelease = false; - for (; !Store || !SawRelease; ++I) { - if (I == End) - return; - - Instruction *Inst = I; - if (Inst == Release) { - SawRelease = true; - continue; - } - - InstructionClass Class = GetBasicInstructionClass(Inst); - - // Unrelated retains are harmless. - if (IsRetain(Class)) - continue; - - if (Store) { - // The store is the point where we're going to put the objc_storeStrong, - // so make sure there are no uses after it. - if (CanUse(Inst, Load, PA, Class)) - return; - } else if (AA->getModRefInfo(Inst, Loc) & AliasAnalysis::Mod) { - // We are moving the load down to the store, so check for anything - // else which writes to the memory between the load and the store. - Store = dyn_cast(Inst); - if (!Store || !Store->isSimple()) return; - if (Store->getPointerOperand() != Loc.Ptr) return; - } - } - - Value *New = StripPointerCastsAndObjCCalls(Store->getValueOperand()); - - // Walk up to find the retain. - I = Store; - BasicBlock::iterator Begin = BB->begin(); - while (I != Begin && GetBasicInstructionClass(I) != IC_Retain) - --I; - Instruction *Retain = I; - if (GetBasicInstructionClass(Retain) != IC_Retain) return; - if (GetObjCArg(Retain) != New) return; - - Changed = true; - ++NumStoreStrongs; - - LLVMContext &C = Release->getContext(); - Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); - Type *I8XX = PointerType::getUnqual(I8X); - - Value *Args[] = { Load->getPointerOperand(), New }; - if (Args[0]->getType() != I8XX) - Args[0] = new BitCastInst(Args[0], I8XX, "", Store); - if (Args[1]->getType() != I8X) - Args[1] = new BitCastInst(Args[1], I8X, "", Store); - CallInst *StoreStrong = - CallInst::Create(getStoreStrongCallee(BB->getParent()->getParent()), - Args, "", Store); - StoreStrong->setDoesNotThrow(); - StoreStrong->setDebugLoc(Store->getDebugLoc()); - - // We can't set the tail flag yet, because we haven't yet determined - // whether there are any escaping allocas. Remember this call, so that - // we can set the tail flag once we know it's safe. - StoreStrongCalls.insert(StoreStrong); - - if (&*Iter == Store) ++Iter; - Store->eraseFromParent(); - Release->eraseFromParent(); - EraseInstruction(Retain); - if (Load->use_empty()) - Load->eraseFromParent(); -} - -bool ObjCARCContract::doInitialization(Module &M) { - // If nothing in the Module uses ARC, don't do anything. - Run = ModuleHasARC(M); - if (!Run) - return false; - - // These are initialized lazily. - StoreStrongCallee = 0; - RetainAutoreleaseCallee = 0; - RetainAutoreleaseRVCallee = 0; - - // Initialize RetainRVMarker. - RetainRVMarker = 0; - if (NamedMDNode *NMD = - M.getNamedMetadata("clang.arc.retainAutoreleasedReturnValueMarker")) - if (NMD->getNumOperands() == 1) { - const MDNode *N = NMD->getOperand(0); - if (N->getNumOperands() == 1) - if (const MDString *S = dyn_cast(N->getOperand(0))) - RetainRVMarker = S; - } - - return false; -} - -bool ObjCARCContract::runOnFunction(Function &F) { - if (!EnableARCOpts) - return false; - - // If nothing in the Module uses ARC, don't do anything. - if (!Run) - return false; - - Changed = false; - AA = &getAnalysis(); - DT = &getAnalysis(); - - PA.setAA(&getAnalysis()); - - // Track whether it's ok to mark objc_storeStrong calls with the "tail" - // keyword. Be conservative if the function has variadic arguments. - // It seems that functions which "return twice" are also unsafe for the - // "tail" argument, because they are setjmp, which could need to - // return to an earlier stack state. - bool TailOkForStoreStrongs = !F.isVarArg() && - !F.callsFunctionThatReturnsTwice(); - - // For ObjC library calls which return their argument, replace uses of the - // argument with uses of the call return value, if it dominates the use. This - // reduces register pressure. - SmallPtrSet DependingInstructions; - SmallPtrSet Visited; - for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) { - Instruction *Inst = &*I++; - - DEBUG(dbgs() << "ObjCARCContract: Visiting: " << *Inst << "\n"); - - // Only these library routines return their argument. In particular, - // objc_retainBlock does not necessarily return its argument. - InstructionClass Class = GetBasicInstructionClass(Inst); - switch (Class) { - case IC_Retain: - case IC_FusedRetainAutorelease: - case IC_FusedRetainAutoreleaseRV: - break; - case IC_Autorelease: - case IC_AutoreleaseRV: - if (ContractAutorelease(F, Inst, Class, DependingInstructions, Visited)) - continue; - break; - case IC_RetainRV: { - // If we're compiling for a target which needs a special inline-asm - // marker to do the retainAutoreleasedReturnValue optimization, - // insert it now. - if (!RetainRVMarker) - break; - BasicBlock::iterator BBI = Inst; - BasicBlock *InstParent = Inst->getParent(); - - // Step up to see if the call immediately precedes the RetainRV call. - // If it's an invoke, we have to cross a block boundary. And we have - // to carefully dodge no-op instructions. - do { - if (&*BBI == InstParent->begin()) { - BasicBlock *Pred = InstParent->getSinglePredecessor(); - if (!Pred) - goto decline_rv_optimization; - BBI = Pred->getTerminator(); - break; - } - --BBI; - } while (isNoopInstruction(BBI)); - - if (&*BBI == GetObjCArg(Inst)) { - DEBUG(dbgs() << "ObjCARCContract: Adding inline asm marker for " - "retainAutoreleasedReturnValue optimization.\n"); - Changed = true; - InlineAsm *IA = - InlineAsm::get(FunctionType::get(Type::getVoidTy(Inst->getContext()), - /*isVarArg=*/false), - RetainRVMarker->getString(), - /*Constraints=*/"", /*hasSideEffects=*/true); - CallInst::Create(IA, "", Inst); - } - decline_rv_optimization: - break; - } - case IC_InitWeak: { - // objc_initWeak(p, null) => *p = null - CallInst *CI = cast(Inst); - if (isNullOrUndef(CI->getArgOperand(1))) { - Value *Null = - ConstantPointerNull::get(cast(CI->getType())); - Changed = true; - new StoreInst(Null, CI->getArgOperand(0), CI); - - DEBUG(dbgs() << "OBJCARCContract: Old = " << *CI << "\n" - << " New = " << *Null << "\n"); - - CI->replaceAllUsesWith(Null); - CI->eraseFromParent(); - } - continue; - } - case IC_Release: - ContractRelease(Inst, I); - continue; - case IC_User: - // Be conservative if the function has any alloca instructions. - // Technically we only care about escaping alloca instructions, - // but this is sufficient to handle some interesting cases. - if (isa(Inst)) - TailOkForStoreStrongs = false; - continue; - default: - continue; - } - - DEBUG(dbgs() << "ObjCARCContract: Finished List.\n\n"); - - // Don't use GetObjCArg because we don't want to look through bitcasts - // and such; to do the replacement, the argument must have type i8*. - const Value *Arg = cast(Inst)->getArgOperand(0); - for (;;) { - // If we're compiling bugpointed code, don't get in trouble. - if (!isa(Arg) && !isa(Arg)) - break; - // Look through the uses of the pointer. - for (Value::const_use_iterator UI = Arg->use_begin(), UE = Arg->use_end(); - UI != UE; ) { - Use &U = UI.getUse(); - unsigned OperandNo = UI.getOperandNo(); - ++UI; // Increment UI now, because we may unlink its element. - - // If the call's return value dominates a use of the call's argument - // value, rewrite the use to use the return value. We check for - // reachability here because an unreachable call is considered to - // trivially dominate itself, which would lead us to rewriting its - // argument in terms of its return value, which would lead to - // infinite loops in GetObjCArg. - if (DT->isReachableFromEntry(U) && DT->dominates(Inst, U)) { - Changed = true; - Instruction *Replacement = Inst; - Type *UseTy = U.get()->getType(); - if (PHINode *PHI = dyn_cast(U.getUser())) { - // For PHI nodes, insert the bitcast in the predecessor block. - unsigned ValNo = PHINode::getIncomingValueNumForOperand(OperandNo); - BasicBlock *BB = PHI->getIncomingBlock(ValNo); - if (Replacement->getType() != UseTy) - Replacement = new BitCastInst(Replacement, UseTy, "", - &BB->back()); - // While we're here, rewrite all edges for this PHI, rather - // than just one use at a time, to minimize the number of - // bitcasts we emit. - for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) - if (PHI->getIncomingBlock(i) == BB) { - // Keep the UI iterator valid. - if (&PHI->getOperandUse( - PHINode::getOperandNumForIncomingValue(i)) == - &UI.getUse()) - ++UI; - PHI->setIncomingValue(i, Replacement); - } - } else { - if (Replacement->getType() != UseTy) - Replacement = new BitCastInst(Replacement, UseTy, "", - cast(U.getUser())); - U.set(Replacement); - } - } - } - - // If Arg is a no-op casted pointer, strip one level of casts and iterate. - if (const BitCastInst *BI = dyn_cast(Arg)) - Arg = BI->getOperand(0); - else if (isa(Arg) && - cast(Arg)->hasAllZeroIndices()) - Arg = cast(Arg)->getPointerOperand(); - else if (isa(Arg) && - !cast(Arg)->mayBeOverridden()) - Arg = cast(Arg)->getAliasee(); - else - break; - } - } - - // If this function has no escaping allocas or suspicious vararg usage, - // objc_storeStrong calls can be marked with the "tail" keyword. - if (TailOkForStoreStrongs) - for (SmallPtrSet::iterator I = StoreStrongCalls.begin(), - E = StoreStrongCalls.end(); I != E; ++I) - (*I)->setTailCall(); - StoreStrongCalls.clear(); - - return Changed; -} - -/// @} -/// diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp index 35d2fa0..8a9c7da 100644 --- a/lib/Transforms/Scalar/Scalar.cpp +++ b/lib/Transforms/Scalar/Scalar.cpp @@ -50,11 +50,6 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeLowerAtomicPass(Registry); initializeLowerExpectIntrinsicPass(Registry); initializeMemCpyOptPass(Registry); - initializeObjCARCAliasAnalysisPass(Registry); - initializeObjCARCAPElimPass(Registry); - initializeObjCARCExpandPass(Registry); - initializeObjCARCContractPass(Registry); - initializeObjCARCOptPass(Registry); initializeReassociatePass(Registry); initializeRegToMemPass(Registry); initializeSCCPPass(Registry); -- cgit v1.1 From bc49cf73079c1223fba5046047517fc3c00d5284 Mon Sep 17 00:00:00 2001 From: Reed Kotler Date: Mon, 28 Jan 2013 02:46:49 +0000 Subject: Make some code a little simpler. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173649 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsISelLowering.cpp | 78 ++++++++++++++++++------------------ lib/Target/Mips/MipsISelLowering.h | 2 + 2 files changed, 40 insertions(+), 40 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 0551bb4..de0fae7 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -222,47 +222,45 @@ namespace { } +void MipsTargetLowering::SetMips16LibcallName + (RTLIB::Libcall l, const char *Name) { + setLibcallName(l, Name); + noHelperNeeded.insert(Name); +} + void MipsTargetLowering::setMips16HardFloatLibCalls() { - setLibcallName(RTLIB::ADD_F32, addToNoHelperNeeded("__mips16_addsf3")); - setLibcallName(RTLIB::ADD_F64, addToNoHelperNeeded("__mips16_adddf3")); - setLibcallName(RTLIB::SUB_F32, addToNoHelperNeeded("__mips16_subsf3")); - setLibcallName(RTLIB::SUB_F64, addToNoHelperNeeded("__mips16_subdf3")); - setLibcallName(RTLIB::MUL_F32, addToNoHelperNeeded("__mips16_mulsf3")); - setLibcallName(RTLIB::MUL_F64, addToNoHelperNeeded("__mips16_muldf3")); - setLibcallName(RTLIB::DIV_F32, addToNoHelperNeeded("__mips16_divsf3")); - setLibcallName(RTLIB::DIV_F64, addToNoHelperNeeded("__mips16_divdf3")); - setLibcallName(RTLIB::FPEXT_F32_F64, - addToNoHelperNeeded("__mips16_extendsfdf2")); - setLibcallName(RTLIB::FPROUND_F64_F32, - addToNoHelperNeeded("__mips16_truncdfsf2")); - setLibcallName(RTLIB::FPTOSINT_F32_I32, - addToNoHelperNeeded("__mips16_fix_truncsfsi")); - setLibcallName(RTLIB::FPTOSINT_F64_I32, - addToNoHelperNeeded("__mips16_fix_truncdfsi")); - setLibcallName(RTLIB::SINTTOFP_I32_F32, - addToNoHelperNeeded("__mips16_floatsisf")); - setLibcallName(RTLIB::SINTTOFP_I32_F64, - addToNoHelperNeeded("__mips16_floatsidf")); - setLibcallName(RTLIB::UINTTOFP_I32_F32, - addToNoHelperNeeded("__mips16_floatunsisf")); - setLibcallName(RTLIB::UINTTOFP_I32_F64, - addToNoHelperNeeded("__mips16_floatunsidf")); - setLibcallName(RTLIB::OEQ_F32, addToNoHelperNeeded("__mips16_eqsf2")); - setLibcallName(RTLIB::OEQ_F64, addToNoHelperNeeded("__mips16_eqdf2")); - setLibcallName(RTLIB::UNE_F32, addToNoHelperNeeded("__mips16_nesf2")); - setLibcallName(RTLIB::UNE_F64, addToNoHelperNeeded("__mips16_nedf2")); - setLibcallName(RTLIB::OGE_F32, addToNoHelperNeeded("__mips16_gesf2")); - setLibcallName(RTLIB::OGE_F64, addToNoHelperNeeded("__mips16_gedf2")); - setLibcallName(RTLIB::OLT_F32, addToNoHelperNeeded("__mips16_ltsf2")); - setLibcallName(RTLIB::OLT_F64, addToNoHelperNeeded("__mips16_ltdf2")); - setLibcallName(RTLIB::OLE_F32, addToNoHelperNeeded("__mips16_lesf2")); - setLibcallName(RTLIB::OLE_F64, addToNoHelperNeeded("__mips16_ledf2")); - setLibcallName(RTLIB::OGT_F32, addToNoHelperNeeded("__mips16_gtsf2")); - setLibcallName(RTLIB::OGT_F64, addToNoHelperNeeded("__mips16_gtdf2")); - setLibcallName(RTLIB::UO_F32, addToNoHelperNeeded("__mips16_unordsf2")); - setLibcallName(RTLIB::UO_F64, addToNoHelperNeeded("__mips16_unorddf2")); - setLibcallName(RTLIB::O_F32, addToNoHelperNeeded("__mips16_unordsf2")); - setLibcallName(RTLIB::O_F64, addToNoHelperNeeded("__mips16_unorddf2")); + SetMips16LibcallName(RTLIB::ADD_F32, "__mips16_addsf3"); + SetMips16LibcallName(RTLIB::ADD_F64, "__mips16_adddf3"); + SetMips16LibcallName(RTLIB::SUB_F32, "__mips16_subsf3"); + SetMips16LibcallName(RTLIB::SUB_F64, "__mips16_subdf3"); + SetMips16LibcallName(RTLIB::MUL_F32, "__mips16_mulsf3"); + SetMips16LibcallName(RTLIB::MUL_F64, "__mips16_muldf3"); + SetMips16LibcallName(RTLIB::DIV_F32, "__mips16_divsf3"); + SetMips16LibcallName(RTLIB::DIV_F64, "__mips16_divdf3"); + SetMips16LibcallName(RTLIB::FPEXT_F32_F64, "__mips16_extendsfdf2"); + SetMips16LibcallName(RTLIB::FPROUND_F64_F32, "__mips16_truncdfsf2"); + SetMips16LibcallName(RTLIB::FPTOSINT_F32_I32, "__mips16_fix_truncsfsi"); + SetMips16LibcallName(RTLIB::FPTOSINT_F64_I32, "__mips16_fix_truncdfsi"); + SetMips16LibcallName(RTLIB::SINTTOFP_I32_F32, "__mips16_floatsisf"); + SetMips16LibcallName(RTLIB::SINTTOFP_I32_F64, "__mips16_floatsidf"); + SetMips16LibcallName(RTLIB::UINTTOFP_I32_F32, "__mips16_floatunsisf"); + SetMips16LibcallName(RTLIB::UINTTOFP_I32_F64, "__mips16_floatunsidf"); + SetMips16LibcallName(RTLIB::OEQ_F32, "__mips16_eqsf2"); + SetMips16LibcallName(RTLIB::OEQ_F64, "__mips16_eqdf2"); + SetMips16LibcallName(RTLIB::UNE_F32, "__mips16_nesf2"); + SetMips16LibcallName(RTLIB::UNE_F64, "__mips16_nedf2"); + SetMips16LibcallName(RTLIB::OGE_F32, "__mips16_gesf2"); + SetMips16LibcallName(RTLIB::OGE_F64, "__mips16_gedf2"); + SetMips16LibcallName(RTLIB::OLT_F32, "__mips16_ltsf2"); + SetMips16LibcallName(RTLIB::OLT_F64, "__mips16_ltdf2"); + SetMips16LibcallName(RTLIB::OLE_F32, "__mips16_lesf2"); + SetMips16LibcallName(RTLIB::OLE_F64, "__mips16_ledf2"); + SetMips16LibcallName(RTLIB::OGT_F32, "__mips16_gtsf2"); + SetMips16LibcallName(RTLIB::OGT_F64, "__mips16_gtdf2"); + SetMips16LibcallName(RTLIB::UO_F32, "__mips16_unordsf2"); + SetMips16LibcallName(RTLIB::UO_F64, "__mips16_unorddf2"); + SetMips16LibcallName(RTLIB::O_F32, "__mips16_unordsf2"); + SetMips16LibcallName(RTLIB::O_F64, "__mips16_unorddf2"); } MipsTargetLowering:: diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index c682c89..053f8de 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -176,6 +176,8 @@ namespace llvm { virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; private: + void SetMips16LibcallName(RTLIB::Libcall, const char *Name); + void setMips16HardFloatLibCalls(); unsigned int -- cgit v1.1 From 6504255a2257c53df8ab191c1db4517139f5dc8c Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Mon, 28 Jan 2013 03:28:38 +0000 Subject: Extracted pass ObjCARCExpand from ObjCARC.cpp => ObjCARCExpand.cpp. I also added the local header ObjCARC.h for common functions used by the various passes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173651 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/ObjCARC/CMakeLists.txt | 1 + lib/Transforms/ObjCARC/ObjCARC.cpp | 12 +- lib/Transforms/ObjCARC/ObjCARC.h | 242 +++++++++++++++++++++++++ lib/Transforms/ObjCARC/ObjCARCExpand.cpp | 113 ++++++++++++ lib/Transforms/ObjCARC/ObjCARCOpts.cpp | 301 +------------------------------ 5 files changed, 370 insertions(+), 299 deletions(-) create mode 100644 lib/Transforms/ObjCARC/ObjCARC.h create mode 100644 lib/Transforms/ObjCARC/ObjCARCExpand.cpp (limited to 'lib') diff --git a/lib/Transforms/ObjCARC/CMakeLists.txt b/lib/Transforms/ObjCARC/CMakeLists.txt index ab17267..3bb362f 100644 --- a/lib/Transforms/ObjCARC/CMakeLists.txt +++ b/lib/Transforms/ObjCARC/CMakeLists.txt @@ -1,6 +1,7 @@ add_llvm_library(LLVMObjCARCOpts ObjCARC.cpp ObjCARCOpts.cpp + ObjCARCExpand.cpp ) add_dependencies(LLVMObjCARCOpts intrinsics_gen) diff --git a/lib/Transforms/ObjCARC/ObjCARC.cpp b/lib/Transforms/ObjCARC/ObjCARC.cpp index d4e3149..38adfa3 100644 --- a/lib/Transforms/ObjCARC/ObjCARC.cpp +++ b/lib/Transforms/ObjCARC/ObjCARC.cpp @@ -13,15 +13,25 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/ObjCARC.h" +#include "ObjCARC.h" + #include "llvm-c/Initialization.h" #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/Verifier.h" #include "llvm/IR/DataLayout.h" #include "llvm/InitializePasses.h" #include "llvm/PassManager.h" +#include "llvm/Support/Commandline.h" using namespace llvm; +using namespace llvm::objcarc; + +/// \brief A handy option to enable/disable all ARC Optimizations. +bool llvm::objcarc::EnableARCOpts; +static cl::opt +EnableARCOptimizations("enable-objc-arc-opts", + cl::location(EnableARCOpts), + cl::init(true)); /// initializeObjCARCOptsPasses - Initialize all passes linked into the /// ObjCARCOpts library. diff --git a/lib/Transforms/ObjCARC/ObjCARC.h b/lib/Transforms/ObjCARC/ObjCARC.h new file mode 100644 index 0000000..ac04cad --- /dev/null +++ b/lib/Transforms/ObjCARC/ObjCARC.h @@ -0,0 +1,242 @@ +//===- ObjCARC.h - ObjC ARC Optimization --------------*- mode: c++ -*-----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file defines common definitions/declarations used by the ObjC ARC +/// Optimizer. ARC stands for Automatic Reference Counting and is a system for +/// managing reference counts for objects in Objective C. +/// +/// WARNING: This file knows about certain library functions. It recognizes them +/// by name, and hardwires knowledge of their semantics. +/// +/// WARNING: This file knows about how certain Objective-C library functions are +/// used. Naive LLVM IR transformations which would otherwise be +/// behavior-preserving may break these assumptions. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_SCALAR_OBJCARC_H +#define LLVM_TRANSFORMS_SCALAR_OBJCARC_H + +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/InstIterator.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/ObjCARC.h" + +namespace llvm { +namespace objcarc { + +/// \brief A handy option to enable/disable all ARC Optimizations. +extern bool EnableARCOpts; + +/// \brief Test if the given module looks interesting to run ARC optimization +/// on. +static inline bool ModuleHasARC(const Module &M) { + return + M.getNamedValue("objc_retain") || + M.getNamedValue("objc_release") || + M.getNamedValue("objc_autorelease") || + M.getNamedValue("objc_retainAutoreleasedReturnValue") || + M.getNamedValue("objc_retainBlock") || + M.getNamedValue("objc_autoreleaseReturnValue") || + M.getNamedValue("objc_autoreleasePoolPush") || + M.getNamedValue("objc_loadWeakRetained") || + M.getNamedValue("objc_loadWeak") || + M.getNamedValue("objc_destroyWeak") || + M.getNamedValue("objc_storeWeak") || + M.getNamedValue("objc_initWeak") || + M.getNamedValue("objc_moveWeak") || + M.getNamedValue("objc_copyWeak") || + M.getNamedValue("objc_retainedObject") || + M.getNamedValue("objc_unretainedObject") || + M.getNamedValue("objc_unretainedPointer"); +} + +/// \enum InstructionClass +/// \brief A simple classification for instructions. +enum InstructionClass { + IC_Retain, ///< objc_retain + IC_RetainRV, ///< objc_retainAutoreleasedReturnValue + IC_RetainBlock, ///< objc_retainBlock + IC_Release, ///< objc_release + IC_Autorelease, ///< objc_autorelease + IC_AutoreleaseRV, ///< objc_autoreleaseReturnValue + IC_AutoreleasepoolPush, ///< objc_autoreleasePoolPush + IC_AutoreleasepoolPop, ///< objc_autoreleasePoolPop + IC_NoopCast, ///< objc_retainedObject, etc. + IC_FusedRetainAutorelease, ///< objc_retainAutorelease + IC_FusedRetainAutoreleaseRV, ///< objc_retainAutoreleaseReturnValue + IC_LoadWeakRetained, ///< objc_loadWeakRetained (primitive) + IC_StoreWeak, ///< objc_storeWeak (primitive) + IC_InitWeak, ///< objc_initWeak (derived) + IC_LoadWeak, ///< objc_loadWeak (derived) + IC_MoveWeak, ///< objc_moveWeak (derived) + IC_CopyWeak, ///< objc_copyWeak (derived) + IC_DestroyWeak, ///< objc_destroyWeak (derived) + IC_StoreStrong, ///< objc_storeStrong (derived) + IC_CallOrUser, ///< could call objc_release and/or "use" pointers + IC_Call, ///< could call objc_release + IC_User, ///< could "use" a pointer + IC_None ///< anything else +}; + +static raw_ostream &operator<<(raw_ostream &OS, const InstructionClass Class) + LLVM_ATTRIBUTE_USED; + +static raw_ostream &operator<<(raw_ostream &OS, const InstructionClass Class) { + switch (Class) { + case IC_Retain: + return OS << "IC_Retain"; + case IC_RetainRV: + return OS << "IC_RetainRV"; + case IC_RetainBlock: + return OS << "IC_RetainBlock"; + case IC_Release: + return OS << "IC_Release"; + case IC_Autorelease: + return OS << "IC_Autorelease"; + case IC_AutoreleaseRV: + return OS << "IC_AutoreleaseRV"; + case IC_AutoreleasepoolPush: + return OS << "IC_AutoreleasepoolPush"; + case IC_AutoreleasepoolPop: + return OS << "IC_AutoreleasepoolPop"; + case IC_NoopCast: + return OS << "IC_NoopCast"; + case IC_FusedRetainAutorelease: + return OS << "IC_FusedRetainAutorelease"; + case IC_FusedRetainAutoreleaseRV: + return OS << "IC_FusedRetainAutoreleaseRV"; + case IC_LoadWeakRetained: + return OS << "IC_LoadWeakRetained"; + case IC_StoreWeak: + return OS << "IC_StoreWeak"; + case IC_InitWeak: + return OS << "IC_InitWeak"; + case IC_LoadWeak: + return OS << "IC_LoadWeak"; + case IC_MoveWeak: + return OS << "IC_MoveWeak"; + case IC_CopyWeak: + return OS << "IC_CopyWeak"; + case IC_DestroyWeak: + return OS << "IC_DestroyWeak"; + case IC_StoreStrong: + return OS << "IC_StoreStrong"; + case IC_CallOrUser: + return OS << "IC_CallOrUser"; + case IC_Call: + return OS << "IC_Call"; + case IC_User: + return OS << "IC_User"; + case IC_None: + return OS << "IC_None"; + } + llvm_unreachable("Unknown instruction class!"); +} + + +/// \brief Determine if F is one of the special known Functions. If it isn't, +/// return IC_CallOrUser. +static inline InstructionClass GetFunctionClass(const Function *F) { + Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); + + // No arguments. + if (AI == AE) + return StringSwitch(F->getName()) + .Case("objc_autoreleasePoolPush", IC_AutoreleasepoolPush) + .Default(IC_CallOrUser); + + // One argument. + const Argument *A0 = AI++; + if (AI == AE) + // Argument is a pointer. + if (PointerType *PTy = dyn_cast(A0->getType())) { + Type *ETy = PTy->getElementType(); + // Argument is i8*. + if (ETy->isIntegerTy(8)) + return StringSwitch(F->getName()) + .Case("objc_retain", IC_Retain) + .Case("objc_retainAutoreleasedReturnValue", IC_RetainRV) + .Case("objc_retainBlock", IC_RetainBlock) + .Case("objc_release", IC_Release) + .Case("objc_autorelease", IC_Autorelease) + .Case("objc_autoreleaseReturnValue", IC_AutoreleaseRV) + .Case("objc_autoreleasePoolPop", IC_AutoreleasepoolPop) + .Case("objc_retainedObject", IC_NoopCast) + .Case("objc_unretainedObject", IC_NoopCast) + .Case("objc_unretainedPointer", IC_NoopCast) + .Case("objc_retain_autorelease", IC_FusedRetainAutorelease) + .Case("objc_retainAutorelease", IC_FusedRetainAutorelease) + .Case("objc_retainAutoreleaseReturnValue",IC_FusedRetainAutoreleaseRV) + .Default(IC_CallOrUser); + + // Argument is i8** + if (PointerType *Pte = dyn_cast(ETy)) + if (Pte->getElementType()->isIntegerTy(8)) + return StringSwitch(F->getName()) + .Case("objc_loadWeakRetained", IC_LoadWeakRetained) + .Case("objc_loadWeak", IC_LoadWeak) + .Case("objc_destroyWeak", IC_DestroyWeak) + .Default(IC_CallOrUser); + } + + // Two arguments, first is i8**. + const Argument *A1 = AI++; + if (AI == AE) + if (PointerType *PTy = dyn_cast(A0->getType())) + if (PointerType *Pte = dyn_cast(PTy->getElementType())) + if (Pte->getElementType()->isIntegerTy(8)) + if (PointerType *PTy1 = dyn_cast(A1->getType())) { + Type *ETy1 = PTy1->getElementType(); + // Second argument is i8* + if (ETy1->isIntegerTy(8)) + return StringSwitch(F->getName()) + .Case("objc_storeWeak", IC_StoreWeak) + .Case("objc_initWeak", IC_InitWeak) + .Case("objc_storeStrong", IC_StoreStrong) + .Default(IC_CallOrUser); + // Second argument is i8**. + if (PointerType *Pte1 = dyn_cast(ETy1)) + if (Pte1->getElementType()->isIntegerTy(8)) + return StringSwitch(F->getName()) + .Case("objc_moveWeak", IC_MoveWeak) + .Case("objc_copyWeak", IC_CopyWeak) + .Default(IC_CallOrUser); + } + + // Anything else. + return IC_CallOrUser; +} + +/// \brief Determine which objc runtime call instruction class V belongs to. +/// +/// This is similar to GetInstructionClass except that it only detects objc +/// runtime calls. This allows it to be faster. +/// +static inline InstructionClass GetBasicInstructionClass(const Value *V) { + if (const CallInst *CI = dyn_cast(V)) { + if (const Function *F = CI->getCalledFunction()) + return GetFunctionClass(F); + // Otherwise, be conservative. + return IC_CallOrUser; + } + + // Otherwise, be conservative. + return isa(V) ? IC_CallOrUser : IC_User; +} + +} // end namespace objcarc +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_SCALAR_OBJCARC_H diff --git a/lib/Transforms/ObjCARC/ObjCARCExpand.cpp b/lib/Transforms/ObjCARC/ObjCARCExpand.cpp new file mode 100644 index 0000000..a65367a --- /dev/null +++ b/lib/Transforms/ObjCARC/ObjCARCExpand.cpp @@ -0,0 +1,113 @@ +//===- ObjCARCExpand.cpp - ObjC ARC Optimization --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file defines ObjC ARC optimizations. ARC stands for Automatic +/// Reference Counting and is a system for managing reference counts for objects +/// in Objective C. +/// +/// This specific file deals with early optimizations which perform certain +/// cleanup operations. +/// +/// WARNING: This file knows about certain library functions. It recognizes them +/// by name, and hardwires knowledge of their semantics. +/// +/// WARNING: This file knows about how certain Objective-C library functions are +/// used. Naive LLVM IR transformations which would otherwise be +/// behavior-preserving may break these assumptions. +/// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "objc-arc-expand" +#include "ObjCARC.h" + +using namespace llvm; +using namespace llvm::objcarc; + +namespace { + /// \brief Early ARC transformations. + class ObjCARCExpand : public FunctionPass { + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + virtual bool doInitialization(Module &M); + virtual bool runOnFunction(Function &F); + + /// A flag indicating whether this optimization pass should run. + bool Run; + + public: + static char ID; + ObjCARCExpand() : FunctionPass(ID) { + initializeObjCARCExpandPass(*PassRegistry::getPassRegistry()); + } + }; +} + +char ObjCARCExpand::ID = 0; +INITIALIZE_PASS(ObjCARCExpand, + "objc-arc-expand", "ObjC ARC expansion", false, false) + +Pass *llvm::createObjCARCExpandPass() { + return new ObjCARCExpand(); +} + +void ObjCARCExpand::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); +} + +bool ObjCARCExpand::doInitialization(Module &M) { + Run = ModuleHasARC(M); + return false; +} + +bool ObjCARCExpand::runOnFunction(Function &F) { + if (!EnableARCOpts) + return false; + + // If nothing in the Module uses ARC, don't do anything. + if (!Run) + return false; + + bool Changed = false; + + DEBUG(dbgs() << "ObjCARCExpand: Visiting Function: " << F.getName() << "\n"); + + for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) { + Instruction *Inst = &*I; + + DEBUG(dbgs() << "ObjCARCExpand: Visiting: " << *Inst << "\n"); + + switch (GetBasicInstructionClass(Inst)) { + case IC_Retain: + case IC_RetainRV: + case IC_Autorelease: + case IC_AutoreleaseRV: + case IC_FusedRetainAutorelease: + case IC_FusedRetainAutoreleaseRV: { + // These calls return their argument verbatim, as a low-level + // optimization. However, this makes high-level optimizations + // harder. Undo any uses of this optimization that the front-end + // emitted here. We'll redo them in the contract pass. + Changed = true; + Value *Value = cast(Inst)->getArgOperand(0); + DEBUG(dbgs() << "ObjCARCExpand: Old = " << *Inst << "\n" + " New = " << *Value << "\n"); + Inst->replaceAllUsesWith(Value); + break; + } + default: + break; + } + } + + DEBUG(dbgs() << "ObjCARCExpand: Finished List.\n\n"); + + return Changed; +} + +/// @} +/// diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp index 411da64..f537d44 100644 --- a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp @@ -28,16 +28,12 @@ /// //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "objc-arc" +#define DEBUG_TYPE "objc-arc-opts" +#include "ObjCARC.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" using namespace llvm; - -/// \brief A handy option to enable/disable all optimizations in this file. -static cl::opt EnableARCOpts("enable-objc-arc-opts", cl::init(true)); +using namespace llvm::objcarc; /// \defgroup MiscUtils Miscellaneous utilities that are not ARC specific. /// @{ @@ -132,97 +128,11 @@ namespace { /// \defgroup ARCUtilities Utility declarations/definitions specific to ARC. /// @{ -#include "llvm/ADT/StringSwitch.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Intrinsics.h" -#include "llvm/IR/Module.h" #include "llvm/Support/CallSite.h" #include "llvm/Transforms/Utils/Local.h" -namespace { - /// \enum InstructionClass - /// \brief A simple classification for instructions. - enum InstructionClass { - IC_Retain, ///< objc_retain - IC_RetainRV, ///< objc_retainAutoreleasedReturnValue - IC_RetainBlock, ///< objc_retainBlock - IC_Release, ///< objc_release - IC_Autorelease, ///< objc_autorelease - IC_AutoreleaseRV, ///< objc_autoreleaseReturnValue - IC_AutoreleasepoolPush, ///< objc_autoreleasePoolPush - IC_AutoreleasepoolPop, ///< objc_autoreleasePoolPop - IC_NoopCast, ///< objc_retainedObject, etc. - IC_FusedRetainAutorelease, ///< objc_retainAutorelease - IC_FusedRetainAutoreleaseRV, ///< objc_retainAutoreleaseReturnValue - IC_LoadWeakRetained, ///< objc_loadWeakRetained (primitive) - IC_StoreWeak, ///< objc_storeWeak (primitive) - IC_InitWeak, ///< objc_initWeak (derived) - IC_LoadWeak, ///< objc_loadWeak (derived) - IC_MoveWeak, ///< objc_moveWeak (derived) - IC_CopyWeak, ///< objc_copyWeak (derived) - IC_DestroyWeak, ///< objc_destroyWeak (derived) - IC_StoreStrong, ///< objc_storeStrong (derived) - IC_CallOrUser, ///< could call objc_release and/or "use" pointers - IC_Call, ///< could call objc_release - IC_User, ///< could "use" a pointer - IC_None ///< anything else - }; - - raw_ostream &operator<<(raw_ostream &OS, const InstructionClass Class) - LLVM_ATTRIBUTE_USED; - raw_ostream &operator<<(raw_ostream &OS, const InstructionClass Class) { - switch (Class) { - case IC_Retain: - return OS << "IC_Retain"; - case IC_RetainRV: - return OS << "IC_RetainRV"; - case IC_RetainBlock: - return OS << "IC_RetainBlock"; - case IC_Release: - return OS << "IC_Release"; - case IC_Autorelease: - return OS << "IC_Autorelease"; - case IC_AutoreleaseRV: - return OS << "IC_AutoreleaseRV"; - case IC_AutoreleasepoolPush: - return OS << "IC_AutoreleasepoolPush"; - case IC_AutoreleasepoolPop: - return OS << "IC_AutoreleasepoolPop"; - case IC_NoopCast: - return OS << "IC_NoopCast"; - case IC_FusedRetainAutorelease: - return OS << "IC_FusedRetainAutorelease"; - case IC_FusedRetainAutoreleaseRV: - return OS << "IC_FusedRetainAutoreleaseRV"; - case IC_LoadWeakRetained: - return OS << "IC_LoadWeakRetained"; - case IC_StoreWeak: - return OS << "IC_StoreWeak"; - case IC_InitWeak: - return OS << "IC_InitWeak"; - case IC_LoadWeak: - return OS << "IC_LoadWeak"; - case IC_MoveWeak: - return OS << "IC_MoveWeak"; - case IC_CopyWeak: - return OS << "IC_CopyWeak"; - case IC_DestroyWeak: - return OS << "IC_DestroyWeak"; - case IC_StoreStrong: - return OS << "IC_StoreStrong"; - case IC_CallOrUser: - return OS << "IC_CallOrUser"; - case IC_Call: - return OS << "IC_Call"; - case IC_User: - return OS << "IC_User"; - case IC_None: - return OS << "IC_None"; - } - llvm_unreachable("Unknown instruction class!"); - } -} - /// \brief Test whether the given value is possible a retainable object pointer. static bool IsPotentialRetainableObjPtr(const Value *Op) { // Pointers to static or stack storage are not valid retainable object pointers. @@ -257,79 +167,6 @@ static InstructionClass GetCallSiteClass(ImmutableCallSite CS) { return CS.onlyReadsMemory() ? IC_None : IC_Call; } -/// \brief Determine if F is one of the special known Functions. If it isn't, -/// return IC_CallOrUser. -static InstructionClass GetFunctionClass(const Function *F) { - Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); - - // No arguments. - if (AI == AE) - return StringSwitch(F->getName()) - .Case("objc_autoreleasePoolPush", IC_AutoreleasepoolPush) - .Default(IC_CallOrUser); - - // One argument. - const Argument *A0 = AI++; - if (AI == AE) - // Argument is a pointer. - if (PointerType *PTy = dyn_cast(A0->getType())) { - Type *ETy = PTy->getElementType(); - // Argument is i8*. - if (ETy->isIntegerTy(8)) - return StringSwitch(F->getName()) - .Case("objc_retain", IC_Retain) - .Case("objc_retainAutoreleasedReturnValue", IC_RetainRV) - .Case("objc_retainBlock", IC_RetainBlock) - .Case("objc_release", IC_Release) - .Case("objc_autorelease", IC_Autorelease) - .Case("objc_autoreleaseReturnValue", IC_AutoreleaseRV) - .Case("objc_autoreleasePoolPop", IC_AutoreleasepoolPop) - .Case("objc_retainedObject", IC_NoopCast) - .Case("objc_unretainedObject", IC_NoopCast) - .Case("objc_unretainedPointer", IC_NoopCast) - .Case("objc_retain_autorelease", IC_FusedRetainAutorelease) - .Case("objc_retainAutorelease", IC_FusedRetainAutorelease) - .Case("objc_retainAutoreleaseReturnValue",IC_FusedRetainAutoreleaseRV) - .Default(IC_CallOrUser); - - // Argument is i8** - if (PointerType *Pte = dyn_cast(ETy)) - if (Pte->getElementType()->isIntegerTy(8)) - return StringSwitch(F->getName()) - .Case("objc_loadWeakRetained", IC_LoadWeakRetained) - .Case("objc_loadWeak", IC_LoadWeak) - .Case("objc_destroyWeak", IC_DestroyWeak) - .Default(IC_CallOrUser); - } - - // Two arguments, first is i8**. - const Argument *A1 = AI++; - if (AI == AE) - if (PointerType *PTy = dyn_cast(A0->getType())) - if (PointerType *Pte = dyn_cast(PTy->getElementType())) - if (Pte->getElementType()->isIntegerTy(8)) - if (PointerType *PTy1 = dyn_cast(A1->getType())) { - Type *ETy1 = PTy1->getElementType(); - // Second argument is i8* - if (ETy1->isIntegerTy(8)) - return StringSwitch(F->getName()) - .Case("objc_storeWeak", IC_StoreWeak) - .Case("objc_initWeak", IC_InitWeak) - .Case("objc_storeStrong", IC_StoreStrong) - .Default(IC_CallOrUser); - // Second argument is i8**. - if (PointerType *Pte1 = dyn_cast(ETy1)) - if (Pte1->getElementType()->isIntegerTy(8)) - return StringSwitch(F->getName()) - .Case("objc_moveWeak", IC_MoveWeak) - .Case("objc_copyWeak", IC_CopyWeak) - .Default(IC_CallOrUser); - } - - // Anything else. - return IC_CallOrUser; -} - /// \brief Determine what kind of construct V is. static InstructionClass GetInstructionClass(const Value *V) { if (const Instruction *I = dyn_cast(V)) { @@ -420,23 +257,6 @@ static InstructionClass GetInstructionClass(const Value *V) { return IC_None; } -/// \brief Determine which objc runtime call instruction class V belongs to. -/// -/// This is similar to GetInstructionClass except that it only detects objc -/// runtime calls. This allows it to be faster. -/// -static InstructionClass GetBasicInstructionClass(const Value *V) { - if (const CallInst *CI = dyn_cast(V)) { - if (const Function *F = CI->getCalledFunction()) - return GetFunctionClass(F); - // Otherwise, be conservative. - return IC_CallOrUser; - } - - // Otherwise, be conservative. - return isa(V) ? IC_CallOrUser : IC_User; -} - /// \brief Test if the given class is objc_retain or equivalent. static bool IsRetain(InstructionClass Class) { return Class == IC_Retain || @@ -648,29 +468,6 @@ static const Value *FindSingleUseIdentifiedObject(const Value *Arg) { return 0; } -/// \brief Test if the given module looks interesting to run ARC optimization -/// on. -static bool ModuleHasARC(const Module &M) { - return - M.getNamedValue("objc_retain") || - M.getNamedValue("objc_release") || - M.getNamedValue("objc_autorelease") || - M.getNamedValue("objc_retainAutoreleasedReturnValue") || - M.getNamedValue("objc_retainBlock") || - M.getNamedValue("objc_autoreleaseReturnValue") || - M.getNamedValue("objc_autoreleasePoolPush") || - M.getNamedValue("objc_loadWeakRetained") || - M.getNamedValue("objc_loadWeak") || - M.getNamedValue("objc_destroyWeak") || - M.getNamedValue("objc_storeWeak") || - M.getNamedValue("objc_initWeak") || - M.getNamedValue("objc_moveWeak") || - M.getNamedValue("objc_copyWeak") || - M.getNamedValue("objc_retainedObject") || - M.getNamedValue("objc_unretainedObject") || - M.getNamedValue("objc_unretainedPointer"); -} - /// \brief Test whether the given pointer, which is an Objective C block /// pointer, does not "escape". /// @@ -756,10 +553,6 @@ static bool DoesObjCBlockEscape(const Value *BlockPtr) { /// \defgroup ARCAA Extends alias analysis using ObjC specific knowledge. /// @{ -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/Passes.h" -#include "llvm/Pass.h" - namespace { /// \brief This is a simple alias analysis implementation that uses knowledge /// of ARC constructs to answer queries. @@ -928,94 +721,6 @@ ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS1, /// @} /// -/// \defgroup ARCExpansion Early ARC Optimizations. -/// @{ - -#include "llvm/Support/InstIterator.h" -#include "llvm/Transforms/ObjCARC.h" - -namespace { - /// \brief Early ARC transformations. - class ObjCARCExpand : public FunctionPass { - virtual void getAnalysisUsage(AnalysisUsage &AU) const; - virtual bool doInitialization(Module &M); - virtual bool runOnFunction(Function &F); - - /// A flag indicating whether this optimization pass should run. - bool Run; - - public: - static char ID; - ObjCARCExpand() : FunctionPass(ID) { - initializeObjCARCExpandPass(*PassRegistry::getPassRegistry()); - } - }; -} - -char ObjCARCExpand::ID = 0; -INITIALIZE_PASS(ObjCARCExpand, - "objc-arc-expand", "ObjC ARC expansion", false, false) - -Pass *llvm::createObjCARCExpandPass() { - return new ObjCARCExpand(); -} - -void ObjCARCExpand::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); -} - -bool ObjCARCExpand::doInitialization(Module &M) { - Run = ModuleHasARC(M); - return false; -} - -bool ObjCARCExpand::runOnFunction(Function &F) { - if (!EnableARCOpts) - return false; - - // If nothing in the Module uses ARC, don't do anything. - if (!Run) - return false; - - bool Changed = false; - - DEBUG(dbgs() << "ObjCARCExpand: Visiting Function: " << F.getName() << "\n"); - - for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) { - Instruction *Inst = &*I; - - DEBUG(dbgs() << "ObjCARCExpand: Visiting: " << *Inst << "\n"); - - switch (GetBasicInstructionClass(Inst)) { - case IC_Retain: - case IC_RetainRV: - case IC_Autorelease: - case IC_AutoreleaseRV: - case IC_FusedRetainAutorelease: - case IC_FusedRetainAutoreleaseRV: { - // These calls return their argument verbatim, as a low-level - // optimization. However, this makes high-level optimizations - // harder. Undo any uses of this optimization that the front-end - // emitted here. We'll redo them in the contract pass. - Changed = true; - Value *Value = cast(Inst)->getArgOperand(0); - DEBUG(dbgs() << "ObjCARCExpand: Old = " << *Inst << "\n" - " New = " << *Value << "\n"); - Inst->replaceAllUsesWith(Value); - break; - } - default: - break; - } - } - - DEBUG(dbgs() << "ObjCARCExpand: Finished List.\n\n"); - - return Changed; -} - -/// @} -/// /// \defgroup ARCAPElim ARC Autorelease Pool Elimination. /// @{ -- cgit v1.1 From 6313c99aa8a58a870f2b80de894eaf2e6bf96ee8 Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Mon, 28 Jan 2013 03:30:34 +0000 Subject: Removed extraneous doxygen end module statement. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173652 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/ObjCARC/ObjCARCExpand.cpp | 2 -- 1 file changed, 2 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/ObjCARC/ObjCARCExpand.cpp b/lib/Transforms/ObjCARC/ObjCARCExpand.cpp index a65367a..ffc5405 100644 --- a/lib/Transforms/ObjCARC/ObjCARCExpand.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCExpand.cpp @@ -109,5 +109,3 @@ bool ObjCARCExpand::runOnFunction(Function &F) { return Changed; } -/// @} -/// -- cgit v1.1 From 73104b7d7f6557cadf6f300d9d6cccced7ab2561 Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Mon, 28 Jan 2013 03:35:20 +0000 Subject: Fixed case insensitive issue. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173653 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/ObjCARC/ObjCARC.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Transforms/ObjCARC/ObjCARC.cpp b/lib/Transforms/ObjCARC/ObjCARC.cpp index 38adfa3..dceb567 100644 --- a/lib/Transforms/ObjCARC/ObjCARC.cpp +++ b/lib/Transforms/ObjCARC/ObjCARC.cpp @@ -21,7 +21,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/InitializePasses.h" #include "llvm/PassManager.h" -#include "llvm/Support/Commandline.h" +#include "llvm/Support/CommandLine.h" using namespace llvm; using namespace llvm::objcarc; -- cgit v1.1 From 3c67f1cd94760f879e0ec5407f0d1078bc51f9b4 Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Mon, 28 Jan 2013 04:12:07 +0000 Subject: Refactored out pass ObjCARCAPElim from ObjCARCOpts.cpp => ObjCARCAPElim.cpp. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173654 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/ObjCARC/CMakeLists.txt | 1 + lib/Transforms/ObjCARC/ObjCARCAPElim.cpp | 179 +++++++++++++++++++++++++++++++ lib/Transforms/ObjCARC/ObjCARCOpts.cpp | 151 +------------------------- 3 files changed, 183 insertions(+), 148 deletions(-) create mode 100644 lib/Transforms/ObjCARC/ObjCARCAPElim.cpp (limited to 'lib') diff --git a/lib/Transforms/ObjCARC/CMakeLists.txt b/lib/Transforms/ObjCARC/CMakeLists.txt index 3bb362f..a0bdb04 100644 --- a/lib/Transforms/ObjCARC/CMakeLists.txt +++ b/lib/Transforms/ObjCARC/CMakeLists.txt @@ -2,6 +2,7 @@ add_llvm_library(LLVMObjCARCOpts ObjCARC.cpp ObjCARCOpts.cpp ObjCARCExpand.cpp + ObjCARCAPElim.cpp ) add_dependencies(LLVMObjCARCOpts intrinsics_gen) diff --git a/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp b/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp new file mode 100644 index 0000000..c849bcd --- /dev/null +++ b/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp @@ -0,0 +1,179 @@ +//===- ObjCARCOpts.cpp - ObjC ARC Optimization ----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file defines ObjC ARC optimizations. ARC stands for Automatic +/// Reference Counting and is a system for managing reference counts for objects +/// in Objective C. +/// +/// The optimizations performed include elimination of redundant, partially +/// redundant, and inconsequential reference count operations, elimination of +/// redundant weak pointer operations, pattern-matching and replacement of +/// low-level operations into higher-level operations, and numerous minor +/// simplifications. +/// +/// This file also defines a simple ARC-aware AliasAnalysis. +/// +/// WARNING: This file knows about certain library functions. It recognizes them +/// by name, and hardwires knowledge of their semantics. +/// +/// WARNING: This file knows about how certain Objective-C library functions are +/// used. Naive LLVM IR transformations which would otherwise be +/// behavior-preserving may break these assumptions. +/// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "objc-arc-ap-elim" +#include "ObjCARC.h" + +#include "llvm/ADT/STLExtras.h" +#include "llvm/IR/Constants.h" + +using namespace llvm; +using namespace llvm::objcarc; + +namespace { + /// \brief Autorelease pool elimination. + class ObjCARCAPElim : public ModulePass { + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + virtual bool runOnModule(Module &M); + + static bool MayAutorelease(ImmutableCallSite CS, unsigned Depth = 0); + static bool OptimizeBB(BasicBlock *BB); + + public: + static char ID; + ObjCARCAPElim() : ModulePass(ID) { + initializeObjCARCAPElimPass(*PassRegistry::getPassRegistry()); + } + }; +} + +char ObjCARCAPElim::ID = 0; +INITIALIZE_PASS(ObjCARCAPElim, + "objc-arc-apelim", + "ObjC ARC autorelease pool elimination", + false, false) + +Pass *llvm::createObjCARCAPElimPass() { + return new ObjCARCAPElim(); +} + +void ObjCARCAPElim::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); +} + +/// Interprocedurally determine if calls made by the given call site can +/// possibly produce autoreleases. +bool ObjCARCAPElim::MayAutorelease(ImmutableCallSite CS, unsigned Depth) { + if (const Function *Callee = CS.getCalledFunction()) { + if (Callee->isDeclaration() || Callee->mayBeOverridden()) + return true; + for (Function::const_iterator I = Callee->begin(), E = Callee->end(); + I != E; ++I) { + const BasicBlock *BB = I; + for (BasicBlock::const_iterator J = BB->begin(), F = BB->end(); + J != F; ++J) + if (ImmutableCallSite JCS = ImmutableCallSite(J)) + // This recursion depth limit is arbitrary. It's just great + // enough to cover known interesting testcases. + if (Depth < 3 && + !JCS.onlyReadsMemory() && + MayAutorelease(JCS, Depth + 1)) + return true; + } + return false; + } + + return true; +} + +bool ObjCARCAPElim::OptimizeBB(BasicBlock *BB) { + bool Changed = false; + + Instruction *Push = 0; + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) { + Instruction *Inst = I++; + switch (GetBasicInstructionClass(Inst)) { + case IC_AutoreleasepoolPush: + Push = Inst; + break; + case IC_AutoreleasepoolPop: + // If this pop matches a push and nothing in between can autorelease, + // zap the pair. + if (Push && cast(Inst)->getArgOperand(0) == Push) { + Changed = true; + DEBUG(dbgs() << "ObjCARCAPElim::OptimizeBB: Zapping push pop " + "autorelease pair:\n" + " Pop: " << *Inst << "\n" + << " Push: " << *Push << "\n"); + Inst->eraseFromParent(); + Push->eraseFromParent(); + } + Push = 0; + break; + case IC_CallOrUser: + if (MayAutorelease(ImmutableCallSite(Inst))) + Push = 0; + break; + default: + break; + } + } + + return Changed; +} + +bool ObjCARCAPElim::runOnModule(Module &M) { + if (!EnableARCOpts) + return false; + + // If nothing in the Module uses ARC, don't do anything. + if (!ModuleHasARC(M)) + return false; + + // Find the llvm.global_ctors variable, as the first step in + // identifying the global constructors. In theory, unnecessary autorelease + // pools could occur anywhere, but in practice it's pretty rare. Global + // ctors are a place where autorelease pools get inserted automatically, + // so it's pretty common for them to be unnecessary, and it's pretty + // profitable to eliminate them. + GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors"); + if (!GV) + return false; + + assert(GV->hasDefinitiveInitializer() && + "llvm.global_ctors is uncooperative!"); + + bool Changed = false; + + // Dig the constructor functions out of GV's initializer. + ConstantArray *Init = cast(GV->getInitializer()); + for (User::op_iterator OI = Init->op_begin(), OE = Init->op_end(); + OI != OE; ++OI) { + Value *Op = *OI; + // llvm.global_ctors is an array of pairs where the second members + // are constructor functions. + Function *F = dyn_cast(cast(Op)->getOperand(1)); + // If the user used a constructor function with the wrong signature and + // it got bitcasted or whatever, look the other way. + if (!F) + continue; + // Only look at function definitions. + if (F->isDeclaration()) + continue; + // Only look at functions with one basic block. + if (llvm::next(F->begin()) != F->end()) + continue; + // Ok, a single-block constructor function definition. Try to optimize it. + Changed |= OptimizeBB(F->begin()); + } + + return Changed; +} + diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp index f537d44..900ad61 100644 --- a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp @@ -30,8 +30,11 @@ #define DEBUG_TYPE "objc-arc-opts" #include "ObjCARC.h" + #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/STLExtras.h" + using namespace llvm; using namespace llvm::objcarc; @@ -721,154 +724,6 @@ ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS1, /// @} /// -/// \defgroup ARCAPElim ARC Autorelease Pool Elimination. -/// @{ - -#include "llvm/ADT/STLExtras.h" -#include "llvm/IR/Constants.h" - -namespace { - /// \brief Autorelease pool elimination. - class ObjCARCAPElim : public ModulePass { - virtual void getAnalysisUsage(AnalysisUsage &AU) const; - virtual bool runOnModule(Module &M); - - static bool MayAutorelease(ImmutableCallSite CS, unsigned Depth = 0); - static bool OptimizeBB(BasicBlock *BB); - - public: - static char ID; - ObjCARCAPElim() : ModulePass(ID) { - initializeObjCARCAPElimPass(*PassRegistry::getPassRegistry()); - } - }; -} - -char ObjCARCAPElim::ID = 0; -INITIALIZE_PASS(ObjCARCAPElim, - "objc-arc-apelim", - "ObjC ARC autorelease pool elimination", - false, false) - -Pass *llvm::createObjCARCAPElimPass() { - return new ObjCARCAPElim(); -} - -void ObjCARCAPElim::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); -} - -/// Interprocedurally determine if calls made by the given call site can -/// possibly produce autoreleases. -bool ObjCARCAPElim::MayAutorelease(ImmutableCallSite CS, unsigned Depth) { - if (const Function *Callee = CS.getCalledFunction()) { - if (Callee->isDeclaration() || Callee->mayBeOverridden()) - return true; - for (Function::const_iterator I = Callee->begin(), E = Callee->end(); - I != E; ++I) { - const BasicBlock *BB = I; - for (BasicBlock::const_iterator J = BB->begin(), F = BB->end(); - J != F; ++J) - if (ImmutableCallSite JCS = ImmutableCallSite(J)) - // This recursion depth limit is arbitrary. It's just great - // enough to cover known interesting testcases. - if (Depth < 3 && - !JCS.onlyReadsMemory() && - MayAutorelease(JCS, Depth + 1)) - return true; - } - return false; - } - - return true; -} - -bool ObjCARCAPElim::OptimizeBB(BasicBlock *BB) { - bool Changed = false; - - Instruction *Push = 0; - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) { - Instruction *Inst = I++; - switch (GetBasicInstructionClass(Inst)) { - case IC_AutoreleasepoolPush: - Push = Inst; - break; - case IC_AutoreleasepoolPop: - // If this pop matches a push and nothing in between can autorelease, - // zap the pair. - if (Push && cast(Inst)->getArgOperand(0) == Push) { - Changed = true; - DEBUG(dbgs() << "ObjCARCAPElim::OptimizeBB: Zapping push pop " - "autorelease pair:\n" - " Pop: " << *Inst << "\n" - << " Push: " << *Push << "\n"); - Inst->eraseFromParent(); - Push->eraseFromParent(); - } - Push = 0; - break; - case IC_CallOrUser: - if (MayAutorelease(ImmutableCallSite(Inst))) - Push = 0; - break; - default: - break; - } - } - - return Changed; -} - -bool ObjCARCAPElim::runOnModule(Module &M) { - if (!EnableARCOpts) - return false; - - // If nothing in the Module uses ARC, don't do anything. - if (!ModuleHasARC(M)) - return false; - - // Find the llvm.global_ctors variable, as the first step in - // identifying the global constructors. In theory, unnecessary autorelease - // pools could occur anywhere, but in practice it's pretty rare. Global - // ctors are a place where autorelease pools get inserted automatically, - // so it's pretty common for them to be unnecessary, and it's pretty - // profitable to eliminate them. - GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors"); - if (!GV) - return false; - - assert(GV->hasDefinitiveInitializer() && - "llvm.global_ctors is uncooperative!"); - - bool Changed = false; - - // Dig the constructor functions out of GV's initializer. - ConstantArray *Init = cast(GV->getInitializer()); - for (User::op_iterator OI = Init->op_begin(), OE = Init->op_end(); - OI != OE; ++OI) { - Value *Op = *OI; - // llvm.global_ctors is an array of pairs where the second members - // are constructor functions. - Function *F = dyn_cast(cast(Op)->getOperand(1)); - // If the user used a constructor function with the wrong signature and - // it got bitcasted or whatever, look the other way. - if (!F) - continue; - // Only look at function definitions. - if (F->isDeclaration()) - continue; - // Only look at functions with one basic block. - if (llvm::next(F->begin()) != F->end()) - continue; - // Ok, a single-block constructor function definition. Try to optimize it. - Changed |= OptimizeBB(F->begin()); - } - - return Changed; -} - -/// @} -/// /// \defgroup ARCOpt ARC Optimization. /// @{ -- cgit v1.1 From 53ff78b2019e96e142986d19dd99f8dd563dc494 Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Mon, 28 Jan 2013 04:29:01 +0000 Subject: Revert r173646, "Use proper type for the index." Unfortunately, msvc miscompiles it. Investigating. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173656 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Attributes.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 544c3cf..6745486 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -710,27 +710,27 @@ AttributeSet AttributeSet::getSlotAttributes(unsigned Slot) const { return pImpl->getSlotAttributes(Slot); } -bool AttributeSet::hasAttribute(uint64_t Index, Attribute::AttrKind Kind) const{ +bool AttributeSet::hasAttribute(unsigned Index, Attribute::AttrKind Kind) const{ return getAttributes(Index).hasAttribute(Kind); } -bool AttributeSet::hasAttributes(uint64_t Index) const { +bool AttributeSet::hasAttributes(unsigned Index) const { return getAttributes(Index).hasAttributes(); } -std::string AttributeSet::getAsString(uint64_t Index) const { +std::string AttributeSet::getAsString(unsigned Index) const { return getAttributes(Index).getAsString(); } -unsigned AttributeSet::getParamAlignment(uint64_t Idx) const { +unsigned AttributeSet::getParamAlignment(unsigned Idx) const { return getAttributes(Idx).getAlignment(); } -unsigned AttributeSet::getStackAlignment(uint64_t Index) const { +unsigned AttributeSet::getStackAlignment(unsigned Index) const { return getAttributes(Index).getStackAlignment(); } -uint64_t AttributeSet::Raw(uint64_t Index) const { +uint64_t AttributeSet::Raw(unsigned Index) const { // FIXME: Remove this. return pImpl ? pImpl->Raw(Index) : 0; } @@ -738,7 +738,7 @@ uint64_t AttributeSet::Raw(uint64_t Index) const { /// \brief The attributes for the specified index are returned. /// /// FIXME: This shouldn't return 'Attribute'. -Attribute AttributeSet::getAttributes(uint64_t Idx) const { +Attribute AttributeSet::getAttributes(unsigned Idx) const { if (pImpl == 0) return Attribute(); // Loop through to find the attribute we want. -- cgit v1.1 From 49f6060f16aec4024d644a6ec4ddd3de9b3e8821 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Mon, 28 Jan 2013 05:23:28 +0000 Subject: Rewrite the addAttr() method. This now uses the AttributeSet object instead of the Attribute / AttributeWithIndex objects. It's fairly simple now. It goes through all of the subsets before the one we're modifying, adds them to the new set. It then adds the modified subset. And then adds the rest of the subsets. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173659 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Attributes.cpp | 149 +++++++++++++++++++++++++------------------------- 1 file changed, 76 insertions(+), 73 deletions(-) (limited to 'lib') diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 6745486..f341e54 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -246,6 +246,39 @@ AttrBuilder &AttrBuilder::removeAttribute(Attribute::AttrKind Val) { return *this; } +AttrBuilder &AttrBuilder::addAttributes(Attribute Attr) { + uint64_t Mask = Attr.Raw(); + + for (Attribute::AttrKind I = Attribute::None; I != Attribute::EndAttrKinds; + I = Attribute::AttrKind(I + 1)) + if ((Mask & AttributeImpl::getAttrMask(I)) != 0) + Attrs.insert(I); + + if (Attr.getAlignment()) + Alignment = Attr.getAlignment(); + if (Attr.getStackAlignment()) + StackAlignment = Attr.getStackAlignment(); + return *this; +} + +AttrBuilder &AttrBuilder::removeAttributes(Attribute A) { + uint64_t Mask = A.Raw(); + + for (Attribute::AttrKind I = Attribute::None; I != Attribute::EndAttrKinds; + I = Attribute::AttrKind(I + 1)) { + if (Mask & AttributeImpl::getAttrMask(I)) { + Attrs.erase(I); + + if (I == Attribute::Alignment) + Alignment = 0; + else if (I == Attribute::StackAlignment) + StackAlignment = 0; + } + } + + return *this; +} + AttrBuilder &AttrBuilder::addAlignmentAttr(unsigned Align) { if (Align == 0) return *this; @@ -285,39 +318,6 @@ AttrBuilder &AttrBuilder::addRawValue(uint64_t Val) { return *this; } -AttrBuilder &AttrBuilder::addAttributes(const Attribute &Attr) { - uint64_t Mask = Attr.Raw(); - - for (Attribute::AttrKind I = Attribute::None; I != Attribute::EndAttrKinds; - I = Attribute::AttrKind(I + 1)) - if ((Mask & AttributeImpl::getAttrMask(I)) != 0) - Attrs.insert(I); - - if (Attr.getAlignment()) - Alignment = Attr.getAlignment(); - if (Attr.getStackAlignment()) - StackAlignment = Attr.getStackAlignment(); - return *this; -} - -AttrBuilder &AttrBuilder::removeAttributes(const Attribute &A){ - uint64_t Mask = A.Raw(); - - for (Attribute::AttrKind I = Attribute::None; I != Attribute::EndAttrKinds; - I = Attribute::AttrKind(I + 1)) { - if (Mask & AttributeImpl::getAttrMask(I)) { - Attrs.erase(I); - - if (I == Attribute::Alignment) - Alignment = 0; - else if (I == Attribute::StackAlignment) - StackAlignment = 0; - } - } - - return *this; -} - bool AttrBuilder::contains(Attribute::AttrKind A) const { return Attrs.count(A); } @@ -710,23 +710,23 @@ AttributeSet AttributeSet::getSlotAttributes(unsigned Slot) const { return pImpl->getSlotAttributes(Slot); } -bool AttributeSet::hasAttribute(unsigned Index, Attribute::AttrKind Kind) const{ +bool AttributeSet::hasAttribute(uint64_t Index, Attribute::AttrKind Kind) const{ return getAttributes(Index).hasAttribute(Kind); } -bool AttributeSet::hasAttributes(unsigned Index) const { +bool AttributeSet::hasAttributes(uint64_t Index) const { return getAttributes(Index).hasAttributes(); } -std::string AttributeSet::getAsString(unsigned Index) const { +std::string AttributeSet::getAsString(uint64_t Index) const { return getAttributes(Index).getAsString(); } -unsigned AttributeSet::getParamAlignment(unsigned Idx) const { +unsigned AttributeSet::getParamAlignment(uint64_t Idx) const { return getAttributes(Idx).getAlignment(); } -unsigned AttributeSet::getStackAlignment(unsigned Index) const { +unsigned AttributeSet::getStackAlignment(uint64_t Index) const { return getAttributes(Index).getStackAlignment(); } @@ -771,58 +771,61 @@ bool AttributeSet::hasAttrSomewhere(Attribute::AttrKind Attr) const { AttributeSet AttributeSet::addAttribute(LLVMContext &C, unsigned Idx, Attribute::AttrKind Attr) const { - return addAttr(C, Idx, Attribute::get(C, Attr)); + return addAttr(C, Idx, AttributeSet::get(C, Idx, Attr)); } AttributeSet AttributeSet::addAttributes(LLVMContext &C, unsigned Idx, AttributeSet Attrs) const { - return addAttr(C, Idx, Attrs.getAttributes(Idx)); + return addAttr(C, Idx, Attrs); } AttributeSet AttributeSet::addAttr(LLVMContext &C, unsigned Idx, - Attribute Attrs) const { - Attribute OldAttrs = getAttributes(Idx); + AttributeSet Attrs) const { + if (!pImpl) return Attrs; + if (!Attrs.pImpl) return *this; + #ifndef NDEBUG - // FIXME it is not obvious how this should work for alignment. - // For now, say we can't change a known alignment. - unsigned OldAlign = OldAttrs.getAlignment(); - unsigned NewAlign = Attrs.getAlignment(); + // FIXME it is not obvious how this should work for alignment. For now, say + // we can't change a known alignment. + unsigned OldAlign = getParamAlignment(Idx); + unsigned NewAlign = Attrs.getParamAlignment(Idx); assert((!OldAlign || !NewAlign || OldAlign == NewAlign) && "Attempt to change alignment!"); #endif - AttrBuilder NewAttrs = - AttrBuilder(OldAttrs).addAttributes(Attrs); - if (NewAttrs == AttrBuilder(OldAttrs)) - return *this; + // Add the attribute slots before the one we're trying to add. + SmallVector AttrSet; + uint64_t NumAttrs = pImpl->getNumAttributes(); + AttributeSet AS; + uint64_t LastIndex = 0; + for (unsigned I = 0, E = NumAttrs; I != E; ++I) { + if (getSlotIndex(I) >= Idx) { + if (getSlotIndex(I) == Idx) AS = getSlotAttributes(LastIndex++); + break; + } + LastIndex = I + 1; + AttrSet.push_back(getSlotAttributes(I)); + } - SmallVector NewAttrList; - if (pImpl == 0) { - NewAttrList.push_back(AttributeWithIndex::get(Idx, Attrs)); - } else { - ArrayRef OldAttrList = pImpl->getAttributes(); - unsigned i = 0, e = OldAttrList.size(); - - // Copy attributes for arguments before this one. - for (; i != e && OldAttrList[i].Index < Idx; ++i) - NewAttrList.push_back(OldAttrList[i]); - - // If there are attributes already at this index, merge them in. - if (i != e && OldAttrList[i].Index == Idx) { - Attrs = - Attribute::get(C, AttrBuilder(Attrs). - addAttributes(OldAttrList[i].Attrs)); - ++i; + // Now add the attribute into the correct slot. There may already be an + // AttributeSet there. + AttrBuilder B(AS, Idx); + + for (unsigned I = 0, E = Attrs.pImpl->getNumAttributes(); I != E; ++I) + if (Attrs.getSlotIndex(I) == Idx) { + for (AttributeSetImpl::const_iterator II = Attrs.pImpl->begin(I), + IE = Attrs.pImpl->end(I); II != IE; ++II) + B.addAttributes(*II); + break; } - NewAttrList.push_back(AttributeWithIndex::get(Idx, Attrs)); + AttrSet.push_back(AttributeSet::get(C, Idx, B)); - // Copy attributes for arguments after this one. - NewAttrList.insert(NewAttrList.end(), - OldAttrList.begin()+i, OldAttrList.end()); - } + // Add the remaining attribute slots. + for (unsigned I = LastIndex, E = NumAttrs; I < E; ++I) + AttrSet.push_back(getSlotAttributes(I)); - return get(C, NewAttrList); + return get(C, AttrSet); } AttributeSet AttributeSet::removeAttribute(LLVMContext &C, unsigned Idx, -- cgit v1.1 From 98b92f3bf5770e02498549e24b3db75d5862c173 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Mon, 28 Jan 2013 05:44:14 +0000 Subject: Rewrite the removeAttr() method. This now uses the AttributeSet object instead of the Attribute / AttributeWithIndex objects. It's fairly simple now. It goes through all of the subsets before the one we're modifying, adds them to the new set. It then adds the modified subset (with the requested attributes removed). And then adds the rest of the subsets. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173660 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Attributes.cpp | 82 +++++++++++++++++++++++++-------------------------- 1 file changed, 40 insertions(+), 42 deletions(-) (limited to 'lib') diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index f341e54..67fab83 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -771,16 +771,11 @@ bool AttributeSet::hasAttrSomewhere(Attribute::AttrKind Attr) const { AttributeSet AttributeSet::addAttribute(LLVMContext &C, unsigned Idx, Attribute::AttrKind Attr) const { - return addAttr(C, Idx, AttributeSet::get(C, Idx, Attr)); + return addAttributes(C, Idx, AttributeSet::get(C, Idx, Attr)); } AttributeSet AttributeSet::addAttributes(LLVMContext &C, unsigned Idx, AttributeSet Attrs) const { - return addAttr(C, Idx, Attrs); -} - -AttributeSet AttributeSet::addAttr(LLVMContext &C, unsigned Idx, - AttributeSet Attrs) const { if (!pImpl) return Attrs; if (!Attrs.pImpl) return *this; @@ -830,51 +825,54 @@ AttributeSet AttributeSet::addAttr(LLVMContext &C, unsigned Idx, AttributeSet AttributeSet::removeAttribute(LLVMContext &C, unsigned Idx, Attribute::AttrKind Attr) const { - return removeAttr(C, Idx, Attribute::get(C, Attr)); + return removeAttributes(C, Idx, AttributeSet::get(C, Idx, Attr)); } AttributeSet AttributeSet::removeAttributes(LLVMContext &C, unsigned Idx, AttributeSet Attrs) const { - return removeAttr(C, Idx, Attrs.getAttributes(Idx)); -} + if (!pImpl) return AttributeSet(); + if (!Attrs.pImpl) return *this; -AttributeSet AttributeSet::removeAttr(LLVMContext &C, unsigned Idx, - Attribute Attrs) const { #ifndef NDEBUG // FIXME it is not obvious how this should work for alignment. // For now, say we can't pass in alignment, which no current use does. - assert(!Attrs.hasAttribute(Attribute::Alignment) && - "Attempt to exclude alignment!"); + assert(!Attrs.hasAttribute(Idx, Attribute::Alignment) && + "Attempt to change alignment!"); #endif - if (pImpl == 0) return AttributeSet(); - - Attribute OldAttrs = getAttributes(Idx); - AttrBuilder NewAttrs = - AttrBuilder(OldAttrs).removeAttributes(Attrs); - if (NewAttrs == AttrBuilder(OldAttrs)) - return *this; - - SmallVector NewAttrList; - ArrayRef OldAttrList = pImpl->getAttributes(); - unsigned i = 0, e = OldAttrList.size(); - - // Copy attributes for arguments before this one. - for (; i != e && OldAttrList[i].Index < Idx; ++i) - NewAttrList.push_back(OldAttrList[i]); - - // If there are attributes already at this index, merge them in. - assert(OldAttrList[i].Index == Idx && "Attribute isn't set?"); - Attrs = Attribute::get(C, AttrBuilder(OldAttrList[i].Attrs). - removeAttributes(Attrs)); - ++i; - if (Attrs.hasAttributes()) // If any attributes left for this param, add them. - NewAttrList.push_back(AttributeWithIndex::get(Idx, Attrs)); - - // Copy attributes for arguments after this one. - NewAttrList.insert(NewAttrList.end(), - OldAttrList.begin()+i, OldAttrList.end()); - - return get(C, NewAttrList); + + // Add the attribute slots before the one we're trying to add. + SmallVector AttrSet; + uint64_t NumAttrs = pImpl->getNumAttributes(); + AttributeSet AS; + uint64_t LastIndex = 0; + for (unsigned I = 0, E = NumAttrs; I != E; ++I) { + if (getSlotIndex(I) >= Idx) { + if (getSlotIndex(I) == Idx) AS = getSlotAttributes(LastIndex++); + break; + } + LastIndex = I + 1; + AttrSet.push_back(getSlotAttributes(I)); + } + + // Now add the attribute into the correct slot. There may already be an + // AttributeSet there. + AttrBuilder B(AS, Idx); + + for (unsigned I = 0, E = Attrs.pImpl->getNumAttributes(); I != E; ++I) + if (Attrs.getSlotIndex(I) == Idx) { + for (AttributeSetImpl::const_iterator II = Attrs.pImpl->begin(I), + IE = Attrs.pImpl->end(I); II != IE; ++II) + B.removeAttributes(*II); + break; + } + + AttrSet.push_back(AttributeSet::get(C, Idx, B)); + + // Add the remaining attribute slots. + for (unsigned I = LastIndex, E = NumAttrs; I < E; ++I) + AttrSet.push_back(getSlotAttributes(I)); + + return get(C, AttrSet); } void AttributeSet::dump() const { -- cgit v1.1 From 19d815c04fde6b7b53c2b542813157edfa213842 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Mon, 28 Jan 2013 05:51:40 +0000 Subject: Mid-air collision. reapply r173656. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173661 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Attributes.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 67fab83..3f0038b 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -710,23 +710,23 @@ AttributeSet AttributeSet::getSlotAttributes(unsigned Slot) const { return pImpl->getSlotAttributes(Slot); } -bool AttributeSet::hasAttribute(uint64_t Index, Attribute::AttrKind Kind) const{ +bool AttributeSet::hasAttribute(unsigned Index, Attribute::AttrKind Kind) const{ return getAttributes(Index).hasAttribute(Kind); } -bool AttributeSet::hasAttributes(uint64_t Index) const { +bool AttributeSet::hasAttributes(unsigned Index) const { return getAttributes(Index).hasAttributes(); } -std::string AttributeSet::getAsString(uint64_t Index) const { +std::string AttributeSet::getAsString(unsigned Index) const { return getAttributes(Index).getAsString(); } -unsigned AttributeSet::getParamAlignment(uint64_t Idx) const { +unsigned AttributeSet::getParamAlignment(unsigned Idx) const { return getAttributes(Idx).getAlignment(); } -unsigned AttributeSet::getStackAlignment(uint64_t Index) const { +unsigned AttributeSet::getStackAlignment(unsigned Index) const { return getAttributes(Index).getStackAlignment(); } -- cgit v1.1 From 6086847bfbc538b99305b4d7e0a53ab610f6a9bb Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Mon, 28 Jan 2013 05:51:54 +0000 Subject: Refactor ObjCARCAliasAnalysis into its own file. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173662 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/ObjCARC/CMakeLists.txt | 1 + lib/Transforms/ObjCARC/ObjCARC.h | 116 +++++++++- lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp | 164 ++++++++++++++ lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h | 71 ++++++ lib/Transforms/ObjCARC/ObjCARCOpts.cpp | 283 +----------------------- 5 files changed, 352 insertions(+), 283 deletions(-) create mode 100644 lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp create mode 100644 lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h (limited to 'lib') diff --git a/lib/Transforms/ObjCARC/CMakeLists.txt b/lib/Transforms/ObjCARC/CMakeLists.txt index a0bdb04..b988611 100644 --- a/lib/Transforms/ObjCARC/CMakeLists.txt +++ b/lib/Transforms/ObjCARC/CMakeLists.txt @@ -3,6 +3,7 @@ add_llvm_library(LLVMObjCARCOpts ObjCARCOpts.cpp ObjCARCExpand.cpp ObjCARCAPElim.cpp + ObjCARCAliasAnalysis.cpp ) add_dependencies(LLVMObjCARCOpts intrinsics_gen) diff --git a/lib/Transforms/ObjCARC/ObjCARC.h b/lib/Transforms/ObjCARC/ObjCARC.h index ac04cad..a6fdc28 100644 --- a/lib/Transforms/ObjCARC/ObjCARC.h +++ b/lib/Transforms/ObjCARC/ObjCARC.h @@ -26,6 +26,7 @@ #include "llvm/ADT/StringSwitch.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" @@ -145,10 +146,81 @@ static raw_ostream &operator<<(raw_ostream &OS, const InstructionClass Class) { llvm_unreachable("Unknown instruction class!"); } +/// \brief Test if the given class is objc_retain or equivalent. +static inline bool IsRetain(InstructionClass Class) { + return Class == IC_Retain || + Class == IC_RetainRV; +} + +/// \brief Test if the given class is objc_autorelease or equivalent. +static inline bool IsAutorelease(InstructionClass Class) { + return Class == IC_Autorelease || + Class == IC_AutoreleaseRV; +} + +/// \brief Test if the given class represents instructions which return their +/// argument verbatim. +static inline bool IsForwarding(InstructionClass Class) { + // objc_retainBlock technically doesn't always return its argument + // verbatim, but it doesn't matter for our purposes here. + return Class == IC_Retain || + Class == IC_RetainRV || + Class == IC_Autorelease || + Class == IC_AutoreleaseRV || + Class == IC_RetainBlock || + Class == IC_NoopCast; +} + +/// \brief Test if the given class represents instructions which do nothing if +/// passed a null pointer. +static inline bool IsNoopOnNull(InstructionClass Class) { + return Class == IC_Retain || + Class == IC_RetainRV || + Class == IC_Release || + Class == IC_Autorelease || + Class == IC_AutoreleaseRV || + Class == IC_RetainBlock; +} + +/// \brief Test if the given class represents instructions which are always safe +/// to mark with the "tail" keyword. +static inline bool IsAlwaysTail(InstructionClass Class) { + // IC_RetainBlock may be given a stack argument. + return Class == IC_Retain || + Class == IC_RetainRV || + Class == IC_AutoreleaseRV; +} + +/// \brief Test if the given class represents instructions which are never safe +/// to mark with the "tail" keyword. +static inline bool IsNeverTail(InstructionClass Class) { + /// It is never safe to tail call objc_autorelease since by tail calling + /// objc_autorelease, we also tail call -[NSObject autorelease] which supports + /// fast autoreleasing causing our object to be potentially reclaimed from the + /// autorelease pool which violates the semantics of __autoreleasing types in + /// ARC. + return Class == IC_Autorelease; +} + +/// \brief Test if the given class represents instructions which are always safe +/// to mark with the nounwind attribute. +static inline bool IsNoThrow(InstructionClass Class) { + // objc_retainBlock is not nounwind because it calls user copy constructors + // which could theoretically throw. + return Class == IC_Retain || + Class == IC_RetainRV || + Class == IC_Release || + Class == IC_Autorelease || + Class == IC_AutoreleaseRV || + Class == IC_AutoreleasepoolPush || + Class == IC_AutoreleasepoolPop; +} /// \brief Determine if F is one of the special known Functions. If it isn't, /// return IC_CallOrUser. -static inline InstructionClass GetFunctionClass(const Function *F) { +static InstructionClass GetFunctionClass(const Function *F) + LLVM_ATTRIBUTE_USED; +static InstructionClass GetFunctionClass(const Function *F) { Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); // No arguments. @@ -236,6 +308,48 @@ static inline InstructionClass GetBasicInstructionClass(const Value *V) { return isa(V) ? IC_CallOrUser : IC_User; } + +/// \brief This is a wrapper around getUnderlyingObject which also knows how to +/// look through objc_retain and objc_autorelease calls, which we know to return +/// their argument verbatim. +static inline const Value *GetUnderlyingObjCPtr(const Value *V) { + for (;;) { + V = GetUnderlyingObject(V); + if (!IsForwarding(GetBasicInstructionClass(V))) + break; + V = cast(V)->getArgOperand(0); + } + + return V; +} + +/// \brief This is a wrapper around Value::stripPointerCasts which also knows +/// how to look through objc_retain and objc_autorelease calls, which we know to +/// return their argument verbatim. +static inline const Value *StripPointerCastsAndObjCCalls(const Value *V) { + for (;;) { + V = V->stripPointerCasts(); + if (!IsForwarding(GetBasicInstructionClass(V))) + break; + V = cast(V)->getArgOperand(0); + } + return V; +} + +/// \brief This is a wrapper around Value::stripPointerCasts which also knows +/// how to look through objc_retain and objc_autorelease calls, which we know to +/// return their argument verbatim. +static inline Value *StripPointerCastsAndObjCCalls(Value *V) { + for (;;) { + V = V->stripPointerCasts(); + if (!IsForwarding(GetBasicInstructionClass(V))) + break; + V = cast(V)->getArgOperand(0); + } + return V; +} + + } // end namespace objcarc } // end namespace llvm diff --git a/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp b/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp new file mode 100644 index 0000000..be30112 --- /dev/null +++ b/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp @@ -0,0 +1,164 @@ +//===- ObjCARCAliasAnalysis.cpp - ObjC ARC Optimization -*- mode: c++ -*---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file defines a simple ARC-aware AliasAnalysis using special knowledge +/// of Objective C to enhance other optimization passes which rely on the Alias +/// Analysis infrastructure. +/// +/// WARNING: This file knows about certain library functions. It recognizes them +/// by name, and hardwires knowledge of their semantics. +/// +/// WARNING: This file knows about how certain Objective-C library functions are +/// used. Naive LLVM IR transformations which would otherwise be +/// behavior-preserving may break these assumptions. +/// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "objc-arc-aa" +#include "ObjCARC.h" +#include "ObjCARCAliasAnalysis.h" + +#include "llvm/IR/Instruction.h" +#include "llvm/InitializePasses.h" +#include "llvm/PassAnalysisSupport.h" +#include "llvm/PassSupport.h" + +namespace llvm { + class Function; + class Value; +} + +using namespace llvm; +using namespace llvm::objcarc; + +// Register this pass... +char ObjCARCAliasAnalysis::ID = 0; +INITIALIZE_AG_PASS(ObjCARCAliasAnalysis, AliasAnalysis, "objc-arc-aa", + "ObjC-ARC-Based Alias Analysis", false, true, false) + +ImmutablePass *llvm::createObjCARCAliasAnalysisPass() { + return new ObjCARCAliasAnalysis(); +} + +void +ObjCARCAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AliasAnalysis::getAnalysisUsage(AU); +} + +AliasAnalysis::AliasResult +ObjCARCAliasAnalysis::alias(const Location &LocA, const Location &LocB) { + if (!EnableARCOpts) + return AliasAnalysis::alias(LocA, LocB); + + // First, strip off no-ops, including ObjC-specific no-ops, and try making a + // precise alias query. + const Value *SA = StripPointerCastsAndObjCCalls(LocA.Ptr); + const Value *SB = StripPointerCastsAndObjCCalls(LocB.Ptr); + AliasResult Result = + AliasAnalysis::alias(Location(SA, LocA.Size, LocA.TBAATag), + Location(SB, LocB.Size, LocB.TBAATag)); + if (Result != MayAlias) + return Result; + + // If that failed, climb to the underlying object, including climbing through + // ObjC-specific no-ops, and try making an imprecise alias query. + const Value *UA = GetUnderlyingObjCPtr(SA); + const Value *UB = GetUnderlyingObjCPtr(SB); + if (UA != SA || UB != SB) { + Result = AliasAnalysis::alias(Location(UA), Location(UB)); + // We can't use MustAlias or PartialAlias results here because + // GetUnderlyingObjCPtr may return an offsetted pointer value. + if (Result == NoAlias) + return NoAlias; + } + + // If that failed, fail. We don't need to chain here, since that's covered + // by the earlier precise query. + return MayAlias; +} + +bool +ObjCARCAliasAnalysis::pointsToConstantMemory(const Location &Loc, + bool OrLocal) { + if (!EnableARCOpts) + return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); + + // First, strip off no-ops, including ObjC-specific no-ops, and try making + // a precise alias query. + const Value *S = StripPointerCastsAndObjCCalls(Loc.Ptr); + if (AliasAnalysis::pointsToConstantMemory(Location(S, Loc.Size, Loc.TBAATag), + OrLocal)) + return true; + + // If that failed, climb to the underlying object, including climbing through + // ObjC-specific no-ops, and try making an imprecise alias query. + const Value *U = GetUnderlyingObjCPtr(S); + if (U != S) + return AliasAnalysis::pointsToConstantMemory(Location(U), OrLocal); + + // If that failed, fail. We don't need to chain here, since that's covered + // by the earlier precise query. + return false; +} + +AliasAnalysis::ModRefBehavior +ObjCARCAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) { + // We have nothing to do. Just chain to the next AliasAnalysis. + return AliasAnalysis::getModRefBehavior(CS); +} + +AliasAnalysis::ModRefBehavior +ObjCARCAliasAnalysis::getModRefBehavior(const Function *F) { + if (!EnableARCOpts) + return AliasAnalysis::getModRefBehavior(F); + + switch (GetFunctionClass(F)) { + case IC_NoopCast: + return DoesNotAccessMemory; + default: + break; + } + + return AliasAnalysis::getModRefBehavior(F); +} + +AliasAnalysis::ModRefResult +ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS, const Location &Loc) { + if (!EnableARCOpts) + return AliasAnalysis::getModRefInfo(CS, Loc); + + switch (GetBasicInstructionClass(CS.getInstruction())) { + case IC_Retain: + case IC_RetainRV: + case IC_Autorelease: + case IC_AutoreleaseRV: + case IC_NoopCast: + case IC_AutoreleasepoolPush: + case IC_FusedRetainAutorelease: + case IC_FusedRetainAutoreleaseRV: + // These functions don't access any memory visible to the compiler. + // Note that this doesn't include objc_retainBlock, because it updates + // pointers when it copies block data. + return NoModRef; + default: + break; + } + + return AliasAnalysis::getModRefInfo(CS, Loc); +} + +AliasAnalysis::ModRefResult +ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS1, + ImmutableCallSite CS2) { + // TODO: Theoretically we could check for dependencies between objc_* calls + // and OnlyAccessesArgumentPointees calls or other well-behaved calls. + return AliasAnalysis::getModRefInfo(CS1, CS2); +} + diff --git a/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h b/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h new file mode 100644 index 0000000..d223b38 --- /dev/null +++ b/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h @@ -0,0 +1,71 @@ +//===- ObjCARCAliasAnalysis.h - ObjC ARC Optimization -*- mode: c++ -*-----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file declares a simple ARC-aware AliasAnalysis using special knowledge +/// of Objective C to enhance other optimization passes which rely on the Alias +/// Analysis infrastructure. +/// +/// WARNING: This file knows about certain library functions. It recognizes them +/// by name, and hardwires knowledge of their semantics. +/// +/// WARNING: This file knows about how certain Objective-C library functions are +/// used. Naive LLVM IR transformations which would otherwise be +/// behavior-preserving may break these assumptions. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_OBJCARC_OBJCARCALIASANALYSIS_H +#define LLVM_TRANSFORMS_OBJCARC_OBJCARCALIASANALYSIS_H + +namespace llvm { +namespace objcarc { + + /// \brief This is a simple alias analysis implementation that uses knowledge + /// of ARC constructs to answer queries. + /// + /// TODO: This class could be generalized to know about other ObjC-specific + /// tricks. Such as knowing that ivars in the non-fragile ABI are non-aliasing + /// even though their offsets are dynamic. + class ObjCARCAliasAnalysis : public ImmutablePass, + public AliasAnalysis { + public: + static char ID; // Class identification, replacement for typeinfo + ObjCARCAliasAnalysis() : ImmutablePass(ID) { + initializeObjCARCAliasAnalysisPass(*PassRegistry::getPassRegistry()); + } + + private: + virtual void initializePass() { + InitializeAliasAnalysis(this); + } + + /// This method is used when a pass implements an analysis interface through + /// multiple inheritance. If needed, it should override this to adjust the + /// this pointer as needed for the specified pass info. + virtual void *getAdjustedAnalysisPointer(const void *PI) { + if (PI == &AliasAnalysis::ID) + return static_cast(this); + return this; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + virtual AliasResult alias(const Location &LocA, const Location &LocB); + virtual bool pointsToConstantMemory(const Location &Loc, bool OrLocal); + virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS); + virtual ModRefBehavior getModRefBehavior(const Function *F); + virtual ModRefResult getModRefInfo(ImmutableCallSite CS, + const Location &Loc); + virtual ModRefResult getModRefInfo(ImmutableCallSite CS1, + ImmutableCallSite CS2); + }; + +} // namespace objcarc +} // namespace llvm + +#endif // LLVM_TRANSFORMS_OBJCARC_OBJCARCALIASANALYSIS_H diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp index 900ad61..894f4ee 100644 --- a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp @@ -30,6 +30,7 @@ #define DEBUG_TYPE "objc-arc-opts" #include "ObjCARC.h" +#include "ObjCARCAliasAnalysis.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" @@ -131,7 +132,6 @@ namespace { /// \defgroup ARCUtilities Utility declarations/definitions specific to ARC. /// @{ -#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Intrinsics.h" #include "llvm/Support/CallSite.h" #include "llvm/Transforms/Utils/Local.h" @@ -260,76 +260,6 @@ static InstructionClass GetInstructionClass(const Value *V) { return IC_None; } -/// \brief Test if the given class is objc_retain or equivalent. -static bool IsRetain(InstructionClass Class) { - return Class == IC_Retain || - Class == IC_RetainRV; -} - -/// \brief Test if the given class is objc_autorelease or equivalent. -static bool IsAutorelease(InstructionClass Class) { - return Class == IC_Autorelease || - Class == IC_AutoreleaseRV; -} - -/// \brief Test if the given class represents instructions which return their -/// argument verbatim. -static bool IsForwarding(InstructionClass Class) { - // objc_retainBlock technically doesn't always return its argument - // verbatim, but it doesn't matter for our purposes here. - return Class == IC_Retain || - Class == IC_RetainRV || - Class == IC_Autorelease || - Class == IC_AutoreleaseRV || - Class == IC_RetainBlock || - Class == IC_NoopCast; -} - -/// \brief Test if the given class represents instructions which do nothing if -/// passed a null pointer. -static bool IsNoopOnNull(InstructionClass Class) { - return Class == IC_Retain || - Class == IC_RetainRV || - Class == IC_Release || - Class == IC_Autorelease || - Class == IC_AutoreleaseRV || - Class == IC_RetainBlock; -} - -/// \brief Test if the given class represents instructions which are always safe -/// to mark with the "tail" keyword. -static bool IsAlwaysTail(InstructionClass Class) { - // IC_RetainBlock may be given a stack argument. - return Class == IC_Retain || - Class == IC_RetainRV || - Class == IC_AutoreleaseRV; -} - -/// \brief Test if the given class represents instructions which are never safe -/// to mark with the "tail" keyword. -static bool IsNeverTail(InstructionClass Class) { - /// It is never safe to tail call objc_autorelease since by tail calling - /// objc_autorelease, we also tail call -[NSObject autorelease] which supports - /// fast autoreleasing causing our object to be potentially reclaimed from the - /// autorelease pool which violates the semantics of __autoreleasing types in - /// ARC. - return Class == IC_Autorelease; -} - -/// \brief Test if the given class represents instructions which are always safe -/// to mark with the nounwind attribute. -static bool IsNoThrow(InstructionClass Class) { - // objc_retainBlock is not nounwind because it calls user copy constructors - // which could theoretically throw. - return Class == IC_Retain || - Class == IC_RetainRV || - Class == IC_Release || - Class == IC_Autorelease || - Class == IC_AutoreleaseRV || - Class == IC_AutoreleasepoolPush || - Class == IC_AutoreleasepoolPop; -} - /// \brief Erase the given instruction. /// /// Many ObjC calls return their argument verbatim, @@ -354,46 +284,6 @@ static void EraseInstruction(Instruction *CI) { RecursivelyDeleteTriviallyDeadInstructions(OldArg); } -/// \brief This is a wrapper around getUnderlyingObject which also knows how to -/// look through objc_retain and objc_autorelease calls, which we know to return -/// their argument verbatim. -static const Value *GetUnderlyingObjCPtr(const Value *V) { - for (;;) { - V = GetUnderlyingObject(V); - if (!IsForwarding(GetBasicInstructionClass(V))) - break; - V = cast(V)->getArgOperand(0); - } - - return V; -} - -/// \brief This is a wrapper around Value::stripPointerCasts which also knows -/// how to look through objc_retain and objc_autorelease calls, which we know to -/// return their argument verbatim. -static const Value *StripPointerCastsAndObjCCalls(const Value *V) { - for (;;) { - V = V->stripPointerCasts(); - if (!IsForwarding(GetBasicInstructionClass(V))) - break; - V = cast(V)->getArgOperand(0); - } - return V; -} - -/// \brief This is a wrapper around Value::stripPointerCasts which also knows -/// how to look through objc_retain and objc_autorelease calls, which we know to -/// return their argument verbatim. -static Value *StripPointerCastsAndObjCCalls(Value *V) { - for (;;) { - V = V->stripPointerCasts(); - if (!IsForwarding(GetBasicInstructionClass(V))) - break; - V = cast(V)->getArgOperand(0); - } - return V; -} - /// \brief Assuming the given instruction is one of the special calls such as /// objc_retain or objc_release, return the argument value, stripped of no-op /// casts and forwarding calls. @@ -553,177 +443,6 @@ static bool DoesObjCBlockEscape(const Value *BlockPtr) { /// @} /// -/// \defgroup ARCAA Extends alias analysis using ObjC specific knowledge. -/// @{ - -namespace { - /// \brief This is a simple alias analysis implementation that uses knowledge - /// of ARC constructs to answer queries. - /// - /// TODO: This class could be generalized to know about other ObjC-specific - /// tricks. Such as knowing that ivars in the non-fragile ABI are non-aliasing - /// even though their offsets are dynamic. - class ObjCARCAliasAnalysis : public ImmutablePass, - public AliasAnalysis { - public: - static char ID; // Class identification, replacement for typeinfo - ObjCARCAliasAnalysis() : ImmutablePass(ID) { - initializeObjCARCAliasAnalysisPass(*PassRegistry::getPassRegistry()); - } - - private: - virtual void initializePass() { - InitializeAliasAnalysis(this); - } - - /// This method is used when a pass implements an analysis interface through - /// multiple inheritance. If needed, it should override this to adjust the - /// this pointer as needed for the specified pass info. - virtual void *getAdjustedAnalysisPointer(const void *PI) { - if (PI == &AliasAnalysis::ID) - return static_cast(this); - return this; - } - - virtual void getAnalysisUsage(AnalysisUsage &AU) const; - virtual AliasResult alias(const Location &LocA, const Location &LocB); - virtual bool pointsToConstantMemory(const Location &Loc, bool OrLocal); - virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS); - virtual ModRefBehavior getModRefBehavior(const Function *F); - virtual ModRefResult getModRefInfo(ImmutableCallSite CS, - const Location &Loc); - virtual ModRefResult getModRefInfo(ImmutableCallSite CS1, - ImmutableCallSite CS2); - }; -} // End of anonymous namespace - -// Register this pass... -char ObjCARCAliasAnalysis::ID = 0; -INITIALIZE_AG_PASS(ObjCARCAliasAnalysis, AliasAnalysis, "objc-arc-aa", - "ObjC-ARC-Based Alias Analysis", false, true, false) - -ImmutablePass *llvm::createObjCARCAliasAnalysisPass() { - return new ObjCARCAliasAnalysis(); -} - -void -ObjCARCAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); - AliasAnalysis::getAnalysisUsage(AU); -} - -AliasAnalysis::AliasResult -ObjCARCAliasAnalysis::alias(const Location &LocA, const Location &LocB) { - if (!EnableARCOpts) - return AliasAnalysis::alias(LocA, LocB); - - // First, strip off no-ops, including ObjC-specific no-ops, and try making a - // precise alias query. - const Value *SA = StripPointerCastsAndObjCCalls(LocA.Ptr); - const Value *SB = StripPointerCastsAndObjCCalls(LocB.Ptr); - AliasResult Result = - AliasAnalysis::alias(Location(SA, LocA.Size, LocA.TBAATag), - Location(SB, LocB.Size, LocB.TBAATag)); - if (Result != MayAlias) - return Result; - - // If that failed, climb to the underlying object, including climbing through - // ObjC-specific no-ops, and try making an imprecise alias query. - const Value *UA = GetUnderlyingObjCPtr(SA); - const Value *UB = GetUnderlyingObjCPtr(SB); - if (UA != SA || UB != SB) { - Result = AliasAnalysis::alias(Location(UA), Location(UB)); - // We can't use MustAlias or PartialAlias results here because - // GetUnderlyingObjCPtr may return an offsetted pointer value. - if (Result == NoAlias) - return NoAlias; - } - - // If that failed, fail. We don't need to chain here, since that's covered - // by the earlier precise query. - return MayAlias; -} - -bool -ObjCARCAliasAnalysis::pointsToConstantMemory(const Location &Loc, - bool OrLocal) { - if (!EnableARCOpts) - return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); - - // First, strip off no-ops, including ObjC-specific no-ops, and try making - // a precise alias query. - const Value *S = StripPointerCastsAndObjCCalls(Loc.Ptr); - if (AliasAnalysis::pointsToConstantMemory(Location(S, Loc.Size, Loc.TBAATag), - OrLocal)) - return true; - - // If that failed, climb to the underlying object, including climbing through - // ObjC-specific no-ops, and try making an imprecise alias query. - const Value *U = GetUnderlyingObjCPtr(S); - if (U != S) - return AliasAnalysis::pointsToConstantMemory(Location(U), OrLocal); - - // If that failed, fail. We don't need to chain here, since that's covered - // by the earlier precise query. - return false; -} - -AliasAnalysis::ModRefBehavior -ObjCARCAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) { - // We have nothing to do. Just chain to the next AliasAnalysis. - return AliasAnalysis::getModRefBehavior(CS); -} - -AliasAnalysis::ModRefBehavior -ObjCARCAliasAnalysis::getModRefBehavior(const Function *F) { - if (!EnableARCOpts) - return AliasAnalysis::getModRefBehavior(F); - - switch (GetFunctionClass(F)) { - case IC_NoopCast: - return DoesNotAccessMemory; - default: - break; - } - - return AliasAnalysis::getModRefBehavior(F); -} - -AliasAnalysis::ModRefResult -ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS, const Location &Loc) { - if (!EnableARCOpts) - return AliasAnalysis::getModRefInfo(CS, Loc); - - switch (GetBasicInstructionClass(CS.getInstruction())) { - case IC_Retain: - case IC_RetainRV: - case IC_Autorelease: - case IC_AutoreleaseRV: - case IC_NoopCast: - case IC_AutoreleasepoolPush: - case IC_FusedRetainAutorelease: - case IC_FusedRetainAutoreleaseRV: - // These functions don't access any memory visible to the compiler. - // Note that this doesn't include objc_retainBlock, because it updates - // pointers when it copies block data. - return NoModRef; - default: - break; - } - - return AliasAnalysis::getModRefInfo(CS, Loc); -} - -AliasAnalysis::ModRefResult -ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS1, - ImmutableCallSite CS2) { - // TODO: Theoretically we could check for dependencies between objc_* calls - // and OnlyAccessesArgumentPointees calls or other well-behaved calls. - return AliasAnalysis::getModRefInfo(CS1, CS2); -} - -/// @} -/// /// \defgroup ARCOpt ARC Optimization. /// @{ -- cgit v1.1 From 074ddd6f014b8b3488e53d22a961874230afb0d5 Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Mon, 28 Jan 2013 05:51:58 +0000 Subject: Cleaned up includes in various ObjCARC files and removed some whitespace violations. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173663 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/ObjCARC/ObjCARC.cpp | 11 +++++------ lib/Transforms/ObjCARC/ObjCARCAPElim.cpp | 1 - lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp | 11 ++++++++++- lib/Transforms/ObjCARC/ObjCARCExpand.cpp | 20 +++++++++++++++++++- 4 files changed, 34 insertions(+), 9 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/ObjCARC/ObjCARC.cpp b/lib/Transforms/ObjCARC/ObjCARC.cpp index dceb567..b86dff1 100644 --- a/lib/Transforms/ObjCARC/ObjCARC.cpp +++ b/lib/Transforms/ObjCARC/ObjCARC.cpp @@ -14,15 +14,14 @@ //===----------------------------------------------------------------------===// #include "ObjCARC.h" - -#include "llvm-c/Initialization.h" -#include "llvm/Analysis/Passes.h" -#include "llvm/Analysis/Verifier.h" -#include "llvm/IR/DataLayout.h" +#include "llvm-c/Core.h" #include "llvm/InitializePasses.h" -#include "llvm/PassManager.h" #include "llvm/Support/CommandLine.h" +namespace llvm { + class PassRegistry; +} + using namespace llvm; using namespace llvm::objcarc; diff --git a/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp b/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp index c849bcd..b8c51c2 100644 --- a/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp @@ -176,4 +176,3 @@ bool ObjCARCAPElim::runOnModule(Module &M) { return Changed; } - diff --git a/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp b/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp index be30112..9747ce0 100644 --- a/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp @@ -21,8 +21,18 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "objc-arc-aa" + #include "ObjCARC.h" #include "ObjCARCAliasAnalysis.h" +#include "llvm/IR/Instruction.h" +#include "llvm/InitializePasses.h" +#include "llvm/PassAnalysisSupport.h" +#include "llvm/PassSupport.h" + +namespace llvm { + class Function; + class Value; +} #include "llvm/IR/Instruction.h" #include "llvm/InitializePasses.h" @@ -161,4 +171,3 @@ ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS1, // and OnlyAccessesArgumentPointees calls or other well-behaved calls. return AliasAnalysis::getModRefInfo(CS1, CS2); } - diff --git a/lib/Transforms/ObjCARC/ObjCARCExpand.cpp b/lib/Transforms/ObjCARC/ObjCARCExpand.cpp index ffc5405..633f988 100644 --- a/lib/Transforms/ObjCARC/ObjCARCExpand.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCExpand.cpp @@ -24,8 +24,27 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "objc-arc-expand" + #include "ObjCARC.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Value.h" +#include "llvm/Pass.h" +#include "llvm/PassAnalysisSupport.h" +#include "llvm/PassRegistry.h" +#include "llvm/PassSupport.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/InstIterator.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { + class Module; +} + using namespace llvm; using namespace llvm::objcarc; @@ -108,4 +127,3 @@ bool ObjCARCExpand::runOnFunction(Function &F) { return Changed; } - -- cgit v1.1 From 52dd806ed9c656c1b4fd073c8e086b563104f601 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 28 Jan 2013 06:09:24 +0000 Subject: Remove addToNoHelperNeeded function that was left unused after r173649. Fixes a -Wunused warning. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173664 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsISelLowering.cpp | 6 ------ 1 file changed, 6 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index de0fae7..8925dde 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -214,12 +214,6 @@ namespace { }; std::set noHelperNeeded; - - const char* addToNoHelperNeeded(const char* s) { - noHelperNeeded.insert(s); - return s; - } - } void MipsTargetLowering::SetMips16LibcallName -- cgit v1.1 From 64437ead05db4f5d28e697058180ae575c3cf75b Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Mon, 28 Jan 2013 06:39:31 +0000 Subject: Created ObjCARCUtil.cpp for functions which in my humble opinion are too large to static inline and place in a header file such as ObjCARC.h. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173666 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/ObjCARC/CMakeLists.txt | 1 + lib/Transforms/ObjCARC/ObjCARC.h | 128 +--------------------------- lib/Transforms/ObjCARC/ObjCARCUtil.cpp | 149 +++++++++++++++++++++++++++++++++ 3 files changed, 152 insertions(+), 126 deletions(-) create mode 100644 lib/Transforms/ObjCARC/ObjCARCUtil.cpp (limited to 'lib') diff --git a/lib/Transforms/ObjCARC/CMakeLists.txt b/lib/Transforms/ObjCARC/CMakeLists.txt index b988611..e710044 100644 --- a/lib/Transforms/ObjCARC/CMakeLists.txt +++ b/lib/Transforms/ObjCARC/CMakeLists.txt @@ -4,6 +4,7 @@ add_llvm_library(LLVMObjCARCOpts ObjCARCExpand.cpp ObjCARCAPElim.cpp ObjCARCAliasAnalysis.cpp + ObjCARCUtil.cpp ) add_dependencies(LLVMObjCARCOpts intrinsics_gen) diff --git a/lib/Transforms/ObjCARC/ObjCARC.h b/lib/Transforms/ObjCARC/ObjCARC.h index a6fdc28..854de74 100644 --- a/lib/Transforms/ObjCARC/ObjCARC.h +++ b/lib/Transforms/ObjCARC/ObjCARC.h @@ -91,60 +91,7 @@ enum InstructionClass { IC_None ///< anything else }; -static raw_ostream &operator<<(raw_ostream &OS, const InstructionClass Class) - LLVM_ATTRIBUTE_USED; - -static raw_ostream &operator<<(raw_ostream &OS, const InstructionClass Class) { - switch (Class) { - case IC_Retain: - return OS << "IC_Retain"; - case IC_RetainRV: - return OS << "IC_RetainRV"; - case IC_RetainBlock: - return OS << "IC_RetainBlock"; - case IC_Release: - return OS << "IC_Release"; - case IC_Autorelease: - return OS << "IC_Autorelease"; - case IC_AutoreleaseRV: - return OS << "IC_AutoreleaseRV"; - case IC_AutoreleasepoolPush: - return OS << "IC_AutoreleasepoolPush"; - case IC_AutoreleasepoolPop: - return OS << "IC_AutoreleasepoolPop"; - case IC_NoopCast: - return OS << "IC_NoopCast"; - case IC_FusedRetainAutorelease: - return OS << "IC_FusedRetainAutorelease"; - case IC_FusedRetainAutoreleaseRV: - return OS << "IC_FusedRetainAutoreleaseRV"; - case IC_LoadWeakRetained: - return OS << "IC_LoadWeakRetained"; - case IC_StoreWeak: - return OS << "IC_StoreWeak"; - case IC_InitWeak: - return OS << "IC_InitWeak"; - case IC_LoadWeak: - return OS << "IC_LoadWeak"; - case IC_MoveWeak: - return OS << "IC_MoveWeak"; - case IC_CopyWeak: - return OS << "IC_CopyWeak"; - case IC_DestroyWeak: - return OS << "IC_DestroyWeak"; - case IC_StoreStrong: - return OS << "IC_StoreStrong"; - case IC_CallOrUser: - return OS << "IC_CallOrUser"; - case IC_Call: - return OS << "IC_Call"; - case IC_User: - return OS << "IC_User"; - case IC_None: - return OS << "IC_None"; - } - llvm_unreachable("Unknown instruction class!"); -} +raw_ostream &operator<<(raw_ostream &OS, const InstructionClass Class); /// \brief Test if the given class is objc_retain or equivalent. static inline bool IsRetain(InstructionClass Class) { @@ -218,78 +165,7 @@ static inline bool IsNoThrow(InstructionClass Class) { /// \brief Determine if F is one of the special known Functions. If it isn't, /// return IC_CallOrUser. -static InstructionClass GetFunctionClass(const Function *F) - LLVM_ATTRIBUTE_USED; -static InstructionClass GetFunctionClass(const Function *F) { - Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); - - // No arguments. - if (AI == AE) - return StringSwitch(F->getName()) - .Case("objc_autoreleasePoolPush", IC_AutoreleasepoolPush) - .Default(IC_CallOrUser); - - // One argument. - const Argument *A0 = AI++; - if (AI == AE) - // Argument is a pointer. - if (PointerType *PTy = dyn_cast(A0->getType())) { - Type *ETy = PTy->getElementType(); - // Argument is i8*. - if (ETy->isIntegerTy(8)) - return StringSwitch(F->getName()) - .Case("objc_retain", IC_Retain) - .Case("objc_retainAutoreleasedReturnValue", IC_RetainRV) - .Case("objc_retainBlock", IC_RetainBlock) - .Case("objc_release", IC_Release) - .Case("objc_autorelease", IC_Autorelease) - .Case("objc_autoreleaseReturnValue", IC_AutoreleaseRV) - .Case("objc_autoreleasePoolPop", IC_AutoreleasepoolPop) - .Case("objc_retainedObject", IC_NoopCast) - .Case("objc_unretainedObject", IC_NoopCast) - .Case("objc_unretainedPointer", IC_NoopCast) - .Case("objc_retain_autorelease", IC_FusedRetainAutorelease) - .Case("objc_retainAutorelease", IC_FusedRetainAutorelease) - .Case("objc_retainAutoreleaseReturnValue",IC_FusedRetainAutoreleaseRV) - .Default(IC_CallOrUser); - - // Argument is i8** - if (PointerType *Pte = dyn_cast(ETy)) - if (Pte->getElementType()->isIntegerTy(8)) - return StringSwitch(F->getName()) - .Case("objc_loadWeakRetained", IC_LoadWeakRetained) - .Case("objc_loadWeak", IC_LoadWeak) - .Case("objc_destroyWeak", IC_DestroyWeak) - .Default(IC_CallOrUser); - } - - // Two arguments, first is i8**. - const Argument *A1 = AI++; - if (AI == AE) - if (PointerType *PTy = dyn_cast(A0->getType())) - if (PointerType *Pte = dyn_cast(PTy->getElementType())) - if (Pte->getElementType()->isIntegerTy(8)) - if (PointerType *PTy1 = dyn_cast(A1->getType())) { - Type *ETy1 = PTy1->getElementType(); - // Second argument is i8* - if (ETy1->isIntegerTy(8)) - return StringSwitch(F->getName()) - .Case("objc_storeWeak", IC_StoreWeak) - .Case("objc_initWeak", IC_InitWeak) - .Case("objc_storeStrong", IC_StoreStrong) - .Default(IC_CallOrUser); - // Second argument is i8**. - if (PointerType *Pte1 = dyn_cast(ETy1)) - if (Pte1->getElementType()->isIntegerTy(8)) - return StringSwitch(F->getName()) - .Case("objc_moveWeak", IC_MoveWeak) - .Case("objc_copyWeak", IC_CopyWeak) - .Default(IC_CallOrUser); - } - - // Anything else. - return IC_CallOrUser; -} +InstructionClass GetFunctionClass(const Function *F); /// \brief Determine which objc runtime call instruction class V belongs to. /// diff --git a/lib/Transforms/ObjCARC/ObjCARCUtil.cpp b/lib/Transforms/ObjCARC/ObjCARCUtil.cpp new file mode 100644 index 0000000..3192a6d --- /dev/null +++ b/lib/Transforms/ObjCARC/ObjCARCUtil.cpp @@ -0,0 +1,149 @@ +//===- ObjCARCUtil.h - ObjC ARC Optimization ----------*- mode: c++ -*-----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file defines several utility functions used by various ARC +/// optimizations which are IMHO too big to be in a header file. +/// +/// WARNING: This file knows about certain library functions. It recognizes them +/// by name, and hardwires knowledge of their semantics. +/// +/// WARNING: This file knows about how certain Objective-C library functions are +/// used. Naive LLVM IR transformations which would otherwise be +/// behavior-preserving may break these assumptions. +/// +//===----------------------------------------------------------------------===// + +#include "ObjCARC.h" + +using namespace llvm; +using namespace llvm::objcarc; + +raw_ostream &llvm::objcarc::operator<<(raw_ostream &OS, + const InstructionClass Class) { + switch (Class) { + case IC_Retain: + return OS << "IC_Retain"; + case IC_RetainRV: + return OS << "IC_RetainRV"; + case IC_RetainBlock: + return OS << "IC_RetainBlock"; + case IC_Release: + return OS << "IC_Release"; + case IC_Autorelease: + return OS << "IC_Autorelease"; + case IC_AutoreleaseRV: + return OS << "IC_AutoreleaseRV"; + case IC_AutoreleasepoolPush: + return OS << "IC_AutoreleasepoolPush"; + case IC_AutoreleasepoolPop: + return OS << "IC_AutoreleasepoolPop"; + case IC_NoopCast: + return OS << "IC_NoopCast"; + case IC_FusedRetainAutorelease: + return OS << "IC_FusedRetainAutorelease"; + case IC_FusedRetainAutoreleaseRV: + return OS << "IC_FusedRetainAutoreleaseRV"; + case IC_LoadWeakRetained: + return OS << "IC_LoadWeakRetained"; + case IC_StoreWeak: + return OS << "IC_StoreWeak"; + case IC_InitWeak: + return OS << "IC_InitWeak"; + case IC_LoadWeak: + return OS << "IC_LoadWeak"; + case IC_MoveWeak: + return OS << "IC_MoveWeak"; + case IC_CopyWeak: + return OS << "IC_CopyWeak"; + case IC_DestroyWeak: + return OS << "IC_DestroyWeak"; + case IC_StoreStrong: + return OS << "IC_StoreStrong"; + case IC_CallOrUser: + return OS << "IC_CallOrUser"; + case IC_Call: + return OS << "IC_Call"; + case IC_User: + return OS << "IC_User"; + case IC_None: + return OS << "IC_None"; + } + llvm_unreachable("Unknown instruction class!"); +} + +InstructionClass llvm::objcarc::GetFunctionClass(const Function *F) { + Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); + + // No arguments. + if (AI == AE) + return StringSwitch(F->getName()) + .Case("objc_autoreleasePoolPush", IC_AutoreleasepoolPush) + .Default(IC_CallOrUser); + + // One argument. + const Argument *A0 = AI++; + if (AI == AE) + // Argument is a pointer. + if (PointerType *PTy = dyn_cast(A0->getType())) { + Type *ETy = PTy->getElementType(); + // Argument is i8*. + if (ETy->isIntegerTy(8)) + return StringSwitch(F->getName()) + .Case("objc_retain", IC_Retain) + .Case("objc_retainAutoreleasedReturnValue", IC_RetainRV) + .Case("objc_retainBlock", IC_RetainBlock) + .Case("objc_release", IC_Release) + .Case("objc_autorelease", IC_Autorelease) + .Case("objc_autoreleaseReturnValue", IC_AutoreleaseRV) + .Case("objc_autoreleasePoolPop", IC_AutoreleasepoolPop) + .Case("objc_retainedObject", IC_NoopCast) + .Case("objc_unretainedObject", IC_NoopCast) + .Case("objc_unretainedPointer", IC_NoopCast) + .Case("objc_retain_autorelease", IC_FusedRetainAutorelease) + .Case("objc_retainAutorelease", IC_FusedRetainAutorelease) + .Case("objc_retainAutoreleaseReturnValue",IC_FusedRetainAutoreleaseRV) + .Default(IC_CallOrUser); + + // Argument is i8** + if (PointerType *Pte = dyn_cast(ETy)) + if (Pte->getElementType()->isIntegerTy(8)) + return StringSwitch(F->getName()) + .Case("objc_loadWeakRetained", IC_LoadWeakRetained) + .Case("objc_loadWeak", IC_LoadWeak) + .Case("objc_destroyWeak", IC_DestroyWeak) + .Default(IC_CallOrUser); + } + + // Two arguments, first is i8**. + const Argument *A1 = AI++; + if (AI == AE) + if (PointerType *PTy = dyn_cast(A0->getType())) + if (PointerType *Pte = dyn_cast(PTy->getElementType())) + if (Pte->getElementType()->isIntegerTy(8)) + if (PointerType *PTy1 = dyn_cast(A1->getType())) { + Type *ETy1 = PTy1->getElementType(); + // Second argument is i8* + if (ETy1->isIntegerTy(8)) + return StringSwitch(F->getName()) + .Case("objc_storeWeak", IC_StoreWeak) + .Case("objc_initWeak", IC_InitWeak) + .Case("objc_storeStrong", IC_StoreStrong) + .Default(IC_CallOrUser); + // Second argument is i8**. + if (PointerType *Pte1 = dyn_cast(ETy1)) + if (Pte1->getElementType()->isIntegerTy(8)) + return StringSwitch(F->getName()) + .Case("objc_moveWeak", IC_MoveWeak) + .Case("objc_copyWeak", IC_CopyWeak) + .Default(IC_CallOrUser); + } + + // Anything else. + return IC_CallOrUser; +} -- cgit v1.1 From 4aee1bb2223e59efb814a694edaecd07a3418da0 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 28 Jan 2013 06:48:25 +0000 Subject: Fix inconsistent usage of PALIGN and PALIGNR when referring to the same instruction. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173667 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/InstPrinter/X86InstComments.cpp | 12 ++++----- lib/Target/X86/Utils/X86ShuffleDecode.cpp | 3 ++- lib/Target/X86/Utils/X86ShuffleDecode.h | 2 +- lib/Target/X86/X86ISelLowering.cpp | 14 +++++------ lib/Target/X86/X86ISelLowering.h | 2 +- lib/Target/X86/X86InstrFragmentsSIMD.td | 2 +- lib/Target/X86/X86InstrSSE.td | 34 +++++++++++++------------- 7 files changed, 35 insertions(+), 34 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp index d8a45ea..43a8f0f 100644 --- a/lib/Target/X86/InstPrinter/X86InstComments.cpp +++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp @@ -77,9 +77,9 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, case X86::VPALIGNR128rm: Src2Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); - DecodePALIGNMask(MVT::v16i8, - MI->getOperand(MI->getNumOperands()-1).getImm(), - ShuffleMask); + DecodePALIGNRMask(MVT::v16i8, + MI->getOperand(MI->getNumOperands()-1).getImm(), + ShuffleMask); break; case X86::VPALIGNR256rr: Src1Name = getRegName(MI->getOperand(2).getReg()); @@ -87,9 +87,9 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, case X86::VPALIGNR256rm: Src2Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); - DecodePALIGNMask(MVT::v32i8, - MI->getOperand(MI->getNumOperands()-1).getImm(), - ShuffleMask); + DecodePALIGNRMask(MVT::v32i8, + MI->getOperand(MI->getNumOperands()-1).getImm(), + ShuffleMask); case X86::PSHUFDri: case X86::VPSHUFDri: diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp index 9694808..b490f27 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp +++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp @@ -61,7 +61,8 @@ void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl &ShuffleMask) { ShuffleMask.push_back(NElts+i); } -void DecodePALIGNMask(MVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask) { +void DecodePALIGNRMask(MVT VT, unsigned Imm, + SmallVectorImpl &ShuffleMask) { unsigned NumElts = VT.getVectorNumElements(); unsigned Offset = Imm * (VT.getVectorElementType().getSizeInBits() / 8); diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.h index 69ce443..017ab32 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.h +++ b/lib/Target/X86/Utils/X86ShuffleDecode.h @@ -35,7 +35,7 @@ void DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl &ShuffleMask); // <0,2> or <0,1,4,5> void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl &ShuffleMask); -void DecodePALIGNMask(MVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask); +void DecodePALIGNRMask(MVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask); void DecodePSHUFMask(MVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask); diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 76ec12c..a4eae0a 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -3004,7 +3004,7 @@ static bool isTargetShuffle(unsigned Opcode) { case X86ISD::PSHUFHW: case X86ISD::PSHUFLW: case X86ISD::SHUFP: - case X86ISD::PALIGN: + case X86ISD::PALIGNR: case X86ISD::MOVLHPS: case X86ISD::MOVLHPD: case X86ISD::MOVHLPS: @@ -3054,7 +3054,7 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, SelectionDAG &DAG) { switch(Opc) { default: llvm_unreachable("Unknown x86 shuffle node"); - case X86ISD::PALIGN: + case X86ISD::PALIGNR: case X86ISD::SHUFP: case X86ISD::VPERM2X128: return DAG.getNode(Opc, dl, VT, V1, V2, @@ -4592,9 +4592,9 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, case X86ISD::MOVLHPS: DecodeMOVLHPSMask(NumElems, Mask); break; - case X86ISD::PALIGN: + case X86ISD::PALIGNR: ImmN = N->getOperand(N->getNumOperands()-1); - DecodePALIGNMask(VT, cast(ImmN)->getZExtValue(), Mask); + DecodePALIGNRMask(VT, cast(ImmN)->getZExtValue(), Mask); break; case X86ISD::PSHUFD: case X86ISD::VPERMILP: @@ -6932,7 +6932,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { // nodes, and remove one by one until they don't return Op anymore. if (isPALIGNRMask(M, VT, Subtarget)) - return getTargetShuffleNode(X86ISD::PALIGN, dl, VT, V1, V2, + return getTargetShuffleNode(X86ISD::PALIGNR, dl, VT, V1, V2, getShufflePALIGNRImmediate(SVOp), DAG); @@ -12435,7 +12435,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM"; case X86ISD::PTEST: return "X86ISD::PTEST"; case X86ISD::TESTP: return "X86ISD::TESTP"; - case X86ISD::PALIGN: return "X86ISD::PALIGN"; + case X86ISD::PALIGNR: return "X86ISD::PALIGNR"; case X86ISD::PSHUFD: return "X86ISD::PSHUFD"; case X86ISD::PSHUFHW: return "X86ISD::PSHUFHW"; case X86ISD::PSHUFLW: return "X86ISD::PSHUFLW"; @@ -17416,7 +17416,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::BRCOND: return PerformBrCondCombine(N, DAG, DCI, Subtarget); case X86ISD::VZEXT: return performVZEXTCombine(N, DAG, DCI, Subtarget); case X86ISD::SHUFP: // Handle all target specific shuffles - case X86ISD::PALIGN: + case X86ISD::PALIGNR: case X86ISD::UNPCKH: case X86ISD::UNPCKL: case X86ISD::MOVHLPS: diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 6d5e8c2..5e84e27 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -294,7 +294,7 @@ namespace llvm { TESTP, // Several flavors of instructions with vector shuffle behaviors. - PALIGN, + PALIGNR, PSHUFD, PSHUFHW, PSHUFLW, diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 7025e93..2a72fb6 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -160,7 +160,7 @@ def SDTBlend : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, def SDTFma : SDTypeProfile<1, 3, [SDTCisSameAs<0,1>, SDTCisSameAs<1,2>, SDTCisSameAs<1,3>]>; -def X86PAlign : SDNode<"X86ISD::PALIGN", SDTShuff3OpI>; +def X86PAlignr : SDNode<"X86ISD::PALIGNR", SDTShuff3OpI>; def X86PShufd : SDNode<"X86ISD::PSHUFD", SDTShuff2OpI>; def X86PShufhw : SDNode<"X86ISD::PSHUFHW", SDTShuff2OpI>; diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 18c3dfe..0979752 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -5167,7 +5167,7 @@ defm PMULHRSW : SS3I_binop_rm_int<0x0B, "pmulhrsw", // SSSE3 - Packed Align Instruction Patterns //===---------------------------------------------------------------------===// -multiclass ssse3_palign { +multiclass ssse3_palignr { let neverHasSideEffects = 1 in { def R128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i8imm:$src3), @@ -5187,7 +5187,7 @@ multiclass ssse3_palign { } } -multiclass ssse3_palign_y { +multiclass ssse3_palignr_y { let neverHasSideEffects = 1 in { def R256rr : SS3AI<0x0F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, i8imm:$src3), @@ -5204,42 +5204,42 @@ multiclass ssse3_palign_y { } let Predicates = [HasAVX] in - defm VPALIGN : ssse3_palign<"vpalignr", 0>, VEX_4V; + defm VPALIGN : ssse3_palignr<"vpalignr", 0>, VEX_4V; let Predicates = [HasAVX2] in - defm VPALIGN : ssse3_palign_y<"vpalignr", 0>, VEX_4V, VEX_L; + defm VPALIGN : ssse3_palignr_y<"vpalignr", 0>, VEX_4V, VEX_L; let Constraints = "$src1 = $dst", Predicates = [UseSSSE3] in - defm PALIGN : ssse3_palign<"palignr">; + defm PALIGN : ssse3_palignr<"palignr">; let Predicates = [HasAVX2] in { -def : Pat<(v8i32 (X86PAlign VR256:$src1, VR256:$src2, (i8 imm:$imm))), +def : Pat<(v8i32 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))), (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>; -def : Pat<(v8f32 (X86PAlign VR256:$src1, VR256:$src2, (i8 imm:$imm))), +def : Pat<(v8f32 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))), (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>; -def : Pat<(v16i16 (X86PAlign VR256:$src1, VR256:$src2, (i8 imm:$imm))), +def : Pat<(v16i16 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))), (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>; -def : Pat<(v32i8 (X86PAlign VR256:$src1, VR256:$src2, (i8 imm:$imm))), +def : Pat<(v32i8 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))), (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>; } let Predicates = [HasAVX] in { -def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), +def : Pat<(v4i32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; -def : Pat<(v4f32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), +def : Pat<(v4f32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; -def : Pat<(v8i16 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), +def : Pat<(v8i16 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; -def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), +def : Pat<(v16i8 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; } let Predicates = [UseSSSE3] in { -def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), +def : Pat<(v4i32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; -def : Pat<(v4f32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), +def : Pat<(v4f32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; -def : Pat<(v8i16 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), +def : Pat<(v8i16 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; -def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), +def : Pat<(v16i8 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; } -- cgit v1.1 From 53597b2c5cd0ace8683fd7aab33f8d40c085a49d Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 28 Jan 2013 07:19:11 +0000 Subject: Add missing break in 256-bit palignr comment printing. No test case yet because the comment itself is still wrong. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173669 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/InstPrinter/X86InstComments.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp index 43a8f0f..865930a 100644 --- a/lib/Target/X86/InstPrinter/X86InstComments.cpp +++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp @@ -90,6 +90,7 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, DecodePALIGNRMask(MVT::v32i8, MI->getOperand(MI->getNumOperands()-1).getImm(), ShuffleMask); + break; case X86::PSHUFDri: case X86::VPSHUFDri: -- cgit v1.1 From 6ab4cbc98636eecafb12393463453b567d58e22c Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Mon, 28 Jan 2013 07:35:33 +0000 Subject: =?UTF-8?q?A=20bugfix=20for=20tblgen,=20in=20the=20function=20?= =?UTF-8?q?=E2=80=98emitSourceFileHeader=E2=80=99.=20When=20the=20first=20?= =?UTF-8?q?parameter=20(=E2=80=98Desc=E2=80=99)=20is=20more=20than=2080=20?= =?UTF-8?q?characters=20long,=20it=20will=20result=20the=20header=20line?= =?UTF-8?q?=20that=20contains=20the=20description=20to=20be=20more=20Than?= =?UTF-8?q?=20(4GB!)=20long.=20Not=20only=20it=20takes=20forever=20to=20pr?= =?UTF-8?q?oduce,=20the=20output=20file=20cannot=20be=20open,=20since=20it?= =?UTF-8?q?s=20ginormous.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch by Elior Malul. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173672 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/TableGen/TableGenBackend.cpp | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/TableGen/TableGenBackend.cpp b/lib/TableGen/TableGenBackend.cpp index 7c8367a..79d5677 100644 --- a/lib/TableGen/TableGenBackend.cpp +++ b/lib/TableGen/TableGenBackend.cpp @@ -14,13 +14,20 @@ #include "llvm/ADT/Twine.h" #include "llvm/Support/raw_ostream.h" #include "llvm/TableGen/TableGenBackend.h" +#include + using namespace llvm; +const size_t MAX_LINE_LEN = 80U; + static void printLine(raw_ostream &OS, const Twine &Prefix, char Fill, StringRef Suffix) { - uint64_t Pos = OS.tell(); + size_t Pos = (size_t)OS.tell(); + assert((MAX_LINE_LEN - Prefix.str().size() - Suffix.size() > 0) && + "header line exceeds max limit"); OS << Prefix; - for (unsigned i = OS.tell() - Pos, e = 80 - Suffix.size(); i != e; ++i) + const size_t e = MAX_LINE_LEN - Suffix.size(); + for (size_t i = (size_t)OS.tell() - Pos; i < e; ++i) OS << Fill; OS << Suffix << '\n'; } @@ -28,10 +35,22 @@ static void printLine(raw_ostream &OS, const Twine &Prefix, char Fill, void llvm::emitSourceFileHeader(StringRef Desc, raw_ostream &OS) { printLine(OS, "/*===- TableGen'erated file ", '-', "*- C++ -*-===*\\"); printLine(OS, "|*", ' ', "*|"); - printLine(OS, "|* " + Desc, ' ', "*|"); - printLine(OS, "|*", ' ', "*|"); - printLine(OS, "|* Automatically generated file, do not edit!", ' ', "*|"); - printLine(OS, "|*", ' ', "*|"); + size_t Pos = 0U; + size_t PosE; + StringRef Prefix("|*"); + StringRef Suffix(" *|"); + do{ + size_t PSLen = Suffix.size() + Prefix.size(); + PosE = Pos + ((MAX_LINE_LEN > (Desc.size() - PSLen)) ? + Desc.size() : + MAX_LINE_LEN - PSLen); + printLine(OS, Prefix + Desc.slice(Pos, PosE), ' ', Suffix); + Pos = PosE; + } while(Pos < Desc.size()); + printLine(OS, Prefix, ' ', Suffix); + printLine(OS, Prefix + " Automatically generated file, do not edit!", ' ', + Suffix); + printLine(OS, Prefix, ' ', Suffix); printLine(OS, "\\*===", '-', "===*/"); OS << '\n'; } -- cgit v1.1 From 467016e58d57021b14f2ae562d221f00b07cb254 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 28 Jan 2013 07:41:18 +0000 Subject: Fix 256-bit PALIGNR comment decoding to understand that it works on independent 256-bit lanes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173674 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/Utils/X86ShuffleDecode.cpp | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp index b490f27..bbd4904 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp +++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp @@ -66,8 +66,17 @@ void DecodePALIGNRMask(MVT VT, unsigned Imm, unsigned NumElts = VT.getVectorNumElements(); unsigned Offset = Imm * (VT.getVectorElementType().getSizeInBits() / 8); - for (unsigned i = 0; i != NumElts; ++i) - ShuffleMask.push_back((i + Offset) % (NumElts * 2)); + unsigned NumLanes = VT.getSizeInBits() / 128; + unsigned NumLaneElts = NumElts / NumLanes; + + for (unsigned l = 0; l != NumElts; l += NumLaneElts) { + for (unsigned i = 0; i != NumLaneElts; ++i) { + unsigned Base = i + Offset; + // if i+offset is out of this lane then we actually need the other source + if (Base >= NumLaneElts) Base += NumElts - NumLaneElts; + ShuffleMask.push_back(Base + l); + } + } } /// DecodePSHUFMask - This decodes the shuffle masks for pshufd, and vpermilp*. -- cgit v1.1 From 2f36a169a5fa0d31ce3b48f81f48c8b4963d8d0c Mon Sep 17 00:00:00 2001 From: Evgeniy Stepanov Date: Mon, 28 Jan 2013 09:15:15 +0000 Subject: [msan] Make msan-handle-icmp-exact=0 by default. 50% slowdown on one of the specs. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173678 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Instrumentation/MemorySanitizer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp index a329dcc..e92892d 100644 --- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -129,7 +129,7 @@ static cl::opt ClHandleICmp("msan-handle-icmp", static cl::opt ClHandleICmpExact("msan-handle-icmp-exact", cl::desc("exact handling of relational integer ICmp"), - cl::Hidden, cl::init(true)); + cl::Hidden, cl::init(false)); static cl::opt ClStoreCleanOrigin("msan-store-clean-origin", cl::desc("store origin for clean (fully initialized) values"), -- cgit v1.1 From ccfc295b9d8f895432910a140d20446b5bafc4c4 Mon Sep 17 00:00:00 2001 From: Evgeniy Stepanov Date: Mon, 28 Jan 2013 09:18:40 +0000 Subject: Revert r173678. Broken tests. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173679 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Instrumentation/MemorySanitizer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp index e92892d..a329dcc 100644 --- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -129,7 +129,7 @@ static cl::opt ClHandleICmp("msan-handle-icmp", static cl::opt ClHandleICmpExact("msan-handle-icmp-exact", cl::desc("exact handling of relational integer ICmp"), - cl::Hidden, cl::init(false)); + cl::Hidden, cl::init(true)); static cl::opt ClStoreCleanOrigin("msan-store-clean-origin", cl::desc("store origin for clean (fully initialized) values"), -- cgit v1.1 From 647c66e24dc913db8e3e038d2fe6351bd98941a2 Mon Sep 17 00:00:00 2001 From: Evgeniy Stepanov Date: Mon, 28 Jan 2013 11:42:28 +0000 Subject: [msan] Mostly disable msan-handle-icmp-exact. It is way too slow. Change the default option value to 0. Always do exact shadow propagation for unsigned ICmp with constants, it is cheap (under 1% cpu time) and required for correctness. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173682 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Instrumentation/MemorySanitizer.cpp | 30 +++++++++++++++++----- 1 file changed, 24 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp index a329dcc..714972e 100644 --- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -129,7 +129,7 @@ static cl::opt ClHandleICmp("msan-handle-icmp", static cl::opt ClHandleICmpExact("msan-handle-icmp-exact", cl::desc("exact handling of relational integer ICmp"), - cl::Hidden, cl::init(true)); + cl::Hidden, cl::init(false)); static cl::opt ClStoreCleanOrigin("msan-store-clean-origin", cl::desc("store origin for clean (fully initialized) values"), @@ -1255,14 +1255,32 @@ struct MemorySanitizerVisitor : public InstVisitor { } void visitICmpInst(ICmpInst &I) { - if (ClHandleICmp && I.isEquality()) + if (!ClHandleICmp) { + handleShadowOr(I); + return; + } + if (I.isEquality()) { handleEqualityComparison(I); - else if (ClHandleICmp && ClHandleICmpExact && I.isRelational()) + return; + } + + assert(I.isRelational()); + if (ClHandleICmpExact) { handleRelationalComparisonExact(I); - else if (ClHandleICmp && I.isSigned() && I.isRelational()) + return; + } + if (I.isSigned()) { handleSignedRelationalComparison(I); - else - handleShadowOr(I); + return; + } + + assert(I.isUnsigned()); + if ((isa(I.getOperand(0)) || isa(I.getOperand(1)))) { + handleRelationalComparisonExact(I); + return; + } + + handleShadowOr(I); } void visitFCmpInst(FCmpInst &I) { -- cgit v1.1 From 237f09db096f88b8bdaf7ef51fad480c52af5e50 Mon Sep 17 00:00:00 2001 From: Renato Golin Date: Mon, 28 Jan 2013 16:02:45 +0000 Subject: Vectorization Factor clarification git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173691 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/LoopVectorize.cpp | 41 +++++++++++++++++------------- 1 file changed, 24 insertions(+), 17 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index e260b58..339f40b 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -516,14 +516,18 @@ public: const TargetTransformInfo &TTI) : TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI) {} + /// Information about vectorization costs + struct VectorizationFactor { + unsigned Width; // Vector width with best cost + unsigned Cost; // Cost of the loop with that width + }; /// \return The most profitable vectorization factor and the cost of that VF. /// This method checks every power of two up to VF. If UserVF is not ZERO /// then this vectorization factor will be selected if vectorization is /// possible. - std::pair - selectVectorizationFactor(bool OptForSize, unsigned UserVF); + VectorizationFactor selectVectorizationFactor(bool OptForSize, unsigned UserVF); - /// \returns The size (in bits) of the widest type in the code that + /// \return The size (in bits) of the widest type in the code that /// needs to be vectorized. We ignore values that remain scalar such as /// 64 bit loop indices. unsigned getWidestType(); @@ -633,24 +637,23 @@ struct LoopVectorize : public LoopPass { } // Select the optimal vectorization factor. - std::pair VFPair; - VFPair = CM.selectVectorizationFactor(OptForSize, VectorizationFactor); + LoopVectorizationCostModel::VectorizationFactor VF; + VF = CM.selectVectorizationFactor(OptForSize, VectorizationFactor); // Select the unroll factor. unsigned UF = CM.selectUnrollFactor(OptForSize, VectorizationUnroll, - VFPair.first, VFPair.second); - unsigned VF = VFPair.first; + VF.Width, VF.Cost); - if (VF == 1) { + if (VF.Width == 1) { DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial.\n"); return false; } - DEBUG(dbgs() << "LV: Found a vectorizable loop ("<< VF << ") in "<< + DEBUG(dbgs() << "LV: Found a vectorizable loop ("<< VF.Width << ") in "<< F->getParent()->getModuleIdentifier()<<"\n"); DEBUG(dbgs() << "LV: Unroll Factor is " << UF << "\n"); // If we decided that it is *legal* to vectorizer the loop then do it. - InnerLoopVectorizer LB(L, SE, LI, DT, DL, VF, UF); + InnerLoopVectorizer LB(L, SE, LI, DT, DL, VF.Width, UF); LB.vectorize(&LVL); DEBUG(verifyFunction(*L->getHeader()->getParent())); @@ -2675,12 +2678,14 @@ bool LoopVectorizationLegality::hasComputableBounds(Value *Ptr) { return AR->isAffine(); } -std::pair +LoopVectorizationCostModel::VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize, unsigned UserVF) { + // Width 1 means no vectorize + VectorizationFactor Factor = { 1U, 0U }; if (OptForSize && Legal->getRuntimePointerCheck()->Need) { DEBUG(dbgs() << "LV: Aborting. Runtime ptr check is required in Os.\n"); - return std::make_pair(1U, 0U); + return Factor; } // Find the trip count. @@ -2708,7 +2713,7 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize, // If we are unable to calculate the trip count then don't try to vectorize. if (TC < 2) { DEBUG(dbgs() << "LV: Aborting. A tail loop is required in Os.\n"); - return std::make_pair(1U, 0U); + return Factor; } // Find the maximum SIMD width that can fit within the trip count. @@ -2721,7 +2726,7 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize, // zero then we require a tail. if (VF < 2) { DEBUG(dbgs() << "LV: Aborting. A tail loop is required in Os.\n"); - return std::make_pair(1U, 0U); + return Factor; } } @@ -2729,7 +2734,8 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize, assert(isPowerOf2_32(UserVF) && "VF needs to be a power of two"); DEBUG(dbgs() << "LV: Using user VF "< Date: Mon, 28 Jan 2013 17:33:26 +0000 Subject: Fix comment. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173698 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index de0b2b0..f3c7a2a 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -1191,7 +1191,7 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, continue; } - // handle multiple DBG_VALUE instructions describing one variable. + // Handle multiple DBG_VALUE instructions describing one variable. RegVar->setDotDebugLocOffset(DotDebugLocEntries.size()); for (SmallVectorImpl::const_iterator -- cgit v1.1 From 5ff776bfde2dd5d993e51f8f78904ce331b5528c Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Mon, 28 Jan 2013 18:36:58 +0000 Subject: This patch addresses bug 15031. The common code in the post-RA scheduler to break anti-dependencies on the critical path contained a flaw. In the reported case, an anti-dependency between the overlapping registers %X4 and %R4 exists: %X29 = OR8 %X4, %X4 %R4, %X3 = LBZU 1, %X3 The unpatched code breaks the dependency by replacing %R4 and its uses with %R3, the first register on the available list. However, %R3 and %X3 overlap, so this creates two overlapping definitions on the same instruction. The fix is straightforward, preventing selection of a register that overlaps any other defined register on the same instruction. The test case is reduced from the bug report, and verifies that we no longer produce "lbzu 3, 1(3)" when breaking this anti-dependency. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173706 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/CriticalAntiDepBreaker.cpp | 32 ++++++++++++++++++++++++-------- lib/CodeGen/CriticalAntiDepBreaker.h | 3 ++- 2 files changed, 26 insertions(+), 9 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp index 48105d9..ee31dde 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -371,12 +371,13 @@ CriticalAntiDepBreaker::isNewRegClobberedByRefs(RegRefIter RegRefBegin, return false; } -unsigned -CriticalAntiDepBreaker::findSuitableFreeRegister(RegRefIter RegRefBegin, - RegRefIter RegRefEnd, - unsigned AntiDepReg, - unsigned LastNewReg, - const TargetRegisterClass *RC) +unsigned CriticalAntiDepBreaker:: +findSuitableFreeRegister(RegRefIter RegRefBegin, + RegRefIter RegRefEnd, + unsigned AntiDepReg, + unsigned LastNewReg, + const TargetRegisterClass *RC, + SmallVector &Forbid) { ArrayRef Order = RegClassInfo.getOrder(RC); for (unsigned i = 0; i != Order.size(); ++i) { @@ -401,6 +402,15 @@ CriticalAntiDepBreaker::findSuitableFreeRegister(RegRefIter RegRefBegin, Classes[NewReg] == reinterpret_cast(-1) || KillIndices[AntiDepReg] > DefIndices[NewReg]) continue; + // If NewReg overlaps any of the forbidden registers, we can't use it. + bool Forbidden = false; + for (SmallVector::iterator it = Forbid.begin(), + ite = Forbid.end(); it != ite; ++it) + if (TRI->regsOverlap(NewReg, *it)) { + Forbidden = true; + break; + } + if (Forbidden) continue; return NewReg; } @@ -564,6 +574,8 @@ BreakAntiDependencies(const std::vector& SUnits, PrescanInstruction(MI); + SmallVector ForbidRegs; + // If MI's defs have a special allocation requirement, don't allow // any def registers to be changed. Also assume all registers // defined in a call must not be changed (ABI). @@ -574,7 +586,9 @@ BreakAntiDependencies(const std::vector& SUnits, AntiDepReg = 0; else if (AntiDepReg) { // If this instruction has a use of AntiDepReg, breaking it - // is invalid. + // is invalid. If the instruction defines other registers, + // save a list of them so that we don't pick a new register + // that overlaps any of them. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg()) continue; @@ -584,6 +598,8 @@ BreakAntiDependencies(const std::vector& SUnits, AntiDepReg = 0; break; } + if (MO.isDef() && Reg != AntiDepReg) + ForbidRegs.push_back(Reg); } } @@ -606,7 +622,7 @@ BreakAntiDependencies(const std::vector& SUnits, if (unsigned NewReg = findSuitableFreeRegister(Range.first, Range.second, AntiDepReg, LastNewReg[AntiDepReg], - RC)) { + RC, ForbidRegs)) { DEBUG(dbgs() << "Breaking anti-dependence edge on " << TRI->getName(AntiDepReg) << " with " << RegRefs.count(AntiDepReg) << " references" diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h index 8fb2b0e..df13dd3 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.h +++ b/lib/CodeGen/CriticalAntiDepBreaker.h @@ -102,7 +102,8 @@ class TargetRegisterInfo; RegRefIter RegRefEnd, unsigned AntiDepReg, unsigned LastNewReg, - const TargetRegisterClass *RC); + const TargetRegisterClass *RC, + SmallVector &Forbid); }; } -- cgit v1.1 From 3c1c042a64e0d0d01c0e2817aa1f0f5c9c726a80 Mon Sep 17 00:00:00 2001 From: Edwin Vane Date: Mon, 28 Jan 2013 19:34:42 +0000 Subject: Fix gcc/printf/ISO C++ warning Remove the use of the 't' length modifier to avoid a gcc warning. Based on usage, 32 bits of precision is good enough for printing a stack offset for a stack trace. 't' length modifier isn't in C++03 but it *is* in C++11. Added a FIXME to reintroduce once LLVM makes the switch to C++11. Reviewer: gribozavr git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173711 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Support/Unix/Signals.inc | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Support/Unix/Signals.inc b/lib/Support/Unix/Signals.inc index 87162d6..af9e739 100644 --- a/lib/Support/Unix/Signals.inc +++ b/lib/Support/Unix/Signals.inc @@ -295,7 +295,11 @@ void llvm::sys::PrintStackTrace(FILE *FD) { else fputs(d, FD); free(d); - fprintf(FD, " + %tu",(char*)StackTrace[i]-(char*)dlinfo.dli_saddr); + // FIXME: When we move to C++11, use %t length modifier. It's not in + // C++03 and causes gcc to issue warnings. Losing the upper 32 bits of + // the stack offset for a stack dump isn't likely to cause any problems. + fprintf(FD, " + %u",(unsigned)((char*)StackTrace[i]- + (char*)dlinfo.dli_saddr)); } fputc('\n', FD); } -- cgit v1.1 From 710cb0c7826c61b432e19d5899a2f26f76d7aa81 Mon Sep 17 00:00:00 2001 From: Andrew Kaylor Date: Mon, 28 Jan 2013 19:52:37 +0000 Subject: Add support for source and line information to IntelJITEventListener for object emitted by MCJIT. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173712 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../IntelJITEvents/IntelJITEventListener.cpp | 53 ++++++++++++++++++---- 1 file changed, 45 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp index 0f99596..3645a4d 100644 --- a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp +++ b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp @@ -22,7 +22,9 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/DebugInfo/DIContext.h" #include "llvm/ExecutionEngine/ObjectImage.h" +#include "llvm/Object/ObjectFile.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/Errno.h" @@ -78,6 +80,18 @@ static LineNumberInfo LineStartToIntelJITFormat( return Result; } +static LineNumberInfo DILineInfoToIntelJITFormat(uintptr_t StartAddress, + uintptr_t Address, + DILineInfo Line) +{ + LineNumberInfo Result; + + Result.Offset = Address - StartAddress; + Result.LineNumber = Line.getLine(); + + return Result; +} + static iJIT_Method_Load FunctionDescToIntelJITFormat( IntelJITEventsWrapper& Wrapper, const char* FnName, @@ -177,6 +191,7 @@ void IntelJITEventListener::NotifyFreeingMachineCode(void *FnStart) { void IntelJITEventListener::NotifyObjectEmitted(const ObjectImage &Obj) { // Get the address of the object image for use as a unique identifier const void* ObjData = Obj.getData().data(); + DIContext* Context = DIContext::getDWARFContext(Obj.getObjectFile()); MethodAddressVector Functions; // Use symbol info to iterate functions in the object. @@ -185,12 +200,15 @@ void IntelJITEventListener::NotifyObjectEmitted(const ObjectImage &Obj) { E = Obj.end_symbols(); I != E && !ec; I.increment(ec)) { + std::vector LineInfo; + std::string SourceFileName; + object::SymbolRef::Type SymType; if (I->getType(SymType)) continue; if (SymType == object::SymbolRef::ST_Function) { - StringRef Name; - uint64_t Addr; - uint64_t Size; + StringRef Name; + uint64_t Addr; + uint64_t Size; if (I->getName(Name)) continue; if (I->getAddress(Addr)) continue; if (I->getSize(Size)) continue; @@ -203,11 +221,30 @@ void IntelJITEventListener::NotifyObjectEmitted(const ObjectImage &Obj) { Name.data(), Addr, Size); - - // FIXME: Try to find line info for this function in the DWARF sections. - FunctionMessage.source_file_name = 0; - FunctionMessage.line_number_size = 0; - FunctionMessage.line_number_table = 0; + if (Context) { + DILineInfoTable Lines = Context->getLineInfoForAddressRange(Addr, Size); + DILineInfoTable::iterator Begin = Lines.begin(); + DILineInfoTable::iterator End = Lines.end(); + for (DILineInfoTable::iterator It = Begin; It != End; ++It) { + LineInfo.push_back(DILineInfoToIntelJITFormat((uintptr_t)Addr, + It->first, + It->second)); + } + if (LineInfo.size() == 0) { + FunctionMessage.source_file_name = 0; + FunctionMessage.line_number_size = 0; + FunctionMessage.line_number_table = 0; + } else { + SourceFileName = Lines.front().second.getFileName(); + FunctionMessage.source_file_name = (char *)SourceFileName.c_str(); + FunctionMessage.line_number_size = LineInfo.size(); + FunctionMessage.line_number_table = &*LineInfo.begin(); + } + } else { + FunctionMessage.source_file_name = 0; + FunctionMessage.line_number_size = 0; + FunctionMessage.line_number_table = 0; + } Wrapper->iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED, &FunctionMessage); -- cgit v1.1 From 87e10dfefa94f77937c37b0eb51095540d675cbc Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Mon, 28 Jan 2013 21:55:20 +0000 Subject: Remove the AttributeWithIndex class. The AttributeWithIndex class exposed the interior structure of the AttributeSet class. That was gross. Remove it and all of the code that relied upon it. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173722 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/AttributeImpl.h | 19 ++----- lib/IR/Attributes.cpp | 146 +++++++++++++++++++++++-------------------------- 2 files changed, 73 insertions(+), 92 deletions(-) (limited to 'lib') diff --git a/lib/IR/AttributeImpl.h b/lib/IR/AttributeImpl.h index 8d5de77..457b6ab 100644 --- a/lib/IR/AttributeImpl.h +++ b/lib/IR/AttributeImpl.h @@ -115,7 +115,6 @@ class AttributeSetImpl : public FoldingSetNode { friend class AttributeSet; LLVMContext &Context; - SmallVector AttrList; typedef std::pair IndexAttrPair; SmallVector AttrNodes; @@ -124,13 +123,13 @@ class AttributeSetImpl : public FoldingSetNode { void operator=(const AttributeSetImpl &) LLVM_DELETED_FUNCTION; AttributeSetImpl(const AttributeSetImpl &) LLVM_DELETED_FUNCTION; public: - AttributeSetImpl(LLVMContext &C, ArrayRef attrs); + AttributeSetImpl(LLVMContext &C, + ArrayRef > attrs) + : Context(C), AttrNodes(attrs.begin(), attrs.end()) {} /// \brief Get the context that created this AttributeSetImpl. LLVMContext &getContext() { return Context; } - ArrayRef getAttributes() const { return AttrList; } - /// \brief Return the number of attributes this AttributeSet contains. unsigned getNumAttributes() const { return AttrNodes.size(); } @@ -147,7 +146,7 @@ public: /// parameter/ function which the attributes apply to. AttributeSet getSlotAttributes(unsigned Slot) const { // FIXME: This needs to use AttrNodes instead. - return AttributeSet::get(Context, AttrList[Slot]); + return AttributeSet::get(Context, AttrNodes[Slot]); } typedef AttributeSetNode::iterator iterator; @@ -164,16 +163,8 @@ public: { return AttrNodes[Idx].second->end(); } void Profile(FoldingSetNodeID &ID) const { - Profile(ID, AttrList); - } - static void Profile(FoldingSetNodeID &ID, - ArrayRef AttrList) { - for (unsigned i = 0, e = AttrList.size(); i != e; ++i) { - ID.AddInteger(AttrList[i].Index); - ID.AddInteger(AttrList[i].Attrs.Raw()); - } + Profile(ID, AttrNodes); } - static void Profile(FoldingSetNodeID &ID, ArrayRef > Nodes) { for (unsigned i = 0, e = Nodes.size(); i != e; ++i) { diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 3f0038b..1ac66d5 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -1,4 +1,4 @@ -//===-- Attribute.cpp - Implement AttributesList -------------------------===// +//===-- Attributes.cpp - Implement AttributesList -------------------------===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,8 @@ // //===----------------------------------------------------------------------===// // -// This file implements the Attribute, AttributeImpl, AttrBuilder, +// \file +// \brief This file implements the Attribute, AttributeImpl, AttrBuilder, // AttributeSetImpl, and AttributeSet classes. // //===----------------------------------------------------------------------===// @@ -540,44 +541,6 @@ AttributeSetNode *AttributeSetNode::get(LLVMContext &C, // AttributeSetImpl Definition //===----------------------------------------------------------------------===// -AttributeSetImpl:: -AttributeSetImpl(LLVMContext &C, - ArrayRef attrs) - : Context(C), AttrList(attrs.begin(), attrs.end()) { - for (unsigned I = 0, E = attrs.size(); I != E; ++I) { - const AttributeWithIndex &AWI = attrs[I]; - uint64_t Mask = AWI.Attrs.Raw(); - SmallVector Attrs; - - for (Attribute::AttrKind II = Attribute::None; - II != Attribute::EndAttrKinds; II = Attribute::AttrKind(II + 1)) { - if (uint64_t A = (Mask & AttributeImpl::getAttrMask(II))) { - AttrBuilder B; - - if (II == Attribute::Alignment) - B.addAlignmentAttr(1ULL << ((A >> 16) - 1)); - else if (II == Attribute::StackAlignment) - B.addStackAlignmentAttr(1ULL << ((A >> 26) - 1)); - else - B.addAttribute(II); - - Attrs.push_back(Attribute::get(C, B)); - } - } - - AttrNodes.push_back(std::make_pair(AWI.Index, - AttributeSetNode::get(C, Attrs))); - } - - assert(AttrNodes.size() == AttrList.size() && - "Number of attributes is different between lists!"); -#ifndef NDEBUG - for (unsigned I = 0, E = AttrNodes.size(); I != E; ++I) - assert((I == 0 || AttrNodes[I - 1].first < AttrNodes[I].first) && - "Attributes not in ascending order!"); -#endif -} - uint64_t AttributeSetImpl::Raw(uint64_t Index) const { for (unsigned I = 0, E = getNumAttributes(); I != E; ++I) { if (getSlotIndex(I) != Index) continue; @@ -587,9 +550,6 @@ uint64_t AttributeSetImpl::Raw(uint64_t Index) const { for (AttributeSetNode::const_iterator II = ASN->begin(), IE = ASN->end(); II != IE; ++II) B.addAttributes(*II); - - assert(B.Raw() == AttrList[I].Attrs.Raw() && - "Attributes aren't the same!"); return B.Raw(); } @@ -604,7 +564,8 @@ AttributeSet AttributeSet::getParamAttributes(unsigned Idx) const { // FIXME: Remove. return pImpl && hasAttributes(Idx) ? AttributeSet::get(pImpl->getContext(), - AttributeWithIndex::get(Idx, getAttributes(Idx))) : + ArrayRef >( + std::make_pair(Idx, getAttributes(Idx)))) : AttributeSet(); } @@ -612,8 +573,9 @@ AttributeSet AttributeSet::getRetAttributes() const { // FIXME: Remove. return pImpl && hasAttributes(ReturnIndex) ? AttributeSet::get(pImpl->getContext(), - AttributeWithIndex::get(ReturnIndex, - getAttributes(ReturnIndex))) : + ArrayRef >( + std::make_pair(ReturnIndex, + getAttributes(ReturnIndex)))) : AttributeSet(); } @@ -621,27 +583,15 @@ AttributeSet AttributeSet::getFnAttributes() const { // FIXME: Remove. return pImpl && hasAttributes(FunctionIndex) ? AttributeSet::get(pImpl->getContext(), - AttributeWithIndex::get(FunctionIndex, - getAttributes(FunctionIndex))) : + ArrayRef >( + std::make_pair(FunctionIndex, + getAttributes(FunctionIndex)))) : AttributeSet(); } -AttributeSet AttributeSet::get(LLVMContext &C, - ArrayRef Attrs) { - // If there are no attributes then return a null AttributesList pointer. - if (Attrs.empty()) - return AttributeSet(); - -#ifndef NDEBUG - for (unsigned i = 0, e = Attrs.size(); i != e; ++i) { - assert(Attrs[i].Attrs.hasAttributes() && - "Pointless attribute!"); - assert((!i || Attrs[i-1].Index < Attrs[i].Index) && - "Misordered AttributesList!"); - } -#endif - - // Otherwise, build a key to look up the existing attributes. +AttributeSet AttributeSet::getImpl(LLVMContext &C, + ArrayRef > Attrs) { LLVMContextImpl *pImpl = C.pImpl; FoldingSetNodeID ID; AttributeSetImpl::Profile(ID, Attrs); @@ -660,35 +610,75 @@ AttributeSet AttributeSet::get(LLVMContext &C, return AttributeSet(PA); } +AttributeSet AttributeSet::get(LLVMContext &C, + ArrayRef > Attrs){ + // If there are no attributes then return a null AttributesList pointer. + if (Attrs.empty()) + return AttributeSet(); + +#ifndef NDEBUG + for (unsigned i = 0, e = Attrs.size(); i != e; ++i) { + assert((!i || Attrs[i-1].first <= Attrs[i].first) && + "Misordered Attributes list!"); + assert(Attrs[i].second.hasAttributes() && + "Pointless attribute!"); + } +#endif + + // Create a vector if (uint64_t, AttributeSetNode*) pairs from the attributes + // list. + SmallVector, 8> AttrPairVec; + for (ArrayRef >::iterator I = Attrs.begin(), + E = Attrs.end(); I != E; ) { + uint64_t Index = I->first; + SmallVector AttrVec; + while (I->first == Index && I != E) { + AttrVec.push_back(I->second); + ++I; + } + + AttrPairVec.push_back(std::make_pair(Index, + AttributeSetNode::get(C, AttrVec))); + } + + return getImpl(C, AttrPairVec); +} + +AttributeSet AttributeSet::get(LLVMContext &C, + ArrayRef > Attrs) { + // If there are no attributes then return a null AttributesList pointer. + if (Attrs.empty()) + return AttributeSet(); + + return getImpl(C, Attrs); +} + AttributeSet AttributeSet::get(LLVMContext &C, unsigned Idx, AttrBuilder &B) { - // FIXME: This should be implemented as a loop that creates the - // AttributeWithIndexes that then are used to create the AttributeSet. if (!B.hasAttributes()) return AttributeSet(); - return get(C, AttributeWithIndex::get(Idx, Attribute::get(C, B))); + return get(C, ArrayRef >( + std::make_pair(Idx, Attribute::get(C, B)))); } AttributeSet AttributeSet::get(LLVMContext &C, unsigned Idx, ArrayRef Kind) { - // FIXME: This is temporary. Ultimately, the AttributeWithIndex will be - // replaced by an object that holds multiple Attribute::AttrKinds. - AttrBuilder B; + SmallVector, 8> Attrs; for (ArrayRef::iterator I = Kind.begin(), E = Kind.end(); I != E; ++I) - B.addAttribute(*I); - return get(C, Idx, B); + Attrs.push_back(std::make_pair(Idx, Attribute::get(C, *I))); + return get(C, Attrs); } AttributeSet AttributeSet::get(LLVMContext &C, ArrayRef Attrs) { - SmallVector AttrList; - for (ArrayRef::iterator I = Attrs.begin(), E = Attrs.end(); - I != E; ++I) { - AttributeSet AS = *I; + SmallVector, 8> AttrNodeVec; + for (unsigned I = 0, E = Attrs.size(); I != E; ++I) { + AttributeSet AS = Attrs[I]; if (!AS.pImpl) continue; - AttrList.append(AS.pImpl->AttrList.begin(), AS.pImpl->AttrList.end()); + AttrNodeVec.append(AS.pImpl->AttrNodes.begin(), AS.pImpl->AttrNodes.end()); } - return get(C, AttrList); + return get(C, AttrNodeVec); } /// \brief Return the number of slots used in this attribute list. This is the -- cgit v1.1 From 6bdbf061c353295669b6bfc271b948158602d1bc Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Mon, 28 Jan 2013 22:33:39 +0000 Subject: Try to appease some broken compilers by using 'unsigned' instead of 'uint64_t'. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173725 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/AttributeImpl.h | 6 +++--- lib/IR/Attributes.cpp | 33 +++++++++++++++------------------ 2 files changed, 18 insertions(+), 21 deletions(-) (limited to 'lib') diff --git a/lib/IR/AttributeImpl.h b/lib/IR/AttributeImpl.h index 457b6ab..c00f094 100644 --- a/lib/IR/AttributeImpl.h +++ b/lib/IR/AttributeImpl.h @@ -116,7 +116,7 @@ class AttributeSetImpl : public FoldingSetNode { LLVMContext &Context; - typedef std::pair IndexAttrPair; + typedef std::pair IndexAttrPair; SmallVector AttrNodes; // AttributesSet is uniqued, these should not be publicly available. @@ -124,7 +124,7 @@ class AttributeSetImpl : public FoldingSetNode { AttributeSetImpl(const AttributeSetImpl &) LLVM_DELETED_FUNCTION; public: AttributeSetImpl(LLVMContext &C, - ArrayRef > attrs) + ArrayRef > attrs) : Context(C), AttrNodes(attrs.begin(), attrs.end()) {} /// \brief Get the context that created this AttributeSetImpl. @@ -166,7 +166,7 @@ public: Profile(ID, AttrNodes); } static void Profile(FoldingSetNodeID &ID, - ArrayRef > Nodes) { + ArrayRef > Nodes) { for (unsigned i = 0, e = Nodes.size(); i != e; ++i) { ID.AddInteger(Nodes[i].first); ID.AddPointer(Nodes[i].second); diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 1ac66d5..ac394b7 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -31,12 +31,9 @@ using namespace llvm; // Attribute Implementation //===----------------------------------------------------------------------===// -Attribute Attribute::get(LLVMContext &Context, ArrayRef Kinds) { +Attribute Attribute::get(LLVMContext &Context, AttrKind Kind) { AttrBuilder B; - for (ArrayRef::iterator I = Kinds.begin(), E = Kinds.end(); - I != E; ++I) - B.addAttribute(*I); - return Attribute::get(Context, B); + return Attribute::get(Context, B.addAttribute(Kind)); } Attribute Attribute::get(LLVMContext &Context, AttrBuilder &B) { @@ -564,7 +561,7 @@ AttributeSet AttributeSet::getParamAttributes(unsigned Idx) const { // FIXME: Remove. return pImpl && hasAttributes(Idx) ? AttributeSet::get(pImpl->getContext(), - ArrayRef >( + ArrayRef >( std::make_pair(Idx, getAttributes(Idx)))) : AttributeSet(); } @@ -573,7 +570,7 @@ AttributeSet AttributeSet::getRetAttributes() const { // FIXME: Remove. return pImpl && hasAttributes(ReturnIndex) ? AttributeSet::get(pImpl->getContext(), - ArrayRef >( + ArrayRef >( std::make_pair(ReturnIndex, getAttributes(ReturnIndex)))) : AttributeSet(); @@ -583,14 +580,14 @@ AttributeSet AttributeSet::getFnAttributes() const { // FIXME: Remove. return pImpl && hasAttributes(FunctionIndex) ? AttributeSet::get(pImpl->getContext(), - ArrayRef >( + ArrayRef >( std::make_pair(FunctionIndex, getAttributes(FunctionIndex)))) : AttributeSet(); } AttributeSet AttributeSet::getImpl(LLVMContext &C, - ArrayRef > Attrs) { LLVMContextImpl *pImpl = C.pImpl; FoldingSetNodeID ID; @@ -611,7 +608,7 @@ AttributeSet AttributeSet::getImpl(LLVMContext &C, } AttributeSet AttributeSet::get(LLVMContext &C, - ArrayRef > Attrs){ + ArrayRef > Attrs){ // If there are no attributes then return a null AttributesList pointer. if (Attrs.empty()) return AttributeSet(); @@ -625,12 +622,12 @@ AttributeSet AttributeSet::get(LLVMContext &C, } #endif - // Create a vector if (uint64_t, AttributeSetNode*) pairs from the attributes + // Create a vector if (unsigned, AttributeSetNode*) pairs from the attributes // list. - SmallVector, 8> AttrPairVec; - for (ArrayRef >::iterator I = Attrs.begin(), + SmallVector, 8> AttrPairVec; + for (ArrayRef >::iterator I = Attrs.begin(), E = Attrs.end(); I != E; ) { - uint64_t Index = I->first; + unsigned Index = I->first; SmallVector AttrVec; while (I->first == Index && I != E) { AttrVec.push_back(I->second); @@ -645,7 +642,7 @@ AttributeSet AttributeSet::get(LLVMContext &C, } AttributeSet AttributeSet::get(LLVMContext &C, - ArrayRef > Attrs) { // If there are no attributes then return a null AttributesList pointer. if (Attrs.empty()) @@ -657,13 +654,13 @@ AttributeSet AttributeSet::get(LLVMContext &C, AttributeSet AttributeSet::get(LLVMContext &C, unsigned Idx, AttrBuilder &B) { if (!B.hasAttributes()) return AttributeSet(); - return get(C, ArrayRef >( + return get(C, ArrayRef >( std::make_pair(Idx, Attribute::get(C, B)))); } AttributeSet AttributeSet::get(LLVMContext &C, unsigned Idx, ArrayRef Kind) { - SmallVector, 8> Attrs; + SmallVector, 8> Attrs; for (ArrayRef::iterator I = Kind.begin(), E = Kind.end(); I != E; ++I) Attrs.push_back(std::make_pair(Idx, Attribute::get(C, *I))); @@ -671,7 +668,7 @@ AttributeSet AttributeSet::get(LLVMContext &C, unsigned Idx, } AttributeSet AttributeSet::get(LLVMContext &C, ArrayRef Attrs) { - SmallVector, 8> AttrNodeVec; + SmallVector, 8> AttrNodeVec; for (unsigned I = 0, E = Attrs.size(); I != E; ++I) { AttributeSet AS = Attrs[I]; if (!AS.pImpl) continue; -- cgit v1.1 From d3427d3f4094767884f3eafdef3d60c7f5038197 Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Tue, 29 Jan 2013 00:22:47 +0000 Subject: Add isBGQ method to PPCSubtarget This function will be used in future commits. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173729 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCSubtarget.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'lib') diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index 416c0f3..3ddae63 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -160,6 +160,8 @@ public: bool isDarwin() const { return TargetTriple.isMacOSX(); } /// isBGP - True if this is a BG/P platform. bool isBGP() const { return TargetTriple.getVendor() == Triple::BGP; } + /// isBGQ - True if this is a BG/Q platform. + bool isBGQ() const { return TargetTriple.getVendor() == Triple::BGQ; } bool isDarwinABI() const { return isDarwin(); } bool isSVR4ABI() const { return !isDarwin(); } -- cgit v1.1 From c0b3d4c9c252acec01de719d4e756456d5377e6d Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Tue, 29 Jan 2013 00:22:49 +0000 Subject: Unroll again after running BBVectorize Because BBVectorize may significantly shorten a loop body, unroll again after vectorization. This is especially important when using runtime or partial unrolling. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173730 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/IPO/PassManagerBuilder.cpp | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'lib') diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index 6dc1773..47b2b51 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -214,6 +214,10 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { MPM.add(createGVNPass()); // Remove redundancies else MPM.add(createEarlyCSEPass()); // Catch trivial redundancies + + // BBVectorize may have significantly shortened a loop body; unroll again. + if (!DisableUnrollLoops) + MPM.add(createLoopUnrollPass()); } MPM.add(createAggressiveDCEPass()); // Delete dead instructions -- cgit v1.1 From c22f4aa886443507f8406d30d118fdeeac6a8c6c Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Tue, 29 Jan 2013 00:34:06 +0000 Subject: Reorder some functions and add comments. No functionality change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173733 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/AttributeImpl.h | 25 +- lib/IR/Attributes.cpp | 669 ++++++++++++++++++++++++------------------------- 2 files changed, 346 insertions(+), 348 deletions(-) (limited to 'lib') diff --git a/lib/IR/AttributeImpl.h b/lib/IR/AttributeImpl.h index c00f094..34754e8 100644 --- a/lib/IR/AttributeImpl.h +++ b/lib/IR/AttributeImpl.h @@ -37,20 +37,19 @@ class AttributeImpl : public FoldingSetNode { void operator=(const AttributeImpl &) LLVM_DELETED_FUNCTION; AttributeImpl(const AttributeImpl &) LLVM_DELETED_FUNCTION; public: - explicit AttributeImpl(LLVMContext &C, uint64_t data); + AttributeImpl(LLVMContext &C, Constant *Data) + : Context(C), Data(Data) {} explicit AttributeImpl(LLVMContext &C, Attribute::AttrKind data); AttributeImpl(LLVMContext &C, Attribute::AttrKind data, ArrayRef values); AttributeImpl(LLVMContext &C, StringRef data); - LLVMContext &getContext() { return Context; } - - ArrayRef getValues() const { return Vals; } - bool hasAttribute(Attribute::AttrKind A) const; - bool hasAttributes() const; + LLVMContext &getContext() { return Context; } + ArrayRef getValues() const { return Vals; } + uint64_t getAlignment() const; uint64_t getStackAlignment() const; @@ -62,15 +61,19 @@ public: bool operator<(const AttributeImpl &AI) const; - uint64_t Raw() const; // FIXME: Remove. - - static uint64_t getAttrMask(Attribute::AttrKind Val); - void Profile(FoldingSetNodeID &ID) const { Profile(ID, Data, Vals); } static void Profile(FoldingSetNodeID &ID, Constant *Data, - ArrayRef Vals); + ArrayRef Vals) { + ID.AddPointer(Data); + for (unsigned I = 0, E = Vals.size(); I != E; ++I) + ID.AddPointer(Vals[I]); + } + + // FIXME: Remove these! + uint64_t Raw() const; + static uint64_t getAttrMask(Attribute::AttrKind Val); }; //===----------------------------------------------------------------------===// diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index ac394b7..f56eb7b 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -44,7 +44,8 @@ Attribute Attribute::get(LLVMContext &Context, AttrBuilder &B) { // Otherwise, build a key to look up the existing attributes. LLVMContextImpl *pImpl = Context.pImpl; FoldingSetNodeID ID; - ID.AddInteger(B.Raw()); + ConstantInt *CI = ConstantInt::get(Type::getInt64Ty(Context), B.Raw()); + ID.AddPointer(CI); void *InsertPoint; AttributeImpl *PA = pImpl->AttrsSet.FindNodeOrInsertPos(ID, InsertPoint); @@ -52,7 +53,7 @@ Attribute Attribute::get(LLVMContext &Context, AttrBuilder &B) { if (!PA) { // If we didn't find any existing attributes of the same shape then create a // new one and insert it. - PA = new AttributeImpl(Context, B.Raw()); + PA = new AttributeImpl(Context, CI); pImpl->AttrsSet.InsertNode(PA, InsertPoint); } @@ -94,24 +95,6 @@ unsigned Attribute::getStackAlignment() const { return pImpl->getStackAlignment(); } -bool Attribute::operator==(AttrKind K) const { - return pImpl && *pImpl == K; -} -bool Attribute::operator!=(AttrKind K) const { - return !(*this == K); -} - -bool Attribute::operator<(Attribute A) const { - if (!pImpl && !A.pImpl) return false; - if (!pImpl) return true; - if (!A.pImpl) return false; - return *pImpl < *A.pImpl; -} - -uint64_t Attribute::Raw() const { - return pImpl ? pImpl->Raw() : 0; -} - std::string Attribute::getAsString() const { std::string Result; if (hasAttribute(Attribute::ZExt)) @@ -186,184 +169,28 @@ std::string Attribute::getAsString() const { return Result; } -//===----------------------------------------------------------------------===// -// AttrBuilder Method Implementations -//===----------------------------------------------------------------------===// - -AttrBuilder::AttrBuilder(AttributeSet AS, unsigned Idx) - : Alignment(0), StackAlignment(0) { - AttributeSetImpl *pImpl = AS.pImpl; - if (!pImpl) return; - - AttrBuilder B; - - for (unsigned I = 0, E = pImpl->getNumAttributes(); I != E; ++I) { - if (pImpl->getSlotIndex(I) != Idx) continue; - - for (AttributeSetNode::const_iterator II = pImpl->begin(I), - IE = pImpl->end(I); II != IE; ++II) - B.addAttributes(*II); - - break; - } - - if (!B.hasAttributes()) return; - - uint64_t Mask = B.Raw(); - - for (Attribute::AttrKind I = Attribute::None; I != Attribute::EndAttrKinds; - I = Attribute::AttrKind(I + 1)) { - if (uint64_t A = (Mask & AttributeImpl::getAttrMask(I))) { - Attrs.insert(I); - - if (I == Attribute::Alignment) - Alignment = 1ULL << ((A >> 16) - 1); - else if (I == Attribute::StackAlignment) - StackAlignment = 1ULL << ((A >> 26)-1); - } - } -} - -void AttrBuilder::clear() { - Attrs.clear(); - Alignment = StackAlignment = 0; -} - -AttrBuilder &AttrBuilder::addAttribute(Attribute::AttrKind Val) { - Attrs.insert(Val); - return *this; -} - -AttrBuilder &AttrBuilder::removeAttribute(Attribute::AttrKind Val) { - Attrs.erase(Val); - if (Val == Attribute::Alignment) - Alignment = 0; - else if (Val == Attribute::StackAlignment) - StackAlignment = 0; - - return *this; -} - -AttrBuilder &AttrBuilder::addAttributes(Attribute Attr) { - uint64_t Mask = Attr.Raw(); - - for (Attribute::AttrKind I = Attribute::None; I != Attribute::EndAttrKinds; - I = Attribute::AttrKind(I + 1)) - if ((Mask & AttributeImpl::getAttrMask(I)) != 0) - Attrs.insert(I); - - if (Attr.getAlignment()) - Alignment = Attr.getAlignment(); - if (Attr.getStackAlignment()) - StackAlignment = Attr.getStackAlignment(); - return *this; -} - -AttrBuilder &AttrBuilder::removeAttributes(Attribute A) { - uint64_t Mask = A.Raw(); - - for (Attribute::AttrKind I = Attribute::None; I != Attribute::EndAttrKinds; - I = Attribute::AttrKind(I + 1)) { - if (Mask & AttributeImpl::getAttrMask(I)) { - Attrs.erase(I); - - if (I == Attribute::Alignment) - Alignment = 0; - else if (I == Attribute::StackAlignment) - StackAlignment = 0; - } - } - - return *this; -} - -AttrBuilder &AttrBuilder::addAlignmentAttr(unsigned Align) { - if (Align == 0) return *this; - - assert(isPowerOf2_32(Align) && "Alignment must be a power of two."); - assert(Align <= 0x40000000 && "Alignment too large."); - - Attrs.insert(Attribute::Alignment); - Alignment = Align; - return *this; -} - -AttrBuilder &AttrBuilder::addStackAlignmentAttr(unsigned Align) { - // Default alignment, allow the target to define how to align it. - if (Align == 0) return *this; - - assert(isPowerOf2_32(Align) && "Alignment must be a power of two."); - assert(Align <= 0x100 && "Alignment too large."); - - Attrs.insert(Attribute::StackAlignment); - StackAlignment = Align; - return *this; -} - -AttrBuilder &AttrBuilder::addRawValue(uint64_t Val) { - for (Attribute::AttrKind I = Attribute::None; I != Attribute::EndAttrKinds; - I = Attribute::AttrKind(I + 1)) { - if (uint64_t A = (Val & AttributeImpl::getAttrMask(I))) { - Attrs.insert(I); - - if (I == Attribute::Alignment) - Alignment = 1ULL << ((A >> 16) - 1); - else if (I == Attribute::StackAlignment) - StackAlignment = 1ULL << ((A >> 26)-1); - } - } - - return *this; -} - -bool AttrBuilder::contains(Attribute::AttrKind A) const { - return Attrs.count(A); -} - -bool AttrBuilder::hasAttributes() const { - return !Attrs.empty(); -} - -bool AttrBuilder::hasAttributes(const Attribute &A) const { - return Raw() & A.Raw(); +bool Attribute::operator==(AttrKind K) const { + return pImpl && *pImpl == K; } - -bool AttrBuilder::hasAlignmentAttr() const { - return Alignment != 0; +bool Attribute::operator!=(AttrKind K) const { + return !(*this == K); } -uint64_t AttrBuilder::Raw() const { - uint64_t Mask = 0; - - for (DenseSet::const_iterator I = Attrs.begin(), - E = Attrs.end(); I != E; ++I) { - Attribute::AttrKind Kind = *I; - - if (Kind == Attribute::Alignment) - Mask |= (Log2_32(Alignment) + 1) << 16; - else if (Kind == Attribute::StackAlignment) - Mask |= (Log2_32(StackAlignment) + 1) << 26; - else - Mask |= AttributeImpl::getAttrMask(Kind); - } - - return Mask; +bool Attribute::operator<(Attribute A) const { + if (!pImpl && !A.pImpl) return false; + if (!pImpl) return true; + if (!A.pImpl) return false; + return *pImpl < *A.pImpl; } -bool AttrBuilder::operator==(const AttrBuilder &B) { - SmallVector This(Attrs.begin(), Attrs.end()); - SmallVector That(B.Attrs.begin(), B.Attrs.end()); - return This == That; +uint64_t Attribute::Raw() const { + return pImpl ? pImpl->Raw() : 0; } //===----------------------------------------------------------------------===// // AttributeImpl Definition //===----------------------------------------------------------------------===// -AttributeImpl::AttributeImpl(LLVMContext &C, uint64_t data) - : Context(C) { - Data = ConstantInt::get(Type::getInt64Ty(C), data); -} AttributeImpl::AttributeImpl(LLVMContext &C, Attribute::AttrKind data) : Context(C) { Data = ConstantInt::get(Type::getInt64Ty(C), data); @@ -380,6 +207,24 @@ AttributeImpl::AttributeImpl(LLVMContext &C, StringRef data) Data = ConstantDataArray::getString(C, data); } +bool AttributeImpl::hasAttribute(Attribute::AttrKind A) const { + return (Raw() & getAttrMask(A)) != 0; +} + +bool AttributeImpl::hasAttributes() const { + return Raw() != 0; +} + +uint64_t AttributeImpl::getAlignment() const { + uint64_t Mask = Raw() & getAttrMask(Attribute::Alignment); + return 1ULL << ((Mask >> 16) - 1); +} + +uint64_t AttributeImpl::getStackAlignment() const { + uint64_t Mask = Raw() & getAttrMask(Attribute::StackAlignment); + return 1ULL << ((Mask >> 26) - 1); +} + bool AttributeImpl::operator==(Attribute::AttrKind Kind) const { if (ConstantInt *CI = dyn_cast(Data)) return CI->getZExtValue() == Kind; @@ -429,6 +274,7 @@ uint64_t AttributeImpl::Raw() const { } uint64_t AttributeImpl::getAttrMask(Attribute::AttrKind Val) { + // FIXME: Remove this. switch (Val) { case Attribute::EndAttrKinds: case Attribute::AttrKindEmptyKey: @@ -470,35 +316,6 @@ uint64_t AttributeImpl::getAttrMask(Attribute::AttrKind Val) { llvm_unreachable("Unsupported attribute type"); } -bool AttributeImpl::hasAttribute(Attribute::AttrKind A) const { - return (Raw() & getAttrMask(A)) != 0; -} - -bool AttributeImpl::hasAttributes() const { - return Raw() != 0; -} - -uint64_t AttributeImpl::getAlignment() const { - uint64_t Mask = Raw() & getAttrMask(Attribute::Alignment); - return 1ULL << ((Mask >> 16) - 1); -} - -uint64_t AttributeImpl::getStackAlignment() const { - uint64_t Mask = Raw() & getAttrMask(Attribute::StackAlignment); - return 1ULL << ((Mask >> 26) - 1); -} - -void AttributeImpl::Profile(FoldingSetNodeID &ID, Constant *Data, - ArrayRef Vals) { - ID.AddInteger(cast(Data)->getZExtValue()); -#if 0 - // FIXME: Not yet supported. - for (ArrayRef::iterator I = Vals.begin(), E = Vals.end(); - I != E; ++I) - ID.AddPointer(*I); -#endif -} - //===----------------------------------------------------------------------===// // AttributeSetNode Definition //===----------------------------------------------------------------------===// @@ -554,44 +371,15 @@ uint64_t AttributeSetImpl::Raw(uint64_t Index) const { } //===----------------------------------------------------------------------===// -// AttributeSet Method Implementations +// AttributeSet Construction and Mutation Methods //===----------------------------------------------------------------------===// -AttributeSet AttributeSet::getParamAttributes(unsigned Idx) const { - // FIXME: Remove. - return pImpl && hasAttributes(Idx) ? - AttributeSet::get(pImpl->getContext(), - ArrayRef >( - std::make_pair(Idx, getAttributes(Idx)))) : - AttributeSet(); -} - -AttributeSet AttributeSet::getRetAttributes() const { - // FIXME: Remove. - return pImpl && hasAttributes(ReturnIndex) ? - AttributeSet::get(pImpl->getContext(), - ArrayRef >( - std::make_pair(ReturnIndex, - getAttributes(ReturnIndex)))) : - AttributeSet(); -} - -AttributeSet AttributeSet::getFnAttributes() const { - // FIXME: Remove. - return pImpl && hasAttributes(FunctionIndex) ? - AttributeSet::get(pImpl->getContext(), - ArrayRef >( - std::make_pair(FunctionIndex, - getAttributes(FunctionIndex)))) : - AttributeSet(); -} - -AttributeSet AttributeSet::getImpl(LLVMContext &C, - ArrayRef > Attrs) { - LLVMContextImpl *pImpl = C.pImpl; - FoldingSetNodeID ID; - AttributeSetImpl::Profile(ID, Attrs); +AttributeSet AttributeSet::getImpl(LLVMContext &C, + ArrayRef > Attrs) { + LLVMContextImpl *pImpl = C.pImpl; + FoldingSetNodeID ID; + AttributeSetImpl::Profile(ID, Attrs); void *InsertPoint; AttributeSetImpl *PA = pImpl->AttrsLists.FindNodeOrInsertPos(ID, InsertPoint); @@ -668,6 +456,8 @@ AttributeSet AttributeSet::get(LLVMContext &C, unsigned Idx, } AttributeSet AttributeSet::get(LLVMContext &C, ArrayRef Attrs) { + if (Attrs.empty()) return AttributeSet(); + SmallVector, 8> AttrNodeVec; for (unsigned I = 0, E = Attrs.size(); I != E; ++I) { AttributeSet AS = Attrs[I]; @@ -675,85 +465,7 @@ AttributeSet AttributeSet::get(LLVMContext &C, ArrayRef Attrs) { AttrNodeVec.append(AS.pImpl->AttrNodes.begin(), AS.pImpl->AttrNodes.end()); } - return get(C, AttrNodeVec); -} - -/// \brief Return the number of slots used in this attribute list. This is the -/// number of arguments that have an attribute set on them (including the -/// function itself). -unsigned AttributeSet::getNumSlots() const { - return pImpl ? pImpl->getNumAttributes() : 0; -} - -uint64_t AttributeSet::getSlotIndex(unsigned Slot) const { - assert(pImpl && Slot < pImpl->getNumAttributes() && - "Slot # out of range!"); - return pImpl->getSlotIndex(Slot); -} - -AttributeSet AttributeSet::getSlotAttributes(unsigned Slot) const { - assert(pImpl && Slot < pImpl->getNumAttributes() && - "Slot # out of range!"); - return pImpl->getSlotAttributes(Slot); -} - -bool AttributeSet::hasAttribute(unsigned Index, Attribute::AttrKind Kind) const{ - return getAttributes(Index).hasAttribute(Kind); -} - -bool AttributeSet::hasAttributes(unsigned Index) const { - return getAttributes(Index).hasAttributes(); -} - -std::string AttributeSet::getAsString(unsigned Index) const { - return getAttributes(Index).getAsString(); -} - -unsigned AttributeSet::getParamAlignment(unsigned Idx) const { - return getAttributes(Idx).getAlignment(); -} - -unsigned AttributeSet::getStackAlignment(unsigned Index) const { - return getAttributes(Index).getStackAlignment(); -} - -uint64_t AttributeSet::Raw(unsigned Index) const { - // FIXME: Remove this. - return pImpl ? pImpl->Raw(Index) : 0; -} - -/// \brief The attributes for the specified index are returned. -/// -/// FIXME: This shouldn't return 'Attribute'. -Attribute AttributeSet::getAttributes(unsigned Idx) const { - if (pImpl == 0) return Attribute(); - - // Loop through to find the attribute we want. - for (unsigned I = 0, E = pImpl->getNumAttributes(); I != E; ++I) { - if (pImpl->getSlotIndex(I) != Idx) continue; - - AttrBuilder B; - for (AttributeSetImpl::const_iterator II = pImpl->begin(I), - IE = pImpl->end(I); II != IE; ++II) - B.addAttributes(*II); - return Attribute::get(pImpl->getContext(), B); - } - - return Attribute(); -} - -/// hasAttrSomewhere - Return true if the specified attribute is set for at -/// least one parameter or for the return value. -bool AttributeSet::hasAttrSomewhere(Attribute::AttrKind Attr) const { - if (pImpl == 0) return false; - - for (unsigned I = 0, E = pImpl->getNumAttributes(); I != E; ++I) - for (AttributeSetImpl::const_iterator II = pImpl->begin(I), - IE = pImpl->end(I); II != IE; ++II) - if (II->hasAttribute(Attr)) - return true; - - return false; + return getImpl(C, AttrNodeVec); } AttributeSet AttributeSet::addAttribute(LLVMContext &C, unsigned Idx, @@ -862,8 +574,121 @@ AttributeSet AttributeSet::removeAttributes(LLVMContext &C, unsigned Idx, return get(C, AttrSet); } +//===----------------------------------------------------------------------===// +// AttributeSet Accessor Methods +//===----------------------------------------------------------------------===// + +AttributeSet AttributeSet::getParamAttributes(unsigned Idx) const { + return pImpl && hasAttributes(Idx) ? + AttributeSet::get(pImpl->getContext(), + ArrayRef >( + std::make_pair(Idx, getAttributes(Idx)))) : + AttributeSet(); +} + +AttributeSet AttributeSet::getRetAttributes() const { + return pImpl && hasAttributes(ReturnIndex) ? + AttributeSet::get(pImpl->getContext(), + ArrayRef >( + std::make_pair(ReturnIndex, + getAttributes(ReturnIndex)))) : + AttributeSet(); +} + +AttributeSet AttributeSet::getFnAttributes() const { + return pImpl && hasAttributes(FunctionIndex) ? + AttributeSet::get(pImpl->getContext(), + ArrayRef >( + std::make_pair(FunctionIndex, + getAttributes(FunctionIndex)))) : + AttributeSet(); +} + +bool AttributeSet::hasAttribute(unsigned Index, Attribute::AttrKind Kind) const{ + return getAttributes(Index).hasAttribute(Kind); +} + +bool AttributeSet::hasAttributes(unsigned Index) const { + return getAttributes(Index).hasAttributes(); +} + +/// \brief Return true if the specified attribute is set for at least one +/// parameter or for the return value. +bool AttributeSet::hasAttrSomewhere(Attribute::AttrKind Attr) const { + if (pImpl == 0) return false; + + for (unsigned I = 0, E = pImpl->getNumAttributes(); I != E; ++I) + for (AttributeSetImpl::const_iterator II = pImpl->begin(I), + IE = pImpl->end(I); II != IE; ++II) + if (II->hasAttribute(Attr)) + return true; + + return false; +} + +unsigned AttributeSet::getParamAlignment(unsigned Idx) const { + return getAttributes(Idx).getAlignment(); +} + +unsigned AttributeSet::getStackAlignment(unsigned Index) const { + return getAttributes(Index).getStackAlignment(); +} + +std::string AttributeSet::getAsString(unsigned Index) const { + return getAttributes(Index).getAsString(); +} + +/// \brief The attributes for the specified index are returned. +/// +/// FIXME: This shouldn't return 'Attribute'. +Attribute AttributeSet::getAttributes(unsigned Idx) const { + if (pImpl == 0) return Attribute(); + + // Loop through to find the attribute we want. + for (unsigned I = 0, E = pImpl->getNumAttributes(); I != E; ++I) { + if (pImpl->getSlotIndex(I) != Idx) continue; + + AttrBuilder B; + for (AttributeSetImpl::const_iterator II = pImpl->begin(I), + IE = pImpl->end(I); II != IE; ++II) + B.addAttributes(*II); + return Attribute::get(pImpl->getContext(), B); + } + + return Attribute(); +} + +//===----------------------------------------------------------------------===// +// AttributeSet Introspection Methods +//===----------------------------------------------------------------------===// + +/// \brief Return the number of slots used in this attribute list. This is the +/// number of arguments that have an attribute set on them (including the +/// function itself). +unsigned AttributeSet::getNumSlots() const { + return pImpl ? pImpl->getNumAttributes() : 0; +} + +uint64_t AttributeSet::getSlotIndex(unsigned Slot) const { + assert(pImpl && Slot < pImpl->getNumAttributes() && + "Slot # out of range!"); + return pImpl->getSlotIndex(Slot); +} + +AttributeSet AttributeSet::getSlotAttributes(unsigned Slot) const { + assert(pImpl && Slot < pImpl->getNumAttributes() && + "Slot # out of range!"); + return pImpl->getSlotAttributes(Slot); +} + +uint64_t AttributeSet::Raw(unsigned Index) const { + // FIXME: Remove this. + return pImpl ? pImpl->Raw(Index) : 0; +} + void AttributeSet::dump() const { dbgs() << "PAL[\n"; + for (unsigned i = 0, e = getNumSlots(); i < e; ++i) { uint64_t Index = getSlotIndex(i); dbgs() << " { "; @@ -878,6 +703,176 @@ void AttributeSet::dump() const { } //===----------------------------------------------------------------------===// +// AttrBuilder Method Implementations +//===----------------------------------------------------------------------===// + +AttrBuilder::AttrBuilder(AttributeSet AS, unsigned Idx) + : Alignment(0), StackAlignment(0) { + AttributeSetImpl *pImpl = AS.pImpl; + if (!pImpl) return; + + AttrBuilder B; + + for (unsigned I = 0, E = pImpl->getNumAttributes(); I != E; ++I) { + if (pImpl->getSlotIndex(I) != Idx) continue; + + for (AttributeSetNode::const_iterator II = pImpl->begin(I), + IE = pImpl->end(I); II != IE; ++II) + B.addAttributes(*II); + + break; + } + + if (!B.hasAttributes()) return; + + uint64_t Mask = B.Raw(); + + for (Attribute::AttrKind I = Attribute::None; I != Attribute::EndAttrKinds; + I = Attribute::AttrKind(I + 1)) { + if (uint64_t A = (Mask & AttributeImpl::getAttrMask(I))) { + Attrs.insert(I); + + if (I == Attribute::Alignment) + Alignment = 1ULL << ((A >> 16) - 1); + else if (I == Attribute::StackAlignment) + StackAlignment = 1ULL << ((A >> 26)-1); + } + } +} + +void AttrBuilder::clear() { + Attrs.clear(); + Alignment = StackAlignment = 0; +} + +AttrBuilder &AttrBuilder::addAttribute(Attribute::AttrKind Val) { + Attrs.insert(Val); + return *this; +} + +AttrBuilder &AttrBuilder::removeAttribute(Attribute::AttrKind Val) { + Attrs.erase(Val); + if (Val == Attribute::Alignment) + Alignment = 0; + else if (Val == Attribute::StackAlignment) + StackAlignment = 0; + + return *this; +} + +AttrBuilder &AttrBuilder::addAttributes(Attribute Attr) { + uint64_t Mask = Attr.Raw(); + + for (Attribute::AttrKind I = Attribute::None; I != Attribute::EndAttrKinds; + I = Attribute::AttrKind(I + 1)) + if ((Mask & AttributeImpl::getAttrMask(I)) != 0) + Attrs.insert(I); + + if (Attr.getAlignment()) + Alignment = Attr.getAlignment(); + if (Attr.getStackAlignment()) + StackAlignment = Attr.getStackAlignment(); + return *this; +} + +AttrBuilder &AttrBuilder::removeAttributes(Attribute A) { + uint64_t Mask = A.Raw(); + + for (Attribute::AttrKind I = Attribute::None; I != Attribute::EndAttrKinds; + I = Attribute::AttrKind(I + 1)) { + if (Mask & AttributeImpl::getAttrMask(I)) { + Attrs.erase(I); + + if (I == Attribute::Alignment) + Alignment = 0; + else if (I == Attribute::StackAlignment) + StackAlignment = 0; + } + } + + return *this; +} + +AttrBuilder &AttrBuilder::addAlignmentAttr(unsigned Align) { + if (Align == 0) return *this; + + assert(isPowerOf2_32(Align) && "Alignment must be a power of two."); + assert(Align <= 0x40000000 && "Alignment too large."); + + Attrs.insert(Attribute::Alignment); + Alignment = Align; + return *this; +} + +AttrBuilder &AttrBuilder::addStackAlignmentAttr(unsigned Align) { + // Default alignment, allow the target to define how to align it. + if (Align == 0) return *this; + + assert(isPowerOf2_32(Align) && "Alignment must be a power of two."); + assert(Align <= 0x100 && "Alignment too large."); + + Attrs.insert(Attribute::StackAlignment); + StackAlignment = Align; + return *this; +} + +bool AttrBuilder::contains(Attribute::AttrKind A) const { + return Attrs.count(A); +} + +bool AttrBuilder::hasAttributes() const { + return !Attrs.empty(); +} + +bool AttrBuilder::hasAttributes(const Attribute &A) const { + return Raw() & A.Raw(); +} + +bool AttrBuilder::hasAlignmentAttr() const { + return Alignment != 0; +} + +bool AttrBuilder::operator==(const AttrBuilder &B) { + SmallVector This(Attrs.begin(), Attrs.end()); + SmallVector That(B.Attrs.begin(), B.Attrs.end()); + return This == That; +} + +AttrBuilder &AttrBuilder::addRawValue(uint64_t Val) { + for (Attribute::AttrKind I = Attribute::None; I != Attribute::EndAttrKinds; + I = Attribute::AttrKind(I + 1)) { + if (uint64_t A = (Val & AttributeImpl::getAttrMask(I))) { + Attrs.insert(I); + + if (I == Attribute::Alignment) + Alignment = 1ULL << ((A >> 16) - 1); + else if (I == Attribute::StackAlignment) + StackAlignment = 1ULL << ((A >> 26)-1); + } + } + + return *this; +} + +uint64_t AttrBuilder::Raw() const { + uint64_t Mask = 0; + + for (DenseSet::const_iterator I = Attrs.begin(), + E = Attrs.end(); I != E; ++I) { + Attribute::AttrKind Kind = *I; + + if (Kind == Attribute::Alignment) + Mask |= (Log2_32(Alignment) + 1) << 16; + else if (Kind == Attribute::StackAlignment) + Mask |= (Log2_32(StackAlignment) + 1) << 26; + else + Mask |= AttributeImpl::getAttrMask(Kind); + } + + return Mask; +} + +//===----------------------------------------------------------------------===// // AttributeFuncs Function Defintions //===----------------------------------------------------------------------===// @@ -900,9 +895,9 @@ Attribute AttributeFuncs::typeIncompatible(Type *Ty) { return Attribute::get(Ty->getContext(), Incompatible); } -/// encodeLLVMAttributesForBitcode - This returns an integer containing an -/// encoding of all the LLVM attributes found in the given attribute bitset. -/// Any change to this encoding is a breaking change to bitcode compatibility. +/// \brief This returns an integer containing an encoding of all the LLVM +/// attributes found in the given attribute bitset. Any change to this encoding +/// is a breaking change to bitcode compatibility. uint64_t AttributeFuncs::encodeLLVMAttributesForBitcode(AttributeSet Attrs, unsigned Index) { // FIXME: It doesn't make sense to store the alignment information as an @@ -921,9 +916,9 @@ uint64_t AttributeFuncs::encodeLLVMAttributesForBitcode(AttributeSet Attrs, return EncodedAttrs; } -/// decodeLLVMAttributesForBitcode - This returns an attribute bitset containing -/// the LLVM attributes that have been decoded from the given integer. This -/// function must stay in sync with 'encodeLLVMAttributesForBitcode'. +/// \brief This returns an attribute bitset containing the LLVM attributes that +/// have been decoded from the given integer. This function must stay in sync +/// with 'encodeLLVMAttributesForBitcode'. Attribute AttributeFuncs::decodeLLVMAttributesForBitcode(LLVMContext &C, uint64_t EncodedAttrs){ // The alignment is stored as a 16-bit raw value from bits 31--16. We shift -- cgit v1.1 From 817abdd8b055059e5930a15704b9f52da4236456 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Tue, 29 Jan 2013 00:48:16 +0000 Subject: S'more small non-functional changes in comments and #includes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173738 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Attributes.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index f56eb7b..0bc9262 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -16,7 +16,6 @@ #include "llvm/IR/Attributes.h" #include "AttributeImpl.h" #include "LLVMContextImpl.h" -#include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/StringExtras.h" #include "llvm/IR/Type.h" #include "llvm/Support/Atomic.h" @@ -28,7 +27,7 @@ using namespace llvm; //===----------------------------------------------------------------------===// -// Attribute Implementation +// Attribute Construction Methods //===----------------------------------------------------------------------===// Attribute Attribute::get(LLVMContext &Context, AttrKind Kind) { @@ -72,6 +71,10 @@ Attribute Attribute::getWithStackAlignment(LLVMContext &Context, return get(Context, B.addStackAlignmentAttr(Align)); } +//===----------------------------------------------------------------------===// +// Attribute Accessor Methods +//===----------------------------------------------------------------------===// + bool Attribute::hasAttribute(AttrKind Val) const { return pImpl && pImpl->hasAttribute(Val); } -- cgit v1.1 From b8d2c92ac42eca3d4242bd7ce6c6e87a4d887727 Mon Sep 17 00:00:00 2001 From: Andrew Kaylor Date: Tue, 29 Jan 2013 00:50:18 +0000 Subject: Formatting correction git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173739 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp index 3645a4d..7dc295f 100644 --- a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp +++ b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp @@ -82,8 +82,7 @@ static LineNumberInfo LineStartToIntelJITFormat( static LineNumberInfo DILineInfoToIntelJITFormat(uintptr_t StartAddress, uintptr_t Address, - DILineInfo Line) -{ + DILineInfo Line) { LineNumberInfo Result; Result.Offset = Address - StartAddress; -- cgit v1.1 From 8fbc0c296ef067150f3228e389ae04cf7b3b1992 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Tue, 29 Jan 2013 01:02:03 +0000 Subject: Convert the AttrBuilder into a list of Attributes instead of one Attribute object that holds all of its attributes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173742 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Attributes.cpp | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 0bc9262..d947fff 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -445,8 +445,21 @@ AttributeSet AttributeSet::get(LLVMContext &C, AttributeSet AttributeSet::get(LLVMContext &C, unsigned Idx, AttrBuilder &B) { if (!B.hasAttributes()) return AttributeSet(); - return get(C, ArrayRef >( - std::make_pair(Idx, Attribute::get(C, B)))); + + SmallVector, 8> Attrs; + for (AttrBuilder::iterator I = B.begin(), E = B.end(); I != E; ++I) { + Attribute::AttrKind Kind = *I; + if (Kind == Attribute::Alignment) + Attrs.push_back(std::make_pair(Idx, Attribute:: + getWithAlignment(C, B.getAlignment()))); + else if (Kind == Attribute::StackAlignment) + Attrs.push_back(std::make_pair(Idx, Attribute:: + getWithStackAlignment(C, B.getStackAlignment()))); + else + Attrs.push_back(std::make_pair(Idx, Attribute::get(C, Kind))); + } + + return get(C, Attrs); } AttributeSet AttributeSet::get(LLVMContext &C, unsigned Idx, -- cgit v1.1 From 8232ece5c1e57efe54342fb35610497d50bf894f Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Tue, 29 Jan 2013 01:43:29 +0000 Subject: Use an AttrBuilder to generate the correct AttributeSet. We no longer accept an encoded integer as representing all of the attributes. Convert this via the AttrBuilder class into an AttributeSet with the correct representation (an AttributeSetImpl that holds a list of Attribute objects). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173750 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Bitcode/Reader/BitcodeReader.cpp | 13 ++++--------- lib/IR/Attributes.cpp | 24 ++++++++++++++---------- 2 files changed, 18 insertions(+), 19 deletions(-) (limited to 'lib') diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index 2e1a512..ca299c0 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -464,15 +464,10 @@ bool BitcodeReader::ParseAttributeBlock() { return Error("Invalid ENTRY record"); for (unsigned i = 0, e = Record.size(); i != e; i += 2) { - Attribute ReconstitutedAttr = - AttributeFuncs::decodeLLVMAttributesForBitcode(Context, Record[i+1]); - Record[i+1] = ReconstitutedAttr.Raw(); - } - - for (unsigned i = 0, e = Record.size(); i != e; i += 2) { - AttrBuilder B(Record[i+1]); - if (B.hasAttributes()) - Attrs.push_back(AttributeSet::get(Context, Record[i], B)); + AttrBuilder B; + AttributeFuncs::decodeLLVMAttributesForBitcode(Context, B, + Record[i+1]); + Attrs.push_back(AttributeSet::get(Context, Record[i], B)); } MAttributes.push_back(AttributeSet::get(Context, Attrs)); diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index d947fff..8250330 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -377,9 +377,9 @@ uint64_t AttributeSetImpl::Raw(uint64_t Index) const { // AttributeSet Construction and Mutation Methods //===----------------------------------------------------------------------===// -AttributeSet AttributeSet::getImpl(LLVMContext &C, - ArrayRef > Attrs) { +AttributeSet +AttributeSet::getImpl(LLVMContext &C, + ArrayRef > Attrs) { LLVMContextImpl *pImpl = C.pImpl; FoldingSetNodeID ID; AttributeSetImpl::Profile(ID, Attrs); @@ -855,6 +855,8 @@ bool AttrBuilder::operator==(const AttrBuilder &B) { } AttrBuilder &AttrBuilder::addRawValue(uint64_t Val) { + if (!Val) return *this; + for (Attribute::AttrKind I = Attribute::None; I != Attribute::EndAttrKinds; I = Attribute::AttrKind(I + 1)) { if (uint64_t A = (Val & AttributeImpl::getAttrMask(I))) { @@ -914,6 +916,7 @@ Attribute AttributeFuncs::typeIncompatible(Type *Ty) { /// \brief This returns an integer containing an encoding of all the LLVM /// attributes found in the given attribute bitset. Any change to this encoding /// is a breaking change to bitcode compatibility. +/// N.B. This should be used only by the bitcode reader! uint64_t AttributeFuncs::encodeLLVMAttributesForBitcode(AttributeSet Attrs, unsigned Index) { // FIXME: It doesn't make sense to store the alignment information as an @@ -932,21 +935,22 @@ uint64_t AttributeFuncs::encodeLLVMAttributesForBitcode(AttributeSet Attrs, return EncodedAttrs; } -/// \brief This returns an attribute bitset containing the LLVM attributes that -/// have been decoded from the given integer. This function must stay in sync -/// with 'encodeLLVMAttributesForBitcode'. -Attribute AttributeFuncs::decodeLLVMAttributesForBitcode(LLVMContext &C, - uint64_t EncodedAttrs){ +/// \brief This fills an AttrBuilder object with the LLVM attributes that have +/// been decoded from the given integer. This function must stay in sync with +/// 'encodeLLVMAttributesForBitcode'. +/// N.B. This should be used only by the bitcode reader! +void AttributeFuncs::decodeLLVMAttributesForBitcode(LLVMContext &C, + AttrBuilder &B, + uint64_t EncodedAttrs) { // The alignment is stored as a 16-bit raw value from bits 31--16. We shift // the bits above 31 down by 11 bits. unsigned Alignment = (EncodedAttrs & (0xffffULL << 16)) >> 16; assert((!Alignment || isPowerOf2_32(Alignment)) && "Alignment must be a power of two."); - AttrBuilder B(EncodedAttrs & 0xffff); + B.addRawValue(EncodedAttrs & 0xffff); if (Alignment) B.addAlignmentAttr(Alignment); B.addRawValue((EncodedAttrs & (0xffffULL << 32)) >> 11); - return Attribute::get(C, B); } -- cgit v1.1 From 8688a58c53b46d2dda9bf50dafd5195790a7ed58 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Tue, 29 Jan 2013 02:32:37 +0000 Subject: Teach SDISel to combine fsin / fcos into a fsincos node if the following conditions are met: 1. They share the same operand and are in the same BB. 2. Both outputs are used. 3. The target has a native instruction that maps to ISD::FSINCOS node or the target provides a sincos library call. Implemented the generic optimization in sdisel and enabled it for Mac OSX. Also added an additional optimization for x86_64 Mac OSX by using an alternative entry point __sincos_stret which returns the two results in xmm0 / xmm1. rdar://13087969 PR13204 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173755 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 139 ++++++++++++++++++++++-- lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp | 1 + lib/CodeGen/TargetLoweringBase.cpp | 7 ++ lib/Target/ARM/ARMISelLowering.cpp | 2 + lib/Target/Hexagon/HexagonISelLowering.cpp | 2 + lib/Target/MBlaze/MBlazeISelLowering.cpp | 1 + lib/Target/Mips/MipsISelLowering.cpp | 2 + lib/Target/PowerPC/PPCISelLowering.cpp | 2 + lib/Target/Sparc/SparcISelLowering.cpp | 2 + lib/Target/X86/X86ISelLowering.cpp | 95 +++++++++++++--- lib/Target/X86/X86ISelLowering.h | 2 +- lib/Target/X86/X86Subtarget.cpp | 5 + lib/Target/X86/X86Subtarget.h | 4 + 13 files changed, 240 insertions(+), 24 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index d37edab..6a3e03b 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -102,7 +102,8 @@ private: SDNode *Node, bool isSigned); SDValue ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80, - RTLIB::Libcall Call_F128, RTLIB::Libcall Call_PPCF128); + RTLIB::Libcall Call_F128, + RTLIB::Libcall Call_PPCF128); SDValue ExpandIntLibCall(SDNode *Node, bool isSigned, RTLIB::Libcall Call_I8, RTLIB::Libcall Call_I16, @@ -110,6 +111,7 @@ private: RTLIB::Libcall Call_I64, RTLIB::Libcall Call_I128); void ExpandDivRemLibCall(SDNode *Node, SmallVectorImpl &Results); + void ExpandSinCosLibCall(SDNode *Node, SmallVectorImpl &Results); SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, DebugLoc dl); SDValue ExpandBUILD_VECTOR(SDNode *Node); @@ -2095,6 +2097,106 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, Results.push_back(Rem); } +/// isSinCosLibcallAvailable - Return true if sincos libcall is available. +static bool isSinCosLibcallAvailable(SDNode *Node, const TargetLowering &TLI) { + RTLIB::Libcall LC; + switch (Node->getValueType(0).getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unexpected request for libcall!"); + case MVT::f32: LC = RTLIB::SINCOS_F32; break; + case MVT::f64: LC = RTLIB::SINCOS_F64; break; + case MVT::f80: LC = RTLIB::SINCOS_F80; break; + case MVT::f128: LC = RTLIB::SINCOS_F128; break; + case MVT::ppcf128: LC = RTLIB::SINCOS_PPCF128; break; + } + return TLI.getLibcallName(LC) != 0; +} + +/// useSinCos - Only issue sincos libcall if both sin and cos are +/// needed. +static bool useSinCos(SDNode *Node) { + unsigned OtherOpcode = Node->getOpcode() == ISD::FSIN + ? ISD::FCOS : ISD::FSIN; + + SDValue Op0 = Node->getOperand(0); + for (SDNode::use_iterator UI = Op0.getNode()->use_begin(), + UE = Op0.getNode()->use_end(); UI != UE; ++UI) { + SDNode *User = *UI; + if (User == Node) + continue; + // The other user might have been turned into sincos already. + if (User->getOpcode() == OtherOpcode || User->getOpcode() == ISD::FSINCOS) + return true; + } + return false; +} + +/// ExpandSinCosLibCall - Issue libcalls to sincos to compute sin / cos +/// pairs. +void +SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node, + SmallVectorImpl &Results) { + RTLIB::Libcall LC; + switch (Node->getValueType(0).getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unexpected request for libcall!"); + case MVT::f32: LC = RTLIB::SINCOS_F32; break; + case MVT::f64: LC = RTLIB::SINCOS_F64; break; + case MVT::f80: LC = RTLIB::SINCOS_F80; break; + case MVT::f128: LC = RTLIB::SINCOS_F128; break; + case MVT::ppcf128: LC = RTLIB::SINCOS_PPCF128; break; + } + + // The input chain to this libcall is the entry node of the function. + // Legalizing the call will automatically add the previous call to the + // dependence. + SDValue InChain = DAG.getEntryNode(); + + EVT RetVT = Node->getValueType(0); + Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + + // Pass the argument. + Entry.Node = Node->getOperand(0); + Entry.Ty = RetTy; + Entry.isSExt = false; + Entry.isZExt = false; + Args.push_back(Entry); + + // Pass the return address of sin. + SDValue SinPtr = DAG.CreateStackTemporary(RetVT); + Entry.Node = SinPtr; + Entry.Ty = RetTy->getPointerTo(); + Entry.isSExt = false; + Entry.isZExt = false; + Args.push_back(Entry); + + // Also pass the return address of the cos. + SDValue CosPtr = DAG.CreateStackTemporary(RetVT); + Entry.Node = CosPtr; + Entry.Ty = RetTy->getPointerTo(); + Entry.isSExt = false; + Entry.isZExt = false; + Args.push_back(Entry); + + SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), + TLI.getPointerTy()); + + DebugLoc dl = Node->getDebugLoc(); + TargetLowering:: + CallLoweringInfo CLI(InChain, Type::getVoidTy(*DAG.getContext()), + false, false, false, false, + 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, + Callee, Args, DAG, dl); + std::pair CallInfo = TLI.LowerCallTo(CLI); + + Results.push_back(DAG.getLoad(RetVT, dl, CallInfo.second, SinPtr, + MachinePointerInfo(), false, false, false, 0)); + Results.push_back(DAG.getLoad(RetVT, dl, CallInfo.second, CosPtr, + MachinePointerInfo(), false, false, false, 0)); +} + /// ExpandLegalINT_TO_FP - This function is responsible for legalizing a /// INT_TO_FP operation of the specified operand when the target requests that /// we expand it. At this point, we know that the result and operand types are @@ -3041,14 +3143,33 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { RTLIB::SQRT_PPCF128)); break; case ISD::FSIN: - Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64, - RTLIB::SIN_F80, RTLIB::SIN_F128, - RTLIB::SIN_PPCF128)); + case ISD::FCOS: { + EVT VT = Node->getValueType(0); + bool isSIN = Node->getOpcode() == ISD::FSIN; + // Turn fsin / fcos into ISD::FSINCOS node if there are a pair of fsin / + // fcos which share the same operand and both are used. + if ((TLI.isOperationLegalOrCustom(ISD::FSINCOS, VT) || + isSinCosLibcallAvailable(Node, TLI)) + && useSinCos(Node)) { + SDVTList VTs = DAG.getVTList(VT, VT); + Tmp1 = DAG.getNode(ISD::FSINCOS, dl, VTs, Node->getOperand(0)); + if (!isSIN) + Tmp1 = Tmp1.getValue(1); + Results.push_back(Tmp1); + } else if (isSIN) { + Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64, + RTLIB::SIN_F80, RTLIB::SIN_F128, + RTLIB::SIN_PPCF128)); + } else { + Results.push_back(ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64, + RTLIB::COS_F80, RTLIB::COS_F128, + RTLIB::COS_PPCF128)); + } break; - case ISD::FCOS: - Results.push_back(ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64, - RTLIB::COS_F80, RTLIB::COS_F128, - RTLIB::COS_PPCF128)); + } + case ISD::FSINCOS: + // Expand into sincos libcall. + ExpandSinCosLibCall(Node, Results); break; case ISD::FLOG: Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64, @@ -3181,7 +3302,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { case ISD::UREM: case ISD::SREM: { EVT VT = Node->getValueType(0); - SDVTList VTs = DAG.getVTList(VT, VT); bool isSigned = Node->getOpcode() == ISD::SREM; unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV; unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; @@ -3192,6 +3312,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { // If div is legal, it's better to do the normal expansion !TLI.isOperationLegalOrCustom(DivOpc, Node->getValueType(0)) && useDivRem(Node, isSigned, false))) { + SDVTList VTs = DAG.getVTList(VT, VT); Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1); } else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) { // X % Y -> X-X/Y*Y diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 5701b13..3b5823b 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -140,6 +140,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FSQRT: return "fsqrt"; case ISD::FSIN: return "fsin"; case ISD::FCOS: return "fcos"; + case ISD::FSINCOS: return "fsincos"; case ISD::FTRUNC: return "ftrunc"; case ISD::FFLOOR: return "ffloor"; case ISD::FCEIL: return "fceil"; diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp index 6284d52..10aa9d6 100644 --- a/lib/CodeGen/TargetLoweringBase.cpp +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -340,6 +340,13 @@ static void InitLibcallNames(const char **Names) { Names[RTLIB::SYNC_FETCH_AND_NAND_2] = "__sync_fetch_and_nand_2"; Names[RTLIB::SYNC_FETCH_AND_NAND_4] = "__sync_fetch_and_nand_4"; Names[RTLIB::SYNC_FETCH_AND_NAND_8] = "__sync_fetch_and_nand_8"; + + // These are generally not available. + Names[RTLIB::SINCOS_F32] = 0; + Names[RTLIB::SINCOS_F64] = 0; + Names[RTLIB::SINCOS_F80] = 0; + Names[RTLIB::SINCOS_F128] = 0; + Names[RTLIB::SINCOS_PPCF128] = 0; } /// InitLibcallCallingConvs - Set default libcall CallingConvs. diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index bdbf45c..3d283a5 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -781,6 +781,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FSIN, MVT::f32, Expand); setOperationAction(ISD::FCOS, MVT::f32, Expand); setOperationAction(ISD::FCOS, MVT::f64, Expand); + setOperationAction(ISD::FSINCOS, MVT::f64, Expand); + setOperationAction(ISD::FSINCOS, MVT::f32, Expand); setOperationAction(ISD::FREM, MVT::f64, Expand); setOperationAction(ISD::FREM, MVT::f32, Expand); if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() && diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp index 1a0e581..9c7243b 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -1364,6 +1364,8 @@ HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine setOperationAction(ISD::FSIN , MVT::f32, Expand); setOperationAction(ISD::FCOS , MVT::f32, Expand); setOperationAction(ISD::FREM , MVT::f32, Expand); + setOperationAction(ISD::FSINCOS, MVT::f64, Expand); + setOperationAction(ISD::FSINCOS, MVT::f32, Expand); setOperationAction(ISD::CTPOP, MVT::i32, Expand); setOperationAction(ISD::CTTZ , MVT::i32, Expand); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); diff --git a/lib/Target/MBlaze/MBlazeISelLowering.cpp b/lib/Target/MBlaze/MBlazeISelLowering.cpp index 8a9f092..5b3c6fe 100644 --- a/lib/Target/MBlaze/MBlazeISelLowering.cpp +++ b/lib/Target/MBlaze/MBlazeISelLowering.cpp @@ -81,6 +81,7 @@ MBlazeTargetLowering::MBlazeTargetLowering(MBlazeTargetMachine &TM) setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); setOperationAction(ISD::FSIN, MVT::f32, Expand); setOperationAction(ISD::FCOS, MVT::f32, Expand); + setOperationAction(ISD::FSINCOS, MVT::f32, Expand); setOperationAction(ISD::FPOWI, MVT::f32, Expand); setOperationAction(ISD::FPOW, MVT::f32, Expand); setOperationAction(ISD::FLOG, MVT::f32, Expand); diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 8925dde..7b8557d 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -421,6 +421,8 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::FSIN, MVT::f64, Expand); setOperationAction(ISD::FCOS, MVT::f32, Expand); setOperationAction(ISD::FCOS, MVT::f64, Expand); + setOperationAction(ISD::FSINCOS, MVT::f32, Expand); + setOperationAction(ISD::FSINCOS, MVT::f64, Expand); setOperationAction(ISD::FPOWI, MVT::f32, Expand); setOperationAction(ISD::FPOW, MVT::f32, Expand); setOperationAction(ISD::FPOW, MVT::f64, Expand); diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 9966b2c..9a68927 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -132,11 +132,13 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) // We don't support sin/cos/sqrt/fmod/pow setOperationAction(ISD::FSIN , MVT::f64, Expand); setOperationAction(ISD::FCOS , MVT::f64, Expand); + setOperationAction(ISD::FSINCOS, MVT::f64, Expand); setOperationAction(ISD::FREM , MVT::f64, Expand); setOperationAction(ISD::FPOW , MVT::f64, Expand); setOperationAction(ISD::FMA , MVT::f64, Legal); setOperationAction(ISD::FSIN , MVT::f32, Expand); setOperationAction(ISD::FCOS , MVT::f32, Expand); + setOperationAction(ISD::FSINCOS, MVT::f32, Expand); setOperationAction(ISD::FREM , MVT::f32, Expand); setOperationAction(ISD::FPOW , MVT::f32, Expand); setOperationAction(ISD::FMA , MVT::f32, Legal); diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp index 168640f..7664abd 100644 --- a/lib/Target/Sparc/SparcISelLowering.cpp +++ b/lib/Target/Sparc/SparcISelLowering.cpp @@ -759,10 +759,12 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM) setOperationAction(ISD::FSIN , MVT::f64, Expand); setOperationAction(ISD::FCOS , MVT::f64, Expand); + setOperationAction(ISD::FSINCOS, MVT::f64, Expand); setOperationAction(ISD::FREM , MVT::f64, Expand); setOperationAction(ISD::FMA , MVT::f64, Expand); setOperationAction(ISD::FSIN , MVT::f32, Expand); setOperationAction(ISD::FCOS , MVT::f32, Expand); + setOperationAction(ISD::FSINCOS, MVT::f32, Expand); setOperationAction(ISD::FREM , MVT::f32, Expand); setOperationAction(ISD::FMA , MVT::f32, Expand); setOperationAction(ISD::CTPOP, MVT::i32, Expand); diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index a4eae0a..ca606da 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -605,10 +605,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FGETSIGN, MVT::i32, Custom); // We don't support sin/cos/fmod - setOperationAction(ISD::FSIN , MVT::f64, Expand); - setOperationAction(ISD::FCOS , MVT::f64, Expand); - setOperationAction(ISD::FSIN , MVT::f32, Expand); - setOperationAction(ISD::FCOS , MVT::f32, Expand); + setOperationAction(ISD::FSIN , MVT::f64, Expand); + setOperationAction(ISD::FCOS , MVT::f64, Expand); + setOperationAction(ISD::FSINCOS, MVT::f64, Expand); + setOperationAction(ISD::FSIN , MVT::f32, Expand); + setOperationAction(ISD::FCOS , MVT::f32, Expand); + setOperationAction(ISD::FSINCOS, MVT::f32, Expand); // Expand FP immediates into loads from the stack, except for the special // cases we handle. @@ -633,8 +635,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); // We don't support sin/cos/fmod - setOperationAction(ISD::FSIN , MVT::f32, Expand); - setOperationAction(ISD::FCOS , MVT::f32, Expand); + setOperationAction(ISD::FSIN , MVT::f32, Expand); + setOperationAction(ISD::FCOS , MVT::f32, Expand); + setOperationAction(ISD::FSINCOS, MVT::f32, Expand); // Special cases we handle for FP constants. addLegalFPImmediate(APFloat(+0.0f)); // xorps @@ -644,8 +647,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS if (!TM.Options.UnsafeFPMath) { - setOperationAction(ISD::FSIN , MVT::f64 , Expand); - setOperationAction(ISD::FCOS , MVT::f64 , Expand); + setOperationAction(ISD::FSIN , MVT::f64, Expand); + setOperationAction(ISD::FCOS , MVT::f64, Expand); + setOperationAction(ISD::FSINCOS, MVT::f64, Expand); } } else if (!TM.Options.UseSoftFloat) { // f32 and f64 in x87. @@ -659,10 +663,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); if (!TM.Options.UnsafeFPMath) { - setOperationAction(ISD::FSIN , MVT::f32 , Expand); - setOperationAction(ISD::FSIN , MVT::f64 , Expand); - setOperationAction(ISD::FCOS , MVT::f32 , Expand); - setOperationAction(ISD::FCOS , MVT::f64 , Expand); + setOperationAction(ISD::FSIN , MVT::f64, Expand); + setOperationAction(ISD::FSIN , MVT::f32, Expand); + setOperationAction(ISD::FCOS , MVT::f64, Expand); + setOperationAction(ISD::FCOS , MVT::f32, Expand); + setOperationAction(ISD::FSINCOS, MVT::f64, Expand); + setOperationAction(ISD::FSINCOS, MVT::f32, Expand); } addLegalFPImmediate(APFloat(+0.0)); // FLD0 addLegalFPImmediate(APFloat(+1.0)); // FLD1 @@ -699,8 +705,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) } if (!TM.Options.UnsafeFPMath) { - setOperationAction(ISD::FSIN , MVT::f80 , Expand); - setOperationAction(ISD::FCOS , MVT::f80 , Expand); + setOperationAction(ISD::FSIN , MVT::f80, Expand); + setOperationAction(ISD::FCOS , MVT::f80, Expand); + setOperationAction(ISD::FSINCOS, MVT::f80, Expand); } setOperationAction(ISD::FFLOOR, MVT::f80, Expand); @@ -748,7 +755,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand); setOperationAction(ISD::FABS, VT, Expand); setOperationAction(ISD::FSIN, VT, Expand); + setOperationAction(ISD::FSINCOS, VT, Expand); setOperationAction(ISD::FCOS, VT, Expand); + setOperationAction(ISD::FSINCOS, VT, Expand); setOperationAction(ISD::FREM, VT, Expand); setOperationAction(ISD::FMA, VT, Expand); setOperationAction(ISD::FPOWI, VT, Expand); @@ -1281,6 +1290,19 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setLibcallName(RTLIB::SRA_I128, 0); } + // Combine sin / cos into one node or libcall if possible. + if (Subtarget->hasSinCos()) { + setLibcallName(RTLIB::SINCOS_F32, "sincosf"); + setLibcallName(RTLIB::SINCOS_F64, "sincos"); + if (Subtarget->isTargetDarwin() && Subtarget->is64Bit()) { + // For MacOSX, we don't want to the normal expansion of a libcall to + // sincos. We want to issue a libcall to __sincos_stret to avoid memory + // traffic. + setOperationAction(ISD::FSINCOS, MVT::f64, Custom); + setOperationAction(ISD::FSINCOS, MVT::f32, Custom); + } + } + // We have target-specific dag combine patterns for the following nodes: setTargetDAGCombine(ISD::VECTOR_SHUFFLE); setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); @@ -12014,6 +12036,50 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) { Op.getOperand(1), Op.getOperand(2)); } +SDValue X86TargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { + assert(Subtarget->isTargetDarwin()); + + // For MacOSX, we want to call an alternative entry point: __sincos_stret, + // which returns the values in two XMM registers. + DebugLoc dl = Op.getDebugLoc(); + SDValue Arg = Op.getOperand(0); + EVT ArgVT = Arg.getValueType(); + Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + + ArgListTy Args; + ArgListEntry Entry; + + Entry.Node = Arg; + Entry.Ty = ArgTy; + Entry.isSExt = false; + Entry.isZExt = false; + Args.push_back(Entry); + + const char *LibcallName = (ArgVT == MVT::f64) + ? "__sincos_stret" : "__sincosf_stret"; + SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy()); + + StructType *RetTy = StructType::get(ArgTy, ArgTy, NULL); + TargetLowering:: + CallLoweringInfo CLI(DAG.getEntryNode(), RetTy, + false, false, false, false, 0, + CallingConv::C, /*isTaillCall=*/false, + /*doesNotRet=*/false, /*isReturnValueUsed*/true, + Callee, Args, DAG, dl); + std::pair CallResult = LowerCallTo(CLI); +#if 1 + return CallResult.first; +#else + SDValue RetSin = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, ArgVT, + CallResult.first, DAG.getIntPtrConstant(0)); + SDValue RetCos = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, ArgVT, + CallResult.first, DAG.getIntPtrConstant(1)); + + SDVTList Tys = DAG.getVTList(ArgVT, ArgVT); + return DAG.getNode(ISD::MERGE_VALUES, dl, Tys, RetSin, RetCos); +#endif +} + /// LowerOperation - Provide custom lowering hooks for some operations. /// SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { @@ -12096,6 +12162,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::ADD: return LowerADD(Op, DAG); case ISD::SUB: return LowerSUB(Op, DAG); case ISD::SDIV: return LowerSDIV(Op, DAG); + case ISD::FSINCOS: return LowerFSINCOS(Op, DAG); } } diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 5e84e27..6758ed1 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -838,8 +838,8 @@ namespace llvm { SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const; // Utility functions to help LowerVECTOR_SHUFFLE & LowerBUILD_VECTOR SDValue LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 53c28f4..dad95c6 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -155,6 +155,11 @@ const char *X86Subtarget::getBZeroEntry() const { return 0; } +bool X86Subtarget::hasSinCos() const { + return getTargetTriple().isMacOSX() && + !getTargetTriple().isMacOSXVersionLT(10, 9); +} + /// IsLegalToCallImmediateAddr - Return true if the subtarget allows calls /// to immediate address. bool X86Subtarget::IsLegalToCallImmediateAddr(const TargetMachine &TM) const { diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index b325f62..eb587a5 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -328,6 +328,10 @@ public: /// memset with zero passed as the second argument. Otherwise it /// returns null. const char *getBZeroEntry() const; + + /// This function returns true if the target has sincos() routine in its + /// compiler runtime or math libraries. + bool hasSinCos() const; /// enablePostRAScheduler - run for Atom optimization. bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, -- cgit v1.1 From 3b0f537867c7c661f63938cf71a1031b652c87a2 Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Tue, 29 Jan 2013 03:02:59 +0000 Subject: Removed some cruft from ObjCARCAliasAnalysis.cpp. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173759 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp b/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp index 9747ce0..5d09e5a 100644 --- a/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp @@ -21,18 +21,8 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "objc-arc-aa" - #include "ObjCARC.h" #include "ObjCARCAliasAnalysis.h" -#include "llvm/IR/Instruction.h" -#include "llvm/InitializePasses.h" -#include "llvm/PassAnalysisSupport.h" -#include "llvm/PassSupport.h" - -namespace llvm { - class Function; - class Value; -} #include "llvm/IR/Instruction.h" #include "llvm/InitializePasses.h" -- cgit v1.1 From 3a57c37964adfbbf83b4b309a2ceda43ba6d8231 Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Tue, 29 Jan 2013 03:03:03 +0000 Subject: Extracted ObjCARCContract from ObjCARCOpts into its own file. This also required adding 2x headers Dependency Analysis.h/Provenance Analysis.h and a .cpp file DependencyAnalysis.cpp to unentangle the dependencies inbetween ObjCARCContract and ObjCARCOpts. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173760 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/ObjCARC/CMakeLists.txt | 3 + lib/Transforms/ObjCARC/DependencyAnalysis.cpp | 262 ++++++ lib/Transforms/ObjCARC/DependencyAnalysis.h | 79 ++ lib/Transforms/ObjCARC/ObjCARC.h | 153 ++++ lib/Transforms/ObjCARC/ObjCARCContract.cpp | 537 +++++++++++ lib/Transforms/ObjCARC/ObjCARCOpts.cpp | 1185 +------------------------ lib/Transforms/ObjCARC/ObjCARCUtil.cpp | 93 ++ lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp | 177 ++++ lib/Transforms/ObjCARC/ProvenanceAnalysis.h | 79 ++ 9 files changed, 1387 insertions(+), 1181 deletions(-) create mode 100644 lib/Transforms/ObjCARC/DependencyAnalysis.cpp create mode 100644 lib/Transforms/ObjCARC/DependencyAnalysis.h create mode 100644 lib/Transforms/ObjCARC/ObjCARCContract.cpp create mode 100644 lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp create mode 100644 lib/Transforms/ObjCARC/ProvenanceAnalysis.h (limited to 'lib') diff --git a/lib/Transforms/ObjCARC/CMakeLists.txt b/lib/Transforms/ObjCARC/CMakeLists.txt index e710044..233deb3 100644 --- a/lib/Transforms/ObjCARC/CMakeLists.txt +++ b/lib/Transforms/ObjCARC/CMakeLists.txt @@ -5,6 +5,9 @@ add_llvm_library(LLVMObjCARCOpts ObjCARCAPElim.cpp ObjCARCAliasAnalysis.cpp ObjCARCUtil.cpp + ObjCARCContract.cpp + DependencyAnalysis.cpp + ProvenanceAnalysis.cpp ) add_dependencies(LLVMObjCARCOpts intrinsics_gen) diff --git a/lib/Transforms/ObjCARC/DependencyAnalysis.cpp b/lib/Transforms/ObjCARC/DependencyAnalysis.cpp new file mode 100644 index 0000000..5640009 --- /dev/null +++ b/lib/Transforms/ObjCARC/DependencyAnalysis.cpp @@ -0,0 +1,262 @@ +//===- DependencyAnalysis.cpp - ObjC ARC Optimization ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines special dependency analysis routines used in Objective C +/// ARC Optimizations. +/// +/// WARNING: This file knows about certain library functions. It recognizes them +/// by name, and hardwires knowledge of their semantics. +/// +/// WARNING: This file knows about how certain Objective-C library functions are +/// used. Naive LLVM IR transformations which would otherwise be +/// behavior-preserving may break these assumptions. +/// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "objc-arc-dependency" +#include "ObjCARC.h" +#include "ProvenanceAnalysis.h" +#include "DependencyAnalysis.h" + +#include "llvm/Support/CFG.h" + +using namespace llvm; +using namespace llvm::objcarc; + +/// Test whether the given instruction can result in a reference count +/// modification (positive or negative) for the pointer's object. +bool +llvm::objcarc::CanAlterRefCount(const Instruction *Inst, const Value *Ptr, + ProvenanceAnalysis &PA, + InstructionClass Class) { + switch (Class) { + case IC_Autorelease: + case IC_AutoreleaseRV: + case IC_User: + // These operations never directly modify a reference count. + return false; + default: break; + } + + ImmutableCallSite CS = static_cast(Inst); + assert(CS && "Only calls can alter reference counts!"); + + // See if AliasAnalysis can help us with the call. + AliasAnalysis::ModRefBehavior MRB = PA.getAA()->getModRefBehavior(CS); + if (AliasAnalysis::onlyReadsMemory(MRB)) + return false; + if (AliasAnalysis::onlyAccessesArgPointees(MRB)) { + for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); + I != E; ++I) { + const Value *Op = *I; + if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Ptr, Op)) + return true; + } + return false; + } + + // Assume the worst. + return true; +} + +/// Test whether the given instruction can "use" the given pointer's object in a +/// way that requires the reference count to be positive. +bool +llvm::objcarc::CanUse(const Instruction *Inst, const Value *Ptr, + ProvenanceAnalysis &PA, InstructionClass Class) { + // IC_Call operations (as opposed to IC_CallOrUser) never "use" objc pointers. + if (Class == IC_Call) + return false; + + // Consider various instructions which may have pointer arguments which are + // not "uses". + if (const ICmpInst *ICI = dyn_cast(Inst)) { + // Comparing a pointer with null, or any other constant, isn't really a use, + // because we don't care what the pointer points to, or about the values + // of any other dynamic reference-counted pointers. + if (!IsPotentialRetainableObjPtr(ICI->getOperand(1), *PA.getAA())) + return false; + } else if (ImmutableCallSite CS = static_cast(Inst)) { + // For calls, just check the arguments (and not the callee operand). + for (ImmutableCallSite::arg_iterator OI = CS.arg_begin(), + OE = CS.arg_end(); OI != OE; ++OI) { + const Value *Op = *OI; + if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Ptr, Op)) + return true; + } + return false; + } else if (const StoreInst *SI = dyn_cast(Inst)) { + // Special-case stores, because we don't care about the stored value, just + // the store address. + const Value *Op = GetUnderlyingObjCPtr(SI->getPointerOperand()); + // If we can't tell what the underlying object was, assume there is a + // dependence. + return IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Op, Ptr); + } + + // Check each operand for a match. + for (User::const_op_iterator OI = Inst->op_begin(), OE = Inst->op_end(); + OI != OE; ++OI) { + const Value *Op = *OI; + if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Ptr, Op)) + return true; + } + return false; +} + +/// Test if there can be dependencies on Inst through Arg. This function only +/// tests dependencies relevant for removing pairs of calls. +bool +llvm::objcarc::Depends(DependenceKind Flavor, Instruction *Inst, + const Value *Arg, ProvenanceAnalysis &PA) { + // If we've reached the definition of Arg, stop. + if (Inst == Arg) + return true; + + switch (Flavor) { + case NeedsPositiveRetainCount: { + InstructionClass Class = GetInstructionClass(Inst); + switch (Class) { + case IC_AutoreleasepoolPop: + case IC_AutoreleasepoolPush: + case IC_None: + return false; + default: + return CanUse(Inst, Arg, PA, Class); + } + } + + case AutoreleasePoolBoundary: { + InstructionClass Class = GetInstructionClass(Inst); + switch (Class) { + case IC_AutoreleasepoolPop: + case IC_AutoreleasepoolPush: + // These mark the end and begin of an autorelease pool scope. + return true; + default: + // Nothing else does this. + return false; + } + } + + case CanChangeRetainCount: { + InstructionClass Class = GetInstructionClass(Inst); + switch (Class) { + case IC_AutoreleasepoolPop: + // Conservatively assume this can decrement any count. + return true; + case IC_AutoreleasepoolPush: + case IC_None: + return false; + default: + return CanAlterRefCount(Inst, Arg, PA, Class); + } + } + + case RetainAutoreleaseDep: + switch (GetBasicInstructionClass(Inst)) { + case IC_AutoreleasepoolPop: + case IC_AutoreleasepoolPush: + // Don't merge an objc_autorelease with an objc_retain inside a different + // autoreleasepool scope. + return true; + case IC_Retain: + case IC_RetainRV: + // Check for a retain of the same pointer for merging. + return GetObjCArg(Inst) == Arg; + default: + // Nothing else matters for objc_retainAutorelease formation. + return false; + } + + case RetainAutoreleaseRVDep: { + InstructionClass Class = GetBasicInstructionClass(Inst); + switch (Class) { + case IC_Retain: + case IC_RetainRV: + // Check for a retain of the same pointer for merging. + return GetObjCArg(Inst) == Arg; + default: + // Anything that can autorelease interrupts + // retainAutoreleaseReturnValue formation. + return CanInterruptRV(Class); + } + } + + case RetainRVDep: + return CanInterruptRV(GetBasicInstructionClass(Inst)); + } + + llvm_unreachable("Invalid dependence flavor"); +} + +/// Walk up the CFG from StartPos (which is in StartBB) and find local and +/// non-local dependencies on Arg. +/// +/// TODO: Cache results? +void +llvm::objcarc::FindDependencies(DependenceKind Flavor, + const Value *Arg, + BasicBlock *StartBB, Instruction *StartInst, + SmallPtrSet &DependingInsts, + SmallPtrSet &Visited, + ProvenanceAnalysis &PA) { + BasicBlock::iterator StartPos = StartInst; + + SmallVector, 4> Worklist; + Worklist.push_back(std::make_pair(StartBB, StartPos)); + do { + std::pair Pair = + Worklist.pop_back_val(); + BasicBlock *LocalStartBB = Pair.first; + BasicBlock::iterator LocalStartPos = Pair.second; + BasicBlock::iterator StartBBBegin = LocalStartBB->begin(); + for (;;) { + if (LocalStartPos == StartBBBegin) { + pred_iterator PI(LocalStartBB), PE(LocalStartBB, false); + if (PI == PE) + // If we've reached the function entry, produce a null dependence. + DependingInsts.insert(0); + else + // Add the predecessors to the worklist. + do { + BasicBlock *PredBB = *PI; + if (Visited.insert(PredBB)) + Worklist.push_back(std::make_pair(PredBB, PredBB->end())); + } while (++PI != PE); + break; + } + + Instruction *Inst = --LocalStartPos; + if (Depends(Flavor, Inst, Arg, PA)) { + DependingInsts.insert(Inst); + break; + } + } + } while (!Worklist.empty()); + + // Determine whether the original StartBB post-dominates all of the blocks we + // visited. If not, insert a sentinal indicating that most optimizations are + // not safe. + for (SmallPtrSet::const_iterator I = Visited.begin(), + E = Visited.end(); I != E; ++I) { + const BasicBlock *BB = *I; + if (BB == StartBB) + continue; + const TerminatorInst *TI = cast(&BB->back()); + for (succ_const_iterator SI(TI), SE(TI, false); SI != SE; ++SI) { + const BasicBlock *Succ = *SI; + if (Succ != StartBB && !Visited.count(Succ)) { + DependingInsts.insert(reinterpret_cast(-1)); + return; + } + } + } +} diff --git a/lib/Transforms/ObjCARC/DependencyAnalysis.h b/lib/Transforms/ObjCARC/DependencyAnalysis.h new file mode 100644 index 0000000..24d358b --- /dev/null +++ b/lib/Transforms/ObjCARC/DependencyAnalysis.h @@ -0,0 +1,79 @@ +//===- DependencyAnalysis.h - ObjC ARC Optimization ---*- mode: c++ -*-----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file declares special dependency analysis routines used in Objective C +/// ARC Optimizations. +/// +/// WARNING: This file knows about certain library functions. It recognizes them +/// by name, and hardwires knowledge of their semantics. +/// +/// WARNING: This file knows about how certain Objective-C library functions are +/// used. Naive LLVM IR transformations which would otherwise be +/// behavior-preserving may break these assumptions. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_OBJCARC_DEPEDENCYANALYSIS_H +#define LLVM_TRANSFORMS_OBJCARC_DEPEDENCYANALYSIS_H + +#include "llvm/ADT/SmallPtrSet.h" + +namespace llvm { + class BasicBlock; + class Instruction; + class Value; +} + +namespace llvm { +namespace objcarc { + +class ProvenanceAnalysis; + +/// \enum DependenceKind +/// \brief Defines different dependence kinds among various ARC constructs. +/// +/// There are several kinds of dependence-like concepts in use here. +/// +enum DependenceKind { + NeedsPositiveRetainCount, + AutoreleasePoolBoundary, + CanChangeRetainCount, + RetainAutoreleaseDep, ///< Blocks objc_retainAutorelease. + RetainAutoreleaseRVDep, ///< Blocks objc_retainAutoreleaseReturnValue. + RetainRVDep ///< Blocks objc_retainAutoreleasedReturnValue. +}; + +void FindDependencies(DependenceKind Flavor, + const Value *Arg, + BasicBlock *StartBB, Instruction *StartInst, + SmallPtrSet &DependingInstructions, + SmallPtrSet &Visited, + ProvenanceAnalysis &PA); + +bool +Depends(DependenceKind Flavor, Instruction *Inst, const Value *Arg, + ProvenanceAnalysis &PA); + +/// Test whether the given instruction can "use" the given pointer's object in a +/// way that requires the reference count to be positive. +bool +CanUse(const Instruction *Inst, const Value *Ptr, ProvenanceAnalysis &PA, + InstructionClass Class); + +/// Test whether the given instruction can result in a reference count +/// modification (positive or negative) for the pointer's object. +bool +CanAlterRefCount(const Instruction *Inst, const Value *Ptr, + ProvenanceAnalysis &PA, InstructionClass Class); + +} // namespace objcarc +} // namespace llvm + +#endif // LLVM_TRANSFORMS_OBJCARC_DEPEDENCYANALYSIS_H diff --git a/lib/Transforms/ObjCARC/ObjCARC.h b/lib/Transforms/ObjCARC/ObjCARC.h index 854de74..32ca30e 100644 --- a/lib/Transforms/ObjCARC/ObjCARC.h +++ b/lib/Transforms/ObjCARC/ObjCARC.h @@ -29,10 +29,12 @@ #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" +#include "llvm/Support/CallSite.h" #include "llvm/Support/Debug.h" #include "llvm/Support/InstIterator.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/ObjCARC.h" +#include "llvm/Transforms/Utils/Local.h" namespace llvm { namespace objcarc { @@ -163,6 +165,24 @@ static inline bool IsNoThrow(InstructionClass Class) { Class == IC_AutoreleasepoolPop; } +/// Test whether the given instruction can autorelease any pointer or cause an +/// autoreleasepool pop. +static inline bool +CanInterruptRV(InstructionClass Class) { + switch (Class) { + case IC_AutoreleasepoolPop: + case IC_CallOrUser: + case IC_Call: + case IC_Autorelease: + case IC_AutoreleaseRV: + case IC_FusedRetainAutorelease: + case IC_FusedRetainAutoreleaseRV: + return true; + default: + return false; + } +} + /// \brief Determine if F is one of the special known Functions. If it isn't, /// return IC_CallOrUser. InstructionClass GetFunctionClass(const Function *F); @@ -184,6 +204,8 @@ static inline InstructionClass GetBasicInstructionClass(const Value *V) { return isa(V) ? IC_CallOrUser : IC_User; } +/// \brief Determine what kind of construct V is. +InstructionClass GetInstructionClass(const Value *V); /// \brief This is a wrapper around getUnderlyingObject which also knows how to /// look through objc_retain and objc_autorelease calls, which we know to return @@ -225,6 +247,137 @@ static inline Value *StripPointerCastsAndObjCCalls(Value *V) { return V; } +/// \brief Assuming the given instruction is one of the special calls such as +/// objc_retain or objc_release, return the argument value, stripped of no-op +/// casts and forwarding calls. +static inline Value *GetObjCArg(Value *Inst) { + return StripPointerCastsAndObjCCalls(cast(Inst)->getArgOperand(0)); +} + +static inline bool isNullOrUndef(const Value *V) { + return isa(V) || isa(V); +} + +static inline bool isNoopInstruction(const Instruction *I) { + return isa(I) || + (isa(I) && + cast(I)->hasAllZeroIndices()); +} + + +/// \brief Erase the given instruction. +/// +/// Many ObjC calls return their argument verbatim, +/// so if it's such a call and the return value has users, replace them with the +/// argument value. +/// +static inline void EraseInstruction(Instruction *CI) { + Value *OldArg = cast(CI)->getArgOperand(0); + + bool Unused = CI->use_empty(); + + if (!Unused) { + // Replace the return value with the argument. + assert(IsForwarding(GetBasicInstructionClass(CI)) && + "Can't delete non-forwarding instruction with users!"); + CI->replaceAllUsesWith(OldArg); + } + + CI->eraseFromParent(); + + if (Unused) + RecursivelyDeleteTriviallyDeadInstructions(OldArg); +} + +/// \brief Test whether the given value is possible a retainable object pointer. +static inline bool IsPotentialRetainableObjPtr(const Value *Op) { + // Pointers to static or stack storage are not valid retainable object pointers. + if (isa(Op) || isa(Op)) + return false; + // Special arguments can not be a valid retainable object pointer. + if (const Argument *Arg = dyn_cast(Op)) + if (Arg->hasByValAttr() || + Arg->hasNestAttr() || + Arg->hasStructRetAttr()) + return false; + // Only consider values with pointer types. + // + // It seemes intuitive to exclude function pointer types as well, since + // functions are never retainable object pointers, however clang occasionally + // bitcasts retainable object pointers to function-pointer type temporarily. + PointerType *Ty = dyn_cast(Op->getType()); + if (!Ty) + return false; + // Conservatively assume anything else is a potential retainable object pointer. + return true; +} + +static inline bool IsPotentialRetainableObjPtr(const Value *Op, + AliasAnalysis &AA) { + // First make the rudimentary check. + if (!IsPotentialRetainableObjPtr(Op)) + return false; + + // Objects in constant memory are not reference-counted. + if (AA.pointsToConstantMemory(Op)) + return false; + + // Pointers in constant memory are not pointing to reference-counted objects. + if (const LoadInst *LI = dyn_cast(Op)) + if (AA.pointsToConstantMemory(LI->getPointerOperand())) + return false; + + // Otherwise assume the worst. + return true; +} + +/// \brief Helper for GetInstructionClass. Determines what kind of construct CS +/// is. +static inline InstructionClass GetCallSiteClass(ImmutableCallSite CS) { + for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); + I != E; ++I) + if (IsPotentialRetainableObjPtr(*I)) + return CS.onlyReadsMemory() ? IC_User : IC_CallOrUser; + + return CS.onlyReadsMemory() ? IC_None : IC_Call; +} + +/// \brief Return true if this value refers to a distinct and identifiable +/// object. +/// +/// This is similar to AliasAnalysis's isIdentifiedObject, except that it uses +/// special knowledge of ObjC conventions. +static inline bool IsObjCIdentifiedObject(const Value *V) { + // Assume that call results and arguments have their own "provenance". + // Constants (including GlobalVariables) and Allocas are never + // reference-counted. + if (isa(V) || isa(V) || + isa(V) || isa(V) || + isa(V)) + return true; + + if (const LoadInst *LI = dyn_cast(V)) { + const Value *Pointer = + StripPointerCastsAndObjCCalls(LI->getPointerOperand()); + if (const GlobalVariable *GV = dyn_cast(Pointer)) { + // A constant pointer can't be pointing to an object on the heap. It may + // be reference-counted, but it won't be deleted. + if (GV->isConstant()) + return true; + StringRef Name = GV->getName(); + // These special variables are known to hold values which are not + // reference-counted pointers. + if (Name.startswith("\01L_OBJC_SELECTOR_REFERENCES_") || + Name.startswith("\01L_OBJC_CLASSLIST_REFERENCES_") || + Name.startswith("\01L_OBJC_CLASSLIST_SUP_REFS_$_") || + Name.startswith("\01L_OBJC_METH_VAR_NAME_") || + Name.startswith("\01l_objc_msgSend_fixup_")) + return true; + } + } + + return false; +} } // end namespace objcarc } // end namespace llvm diff --git a/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/lib/Transforms/ObjCARC/ObjCARCContract.cpp new file mode 100644 index 0000000..704ac92 --- /dev/null +++ b/lib/Transforms/ObjCARC/ObjCARCContract.cpp @@ -0,0 +1,537 @@ +//===- ObjCARCOpts.cpp - ObjC ARC Optimization ----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file defines late ObjC ARC optimizations. ARC stands for Automatic +/// Reference Counting and is a system for managing reference counts for objects +/// in Objective C. +/// +/// WARNING: This file knows about certain library functions. It recognizes them +/// by name, and hardwires knowledge of their semantics. +/// +/// WARNING: This file knows about how certain Objective-C library functions are +/// used. Naive LLVM IR transformations which would otherwise be +/// behavior-preserving may break these assumptions. +/// +//===----------------------------------------------------------------------===// + +// TODO: ObjCARCContract could insert PHI nodes when uses aren't +// dominated by single calls. + +#define DEBUG_TYPE "objc-arc-contract" +#include "ObjCARC.h" +#include "ProvenanceAnalysis.h" +#include "DependencyAnalysis.h" + +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Operator.h" + +using namespace llvm; +using namespace llvm::objcarc; + +STATISTIC(NumPeeps, "Number of calls peephole-optimized"); +STATISTIC(NumStoreStrongs, "Number objc_storeStrong calls formed"); + +namespace { + /// \brief Late ARC optimizations + /// + /// These change the IR in a way that makes it difficult to be analyzed by + /// ObjCARCOpt, so it's run late. + class ObjCARCContract : public FunctionPass { + bool Changed; + AliasAnalysis *AA; + DominatorTree *DT; + ProvenanceAnalysis PA; + + /// A flag indicating whether this optimization pass should run. + bool Run; + + /// Declarations for ObjC runtime functions, for use in creating calls to + /// them. These are initialized lazily to avoid cluttering up the Module + /// with unused declarations. + + /// Declaration for objc_storeStrong(). + Constant *StoreStrongCallee; + /// Declaration for objc_retainAutorelease(). + Constant *RetainAutoreleaseCallee; + /// Declaration for objc_retainAutoreleaseReturnValue(). + Constant *RetainAutoreleaseRVCallee; + + /// The inline asm string to insert between calls and RetainRV calls to make + /// the optimization work on targets which need it. + const MDString *RetainRVMarker; + + /// The set of inserted objc_storeStrong calls. If at the end of walking the + /// function we have found no alloca instructions, these calls can be marked + /// "tail". + SmallPtrSet StoreStrongCalls; + + Constant *getStoreStrongCallee(Module *M); + Constant *getRetainAutoreleaseCallee(Module *M); + Constant *getRetainAutoreleaseRVCallee(Module *M); + + bool ContractAutorelease(Function &F, Instruction *Autorelease, + InstructionClass Class, + SmallPtrSet + &DependingInstructions, + SmallPtrSet + &Visited); + + void ContractRelease(Instruction *Release, + inst_iterator &Iter); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + virtual bool doInitialization(Module &M); + virtual bool runOnFunction(Function &F); + + public: + static char ID; + ObjCARCContract() : FunctionPass(ID) { + initializeObjCARCContractPass(*PassRegistry::getPassRegistry()); + } + }; +} + +char ObjCARCContract::ID = 0; +INITIALIZE_PASS_BEGIN(ObjCARCContract, + "objc-arc-contract", "ObjC ARC contraction", false, false) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_END(ObjCARCContract, + "objc-arc-contract", "ObjC ARC contraction", false, false) + +Pass *llvm::createObjCARCContractPass() { + return new ObjCARCContract(); +} + +void ObjCARCContract::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.addRequired(); + AU.setPreservesCFG(); +} + +Constant *ObjCARCContract::getStoreStrongCallee(Module *M) { + if (!StoreStrongCallee) { + LLVMContext &C = M->getContext(); + Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); + Type *I8XX = PointerType::getUnqual(I8X); + Type *Params[] = { I8XX, I8X }; + + AttributeSet Attr = AttributeSet() + .addAttribute(M->getContext(), AttributeSet::FunctionIndex, + Attribute::NoUnwind) + .addAttribute(M->getContext(), 1, Attribute::NoCapture); + + StoreStrongCallee = + M->getOrInsertFunction( + "objc_storeStrong", + FunctionType::get(Type::getVoidTy(C), Params, /*isVarArg=*/false), + Attr); + } + return StoreStrongCallee; +} + +Constant *ObjCARCContract::getRetainAutoreleaseCallee(Module *M) { + if (!RetainAutoreleaseCallee) { + LLVMContext &C = M->getContext(); + Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); + Type *Params[] = { I8X }; + FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false); + AttributeSet Attribute = + AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex, + Attribute::NoUnwind); + RetainAutoreleaseCallee = + M->getOrInsertFunction("objc_retainAutorelease", FTy, Attribute); + } + return RetainAutoreleaseCallee; +} + +Constant *ObjCARCContract::getRetainAutoreleaseRVCallee(Module *M) { + if (!RetainAutoreleaseRVCallee) { + LLVMContext &C = M->getContext(); + Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); + Type *Params[] = { I8X }; + FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false); + AttributeSet Attribute = + AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex, + Attribute::NoUnwind); + RetainAutoreleaseRVCallee = + M->getOrInsertFunction("objc_retainAutoreleaseReturnValue", FTy, + Attribute); + } + return RetainAutoreleaseRVCallee; +} + +/// Merge an autorelease with a retain into a fused call. +bool +ObjCARCContract::ContractAutorelease(Function &F, Instruction *Autorelease, + InstructionClass Class, + SmallPtrSet + &DependingInstructions, + SmallPtrSet + &Visited) { + const Value *Arg = GetObjCArg(Autorelease); + + // Check that there are no instructions between the retain and the autorelease + // (such as an autorelease_pop) which may change the count. + CallInst *Retain = 0; + if (Class == IC_AutoreleaseRV) + FindDependencies(RetainAutoreleaseRVDep, Arg, + Autorelease->getParent(), Autorelease, + DependingInstructions, Visited, PA); + else + FindDependencies(RetainAutoreleaseDep, Arg, + Autorelease->getParent(), Autorelease, + DependingInstructions, Visited, PA); + + Visited.clear(); + if (DependingInstructions.size() != 1) { + DependingInstructions.clear(); + return false; + } + + Retain = dyn_cast_or_null(*DependingInstructions.begin()); + DependingInstructions.clear(); + + if (!Retain || + GetBasicInstructionClass(Retain) != IC_Retain || + GetObjCArg(Retain) != Arg) + return false; + + Changed = true; + ++NumPeeps; + + DEBUG(dbgs() << "ObjCARCContract::ContractAutorelease: Fusing " + "retain/autorelease. Erasing: " << *Autorelease << "\n" + " Old Retain: " + << *Retain << "\n"); + + if (Class == IC_AutoreleaseRV) + Retain->setCalledFunction(getRetainAutoreleaseRVCallee(F.getParent())); + else + Retain->setCalledFunction(getRetainAutoreleaseCallee(F.getParent())); + + DEBUG(dbgs() << " New Retain: " + << *Retain << "\n"); + + EraseInstruction(Autorelease); + return true; +} + +/// Attempt to merge an objc_release with a store, load, and objc_retain to form +/// an objc_storeStrong. This can be a little tricky because the instructions +/// don't always appear in order, and there may be unrelated intervening +/// instructions. +void ObjCARCContract::ContractRelease(Instruction *Release, + inst_iterator &Iter) { + LoadInst *Load = dyn_cast(GetObjCArg(Release)); + if (!Load || !Load->isSimple()) return; + + // For now, require everything to be in one basic block. + BasicBlock *BB = Release->getParent(); + if (Load->getParent() != BB) return; + + // Walk down to find the store and the release, which may be in either order. + BasicBlock::iterator I = Load, End = BB->end(); + ++I; + AliasAnalysis::Location Loc = AA->getLocation(Load); + StoreInst *Store = 0; + bool SawRelease = false; + for (; !Store || !SawRelease; ++I) { + if (I == End) + return; + + Instruction *Inst = I; + if (Inst == Release) { + SawRelease = true; + continue; + } + + InstructionClass Class = GetBasicInstructionClass(Inst); + + // Unrelated retains are harmless. + if (IsRetain(Class)) + continue; + + if (Store) { + // The store is the point where we're going to put the objc_storeStrong, + // so make sure there are no uses after it. + if (CanUse(Inst, Load, PA, Class)) + return; + } else if (AA->getModRefInfo(Inst, Loc) & AliasAnalysis::Mod) { + // We are moving the load down to the store, so check for anything + // else which writes to the memory between the load and the store. + Store = dyn_cast(Inst); + if (!Store || !Store->isSimple()) return; + if (Store->getPointerOperand() != Loc.Ptr) return; + } + } + + Value *New = StripPointerCastsAndObjCCalls(Store->getValueOperand()); + + // Walk up to find the retain. + I = Store; + BasicBlock::iterator Begin = BB->begin(); + while (I != Begin && GetBasicInstructionClass(I) != IC_Retain) + --I; + Instruction *Retain = I; + if (GetBasicInstructionClass(Retain) != IC_Retain) return; + if (GetObjCArg(Retain) != New) return; + + Changed = true; + ++NumStoreStrongs; + + LLVMContext &C = Release->getContext(); + Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); + Type *I8XX = PointerType::getUnqual(I8X); + + Value *Args[] = { Load->getPointerOperand(), New }; + if (Args[0]->getType() != I8XX) + Args[0] = new BitCastInst(Args[0], I8XX, "", Store); + if (Args[1]->getType() != I8X) + Args[1] = new BitCastInst(Args[1], I8X, "", Store); + CallInst *StoreStrong = + CallInst::Create(getStoreStrongCallee(BB->getParent()->getParent()), + Args, "", Store); + StoreStrong->setDoesNotThrow(); + StoreStrong->setDebugLoc(Store->getDebugLoc()); + + // We can't set the tail flag yet, because we haven't yet determined + // whether there are any escaping allocas. Remember this call, so that + // we can set the tail flag once we know it's safe. + StoreStrongCalls.insert(StoreStrong); + + if (&*Iter == Store) ++Iter; + Store->eraseFromParent(); + Release->eraseFromParent(); + EraseInstruction(Retain); + if (Load->use_empty()) + Load->eraseFromParent(); +} + +bool ObjCARCContract::doInitialization(Module &M) { + // If nothing in the Module uses ARC, don't do anything. + Run = ModuleHasARC(M); + if (!Run) + return false; + + // These are initialized lazily. + StoreStrongCallee = 0; + RetainAutoreleaseCallee = 0; + RetainAutoreleaseRVCallee = 0; + + // Initialize RetainRVMarker. + RetainRVMarker = 0; + if (NamedMDNode *NMD = + M.getNamedMetadata("clang.arc.retainAutoreleasedReturnValueMarker")) + if (NMD->getNumOperands() == 1) { + const MDNode *N = NMD->getOperand(0); + if (N->getNumOperands() == 1) + if (const MDString *S = dyn_cast(N->getOperand(0))) + RetainRVMarker = S; + } + + return false; +} + +bool ObjCARCContract::runOnFunction(Function &F) { + if (!EnableARCOpts) + return false; + + // If nothing in the Module uses ARC, don't do anything. + if (!Run) + return false; + + Changed = false; + AA = &getAnalysis(); + DT = &getAnalysis(); + + PA.setAA(&getAnalysis()); + + // Track whether it's ok to mark objc_storeStrong calls with the "tail" + // keyword. Be conservative if the function has variadic arguments. + // It seems that functions which "return twice" are also unsafe for the + // "tail" argument, because they are setjmp, which could need to + // return to an earlier stack state. + bool TailOkForStoreStrongs = !F.isVarArg() && + !F.callsFunctionThatReturnsTwice(); + + // For ObjC library calls which return their argument, replace uses of the + // argument with uses of the call return value, if it dominates the use. This + // reduces register pressure. + SmallPtrSet DependingInstructions; + SmallPtrSet Visited; + for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) { + Instruction *Inst = &*I++; + + DEBUG(dbgs() << "ObjCARCContract: Visiting: " << *Inst << "\n"); + + // Only these library routines return their argument. In particular, + // objc_retainBlock does not necessarily return its argument. + InstructionClass Class = GetBasicInstructionClass(Inst); + switch (Class) { + case IC_Retain: + case IC_FusedRetainAutorelease: + case IC_FusedRetainAutoreleaseRV: + break; + case IC_Autorelease: + case IC_AutoreleaseRV: + if (ContractAutorelease(F, Inst, Class, DependingInstructions, Visited)) + continue; + break; + case IC_RetainRV: { + // If we're compiling for a target which needs a special inline-asm + // marker to do the retainAutoreleasedReturnValue optimization, + // insert it now. + if (!RetainRVMarker) + break; + BasicBlock::iterator BBI = Inst; + BasicBlock *InstParent = Inst->getParent(); + + // Step up to see if the call immediately precedes the RetainRV call. + // If it's an invoke, we have to cross a block boundary. And we have + // to carefully dodge no-op instructions. + do { + if (&*BBI == InstParent->begin()) { + BasicBlock *Pred = InstParent->getSinglePredecessor(); + if (!Pred) + goto decline_rv_optimization; + BBI = Pred->getTerminator(); + break; + } + --BBI; + } while (isNoopInstruction(BBI)); + + if (&*BBI == GetObjCArg(Inst)) { + DEBUG(dbgs() << "ObjCARCContract: Adding inline asm marker for " + "retainAutoreleasedReturnValue optimization.\n"); + Changed = true; + InlineAsm *IA = + InlineAsm::get(FunctionType::get(Type::getVoidTy(Inst->getContext()), + /*isVarArg=*/false), + RetainRVMarker->getString(), + /*Constraints=*/"", /*hasSideEffects=*/true); + CallInst::Create(IA, "", Inst); + } + decline_rv_optimization: + break; + } + case IC_InitWeak: { + // objc_initWeak(p, null) => *p = null + CallInst *CI = cast(Inst); + if (isNullOrUndef(CI->getArgOperand(1))) { + Value *Null = + ConstantPointerNull::get(cast(CI->getType())); + Changed = true; + new StoreInst(Null, CI->getArgOperand(0), CI); + + DEBUG(dbgs() << "OBJCARCContract: Old = " << *CI << "\n" + << " New = " << *Null << "\n"); + + CI->replaceAllUsesWith(Null); + CI->eraseFromParent(); + } + continue; + } + case IC_Release: + ContractRelease(Inst, I); + continue; + case IC_User: + // Be conservative if the function has any alloca instructions. + // Technically we only care about escaping alloca instructions, + // but this is sufficient to handle some interesting cases. + if (isa(Inst)) + TailOkForStoreStrongs = false; + continue; + default: + continue; + } + + DEBUG(dbgs() << "ObjCARCContract: Finished List.\n\n"); + + // Don't use GetObjCArg because we don't want to look through bitcasts + // and such; to do the replacement, the argument must have type i8*. + const Value *Arg = cast(Inst)->getArgOperand(0); + for (;;) { + // If we're compiling bugpointed code, don't get in trouble. + if (!isa(Arg) && !isa(Arg)) + break; + // Look through the uses of the pointer. + for (Value::const_use_iterator UI = Arg->use_begin(), UE = Arg->use_end(); + UI != UE; ) { + Use &U = UI.getUse(); + unsigned OperandNo = UI.getOperandNo(); + ++UI; // Increment UI now, because we may unlink its element. + + // If the call's return value dominates a use of the call's argument + // value, rewrite the use to use the return value. We check for + // reachability here because an unreachable call is considered to + // trivially dominate itself, which would lead us to rewriting its + // argument in terms of its return value, which would lead to + // infinite loops in GetObjCArg. + if (DT->isReachableFromEntry(U) && DT->dominates(Inst, U)) { + Changed = true; + Instruction *Replacement = Inst; + Type *UseTy = U.get()->getType(); + if (PHINode *PHI = dyn_cast(U.getUser())) { + // For PHI nodes, insert the bitcast in the predecessor block. + unsigned ValNo = PHINode::getIncomingValueNumForOperand(OperandNo); + BasicBlock *BB = PHI->getIncomingBlock(ValNo); + if (Replacement->getType() != UseTy) + Replacement = new BitCastInst(Replacement, UseTy, "", + &BB->back()); + // While we're here, rewrite all edges for this PHI, rather + // than just one use at a time, to minimize the number of + // bitcasts we emit. + for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) + if (PHI->getIncomingBlock(i) == BB) { + // Keep the UI iterator valid. + if (&PHI->getOperandUse( + PHINode::getOperandNumForIncomingValue(i)) == + &UI.getUse()) + ++UI; + PHI->setIncomingValue(i, Replacement); + } + } else { + if (Replacement->getType() != UseTy) + Replacement = new BitCastInst(Replacement, UseTy, "", + cast(U.getUser())); + U.set(Replacement); + } + } + } + + // If Arg is a no-op casted pointer, strip one level of casts and iterate. + if (const BitCastInst *BI = dyn_cast(Arg)) + Arg = BI->getOperand(0); + else if (isa(Arg) && + cast(Arg)->hasAllZeroIndices()) + Arg = cast(Arg)->getPointerOperand(); + else if (isa(Arg) && + !cast(Arg)->mayBeOverridden()) + Arg = cast(Arg)->getAliasee(); + else + break; + } + } + + // If this function has no escaping allocas or suspicious vararg usage, + // objc_storeStrong calls can be marked with the "tail" keyword. + if (TailOkForStoreStrongs) + for (SmallPtrSet::iterator I = StoreStrongCalls.begin(), + E = StoreStrongCalls.end(); I != E; ++I) + (*I)->setTailCall(); + StoreStrongCalls.clear(); + + return Changed; +} + +/// @} +/// diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp index 894f4ee..370c7f4 100644 --- a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp @@ -31,10 +31,13 @@ #define DEBUG_TYPE "objc-arc-opts" #include "ObjCARC.h" #include "ObjCARCAliasAnalysis.h" +#include "ProvenanceAnalysis.h" +#include "DependencyAnalysis.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Support/CFG.h" using namespace llvm; using namespace llvm::objcarc; @@ -132,202 +135,6 @@ namespace { /// \defgroup ARCUtilities Utility declarations/definitions specific to ARC. /// @{ -#include "llvm/IR/Intrinsics.h" -#include "llvm/Support/CallSite.h" -#include "llvm/Transforms/Utils/Local.h" - -/// \brief Test whether the given value is possible a retainable object pointer. -static bool IsPotentialRetainableObjPtr(const Value *Op) { - // Pointers to static or stack storage are not valid retainable object pointers. - if (isa(Op) || isa(Op)) - return false; - // Special arguments can not be a valid retainable object pointer. - if (const Argument *Arg = dyn_cast(Op)) - if (Arg->hasByValAttr() || - Arg->hasNestAttr() || - Arg->hasStructRetAttr()) - return false; - // Only consider values with pointer types. - // - // It seemes intuitive to exclude function pointer types as well, since - // functions are never retainable object pointers, however clang occasionally - // bitcasts retainable object pointers to function-pointer type temporarily. - PointerType *Ty = dyn_cast(Op->getType()); - if (!Ty) - return false; - // Conservatively assume anything else is a potential retainable object pointer. - return true; -} - -/// \brief Helper for GetInstructionClass. Determines what kind of construct CS -/// is. -static InstructionClass GetCallSiteClass(ImmutableCallSite CS) { - for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); - I != E; ++I) - if (IsPotentialRetainableObjPtr(*I)) - return CS.onlyReadsMemory() ? IC_User : IC_CallOrUser; - - return CS.onlyReadsMemory() ? IC_None : IC_Call; -} - -/// \brief Determine what kind of construct V is. -static InstructionClass GetInstructionClass(const Value *V) { - if (const Instruction *I = dyn_cast(V)) { - // Any instruction other than bitcast and gep with a pointer operand have a - // use of an objc pointer. Bitcasts, GEPs, Selects, PHIs transfer a pointer - // to a subsequent use, rather than using it themselves, in this sense. - // As a short cut, several other opcodes are known to have no pointer - // operands of interest. And ret is never followed by a release, so it's - // not interesting to examine. - switch (I->getOpcode()) { - case Instruction::Call: { - const CallInst *CI = cast(I); - // Check for calls to special functions. - if (const Function *F = CI->getCalledFunction()) { - InstructionClass Class = GetFunctionClass(F); - if (Class != IC_CallOrUser) - return Class; - - // None of the intrinsic functions do objc_release. For intrinsics, the - // only question is whether or not they may be users. - switch (F->getIntrinsicID()) { - case Intrinsic::returnaddress: case Intrinsic::frameaddress: - case Intrinsic::stacksave: case Intrinsic::stackrestore: - case Intrinsic::vastart: case Intrinsic::vacopy: case Intrinsic::vaend: - case Intrinsic::objectsize: case Intrinsic::prefetch: - case Intrinsic::stackprotector: - case Intrinsic::eh_return_i32: case Intrinsic::eh_return_i64: - case Intrinsic::eh_typeid_for: case Intrinsic::eh_dwarf_cfa: - case Intrinsic::eh_sjlj_lsda: case Intrinsic::eh_sjlj_functioncontext: - case Intrinsic::init_trampoline: case Intrinsic::adjust_trampoline: - case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: - case Intrinsic::invariant_start: case Intrinsic::invariant_end: - // Don't let dbg info affect our results. - case Intrinsic::dbg_declare: case Intrinsic::dbg_value: - // Short cut: Some intrinsics obviously don't use ObjC pointers. - return IC_None; - default: - break; - } - } - return GetCallSiteClass(CI); - } - case Instruction::Invoke: - return GetCallSiteClass(cast(I)); - case Instruction::BitCast: - case Instruction::GetElementPtr: - case Instruction::Select: case Instruction::PHI: - case Instruction::Ret: case Instruction::Br: - case Instruction::Switch: case Instruction::IndirectBr: - case Instruction::Alloca: case Instruction::VAArg: - case Instruction::Add: case Instruction::FAdd: - case Instruction::Sub: case Instruction::FSub: - case Instruction::Mul: case Instruction::FMul: - case Instruction::SDiv: case Instruction::UDiv: case Instruction::FDiv: - case Instruction::SRem: case Instruction::URem: case Instruction::FRem: - case Instruction::Shl: case Instruction::LShr: case Instruction::AShr: - case Instruction::And: case Instruction::Or: case Instruction::Xor: - case Instruction::SExt: case Instruction::ZExt: case Instruction::Trunc: - case Instruction::IntToPtr: case Instruction::FCmp: - case Instruction::FPTrunc: case Instruction::FPExt: - case Instruction::FPToUI: case Instruction::FPToSI: - case Instruction::UIToFP: case Instruction::SIToFP: - case Instruction::InsertElement: case Instruction::ExtractElement: - case Instruction::ShuffleVector: - case Instruction::ExtractValue: - break; - case Instruction::ICmp: - // Comparing a pointer with null, or any other constant, isn't an - // interesting use, because we don't care what the pointer points to, or - // about the values of any other dynamic reference-counted pointers. - if (IsPotentialRetainableObjPtr(I->getOperand(1))) - return IC_User; - break; - default: - // For anything else, check all the operands. - // Note that this includes both operands of a Store: while the first - // operand isn't actually being dereferenced, it is being stored to - // memory where we can no longer track who might read it and dereference - // it, so we have to consider it potentially used. - for (User::const_op_iterator OI = I->op_begin(), OE = I->op_end(); - OI != OE; ++OI) - if (IsPotentialRetainableObjPtr(*OI)) - return IC_User; - } - } - - // Otherwise, it's totally inert for ARC purposes. - return IC_None; -} - -/// \brief Erase the given instruction. -/// -/// Many ObjC calls return their argument verbatim, -/// so if it's such a call and the return value has users, replace them with the -/// argument value. -/// -static void EraseInstruction(Instruction *CI) { - Value *OldArg = cast(CI)->getArgOperand(0); - - bool Unused = CI->use_empty(); - - if (!Unused) { - // Replace the return value with the argument. - assert(IsForwarding(GetBasicInstructionClass(CI)) && - "Can't delete non-forwarding instruction with users!"); - CI->replaceAllUsesWith(OldArg); - } - - CI->eraseFromParent(); - - if (Unused) - RecursivelyDeleteTriviallyDeadInstructions(OldArg); -} - -/// \brief Assuming the given instruction is one of the special calls such as -/// objc_retain or objc_release, return the argument value, stripped of no-op -/// casts and forwarding calls. -static Value *GetObjCArg(Value *Inst) { - return StripPointerCastsAndObjCCalls(cast(Inst)->getArgOperand(0)); -} - -/// \brief Return true if this value refers to a distinct and identifiable -/// object. -/// -/// This is similar to AliasAnalysis's isIdentifiedObject, except that it uses -/// special knowledge of ObjC conventions. -static bool IsObjCIdentifiedObject(const Value *V) { - // Assume that call results and arguments have their own "provenance". - // Constants (including GlobalVariables) and Allocas are never - // reference-counted. - if (isa(V) || isa(V) || - isa(V) || isa(V) || - isa(V)) - return true; - - if (const LoadInst *LI = dyn_cast(V)) { - const Value *Pointer = - StripPointerCastsAndObjCCalls(LI->getPointerOperand()); - if (const GlobalVariable *GV = dyn_cast(Pointer)) { - // A constant pointer can't be pointing to an object on the heap. It may - // be reference-counted, but it won't be deleted. - if (GV->isConstant()) - return true; - StringRef Name = GV->getName(); - // These special variables are known to hold values which are not - // reference-counted pointers. - if (Name.startswith("\01L_OBJC_SELECTOR_REFERENCES_") || - Name.startswith("\01L_OBJC_CLASSLIST_REFERENCES_") || - Name.startswith("\01L_OBJC_CLASSLIST_SUP_REFS_$_") || - Name.startswith("\01L_OBJC_METH_VAR_NAME_") || - Name.startswith("\01l_objc_msgSend_fixup_")) - return true; - } - } - - return false; -} - /// \brief This is similar to StripPointerCastsAndObjCCalls but it stops as soon /// as it finds a value with multiple uses. static const Value *FindSingleUseIdentifiedObject(const Value *Arg) { @@ -487,7 +294,6 @@ static bool DoesObjCBlockEscape(const Value *BlockPtr) { #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/IR/LLVMContext.h" -#include "llvm/Support/CFG.h" STATISTIC(NumNoops, "Number of no-op objc calls eliminated"); STATISTIC(NumPartialNoops, "Number of partially no-op objc calls eliminated"); @@ -498,187 +304,6 @@ STATISTIC(NumRRs, "Number of retain+release paths eliminated"); STATISTIC(NumPeeps, "Number of calls peephole-optimized"); namespace { - /// \brief This is similar to BasicAliasAnalysis, and it uses many of the same - /// techniques, except it uses special ObjC-specific reasoning about pointer - /// relationships. - /// - /// In this context ``Provenance'' is defined as the history of an object's - /// ownership. Thus ``Provenance Analysis'' is defined by using the notion of - /// an ``independent provenance source'' of a pointer to determine whether or - /// not two pointers have the same provenance source and thus could - /// potentially be related. - class ProvenanceAnalysis { - AliasAnalysis *AA; - - typedef std::pair ValuePairTy; - typedef DenseMap CachedResultsTy; - CachedResultsTy CachedResults; - - bool relatedCheck(const Value *A, const Value *B); - bool relatedSelect(const SelectInst *A, const Value *B); - bool relatedPHI(const PHINode *A, const Value *B); - - void operator=(const ProvenanceAnalysis &) LLVM_DELETED_FUNCTION; - ProvenanceAnalysis(const ProvenanceAnalysis &) LLVM_DELETED_FUNCTION; - - public: - ProvenanceAnalysis() {} - - void setAA(AliasAnalysis *aa) { AA = aa; } - - AliasAnalysis *getAA() const { return AA; } - - bool related(const Value *A, const Value *B); - - void clear() { - CachedResults.clear(); - } - }; -} - -bool ProvenanceAnalysis::relatedSelect(const SelectInst *A, const Value *B) { - // If the values are Selects with the same condition, we can do a more precise - // check: just check for relations between the values on corresponding arms. - if (const SelectInst *SB = dyn_cast(B)) - if (A->getCondition() == SB->getCondition()) - return related(A->getTrueValue(), SB->getTrueValue()) || - related(A->getFalseValue(), SB->getFalseValue()); - - // Check both arms of the Select node individually. - return related(A->getTrueValue(), B) || - related(A->getFalseValue(), B); -} - -bool ProvenanceAnalysis::relatedPHI(const PHINode *A, const Value *B) { - // If the values are PHIs in the same block, we can do a more precise as well - // as efficient check: just check for relations between the values on - // corresponding edges. - if (const PHINode *PNB = dyn_cast(B)) - if (PNB->getParent() == A->getParent()) { - for (unsigned i = 0, e = A->getNumIncomingValues(); i != e; ++i) - if (related(A->getIncomingValue(i), - PNB->getIncomingValueForBlock(A->getIncomingBlock(i)))) - return true; - return false; - } - - // Check each unique source of the PHI node against B. - SmallPtrSet UniqueSrc; - for (unsigned i = 0, e = A->getNumIncomingValues(); i != e; ++i) { - const Value *PV1 = A->getIncomingValue(i); - if (UniqueSrc.insert(PV1) && related(PV1, B)) - return true; - } - - // All of the arms checked out. - return false; -} - -/// Test if the value of P, or any value covered by its provenance, is ever -/// stored within the function (not counting callees). -static bool isStoredObjCPointer(const Value *P) { - SmallPtrSet Visited; - SmallVector Worklist; - Worklist.push_back(P); - Visited.insert(P); - do { - P = Worklist.pop_back_val(); - for (Value::const_use_iterator UI = P->use_begin(), UE = P->use_end(); - UI != UE; ++UI) { - const User *Ur = *UI; - if (isa(Ur)) { - if (UI.getOperandNo() == 0) - // The pointer is stored. - return true; - // The pointed is stored through. - continue; - } - if (isa(Ur)) - // The pointer is passed as an argument, ignore this. - continue; - if (isa(P)) - // Assume the worst. - return true; - if (Visited.insert(Ur)) - Worklist.push_back(Ur); - } - } while (!Worklist.empty()); - - // Everything checked out. - return false; -} - -bool ProvenanceAnalysis::relatedCheck(const Value *A, const Value *B) { - // Skip past provenance pass-throughs. - A = GetUnderlyingObjCPtr(A); - B = GetUnderlyingObjCPtr(B); - - // Quick check. - if (A == B) - return true; - - // Ask regular AliasAnalysis, for a first approximation. - switch (AA->alias(A, B)) { - case AliasAnalysis::NoAlias: - return false; - case AliasAnalysis::MustAlias: - case AliasAnalysis::PartialAlias: - return true; - case AliasAnalysis::MayAlias: - break; - } - - bool AIsIdentified = IsObjCIdentifiedObject(A); - bool BIsIdentified = IsObjCIdentifiedObject(B); - - // An ObjC-Identified object can't alias a load if it is never locally stored. - if (AIsIdentified) { - // Check for an obvious escape. - if (isa(B)) - return isStoredObjCPointer(A); - if (BIsIdentified) { - // Check for an obvious escape. - if (isa(A)) - return isStoredObjCPointer(B); - // Both pointers are identified and escapes aren't an evident problem. - return false; - } - } else if (BIsIdentified) { - // Check for an obvious escape. - if (isa(A)) - return isStoredObjCPointer(B); - } - - // Special handling for PHI and Select. - if (const PHINode *PN = dyn_cast(A)) - return relatedPHI(PN, B); - if (const PHINode *PN = dyn_cast(B)) - return relatedPHI(PN, A); - if (const SelectInst *S = dyn_cast(A)) - return relatedSelect(S, B); - if (const SelectInst *S = dyn_cast(B)) - return relatedSelect(S, A); - - // Conservative. - return true; -} - -bool ProvenanceAnalysis::related(const Value *A, const Value *B) { - // Begin by inserting a conservative value into the map. If the insertion - // fails, we have the answer already. If it succeeds, leave it there until we - // compute the real answer to guard against recursive queries. - if (A > B) std::swap(A, B); - std::pair Pair = - CachedResults.insert(std::make_pair(ValuePairTy(A, B), true)); - if (!Pair.second) - return Pair.first->second; - - bool Result = relatedCheck(A, B); - CachedResults[ValuePairTy(A, B)] = Result; - return Result; -} - -namespace { /// \enum Sequence /// /// \brief A sequence of states that a pointer may go through in which an @@ -1300,300 +925,6 @@ Constant *ObjCARCOpt::getAutoreleaseCallee(Module *M) { return AutoreleaseCallee; } -/// Test whether the given value is possible a reference-counted pointer, -/// including tests which utilize AliasAnalysis. -static bool IsPotentialRetainableObjPtr(const Value *Op, AliasAnalysis &AA) { - // First make the rudimentary check. - if (!IsPotentialRetainableObjPtr(Op)) - return false; - - // Objects in constant memory are not reference-counted. - if (AA.pointsToConstantMemory(Op)) - return false; - - // Pointers in constant memory are not pointing to reference-counted objects. - if (const LoadInst *LI = dyn_cast(Op)) - if (AA.pointsToConstantMemory(LI->getPointerOperand())) - return false; - - // Otherwise assume the worst. - return true; -} - -/// Test whether the given instruction can result in a reference count -/// modification (positive or negative) for the pointer's object. -static bool -CanAlterRefCount(const Instruction *Inst, const Value *Ptr, - ProvenanceAnalysis &PA, InstructionClass Class) { - switch (Class) { - case IC_Autorelease: - case IC_AutoreleaseRV: - case IC_User: - // These operations never directly modify a reference count. - return false; - default: break; - } - - ImmutableCallSite CS = static_cast(Inst); - assert(CS && "Only calls can alter reference counts!"); - - // See if AliasAnalysis can help us with the call. - AliasAnalysis::ModRefBehavior MRB = PA.getAA()->getModRefBehavior(CS); - if (AliasAnalysis::onlyReadsMemory(MRB)) - return false; - if (AliasAnalysis::onlyAccessesArgPointees(MRB)) { - for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); - I != E; ++I) { - const Value *Op = *I; - if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Ptr, Op)) - return true; - } - return false; - } - - // Assume the worst. - return true; -} - -/// Test whether the given instruction can "use" the given pointer's object in a -/// way that requires the reference count to be positive. -static bool -CanUse(const Instruction *Inst, const Value *Ptr, ProvenanceAnalysis &PA, - InstructionClass Class) { - // IC_Call operations (as opposed to IC_CallOrUser) never "use" objc pointers. - if (Class == IC_Call) - return false; - - // Consider various instructions which may have pointer arguments which are - // not "uses". - if (const ICmpInst *ICI = dyn_cast(Inst)) { - // Comparing a pointer with null, or any other constant, isn't really a use, - // because we don't care what the pointer points to, or about the values - // of any other dynamic reference-counted pointers. - if (!IsPotentialRetainableObjPtr(ICI->getOperand(1), *PA.getAA())) - return false; - } else if (ImmutableCallSite CS = static_cast(Inst)) { - // For calls, just check the arguments (and not the callee operand). - for (ImmutableCallSite::arg_iterator OI = CS.arg_begin(), - OE = CS.arg_end(); OI != OE; ++OI) { - const Value *Op = *OI; - if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Ptr, Op)) - return true; - } - return false; - } else if (const StoreInst *SI = dyn_cast(Inst)) { - // Special-case stores, because we don't care about the stored value, just - // the store address. - const Value *Op = GetUnderlyingObjCPtr(SI->getPointerOperand()); - // If we can't tell what the underlying object was, assume there is a - // dependence. - return IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Op, Ptr); - } - - // Check each operand for a match. - for (User::const_op_iterator OI = Inst->op_begin(), OE = Inst->op_end(); - OI != OE; ++OI) { - const Value *Op = *OI; - if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Ptr, Op)) - return true; - } - return false; -} - -/// Test whether the given instruction can autorelease any pointer or cause an -/// autoreleasepool pop. -static bool -CanInterruptRV(InstructionClass Class) { - switch (Class) { - case IC_AutoreleasepoolPop: - case IC_CallOrUser: - case IC_Call: - case IC_Autorelease: - case IC_AutoreleaseRV: - case IC_FusedRetainAutorelease: - case IC_FusedRetainAutoreleaseRV: - return true; - default: - return false; - } -} - -namespace { - /// \enum DependenceKind - /// \brief Defines different dependence kinds among various ARC constructs. - /// - /// There are several kinds of dependence-like concepts in use here. - /// - enum DependenceKind { - NeedsPositiveRetainCount, - AutoreleasePoolBoundary, - CanChangeRetainCount, - RetainAutoreleaseDep, ///< Blocks objc_retainAutorelease. - RetainAutoreleaseRVDep, ///< Blocks objc_retainAutoreleaseReturnValue. - RetainRVDep ///< Blocks objc_retainAutoreleasedReturnValue. - }; -} - -/// Test if there can be dependencies on Inst through Arg. This function only -/// tests dependencies relevant for removing pairs of calls. -static bool -Depends(DependenceKind Flavor, Instruction *Inst, const Value *Arg, - ProvenanceAnalysis &PA) { - // If we've reached the definition of Arg, stop. - if (Inst == Arg) - return true; - - switch (Flavor) { - case NeedsPositiveRetainCount: { - InstructionClass Class = GetInstructionClass(Inst); - switch (Class) { - case IC_AutoreleasepoolPop: - case IC_AutoreleasepoolPush: - case IC_None: - return false; - default: - return CanUse(Inst, Arg, PA, Class); - } - } - - case AutoreleasePoolBoundary: { - InstructionClass Class = GetInstructionClass(Inst); - switch (Class) { - case IC_AutoreleasepoolPop: - case IC_AutoreleasepoolPush: - // These mark the end and begin of an autorelease pool scope. - return true; - default: - // Nothing else does this. - return false; - } - } - - case CanChangeRetainCount: { - InstructionClass Class = GetInstructionClass(Inst); - switch (Class) { - case IC_AutoreleasepoolPop: - // Conservatively assume this can decrement any count. - return true; - case IC_AutoreleasepoolPush: - case IC_None: - return false; - default: - return CanAlterRefCount(Inst, Arg, PA, Class); - } - } - - case RetainAutoreleaseDep: - switch (GetBasicInstructionClass(Inst)) { - case IC_AutoreleasepoolPop: - case IC_AutoreleasepoolPush: - // Don't merge an objc_autorelease with an objc_retain inside a different - // autoreleasepool scope. - return true; - case IC_Retain: - case IC_RetainRV: - // Check for a retain of the same pointer for merging. - return GetObjCArg(Inst) == Arg; - default: - // Nothing else matters for objc_retainAutorelease formation. - return false; - } - - case RetainAutoreleaseRVDep: { - InstructionClass Class = GetBasicInstructionClass(Inst); - switch (Class) { - case IC_Retain: - case IC_RetainRV: - // Check for a retain of the same pointer for merging. - return GetObjCArg(Inst) == Arg; - default: - // Anything that can autorelease interrupts - // retainAutoreleaseReturnValue formation. - return CanInterruptRV(Class); - } - } - - case RetainRVDep: - return CanInterruptRV(GetBasicInstructionClass(Inst)); - } - - llvm_unreachable("Invalid dependence flavor"); -} - -/// Walk up the CFG from StartPos (which is in StartBB) and find local and -/// non-local dependencies on Arg. -/// -/// TODO: Cache results? -static void -FindDependencies(DependenceKind Flavor, - const Value *Arg, - BasicBlock *StartBB, Instruction *StartInst, - SmallPtrSet &DependingInstructions, - SmallPtrSet &Visited, - ProvenanceAnalysis &PA) { - BasicBlock::iterator StartPos = StartInst; - - SmallVector, 4> Worklist; - Worklist.push_back(std::make_pair(StartBB, StartPos)); - do { - std::pair Pair = - Worklist.pop_back_val(); - BasicBlock *LocalStartBB = Pair.first; - BasicBlock::iterator LocalStartPos = Pair.second; - BasicBlock::iterator StartBBBegin = LocalStartBB->begin(); - for (;;) { - if (LocalStartPos == StartBBBegin) { - pred_iterator PI(LocalStartBB), PE(LocalStartBB, false); - if (PI == PE) - // If we've reached the function entry, produce a null dependence. - DependingInstructions.insert(0); - else - // Add the predecessors to the worklist. - do { - BasicBlock *PredBB = *PI; - if (Visited.insert(PredBB)) - Worklist.push_back(std::make_pair(PredBB, PredBB->end())); - } while (++PI != PE); - break; - } - - Instruction *Inst = --LocalStartPos; - if (Depends(Flavor, Inst, Arg, PA)) { - DependingInstructions.insert(Inst); - break; - } - } - } while (!Worklist.empty()); - - // Determine whether the original StartBB post-dominates all of the blocks we - // visited. If not, insert a sentinal indicating that most optimizations are - // not safe. - for (SmallPtrSet::const_iterator I = Visited.begin(), - E = Visited.end(); I != E; ++I) { - const BasicBlock *BB = *I; - if (BB == StartBB) - continue; - const TerminatorInst *TI = cast(&BB->back()); - for (succ_const_iterator SI(TI), SE(TI, false); SI != SE; ++SI) { - const BasicBlock *Succ = *SI; - if (Succ != StartBB && !Visited.count(Succ)) { - DependingInstructions.insert(reinterpret_cast(-1)); - return; - } - } - } -} - -static bool isNullOrUndef(const Value *V) { - return isa(V) || isa(V); -} - -static bool isNoopInstruction(const Instruction *I) { - return isa(I) || - (isa(I) && - cast(I)->hasAllZeroIndices()); -} - /// Turn objc_retain into objc_retainAutoreleasedReturnValue if the operand is a /// return value. void @@ -3337,511 +2668,3 @@ void ObjCARCOpt::releaseMemory() { /// @} /// -/// \defgroup ARCContract ARC Contraction. -/// @{ - -// TODO: ObjCARCContract could insert PHI nodes when uses aren't -// dominated by single calls. - -#include "llvm/Analysis/Dominators.h" -#include "llvm/IR/InlineAsm.h" -#include "llvm/IR/Operator.h" - -STATISTIC(NumStoreStrongs, "Number objc_storeStrong calls formed"); - -namespace { - /// \brief Late ARC optimizations - /// - /// These change the IR in a way that makes it difficult to be analyzed by - /// ObjCARCOpt, so it's run late. - class ObjCARCContract : public FunctionPass { - bool Changed; - AliasAnalysis *AA; - DominatorTree *DT; - ProvenanceAnalysis PA; - - /// A flag indicating whether this optimization pass should run. - bool Run; - - /// Declarations for ObjC runtime functions, for use in creating calls to - /// them. These are initialized lazily to avoid cluttering up the Module - /// with unused declarations. - - /// Declaration for objc_storeStrong(). - Constant *StoreStrongCallee; - /// Declaration for objc_retainAutorelease(). - Constant *RetainAutoreleaseCallee; - /// Declaration for objc_retainAutoreleaseReturnValue(). - Constant *RetainAutoreleaseRVCallee; - - /// The inline asm string to insert between calls and RetainRV calls to make - /// the optimization work on targets which need it. - const MDString *RetainRVMarker; - - /// The set of inserted objc_storeStrong calls. If at the end of walking the - /// function we have found no alloca instructions, these calls can be marked - /// "tail". - SmallPtrSet StoreStrongCalls; - - Constant *getStoreStrongCallee(Module *M); - Constant *getRetainAutoreleaseCallee(Module *M); - Constant *getRetainAutoreleaseRVCallee(Module *M); - - bool ContractAutorelease(Function &F, Instruction *Autorelease, - InstructionClass Class, - SmallPtrSet - &DependingInstructions, - SmallPtrSet - &Visited); - - void ContractRelease(Instruction *Release, - inst_iterator &Iter); - - virtual void getAnalysisUsage(AnalysisUsage &AU) const; - virtual bool doInitialization(Module &M); - virtual bool runOnFunction(Function &F); - - public: - static char ID; - ObjCARCContract() : FunctionPass(ID) { - initializeObjCARCContractPass(*PassRegistry::getPassRegistry()); - } - }; -} - -char ObjCARCContract::ID = 0; -INITIALIZE_PASS_BEGIN(ObjCARCContract, - "objc-arc-contract", "ObjC ARC contraction", false, false) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) -INITIALIZE_PASS_DEPENDENCY(DominatorTree) -INITIALIZE_PASS_END(ObjCARCContract, - "objc-arc-contract", "ObjC ARC contraction", false, false) - -Pass *llvm::createObjCARCContractPass() { - return new ObjCARCContract(); -} - -void ObjCARCContract::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired(); - AU.addRequired(); - AU.setPreservesCFG(); -} - -Constant *ObjCARCContract::getStoreStrongCallee(Module *M) { - if (!StoreStrongCallee) { - LLVMContext &C = M->getContext(); - Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); - Type *I8XX = PointerType::getUnqual(I8X); - Type *Params[] = { I8XX, I8X }; - - AttributeSet Attr = AttributeSet() - .addAttribute(M->getContext(), AttributeSet::FunctionIndex, - Attribute::NoUnwind) - .addAttribute(M->getContext(), 1, Attribute::NoCapture); - - StoreStrongCallee = - M->getOrInsertFunction( - "objc_storeStrong", - FunctionType::get(Type::getVoidTy(C), Params, /*isVarArg=*/false), - Attr); - } - return StoreStrongCallee; -} - -Constant *ObjCARCContract::getRetainAutoreleaseCallee(Module *M) { - if (!RetainAutoreleaseCallee) { - LLVMContext &C = M->getContext(); - Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); - Type *Params[] = { I8X }; - FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false); - AttributeSet Attribute = - AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex, - Attribute::NoUnwind); - RetainAutoreleaseCallee = - M->getOrInsertFunction("objc_retainAutorelease", FTy, Attribute); - } - return RetainAutoreleaseCallee; -} - -Constant *ObjCARCContract::getRetainAutoreleaseRVCallee(Module *M) { - if (!RetainAutoreleaseRVCallee) { - LLVMContext &C = M->getContext(); - Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); - Type *Params[] = { I8X }; - FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false); - AttributeSet Attribute = - AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex, - Attribute::NoUnwind); - RetainAutoreleaseRVCallee = - M->getOrInsertFunction("objc_retainAutoreleaseReturnValue", FTy, - Attribute); - } - return RetainAutoreleaseRVCallee; -} - -/// Merge an autorelease with a retain into a fused call. -bool -ObjCARCContract::ContractAutorelease(Function &F, Instruction *Autorelease, - InstructionClass Class, - SmallPtrSet - &DependingInstructions, - SmallPtrSet - &Visited) { - const Value *Arg = GetObjCArg(Autorelease); - - // Check that there are no instructions between the retain and the autorelease - // (such as an autorelease_pop) which may change the count. - CallInst *Retain = 0; - if (Class == IC_AutoreleaseRV) - FindDependencies(RetainAutoreleaseRVDep, Arg, - Autorelease->getParent(), Autorelease, - DependingInstructions, Visited, PA); - else - FindDependencies(RetainAutoreleaseDep, Arg, - Autorelease->getParent(), Autorelease, - DependingInstructions, Visited, PA); - - Visited.clear(); - if (DependingInstructions.size() != 1) { - DependingInstructions.clear(); - return false; - } - - Retain = dyn_cast_or_null(*DependingInstructions.begin()); - DependingInstructions.clear(); - - if (!Retain || - GetBasicInstructionClass(Retain) != IC_Retain || - GetObjCArg(Retain) != Arg) - return false; - - Changed = true; - ++NumPeeps; - - DEBUG(dbgs() << "ObjCARCContract::ContractAutorelease: Fusing " - "retain/autorelease. Erasing: " << *Autorelease << "\n" - " Old Retain: " - << *Retain << "\n"); - - if (Class == IC_AutoreleaseRV) - Retain->setCalledFunction(getRetainAutoreleaseRVCallee(F.getParent())); - else - Retain->setCalledFunction(getRetainAutoreleaseCallee(F.getParent())); - - DEBUG(dbgs() << " New Retain: " - << *Retain << "\n"); - - EraseInstruction(Autorelease); - return true; -} - -/// Attempt to merge an objc_release with a store, load, and objc_retain to form -/// an objc_storeStrong. This can be a little tricky because the instructions -/// don't always appear in order, and there may be unrelated intervening -/// instructions. -void ObjCARCContract::ContractRelease(Instruction *Release, - inst_iterator &Iter) { - LoadInst *Load = dyn_cast(GetObjCArg(Release)); - if (!Load || !Load->isSimple()) return; - - // For now, require everything to be in one basic block. - BasicBlock *BB = Release->getParent(); - if (Load->getParent() != BB) return; - - // Walk down to find the store and the release, which may be in either order. - BasicBlock::iterator I = Load, End = BB->end(); - ++I; - AliasAnalysis::Location Loc = AA->getLocation(Load); - StoreInst *Store = 0; - bool SawRelease = false; - for (; !Store || !SawRelease; ++I) { - if (I == End) - return; - - Instruction *Inst = I; - if (Inst == Release) { - SawRelease = true; - continue; - } - - InstructionClass Class = GetBasicInstructionClass(Inst); - - // Unrelated retains are harmless. - if (IsRetain(Class)) - continue; - - if (Store) { - // The store is the point where we're going to put the objc_storeStrong, - // so make sure there are no uses after it. - if (CanUse(Inst, Load, PA, Class)) - return; - } else if (AA->getModRefInfo(Inst, Loc) & AliasAnalysis::Mod) { - // We are moving the load down to the store, so check for anything - // else which writes to the memory between the load and the store. - Store = dyn_cast(Inst); - if (!Store || !Store->isSimple()) return; - if (Store->getPointerOperand() != Loc.Ptr) return; - } - } - - Value *New = StripPointerCastsAndObjCCalls(Store->getValueOperand()); - - // Walk up to find the retain. - I = Store; - BasicBlock::iterator Begin = BB->begin(); - while (I != Begin && GetBasicInstructionClass(I) != IC_Retain) - --I; - Instruction *Retain = I; - if (GetBasicInstructionClass(Retain) != IC_Retain) return; - if (GetObjCArg(Retain) != New) return; - - Changed = true; - ++NumStoreStrongs; - - LLVMContext &C = Release->getContext(); - Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); - Type *I8XX = PointerType::getUnqual(I8X); - - Value *Args[] = { Load->getPointerOperand(), New }; - if (Args[0]->getType() != I8XX) - Args[0] = new BitCastInst(Args[0], I8XX, "", Store); - if (Args[1]->getType() != I8X) - Args[1] = new BitCastInst(Args[1], I8X, "", Store); - CallInst *StoreStrong = - CallInst::Create(getStoreStrongCallee(BB->getParent()->getParent()), - Args, "", Store); - StoreStrong->setDoesNotThrow(); - StoreStrong->setDebugLoc(Store->getDebugLoc()); - - // We can't set the tail flag yet, because we haven't yet determined - // whether there are any escaping allocas. Remember this call, so that - // we can set the tail flag once we know it's safe. - StoreStrongCalls.insert(StoreStrong); - - if (&*Iter == Store) ++Iter; - Store->eraseFromParent(); - Release->eraseFromParent(); - EraseInstruction(Retain); - if (Load->use_empty()) - Load->eraseFromParent(); -} - -bool ObjCARCContract::doInitialization(Module &M) { - // If nothing in the Module uses ARC, don't do anything. - Run = ModuleHasARC(M); - if (!Run) - return false; - - // These are initialized lazily. - StoreStrongCallee = 0; - RetainAutoreleaseCallee = 0; - RetainAutoreleaseRVCallee = 0; - - // Initialize RetainRVMarker. - RetainRVMarker = 0; - if (NamedMDNode *NMD = - M.getNamedMetadata("clang.arc.retainAutoreleasedReturnValueMarker")) - if (NMD->getNumOperands() == 1) { - const MDNode *N = NMD->getOperand(0); - if (N->getNumOperands() == 1) - if (const MDString *S = dyn_cast(N->getOperand(0))) - RetainRVMarker = S; - } - - return false; -} - -bool ObjCARCContract::runOnFunction(Function &F) { - if (!EnableARCOpts) - return false; - - // If nothing in the Module uses ARC, don't do anything. - if (!Run) - return false; - - Changed = false; - AA = &getAnalysis(); - DT = &getAnalysis(); - - PA.setAA(&getAnalysis()); - - // Track whether it's ok to mark objc_storeStrong calls with the "tail" - // keyword. Be conservative if the function has variadic arguments. - // It seems that functions which "return twice" are also unsafe for the - // "tail" argument, because they are setjmp, which could need to - // return to an earlier stack state. - bool TailOkForStoreStrongs = !F.isVarArg() && - !F.callsFunctionThatReturnsTwice(); - - // For ObjC library calls which return their argument, replace uses of the - // argument with uses of the call return value, if it dominates the use. This - // reduces register pressure. - SmallPtrSet DependingInstructions; - SmallPtrSet Visited; - for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) { - Instruction *Inst = &*I++; - - DEBUG(dbgs() << "ObjCARCContract: Visiting: " << *Inst << "\n"); - - // Only these library routines return their argument. In particular, - // objc_retainBlock does not necessarily return its argument. - InstructionClass Class = GetBasicInstructionClass(Inst); - switch (Class) { - case IC_Retain: - case IC_FusedRetainAutorelease: - case IC_FusedRetainAutoreleaseRV: - break; - case IC_Autorelease: - case IC_AutoreleaseRV: - if (ContractAutorelease(F, Inst, Class, DependingInstructions, Visited)) - continue; - break; - case IC_RetainRV: { - // If we're compiling for a target which needs a special inline-asm - // marker to do the retainAutoreleasedReturnValue optimization, - // insert it now. - if (!RetainRVMarker) - break; - BasicBlock::iterator BBI = Inst; - BasicBlock *InstParent = Inst->getParent(); - - // Step up to see if the call immediately precedes the RetainRV call. - // If it's an invoke, we have to cross a block boundary. And we have - // to carefully dodge no-op instructions. - do { - if (&*BBI == InstParent->begin()) { - BasicBlock *Pred = InstParent->getSinglePredecessor(); - if (!Pred) - goto decline_rv_optimization; - BBI = Pred->getTerminator(); - break; - } - --BBI; - } while (isNoopInstruction(BBI)); - - if (&*BBI == GetObjCArg(Inst)) { - DEBUG(dbgs() << "ObjCARCContract: Adding inline asm marker for " - "retainAutoreleasedReturnValue optimization.\n"); - Changed = true; - InlineAsm *IA = - InlineAsm::get(FunctionType::get(Type::getVoidTy(Inst->getContext()), - /*isVarArg=*/false), - RetainRVMarker->getString(), - /*Constraints=*/"", /*hasSideEffects=*/true); - CallInst::Create(IA, "", Inst); - } - decline_rv_optimization: - break; - } - case IC_InitWeak: { - // objc_initWeak(p, null) => *p = null - CallInst *CI = cast(Inst); - if (isNullOrUndef(CI->getArgOperand(1))) { - Value *Null = - ConstantPointerNull::get(cast(CI->getType())); - Changed = true; - new StoreInst(Null, CI->getArgOperand(0), CI); - - DEBUG(dbgs() << "OBJCARCContract: Old = " << *CI << "\n" - << " New = " << *Null << "\n"); - - CI->replaceAllUsesWith(Null); - CI->eraseFromParent(); - } - continue; - } - case IC_Release: - ContractRelease(Inst, I); - continue; - case IC_User: - // Be conservative if the function has any alloca instructions. - // Technically we only care about escaping alloca instructions, - // but this is sufficient to handle some interesting cases. - if (isa(Inst)) - TailOkForStoreStrongs = false; - continue; - default: - continue; - } - - DEBUG(dbgs() << "ObjCARCContract: Finished List.\n\n"); - - // Don't use GetObjCArg because we don't want to look through bitcasts - // and such; to do the replacement, the argument must have type i8*. - const Value *Arg = cast(Inst)->getArgOperand(0); - for (;;) { - // If we're compiling bugpointed code, don't get in trouble. - if (!isa(Arg) && !isa(Arg)) - break; - // Look through the uses of the pointer. - for (Value::const_use_iterator UI = Arg->use_begin(), UE = Arg->use_end(); - UI != UE; ) { - Use &U = UI.getUse(); - unsigned OperandNo = UI.getOperandNo(); - ++UI; // Increment UI now, because we may unlink its element. - - // If the call's return value dominates a use of the call's argument - // value, rewrite the use to use the return value. We check for - // reachability here because an unreachable call is considered to - // trivially dominate itself, which would lead us to rewriting its - // argument in terms of its return value, which would lead to - // infinite loops in GetObjCArg. - if (DT->isReachableFromEntry(U) && DT->dominates(Inst, U)) { - Changed = true; - Instruction *Replacement = Inst; - Type *UseTy = U.get()->getType(); - if (PHINode *PHI = dyn_cast(U.getUser())) { - // For PHI nodes, insert the bitcast in the predecessor block. - unsigned ValNo = PHINode::getIncomingValueNumForOperand(OperandNo); - BasicBlock *BB = PHI->getIncomingBlock(ValNo); - if (Replacement->getType() != UseTy) - Replacement = new BitCastInst(Replacement, UseTy, "", - &BB->back()); - // While we're here, rewrite all edges for this PHI, rather - // than just one use at a time, to minimize the number of - // bitcasts we emit. - for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) - if (PHI->getIncomingBlock(i) == BB) { - // Keep the UI iterator valid. - if (&PHI->getOperandUse( - PHINode::getOperandNumForIncomingValue(i)) == - &UI.getUse()) - ++UI; - PHI->setIncomingValue(i, Replacement); - } - } else { - if (Replacement->getType() != UseTy) - Replacement = new BitCastInst(Replacement, UseTy, "", - cast(U.getUser())); - U.set(Replacement); - } - } - } - - // If Arg is a no-op casted pointer, strip one level of casts and iterate. - if (const BitCastInst *BI = dyn_cast(Arg)) - Arg = BI->getOperand(0); - else if (isa(Arg) && - cast(Arg)->hasAllZeroIndices()) - Arg = cast(Arg)->getPointerOperand(); - else if (isa(Arg) && - !cast(Arg)->mayBeOverridden()) - Arg = cast(Arg)->getAliasee(); - else - break; - } - } - - // If this function has no escaping allocas or suspicious vararg usage, - // objc_storeStrong calls can be marked with the "tail" keyword. - if (TailOkForStoreStrongs) - for (SmallPtrSet::iterator I = StoreStrongCalls.begin(), - E = StoreStrongCalls.end(); I != E; ++I) - (*I)->setTailCall(); - StoreStrongCalls.clear(); - - return Changed; -} - -/// @} -/// diff --git a/lib/Transforms/ObjCARC/ObjCARCUtil.cpp b/lib/Transforms/ObjCARC/ObjCARCUtil.cpp index 3192a6d..a02e429 100644 --- a/lib/Transforms/ObjCARC/ObjCARCUtil.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCUtil.cpp @@ -21,6 +21,8 @@ #include "ObjCARC.h" +#include "llvm/IR/Intrinsics.h" + using namespace llvm; using namespace llvm::objcarc; @@ -147,3 +149,94 @@ InstructionClass llvm::objcarc::GetFunctionClass(const Function *F) { // Anything else. return IC_CallOrUser; } + +/// \brief Determine what kind of construct V is. +InstructionClass +llvm::objcarc::GetInstructionClass(const Value *V) { + if (const Instruction *I = dyn_cast(V)) { + // Any instruction other than bitcast and gep with a pointer operand have a + // use of an objc pointer. Bitcasts, GEPs, Selects, PHIs transfer a pointer + // to a subsequent use, rather than using it themselves, in this sense. + // As a short cut, several other opcodes are known to have no pointer + // operands of interest. And ret is never followed by a release, so it's + // not interesting to examine. + switch (I->getOpcode()) { + case Instruction::Call: { + const CallInst *CI = cast(I); + // Check for calls to special functions. + if (const Function *F = CI->getCalledFunction()) { + InstructionClass Class = GetFunctionClass(F); + if (Class != IC_CallOrUser) + return Class; + + // None of the intrinsic functions do objc_release. For intrinsics, the + // only question is whether or not they may be users. + switch (F->getIntrinsicID()) { + case Intrinsic::returnaddress: case Intrinsic::frameaddress: + case Intrinsic::stacksave: case Intrinsic::stackrestore: + case Intrinsic::vastart: case Intrinsic::vacopy: case Intrinsic::vaend: + case Intrinsic::objectsize: case Intrinsic::prefetch: + case Intrinsic::stackprotector: + case Intrinsic::eh_return_i32: case Intrinsic::eh_return_i64: + case Intrinsic::eh_typeid_for: case Intrinsic::eh_dwarf_cfa: + case Intrinsic::eh_sjlj_lsda: case Intrinsic::eh_sjlj_functioncontext: + case Intrinsic::init_trampoline: case Intrinsic::adjust_trampoline: + case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: + case Intrinsic::invariant_start: case Intrinsic::invariant_end: + // Don't let dbg info affect our results. + case Intrinsic::dbg_declare: case Intrinsic::dbg_value: + // Short cut: Some intrinsics obviously don't use ObjC pointers. + return IC_None; + default: + break; + } + } + return GetCallSiteClass(CI); + } + case Instruction::Invoke: + return GetCallSiteClass(cast(I)); + case Instruction::BitCast: + case Instruction::GetElementPtr: + case Instruction::Select: case Instruction::PHI: + case Instruction::Ret: case Instruction::Br: + case Instruction::Switch: case Instruction::IndirectBr: + case Instruction::Alloca: case Instruction::VAArg: + case Instruction::Add: case Instruction::FAdd: + case Instruction::Sub: case Instruction::FSub: + case Instruction::Mul: case Instruction::FMul: + case Instruction::SDiv: case Instruction::UDiv: case Instruction::FDiv: + case Instruction::SRem: case Instruction::URem: case Instruction::FRem: + case Instruction::Shl: case Instruction::LShr: case Instruction::AShr: + case Instruction::And: case Instruction::Or: case Instruction::Xor: + case Instruction::SExt: case Instruction::ZExt: case Instruction::Trunc: + case Instruction::IntToPtr: case Instruction::FCmp: + case Instruction::FPTrunc: case Instruction::FPExt: + case Instruction::FPToUI: case Instruction::FPToSI: + case Instruction::UIToFP: case Instruction::SIToFP: + case Instruction::InsertElement: case Instruction::ExtractElement: + case Instruction::ShuffleVector: + case Instruction::ExtractValue: + break; + case Instruction::ICmp: + // Comparing a pointer with null, or any other constant, isn't an + // interesting use, because we don't care what the pointer points to, or + // about the values of any other dynamic reference-counted pointers. + if (IsPotentialRetainableObjPtr(I->getOperand(1))) + return IC_User; + break; + default: + // For anything else, check all the operands. + // Note that this includes both operands of a Store: while the first + // operand isn't actually being dereferenced, it is being stored to + // memory where we can no longer track who might read it and dereference + // it, so we have to consider it potentially used. + for (User::const_op_iterator OI = I->op_begin(), OE = I->op_end(); + OI != OE; ++OI) + if (IsPotentialRetainableObjPtr(*OI)) + return IC_User; + } + } + + // Otherwise, it's totally inert for ARC purposes. + return IC_None; +} diff --git a/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp b/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp new file mode 100644 index 0000000..79a90c6 --- /dev/null +++ b/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp @@ -0,0 +1,177 @@ +//===- ProvenanceAnalysis.cpp - ObjC ARC Optimization ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines a special form of Alias Analysis called ``Provenance +/// Analysis''. The word ``provenance'' refers to the history of the ownership +/// of an object. Thus ``Provenance Analysis'' is an analysis which attempts to +/// use various techniques to determine if locally +/// +/// WARNING: This file knows about certain library functions. It recognizes them +/// by name, and hardwires knowledge of their semantics. +/// +/// WARNING: This file knows about how certain Objective-C library functions are +/// used. Naive LLVM IR transformations which would otherwise be +/// behavior-preserving may break these assumptions. +/// +//===----------------------------------------------------------------------===// + +#include "ObjCARC.h" +#include "ProvenanceAnalysis.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/STLExtras.h" + +using namespace llvm; +using namespace llvm::objcarc; + +bool ProvenanceAnalysis::relatedSelect(const SelectInst *A, + const Value *B) { + // If the values are Selects with the same condition, we can do a more precise + // check: just check for relations between the values on corresponding arms. + if (const SelectInst *SB = dyn_cast(B)) + if (A->getCondition() == SB->getCondition()) + return related(A->getTrueValue(), SB->getTrueValue()) || + related(A->getFalseValue(), SB->getFalseValue()); + + // Check both arms of the Select node individually. + return related(A->getTrueValue(), B) || + related(A->getFalseValue(), B); +} + +bool ProvenanceAnalysis::relatedPHI(const PHINode *A, + const Value *B) { + // If the values are PHIs in the same block, we can do a more precise as well + // as efficient check: just check for relations between the values on + // corresponding edges. + if (const PHINode *PNB = dyn_cast(B)) + if (PNB->getParent() == A->getParent()) { + for (unsigned i = 0, e = A->getNumIncomingValues(); i != e; ++i) + if (related(A->getIncomingValue(i), + PNB->getIncomingValueForBlock(A->getIncomingBlock(i)))) + return true; + return false; + } + + // Check each unique source of the PHI node against B. + SmallPtrSet UniqueSrc; + for (unsigned i = 0, e = A->getNumIncomingValues(); i != e; ++i) { + const Value *PV1 = A->getIncomingValue(i); + if (UniqueSrc.insert(PV1) && related(PV1, B)) + return true; + } + + // All of the arms checked out. + return false; +} + +/// Test if the value of P, or any value covered by its provenance, is ever +/// stored within the function (not counting callees). +static bool isStoredObjCPointer(const Value *P) { + SmallPtrSet Visited; + SmallVector Worklist; + Worklist.push_back(P); + Visited.insert(P); + do { + P = Worklist.pop_back_val(); + for (Value::const_use_iterator UI = P->use_begin(), UE = P->use_end(); + UI != UE; ++UI) { + const User *Ur = *UI; + if (isa(Ur)) { + if (UI.getOperandNo() == 0) + // The pointer is stored. + return true; + // The pointed is stored through. + continue; + } + if (isa(Ur)) + // The pointer is passed as an argument, ignore this. + continue; + if (isa(P)) + // Assume the worst. + return true; + if (Visited.insert(Ur)) + Worklist.push_back(Ur); + } + } while (!Worklist.empty()); + + // Everything checked out. + return false; +} + +bool ProvenanceAnalysis::relatedCheck(const Value *A, + const Value *B) { + // Skip past provenance pass-throughs. + A = GetUnderlyingObjCPtr(A); + B = GetUnderlyingObjCPtr(B); + + // Quick check. + if (A == B) + return true; + + // Ask regular AliasAnalysis, for a first approximation. + switch (AA->alias(A, B)) { + case AliasAnalysis::NoAlias: + return false; + case AliasAnalysis::MustAlias: + case AliasAnalysis::PartialAlias: + return true; + case AliasAnalysis::MayAlias: + break; + } + + bool AIsIdentified = IsObjCIdentifiedObject(A); + bool BIsIdentified = IsObjCIdentifiedObject(B); + + // An ObjC-Identified object can't alias a load if it is never locally stored. + if (AIsIdentified) { + // Check for an obvious escape. + if (isa(B)) + return isStoredObjCPointer(A); + if (BIsIdentified) { + // Check for an obvious escape. + if (isa(A)) + return isStoredObjCPointer(B); + // Both pointers are identified and escapes aren't an evident problem. + return false; + } + } else if (BIsIdentified) { + // Check for an obvious escape. + if (isa(A)) + return isStoredObjCPointer(B); + } + + // Special handling for PHI and Select. + if (const PHINode *PN = dyn_cast(A)) + return relatedPHI(PN, B); + if (const PHINode *PN = dyn_cast(B)) + return relatedPHI(PN, A); + if (const SelectInst *S = dyn_cast(A)) + return relatedSelect(S, B); + if (const SelectInst *S = dyn_cast(B)) + return relatedSelect(S, A); + + // Conservative. + return true; +} + +bool ProvenanceAnalysis::related(const Value *A, + const Value *B) { + // Begin by inserting a conservative value into the map. If the insertion + // fails, we have the answer already. If it succeeds, leave it there until we + // compute the real answer to guard against recursive queries. + if (A > B) std::swap(A, B); + std::pair Pair = + CachedResults.insert(std::make_pair(ValuePairTy(A, B), true)); + if (!Pair.second) + return Pair.first->second; + + bool Result = relatedCheck(A, B); + CachedResults[ValuePairTy(A, B)] = Result; + return Result; +} diff --git a/lib/Transforms/ObjCARC/ProvenanceAnalysis.h b/lib/Transforms/ObjCARC/ProvenanceAnalysis.h new file mode 100644 index 0000000..d86f08b --- /dev/null +++ b/lib/Transforms/ObjCARC/ProvenanceAnalysis.h @@ -0,0 +1,79 @@ +//===- ProvenanceAnalysis.h - ObjC ARC Optimization ---*- mode: c++ -*-----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file declares a special form of Alias Analysis called ``Provenance +/// Analysis''. The word ``provenance'' refers to the history of the ownership +/// of an object. Thus ``Provenance Analysis'' is an analysis which attempts to +/// use various techniques to determine if locally +/// +/// WARNING: This file knows about certain library functions. It recognizes them +/// by name, and hardwires knowledge of their semantics. +/// +/// WARNING: This file knows about how certain Objective-C library functions are +/// used. Naive LLVM IR transformations which would otherwise be +/// behavior-preserving may break these assumptions. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_OBJCARC_PROVENANCEANALYSIS_H +#define LLVM_TRANSFORMS_OBJCARC_PROVENANCEANALYSIS_H + +#include "llvm/ADT/DenseMap.h" + +namespace llvm { + class Value; + class AliasAnalysis; + class PHINode; + class SelectInst; +} + +namespace llvm { +namespace objcarc { +/// \brief This is similar to BasicAliasAnalysis, and it uses many of the same +/// techniques, except it uses special ObjC-specific reasoning about pointer +/// relationships. +/// +/// In this context ``Provenance'' is defined as the history of an object's +/// ownership. Thus ``Provenance Analysis'' is defined by using the notion of +/// an ``independent provenance source'' of a pointer to determine whether or +/// not two pointers have the same provenance source and thus could +/// potentially be related. +class ProvenanceAnalysis { + AliasAnalysis *AA; + + typedef std::pair ValuePairTy; + typedef DenseMap CachedResultsTy; + CachedResultsTy CachedResults; + + bool relatedCheck(const Value *A, const Value *B); + bool relatedSelect(const SelectInst *A, const Value *B); + bool relatedPHI(const PHINode *A, const Value *B); + + void operator=(const ProvenanceAnalysis &) LLVM_DELETED_FUNCTION; + ProvenanceAnalysis(const ProvenanceAnalysis &) LLVM_DELETED_FUNCTION; + +public: + ProvenanceAnalysis() {} + + void setAA(AliasAnalysis *aa) { AA = aa; } + + AliasAnalysis *getAA() const { return AA; } + + bool related(const Value *A, const Value *B); + + void clear() { + CachedResults.clear(); + } +}; + +} // end namespace objcarc +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_OBJCARC_PROVENANCEANALYSIS_H -- cgit v1.1 From 606c8e36dfdd28fc589356addd3e2cbb89a32e4d Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Tue, 29 Jan 2013 03:20:31 +0000 Subject: Convert getAttributes() to return an AttributeSetNode. The AttributeSetNode contains all of the attributes. This removes one (hopefully last) use of the Attribute class as a container of multiple attributes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173761 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/AttributeImpl.h | 14 +++++ lib/IR/Attributes.cpp | 155 +++++++++++++++++++++++++++++-------------------- 2 files changed, 107 insertions(+), 62 deletions(-) (limited to 'lib') diff --git a/lib/IR/AttributeImpl.h b/lib/IR/AttributeImpl.h index 34754e8..91f5005 100644 --- a/lib/IR/AttributeImpl.h +++ b/lib/IR/AttributeImpl.h @@ -18,6 +18,7 @@ #include "llvm/ADT/FoldingSet.h" #include "llvm/IR/Attributes.h" +#include namespace llvm { @@ -92,6 +93,13 @@ class AttributeSetNode : public FoldingSetNode { public: static AttributeSetNode *get(LLVMContext &C, ArrayRef Attrs); + bool hasAttribute(Attribute::AttrKind Kind) const; + bool hasAttributes() const { return !AttrList.empty(); } + + unsigned getAlignment() const; + unsigned getStackAlignment() const; + std::string getAsString() const; + typedef SmallVectorImpl::iterator iterator; typedef SmallVectorImpl::const_iterator const_iterator; @@ -152,6 +160,12 @@ public: return AttributeSet::get(Context, AttrNodes[Slot]); } + /// \brief Retrieve the attribute set node for the given "slot" in the + /// AttrNode list. + AttributeSetNode *getSlotNode(unsigned Slot) const { + return AttrNodes[Slot].second; + } + typedef AttributeSetNode::iterator iterator; typedef AttributeSetNode::const_iterator const_iterator; diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 8250330..ab29766 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -99,77 +99,78 @@ unsigned Attribute::getStackAlignment() const { } std::string Attribute::getAsString() const { - std::string Result; if (hasAttribute(Attribute::ZExt)) - Result += "zeroext "; + return "zeroext"; if (hasAttribute(Attribute::SExt)) - Result += "signext "; + return "signext"; if (hasAttribute(Attribute::NoReturn)) - Result += "noreturn "; + return "noreturn"; if (hasAttribute(Attribute::NoUnwind)) - Result += "nounwind "; + return "nounwind"; if (hasAttribute(Attribute::UWTable)) - Result += "uwtable "; + return "uwtable"; if (hasAttribute(Attribute::ReturnsTwice)) - Result += "returns_twice "; + return "returns_twice"; if (hasAttribute(Attribute::InReg)) - Result += "inreg "; + return "inreg"; if (hasAttribute(Attribute::NoAlias)) - Result += "noalias "; + return "noalias"; if (hasAttribute(Attribute::NoCapture)) - Result += "nocapture "; + return "nocapture"; if (hasAttribute(Attribute::StructRet)) - Result += "sret "; + return "sret"; if (hasAttribute(Attribute::ByVal)) - Result += "byval "; + return "byval"; if (hasAttribute(Attribute::Nest)) - Result += "nest "; + return "nest"; if (hasAttribute(Attribute::ReadNone)) - Result += "readnone "; + return "readnone"; if (hasAttribute(Attribute::ReadOnly)) - Result += "readonly "; + return "readonly"; if (hasAttribute(Attribute::OptimizeForSize)) - Result += "optsize "; + return "optsize"; if (hasAttribute(Attribute::NoInline)) - Result += "noinline "; + return "noinline"; if (hasAttribute(Attribute::InlineHint)) - Result += "inlinehint "; + return "inlinehint"; if (hasAttribute(Attribute::AlwaysInline)) - Result += "alwaysinline "; + return "alwaysinline"; if (hasAttribute(Attribute::StackProtect)) - Result += "ssp "; + return "ssp"; if (hasAttribute(Attribute::StackProtectReq)) - Result += "sspreq "; + return "sspreq"; if (hasAttribute(Attribute::StackProtectStrong)) - Result += "sspstrong "; + return "sspstrong"; if (hasAttribute(Attribute::NoRedZone)) - Result += "noredzone "; + return "noredzone"; if (hasAttribute(Attribute::NoImplicitFloat)) - Result += "noimplicitfloat "; + return "noimplicitfloat"; if (hasAttribute(Attribute::Naked)) - Result += "naked "; + return "naked"; if (hasAttribute(Attribute::NonLazyBind)) - Result += "nonlazybind "; + return "nonlazybind"; if (hasAttribute(Attribute::AddressSafety)) - Result += "address_safety "; + return "address_safety"; if (hasAttribute(Attribute::MinSize)) - Result += "minsize "; + return "minsize"; if (hasAttribute(Attribute::StackAlignment)) { + std::string Result; Result += "alignstack("; Result += utostr(getStackAlignment()); - Result += ") "; + Result += ")"; + return Result; } if (hasAttribute(Attribute::Alignment)) { + std::string Result; Result += "align "; Result += utostr(getAlignment()); - Result += " "; + Result += ""; + return Result; } if (hasAttribute(Attribute::NoDuplicate)) - Result += "noduplicate "; - // Trim the trailing space. - assert(!Result.empty() && "Unknown attribute!"); - Result.erase(Result.end()-1); - return Result; + return "noduplicate"; + + llvm_unreachable("Unknown attribute"); } bool Attribute::operator==(AttrKind K) const { @@ -354,6 +355,40 @@ AttributeSetNode *AttributeSetNode::get(LLVMContext &C, return PA; } +bool AttributeSetNode::hasAttribute(Attribute::AttrKind Kind) const { + for (SmallVectorImpl::const_iterator I = AttrList.begin(), + E = AttrList.end(); I != E; ++I) + if (I->hasAttribute(Kind)) + return true; + return false; +} + +unsigned AttributeSetNode::getAlignment() const { + for (SmallVectorImpl::const_iterator I = AttrList.begin(), + E = AttrList.end(); I != E; ++I) + if (I->hasAttribute(Attribute::Alignment)) + return I->getAlignment(); + return 0; +} + +unsigned AttributeSetNode::getStackAlignment() const { + for (SmallVectorImpl::const_iterator I = AttrList.begin(), + E = AttrList.end(); I != E; ++I) + if (I->hasAttribute(Attribute::StackAlignment)) + return I->getStackAlignment(); + return 0; +} + +std::string AttributeSetNode::getAsString() const { + std::string Str = ""; + for (SmallVectorImpl::const_iterator I = AttrList.begin(), + E = AttrList.end(); I != E; ++I) { + if (I != AttrList.begin()) Str += " "; + Str += I->getAsString(); + } + return Str; +} + //===----------------------------------------------------------------------===// // AttributeSetImpl Definition //===----------------------------------------------------------------------===// @@ -597,7 +632,7 @@ AttributeSet AttributeSet::removeAttributes(LLVMContext &C, unsigned Idx, AttributeSet AttributeSet::getParamAttributes(unsigned Idx) const { return pImpl && hasAttributes(Idx) ? AttributeSet::get(pImpl->getContext(), - ArrayRef >( + ArrayRef >( std::make_pair(Idx, getAttributes(Idx)))) : AttributeSet(); } @@ -605,7 +640,7 @@ AttributeSet AttributeSet::getParamAttributes(unsigned Idx) const { AttributeSet AttributeSet::getRetAttributes() const { return pImpl && hasAttributes(ReturnIndex) ? AttributeSet::get(pImpl->getContext(), - ArrayRef >( + ArrayRef >( std::make_pair(ReturnIndex, getAttributes(ReturnIndex)))) : AttributeSet(); @@ -614,18 +649,20 @@ AttributeSet AttributeSet::getRetAttributes() const { AttributeSet AttributeSet::getFnAttributes() const { return pImpl && hasAttributes(FunctionIndex) ? AttributeSet::get(pImpl->getContext(), - ArrayRef >( + ArrayRef >( std::make_pair(FunctionIndex, getAttributes(FunctionIndex)))) : AttributeSet(); } bool AttributeSet::hasAttribute(unsigned Index, Attribute::AttrKind Kind) const{ - return getAttributes(Index).hasAttribute(Kind); + AttributeSetNode *ASN = getAttributes(Index); + return ASN ? ASN->hasAttribute(Kind) : false; } bool AttributeSet::hasAttributes(unsigned Index) const { - return getAttributes(Index).hasAttributes(); + AttributeSetNode *ASN = getAttributes(Index); + return ASN ? ASN->hasAttributes() : false; } /// \brief Return true if the specified attribute is set for at least one @@ -642,36 +679,31 @@ bool AttributeSet::hasAttrSomewhere(Attribute::AttrKind Attr) const { return false; } -unsigned AttributeSet::getParamAlignment(unsigned Idx) const { - return getAttributes(Idx).getAlignment(); +unsigned AttributeSet::getParamAlignment(unsigned Index) const { + AttributeSetNode *ASN = getAttributes(Index); + return ASN ? ASN->getAlignment() : 0; } unsigned AttributeSet::getStackAlignment(unsigned Index) const { - return getAttributes(Index).getStackAlignment(); + AttributeSetNode *ASN = getAttributes(Index); + return ASN ? ASN->getStackAlignment() : 0; } std::string AttributeSet::getAsString(unsigned Index) const { - return getAttributes(Index).getAsString(); + AttributeSetNode *ASN = getAttributes(Index); + return ASN ? ASN->getAsString() : std::string(""); } /// \brief The attributes for the specified index are returned. -/// -/// FIXME: This shouldn't return 'Attribute'. -Attribute AttributeSet::getAttributes(unsigned Idx) const { - if (pImpl == 0) return Attribute(); - - // Loop through to find the attribute we want. - for (unsigned I = 0, E = pImpl->getNumAttributes(); I != E; ++I) { - if (pImpl->getSlotIndex(I) != Idx) continue; +AttributeSetNode *AttributeSet::getAttributes(unsigned Idx) const { + if (!pImpl) return 0; - AttrBuilder B; - for (AttributeSetImpl::const_iterator II = pImpl->begin(I), - IE = pImpl->end(I); II != IE; ++II) - B.addAttributes(*II); - return Attribute::get(pImpl->getContext(), B); - } + // Loop through to find the attribute node we want. + for (unsigned I = 0, E = pImpl->getNumAttributes(); I != E; ++I) + if (pImpl->getSlotIndex(I) == Idx) + return pImpl->getSlotNode(I); - return Attribute(); + return 0; } //===----------------------------------------------------------------------===// @@ -948,9 +980,8 @@ void AttributeFuncs::decodeLLVMAttributesForBitcode(LLVMContext &C, assert((!Alignment || isPowerOf2_32(Alignment)) && "Alignment must be a power of two."); - B.addRawValue(EncodedAttrs & 0xffff); if (Alignment) B.addAlignmentAttr(Alignment); - B.addRawValue((EncodedAttrs & (0xffffULL << 32)) >> 11); + B.addRawValue(((EncodedAttrs & (0xffffULL << 32)) >> 11) | + (EncodedAttrs & 0xffff)); } - -- cgit v1.1 From b0f82e7eab4b255ce925e471aa796c13257ac7c6 Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Tue, 29 Jan 2013 04:05:17 +0000 Subject: Removed InstCombine/Targets as library dependencies for libObjCARCOpts since they are unnecessary. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173763 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/ObjCARC/LLVMBuild.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Transforms/ObjCARC/LLVMBuild.txt b/lib/Transforms/ObjCARC/LLVMBuild.txt index 61eced0..ecff0bb 100644 --- a/lib/Transforms/ObjCARC/LLVMBuild.txt +++ b/lib/Transforms/ObjCARC/LLVMBuild.txt @@ -20,4 +20,4 @@ type = Library name = ObjCARC parent = Transforms library_name = ObjCARCOpts -required_libraries = Analysis Core InstCombine Support Target TransformUtils +required_libraries = Analysis Core Support TransformUtils -- cgit v1.1 From 4a0480ad4e1f3f414e324f08c0e61a361452c55f Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Tue, 29 Jan 2013 04:09:24 +0000 Subject: Added two missing headers from ObjCARCAliasAnalysis.h. This was missed since whenever I was including ObjCARCAliasAnalysis.h, I was including ObjCARC.h before it which included these includes (resulting in no compilation breakage). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173764 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'lib') diff --git a/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h b/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h index d223b38..7abe995 100644 --- a/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h +++ b/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h @@ -23,6 +23,9 @@ #ifndef LLVM_TRANSFORMS_OBJCARC_OBJCARCALIASANALYSIS_H #define LLVM_TRANSFORMS_OBJCARC_OBJCARCALIASANALYSIS_H +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Pass.h" + namespace llvm { namespace objcarc { -- cgit v1.1 From 7a7102d17f979918042bc040e27288d64a6bea5f Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Tue, 29 Jan 2013 04:20:52 +0000 Subject: Sorted includes using utils/sort_includes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173767 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/ObjCARC/DependencyAnalysis.cpp | 3 +-- lib/Transforms/ObjCARC/ObjCARCAPElim.cpp | 1 - lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp | 1 - lib/Transforms/ObjCARC/ObjCARCContract.cpp | 3 +-- lib/Transforms/ObjCARC/ObjCARCExpand.cpp | 1 - lib/Transforms/ObjCARC/ObjCARCOpts.cpp | 9 +++------ lib/Transforms/ObjCARC/ObjCARCUtil.cpp | 1 - lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp | 2 +- 8 files changed, 6 insertions(+), 15 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/ObjCARC/DependencyAnalysis.cpp b/lib/Transforms/ObjCARC/DependencyAnalysis.cpp index 5640009..5aada9c 100644 --- a/lib/Transforms/ObjCARC/DependencyAnalysis.cpp +++ b/lib/Transforms/ObjCARC/DependencyAnalysis.cpp @@ -22,9 +22,8 @@ #define DEBUG_TYPE "objc-arc-dependency" #include "ObjCARC.h" -#include "ProvenanceAnalysis.h" #include "DependencyAnalysis.h" - +#include "ProvenanceAnalysis.h" #include "llvm/Support/CFG.h" using namespace llvm; diff --git a/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp b/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp index b8c51c2..fe6406a 100644 --- a/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp @@ -30,7 +30,6 @@ #define DEBUG_TYPE "objc-arc-ap-elim" #include "ObjCARC.h" - #include "llvm/ADT/STLExtras.h" #include "llvm/IR/Constants.h" diff --git a/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp b/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp index 5d09e5a..46b2de7 100644 --- a/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp @@ -23,7 +23,6 @@ #define DEBUG_TYPE "objc-arc-aa" #include "ObjCARC.h" #include "ObjCARCAliasAnalysis.h" - #include "llvm/IR/Instruction.h" #include "llvm/InitializePasses.h" #include "llvm/PassAnalysisSupport.h" diff --git a/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/lib/Transforms/ObjCARC/ObjCARCContract.cpp index 704ac92..9522bd4 100644 --- a/lib/Transforms/ObjCARC/ObjCARCContract.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCContract.cpp @@ -25,9 +25,8 @@ #define DEBUG_TYPE "objc-arc-contract" #include "ObjCARC.h" -#include "ProvenanceAnalysis.h" #include "DependencyAnalysis.h" - +#include "ProvenanceAnalysis.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/Dominators.h" #include "llvm/IR/InlineAsm.h" diff --git a/lib/Transforms/ObjCARC/ObjCARCExpand.cpp b/lib/Transforms/ObjCARC/ObjCARCExpand.cpp index 633f988..39bf8f3 100644 --- a/lib/Transforms/ObjCARC/ObjCARCExpand.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCExpand.cpp @@ -26,7 +26,6 @@ #define DEBUG_TYPE "objc-arc-expand" #include "ObjCARC.h" - #include "llvm/ADT/StringRef.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instruction.h" diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp index 370c7f4..9ccc6ee 100644 --- a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp @@ -30,13 +30,14 @@ #define DEBUG_TYPE "objc-arc-opts" #include "ObjCARC.h" +#include "DependencyAnalysis.h" #include "ObjCARCAliasAnalysis.h" #include "ProvenanceAnalysis.h" -#include "DependencyAnalysis.h" - #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/Support/CFG.h" using namespace llvm; @@ -291,10 +292,6 @@ static bool DoesObjCBlockEscape(const Value *BlockPtr) { // TODO: Delete release+retain pairs (rare). -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/IR/LLVMContext.h" - STATISTIC(NumNoops, "Number of no-op objc calls eliminated"); STATISTIC(NumPartialNoops, "Number of partially no-op objc calls eliminated"); STATISTIC(NumAutoreleases,"Number of autoreleases converted to releases"); diff --git a/lib/Transforms/ObjCARC/ObjCARCUtil.cpp b/lib/Transforms/ObjCARC/ObjCARCUtil.cpp index a02e429..862f9cc 100644 --- a/lib/Transforms/ObjCARC/ObjCARCUtil.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCUtil.cpp @@ -20,7 +20,6 @@ //===----------------------------------------------------------------------===// #include "ObjCARC.h" - #include "llvm/IR/Intrinsics.h" using namespace llvm; diff --git a/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp b/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp index 79a90c6..ff38c9d 100644 --- a/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp +++ b/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp @@ -24,8 +24,8 @@ #include "ObjCARC.h" #include "ProvenanceAnalysis.h" -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" using namespace llvm; using namespace llvm::objcarc; -- cgit v1.1 From 9ab758b9bc2fe51af6dabbdeb30f4a2e600bdcd0 Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Tue, 29 Jan 2013 04:51:59 +0000 Subject: Juggled Debug.h from ObjCARC.h to only the including cpp files that actually have DEBUG statements. Also changed raw_ostream in said header to be a forward declaration (removing an include). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173769 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/ObjCARC/ObjCARC.h | 6 ++++-- lib/Transforms/ObjCARC/ObjCARCAPElim.cpp | 1 + lib/Transforms/ObjCARC/ObjCARCContract.cpp | 1 + lib/Transforms/ObjCARC/ObjCARCOpts.cpp | 1 + 4 files changed, 7 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/ObjCARC/ObjCARC.h b/lib/Transforms/ObjCARC/ObjCARC.h index 32ca30e..2b90496 100644 --- a/lib/Transforms/ObjCARC/ObjCARC.h +++ b/lib/Transforms/ObjCARC/ObjCARC.h @@ -30,13 +30,15 @@ #include "llvm/IR/Module.h" #include "llvm/Pass.h" #include "llvm/Support/CallSite.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/InstIterator.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/ObjCARC.h" #include "llvm/Transforms/Utils/Local.h" namespace llvm { +class raw_ostream; +} + +namespace llvm { namespace objcarc { /// \brief A handy option to enable/disable all ARC Optimizations. diff --git a/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp b/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp index fe6406a..36cf4e5 100644 --- a/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp @@ -32,6 +32,7 @@ #include "ObjCARC.h" #include "llvm/ADT/STLExtras.h" #include "llvm/IR/Constants.h" +#include "llvm/Support/Debug.h" using namespace llvm; using namespace llvm::objcarc; diff --git a/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/lib/Transforms/ObjCARC/ObjCARCContract.cpp index 9522bd4..a9140fb 100644 --- a/lib/Transforms/ObjCARC/ObjCARCContract.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCContract.cpp @@ -31,6 +31,7 @@ #include "llvm/Analysis/Dominators.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Operator.h" +#include "llvm/Support/Debug.h" using namespace llvm; using namespace llvm::objcarc; diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp index 9ccc6ee..47c158c 100644 --- a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp @@ -39,6 +39,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/IR/LLVMContext.h" #include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" using namespace llvm; using namespace llvm::objcarc; -- cgit v1.1 From 67866101a9715d5b56fbae19b739fdbbf2544754 Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Tue, 29 Jan 2013 04:53:55 +0000 Subject: Added missing dashes from header declaration comment. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173770 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/ObjCARC/LLVMBuild.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Transforms/ObjCARC/LLVMBuild.txt b/lib/Transforms/ObjCARC/LLVMBuild.txt index ecff0bb..90a2338 100644 --- a/lib/Transforms/ObjCARC/LLVMBuild.txt +++ b/lib/Transforms/ObjCARC/LLVMBuild.txt @@ -1,4 +1,4 @@ -;===- ./lib/Transforms/ObjCARC/LLVMBuild.txt --- ---------*- Conf -*--===; +;===- ./lib/Transforms/ObjCARC/LLVMBuild.txt -------------------*- Conf -*--===; ; ; The LLVM Compiler Infrastructure ; -- cgit v1.1 From 3dcfdab267f157aeb0e6aed896f7acdeb11481ba Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Tue, 29 Jan 2013 04:58:30 +0000 Subject: Fixed some whitespace/80+ violations. Also added a space after a namespace declaration. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173772 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/ObjCARC/ObjCARC.cpp | 2 +- lib/Transforms/ObjCARC/ObjCARC.h | 6 ++++-- lib/Transforms/ObjCARC/ObjCARCContract.cpp | 3 --- lib/Transforms/ObjCARC/ProvenanceAnalysis.h | 1 + 4 files changed, 6 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/ObjCARC/ObjCARC.cpp b/lib/Transforms/ObjCARC/ObjCARC.cpp index b86dff1..f798063 100644 --- a/lib/Transforms/ObjCARC/ObjCARC.cpp +++ b/lib/Transforms/ObjCARC/ObjCARC.cpp @@ -1,4 +1,4 @@ -//===-- ObjCARC.cpp --------------------------------------------------------===// +//===-- ObjCARC.cpp -------------------------------------------------------===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Transforms/ObjCARC/ObjCARC.h b/lib/Transforms/ObjCARC/ObjCARC.h index 2b90496..e062b66 100644 --- a/lib/Transforms/ObjCARC/ObjCARC.h +++ b/lib/Transforms/ObjCARC/ObjCARC.h @@ -293,7 +293,8 @@ static inline void EraseInstruction(Instruction *CI) { /// \brief Test whether the given value is possible a retainable object pointer. static inline bool IsPotentialRetainableObjPtr(const Value *Op) { - // Pointers to static or stack storage are not valid retainable object pointers. + // Pointers to static or stack storage are not valid retainable object + // pointers. if (isa(Op) || isa(Op)) return false; // Special arguments can not be a valid retainable object pointer. @@ -310,7 +311,8 @@ static inline bool IsPotentialRetainableObjPtr(const Value *Op) { PointerType *Ty = dyn_cast(Op->getType()); if (!Ty) return false; - // Conservatively assume anything else is a potential retainable object pointer. + // Conservatively assume anything else is a potential retainable object + // pointer. return true; } diff --git a/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/lib/Transforms/ObjCARC/ObjCARCContract.cpp index a9140fb..7d1768a 100644 --- a/lib/Transforms/ObjCARC/ObjCARCContract.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCContract.cpp @@ -532,6 +532,3 @@ bool ObjCARCContract::runOnFunction(Function &F) { return Changed; } - -/// @} -/// diff --git a/lib/Transforms/ObjCARC/ProvenanceAnalysis.h b/lib/Transforms/ObjCARC/ProvenanceAnalysis.h index d86f08b..ec449fd 100644 --- a/lib/Transforms/ObjCARC/ProvenanceAnalysis.h +++ b/lib/Transforms/ObjCARC/ProvenanceAnalysis.h @@ -36,6 +36,7 @@ namespace llvm { namespace llvm { namespace objcarc { + /// \brief This is similar to BasicAliasAnalysis, and it uses many of the same /// techniques, except it uses special ObjC-specific reasoning about pointer /// relationships. -- cgit v1.1 From 7fd324a31fbfd237f43d38d3a780a19fbf909ba3 Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Tue, 29 Jan 2013 05:05:17 +0000 Subject: Fixed header comment. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173773 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/ObjCARC/ObjCARCAPElim.cpp | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp b/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp index 36cf4e5..f693c3f 100644 --- a/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp @@ -1,4 +1,4 @@ -//===- ObjCARCOpts.cpp - ObjC ARC Optimization ----------------------------===// +//===- ObjCARCAPElim.cpp - ObjC ARC Optimization --------------------------===// // // The LLVM Compiler Infrastructure // @@ -7,17 +7,13 @@ // //===----------------------------------------------------------------------===// /// \file +/// /// This file defines ObjC ARC optimizations. ARC stands for Automatic /// Reference Counting and is a system for managing reference counts for objects /// in Objective C. /// -/// The optimizations performed include elimination of redundant, partially -/// redundant, and inconsequential reference count operations, elimination of -/// redundant weak pointer operations, pattern-matching and replacement of -/// low-level operations into higher-level operations, and numerous minor -/// simplifications. -/// -/// This file also defines a simple ARC-aware AliasAnalysis. +/// This specific file implements optimizations which remove extraneous +/// autorelease pools. /// /// WARNING: This file knows about certain library functions. It recognizes them /// by name, and hardwires knowledge of their semantics. -- cgit v1.1 From 9d46ef67baf92022f82a49bb2eb8b737b2bdbe95 Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Tue, 29 Jan 2013 05:07:18 +0000 Subject: Fixed 2 more header comments... git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173774 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/ObjCARC/ObjCARCContract.cpp | 2 +- lib/Transforms/ObjCARC/ObjCARCUtil.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/lib/Transforms/ObjCARC/ObjCARCContract.cpp index 7d1768a..ca02b00 100644 --- a/lib/Transforms/ObjCARC/ObjCARCContract.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCContract.cpp @@ -1,4 +1,4 @@ -//===- ObjCARCOpts.cpp - ObjC ARC Optimization ----------------------------===// +//===- ObjCARCContract.cpp - ObjC ARC Optimization ------------------------===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Transforms/ObjCARC/ObjCARCUtil.cpp b/lib/Transforms/ObjCARC/ObjCARCUtil.cpp index 862f9cc..a841c64 100644 --- a/lib/Transforms/ObjCARC/ObjCARCUtil.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCUtil.cpp @@ -1,4 +1,4 @@ -//===- ObjCARCUtil.h - ObjC ARC Optimization ----------*- mode: c++ -*-----===// +//===- ObjCARCUtil.cpp - ObjC ARC Optimization --------*- mode: c++ -*-----===// // // The LLVM Compiler Infrastructure // -- cgit v1.1 From 4c6a2ba4e01ef75837bb39808a3935bd8687ce67 Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Tue, 29 Jan 2013 06:26:35 +0000 Subject: MIsched: cleanup code. Use isBoundaryNode(). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173775 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineScheduler.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index adf9a57..513d8a9 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -587,17 +587,19 @@ void ScheduleDAGMI::findRootsAndBiasEdges(SmallVectorImpl &TopRoots, for (std::vector::iterator I = SUnits.begin(), E = SUnits.end(); I != E; ++I) { SUnit *SU = &(*I); + assert(!SU->isBoundaryNode() && "Boundary node should not be in SUnits"); // Order predecessors so DFSResult follows the critical path. SU->biasCriticalPath(); // A SUnit is ready to top schedule if it has no predecessors. - if (!I->NumPredsLeft && SU != &EntrySU) + if (!I->NumPredsLeft) TopRoots.push_back(SU); // A SUnit is ready to bottom schedule if it has no successors. - if (!I->NumSuccsLeft && SU != &ExitSU) + if (!I->NumSuccsLeft) BotRoots.push_back(SU); } + ExitSU.biasCriticalPath(); } /// Identify DAG roots and setup scheduler queues. -- cgit v1.1 From e6482fabd20a2a5b4f81aff55812782f3b617514 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 29 Jan 2013 07:54:31 +0000 Subject: Merge SSE and AVX shuffle instructions in the comment printer. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173777 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/InstPrinter/X86InstComments.cpp | 112 ++++--------------------- 1 file changed, 14 insertions(+), 98 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp index 865930a..0f6eeb1 100644 --- a/lib/Target/X86/InstPrinter/X86InstComments.cpp +++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp @@ -34,10 +34,6 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, switch (MI->getOpcode()) { case X86::INSERTPSrr: - Src1Name = getRegName(MI->getOperand(0).getReg()); - Src2Name = getRegName(MI->getOperand(2).getReg()); - DecodeINSERTPSMask(MI->getOperand(3).getImm(), ShuffleMask); - break; case X86::VINSERTPSrr: DestName = getRegName(MI->getOperand(0).getReg()); Src1Name = getRegName(MI->getOperand(1).getReg()); @@ -46,10 +42,6 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, break; case X86::MOVLHPSrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodeMOVLHPSMask(2, ShuffleMask); - break; case X86::VMOVLHPSrr: Src2Name = getRegName(MI->getOperand(2).getReg()); Src1Name = getRegName(MI->getOperand(1).getReg()); @@ -58,10 +50,6 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, break; case X86::MOVHLPSrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodeMOVHLPSMask(2, ShuffleMask); - break; case X86::VMOVHLPSrr: Src2Name = getRegName(MI->getOperand(2).getReg()); Src1Name = getRegName(MI->getOperand(1).getReg()); @@ -154,15 +142,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, break; case X86::PUNPCKHBWrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::PUNPCKHBWrm: - Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodeUNPCKHMask(MVT::v16i8, ShuffleMask); - break; case X86::VPUNPCKHBWrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. + case X86::PUNPCKHBWrm: case X86::VPUNPCKHBWrm: Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); @@ -177,15 +160,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, DecodeUNPCKHMask(MVT::v32i8, ShuffleMask); break; case X86::PUNPCKHWDrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::PUNPCKHWDrm: - Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodeUNPCKHMask(MVT::v8i16, ShuffleMask); - break; case X86::VPUNPCKHWDrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. + case X86::PUNPCKHWDrm: case X86::VPUNPCKHWDrm: Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); @@ -200,15 +178,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, DecodeUNPCKHMask(MVT::v16i16, ShuffleMask); break; case X86::PUNPCKHDQrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::PUNPCKHDQrm: - Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodeUNPCKHMask(MVT::v4i32, ShuffleMask); - break; case X86::VPUNPCKHDQrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. + case X86::PUNPCKHDQrm: case X86::VPUNPCKHDQrm: Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); @@ -223,15 +196,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, DecodeUNPCKHMask(MVT::v8i32, ShuffleMask); break; case X86::PUNPCKHQDQrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::PUNPCKHQDQrm: - Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodeUNPCKHMask(MVT::v2i64, ShuffleMask); - break; case X86::VPUNPCKHQDQrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. + case X86::PUNPCKHQDQrm: case X86::VPUNPCKHQDQrm: Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); @@ -247,15 +215,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, break; case X86::PUNPCKLBWrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::PUNPCKLBWrm: - Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodeUNPCKLMask(MVT::v16i8, ShuffleMask); - break; case X86::VPUNPCKLBWrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. + case X86::PUNPCKLBWrm: case X86::VPUNPCKLBWrm: Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); @@ -270,15 +233,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, DecodeUNPCKLMask(MVT::v32i8, ShuffleMask); break; case X86::PUNPCKLWDrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::PUNPCKLWDrm: - Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodeUNPCKLMask(MVT::v8i16, ShuffleMask); - break; case X86::VPUNPCKLWDrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. + case X86::PUNPCKLWDrm: case X86::VPUNPCKLWDrm: Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); @@ -293,15 +251,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, DecodeUNPCKLMask(MVT::v16i16, ShuffleMask); break; case X86::PUNPCKLDQrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::PUNPCKLDQrm: - Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodeUNPCKLMask(MVT::v4i32, ShuffleMask); - break; case X86::VPUNPCKLDQrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. + case X86::PUNPCKLDQrm: case X86::VPUNPCKLDQrm: Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); @@ -316,15 +269,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, DecodeUNPCKLMask(MVT::v8i32, ShuffleMask); break; case X86::PUNPCKLQDQrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::PUNPCKLQDQrm: - Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodeUNPCKLMask(MVT::v2i64, ShuffleMask); - break; case X86::VPUNPCKLQDQrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. + case X86::PUNPCKLQDQrm: case X86::VPUNPCKLQDQrm: Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); @@ -340,16 +288,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, break; case X86::SHUFPDrri: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::SHUFPDrmi: - DecodeSHUFPMask(MVT::v2f64, MI->getOperand(MI->getNumOperands()-1).getImm(), - ShuffleMask); - Src1Name = getRegName(MI->getOperand(0).getReg()); - break; case X86::VSHUFPDrri: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. + case X86::SHUFPDrmi: case X86::VSHUFPDrmi: DecodeSHUFPMask(MVT::v2f64, MI->getOperand(MI->getNumOperands()-1).getImm(), ShuffleMask); @@ -367,16 +309,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, break; case X86::SHUFPSrri: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::SHUFPSrmi: - DecodeSHUFPMask(MVT::v4f32, MI->getOperand(MI->getNumOperands()-1).getImm(), - ShuffleMask); - Src1Name = getRegName(MI->getOperand(0).getReg()); - break; case X86::VSHUFPSrri: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. + case X86::SHUFPSrmi: case X86::VSHUFPSrmi: DecodeSHUFPMask(MVT::v4f32, MI->getOperand(MI->getNumOperands()-1).getImm(), ShuffleMask); @@ -394,15 +330,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, break; case X86::UNPCKLPDrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::UNPCKLPDrm: - DecodeUNPCKLMask(MVT::v2f64, ShuffleMask); - Src1Name = getRegName(MI->getOperand(0).getReg()); - break; case X86::VUNPCKLPDrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. + case X86::UNPCKLPDrm: case X86::VUNPCKLPDrm: DecodeUNPCKLMask(MVT::v2f64, ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); @@ -417,15 +348,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, DestName = getRegName(MI->getOperand(0).getReg()); break; case X86::UNPCKLPSrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::UNPCKLPSrm: - DecodeUNPCKLMask(MVT::v4f32, ShuffleMask); - Src1Name = getRegName(MI->getOperand(0).getReg()); - break; case X86::VUNPCKLPSrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. + case X86::UNPCKLPSrm: case X86::VUNPCKLPSrm: DecodeUNPCKLMask(MVT::v4f32, ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); @@ -440,15 +366,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, DestName = getRegName(MI->getOperand(0).getReg()); break; case X86::UNPCKHPDrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::UNPCKHPDrm: - DecodeUNPCKHMask(MVT::v2f64, ShuffleMask); - Src1Name = getRegName(MI->getOperand(0).getReg()); - break; case X86::VUNPCKHPDrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. + case X86::UNPCKHPDrm: case X86::VUNPCKHPDrm: DecodeUNPCKHMask(MVT::v2f64, ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); @@ -463,15 +384,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, DestName = getRegName(MI->getOperand(0).getReg()); break; case X86::UNPCKHPSrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::UNPCKHPSrm: - DecodeUNPCKHMask(MVT::v4f32, ShuffleMask); - Src1Name = getRegName(MI->getOperand(0).getReg()); - break; case X86::VUNPCKHPSrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. + case X86::UNPCKHPSrm: case X86::VUNPCKHPSrm: DecodeUNPCKHMask(MVT::v4f32, ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); -- cgit v1.1 From 0adfdedacbb87df8cc8b8311365a15fae004977e Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Tue, 29 Jan 2013 09:06:13 +0000 Subject: Fix 64-bit atomic operations in Thumb mode. The ARM and Thumb variants of LDREXD and STREXD have different constraints and take different operands. Previously the code expanding atomic operations didn't take this into account and asserted in Thumb mode. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173780 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.cpp | 120 ++++++++++++++----------------------- 1 file changed, 46 insertions(+), 74 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 3d283a5..6beb1ab 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -5969,9 +5969,6 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, MRI.constrainRegClass(ptr, &ARM::rGPRRegClass); } - unsigned ldrOpc = isThumb2 ? ARM::t2LDREXD : ARM::LDREXD; - unsigned strOpc = isThumb2 ? ARM::t2STREXD : ARM::STREXD; - MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *contBB = 0, *cont2BB = 0; if (IsCmpxchg || IsMinMax) @@ -6009,42 +6006,26 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, // cmp storesuccess, #0 // bne- loopMBB // fallthrough --> exitMBB - // - // Note that the registers are explicitly specified because there is not any - // way to force the register allocator to allocate a register pair. - // - // FIXME: The hardcoded registers are not necessary for Thumb2, but we - // need to properly enforce the restriction that the two output registers - // for ldrexd must be different. BB = loopMBB; + // Load - unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - unsigned GPRPair1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - unsigned GPRPair2; - if (IsMinMax) { - //We need an extra double register for doing min/max. - unsigned undef = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - unsigned r1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - GPRPair2 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - BuildMI(BB, dl, TII->get(TargetOpcode::IMPLICIT_DEF), undef); - BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), r1) - .addReg(undef) - .addReg(vallo) - .addImm(ARM::gsub_0); - BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), GPRPair2) - .addReg(r1) - .addReg(valhi) - .addImm(ARM::gsub_1); + if (isThumb2) { + AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2LDREXD)) + .addReg(destlo, RegState::Define) + .addReg(desthi, RegState::Define) + .addReg(ptr)); + } else { + unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); + AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::LDREXD)) + .addReg(GPRPair0, RegState::Define).addReg(ptr)); + // Copy r2/r3 into dest. (This copy will normally be coalesced.) + BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo) + .addReg(GPRPair0, 0, ARM::gsub_0); + BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi) + .addReg(GPRPair0, 0, ARM::gsub_1); } - AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc)) - .addReg(GPRPair0, RegState::Define).addReg(ptr)); - // Copy r2/r3 into dest. (This copy will normally be coalesced.) - BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo) - .addReg(GPRPair0, 0, ARM::gsub_0); - BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi) - .addReg(GPRPair0, 0, ARM::gsub_1); - + unsigned StoreLo, StoreHi; if (IsCmpxchg) { // Add early exit for (unsigned i = 0; i < 2; i++) { @@ -6060,19 +6041,8 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, } // Copy to physregs for strexd - unsigned setlo = MI->getOperand(5).getReg(); - unsigned sethi = MI->getOperand(6).getReg(); - unsigned undef = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - unsigned r1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - BuildMI(BB, dl, TII->get(TargetOpcode::IMPLICIT_DEF), undef); - BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), r1) - .addReg(undef) - .addReg(setlo) - .addImm(ARM::gsub_0); - BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), GPRPair1) - .addReg(r1) - .addReg(sethi) - .addImm(ARM::gsub_1); + StoreLo = MI->getOperand(5).getReg(); + StoreHi = MI->getOperand(6).getReg(); } else if (Op1) { // Perform binary operation unsigned tmpRegLo = MRI.createVirtualRegister(TRC); @@ -6084,32 +6054,13 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, .addReg(desthi).addReg(valhi)) .addReg(IsMinMax ? ARM::CPSR : 0, getDefRegState(IsMinMax)); - unsigned UndefPair = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - BuildMI(BB, dl, TII->get(TargetOpcode::IMPLICIT_DEF), UndefPair); - unsigned r1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), r1) - .addReg(UndefPair) - .addReg(tmpRegLo) - .addImm(ARM::gsub_0); - BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), GPRPair1) - .addReg(r1) - .addReg(tmpRegHi) - .addImm(ARM::gsub_1); + StoreLo = tmpRegLo; + StoreHi = tmpRegHi; } else { // Copy to physregs for strexd - unsigned UndefPair = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - unsigned r1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - BuildMI(BB, dl, TII->get(TargetOpcode::IMPLICIT_DEF), UndefPair); - BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), r1) - .addReg(UndefPair) - .addReg(vallo) - .addImm(ARM::gsub_0); - BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), GPRPair1) - .addReg(r1) - .addReg(valhi) - .addImm(ARM::gsub_1); + StoreLo = vallo; + StoreHi = valhi; } - unsigned GPRPairStore = GPRPair1; if (IsMinMax) { // Compare and branch to exit block. BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) @@ -6117,12 +6068,33 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, BB->addSuccessor(exitMBB); BB->addSuccessor(contBB); BB = contBB; - GPRPairStore = GPRPair2; + StoreLo = vallo; + StoreHi = valhi; } // Store - AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), storesuccess) - .addReg(GPRPairStore).addReg(ptr)); + if (isThumb2) { + AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2STREXD), storesuccess) + .addReg(StoreLo).addReg(StoreHi).addReg(ptr)); + } else { + // Marshal a pair... + unsigned StorePair = MRI.createVirtualRegister(&ARM::GPRPairRegClass); + unsigned UndefPair = MRI.createVirtualRegister(&ARM::GPRPairRegClass); + unsigned r1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); + BuildMI(BB, dl, TII->get(TargetOpcode::IMPLICIT_DEF), UndefPair); + BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), r1) + .addReg(UndefPair) + .addReg(StoreLo) + .addImm(ARM::gsub_0); + BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), StorePair) + .addReg(r1) + .addReg(StoreHi) + .addImm(ARM::gsub_1); + + // ...and store it + AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::STREXD), storesuccess) + .addReg(StorePair).addReg(ptr)); + } // Cmp+jump AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) .addReg(storesuccess).addImm(0)); -- cgit v1.1 From 09840daeefc1aa8760c535ded6a37eb4f8cd4eaa Mon Sep 17 00:00:00 2001 From: Timur Iskhodzhanov Date: Tue, 29 Jan 2013 09:09:27 +0000 Subject: Hopefully fix the Windows build failure introduced in r173769 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173781 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/ObjCARC/ObjCARCAPElim.cpp | 1 + lib/Transforms/ObjCARC/ObjCARCOpts.cpp | 1 + 2 files changed, 2 insertions(+) (limited to 'lib') diff --git a/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp b/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp index f693c3f..00d9864 100644 --- a/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp @@ -29,6 +29,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/IR/Constants.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; using namespace llvm::objcarc; diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp index 47c158c..e968c8b 100644 --- a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp @@ -40,6 +40,7 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; using namespace llvm::objcarc; -- cgit v1.1 From 7c1ac767691b2cb5d3367e667e51714f34eb675b Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Tue, 29 Jan 2013 14:05:57 +0000 Subject: Fix typo in X86BaseInfo.h that I introduced in r157818. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173798 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/MCTargetDesc/X86BaseInfo.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h index 7ea1961..9e68388 100644 --- a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h +++ b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h @@ -104,7 +104,7 @@ namespace X86II { /// MO_TLSLD - On a symbol operand this indicates that the immediate is /// the offset of the GOT entry with the TLS index for the module that - /// contains the symbol. When this index is passed to a call to to + /// contains the symbol. When this index is passed to a call to /// __tls_get_addr, the function will return the base address of the TLS /// block for the symbol. Used in the x86-64 local dynamic TLS access model. /// @@ -114,7 +114,7 @@ namespace X86II { /// MO_TLSLDM - On a symbol operand this indicates that the immediate is /// the offset of the GOT entry with the TLS index for the module that - /// contains the symbol. When this index is passed to a call to to + /// contains the symbol. When this index is passed to a call to /// ___tls_get_addr, the function will return the base address of the TLS /// block for the symbol. Used in the IA32 local dynamic TLS access model. /// -- cgit v1.1 From 3ba51cefb75364a17e3a23c54c216035c33e67a6 Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Tue, 29 Jan 2013 15:18:16 +0000 Subject: AttributeSet::get(): Fix a valgrind error. It doesn't affect actual behavior, though. Don't touch I->first on the end iterator, I == E! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173804 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Attributes.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index ab29766..2cf7621 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -455,7 +455,7 @@ AttributeSet AttributeSet::get(LLVMContext &C, E = Attrs.end(); I != E; ) { unsigned Index = I->first; SmallVector AttrVec; - while (I->first == Index && I != E) { + while (I != E && I->first == Index) { AttrVec.push_back(I->second); ++I; } -- cgit v1.1 From 86651e4db5efe8530d9d647a83c286d81d628086 Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Tue, 29 Jan 2013 16:31:56 +0000 Subject: R600/AMDILPeepholeOptimizer.cpp: Tweak std::make_pair to satisfy C++11. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173807 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDILPeepholeOptimizer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/R600/AMDILPeepholeOptimizer.cpp b/lib/Target/R600/AMDILPeepholeOptimizer.cpp index a5f7ee5..3a28038 100644 --- a/lib/Target/R600/AMDILPeepholeOptimizer.cpp +++ b/lib/Target/R600/AMDILPeepholeOptimizer.cpp @@ -366,7 +366,7 @@ AMDGPUPeepholeOpt::optimizeCallInst(BasicBlock::iterator *bbb) { std::string buffer(F->getName().str() + "_noret"); F = dyn_cast( F->getParent()->getOrInsertFunction(buffer, F->getFunctionType())); - atomicFuncs.push_back(std::make_pair (CI, F)); + atomicFuncs.push_back(std::make_pair(CI, F)); } if (!mSTM->device()->isSupported(AMDGPUDeviceInfo::ArenaSegment) -- cgit v1.1 From 8c3a411cd6a193672438a80fa37fbff690833a0c Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Tue, 29 Jan 2013 17:31:33 +0000 Subject: LoopVectorize: Clean up ValueMap a bit and avoid double lookups. No intended functionality change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173809 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/LoopVectorize.cpp | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 339f40b..16eb21e 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -223,31 +223,34 @@ private: ValueMap(unsigned UnrollFactor) : UF(UnrollFactor) {} /// \return True if 'Key' is saved in the Value Map. - bool has(Value *Key) { return MapStoreage.count(Key); } + bool has(Value *Key) const { return MapStorage.count(Key); } /// Initializes a new entry in the map. Sets all of the vector parts to the /// save value in 'Val'. /// \return A reference to a vector with splat values. VectorParts &splat(Value *Key, Value *Val) { - MapStoreage[Key].clear(); - MapStoreage[Key].append(UF, Val); - return MapStoreage[Key]; + VectorParts &Entry = MapStorage[Key]; + Entry.assign(UF, Val); + return Entry; } ///\return A reference to the value that is stored at 'Key'. VectorParts &get(Value *Key) { - if (!has(Key)) - MapStoreage[Key].resize(UF); - return MapStoreage[Key]; + VectorParts &Entry = MapStorage[Key]; + if (Entry.empty()) + Entry.resize(UF); + assert(Entry.size() == UF); + return Entry; } + private: /// The unroll factor. Each entry in the map stores this number of vector /// elements. unsigned UF; /// Map storage. We use std::map and not DenseMap because insertions to a /// dense map invalidates its iterators. - std::map MapStoreage; + std::map MapStorage; }; /// The original loop. @@ -824,8 +827,7 @@ InnerLoopVectorizer::getVectorValue(Value *V) { // If this scalar is unknown, assume that it is a constant or that it is // loop invariant. Broadcast V and save the value for future uses. Value *B = getBroadcastInstrs(V); - WidenMap.splat(V, B); - return WidenMap.get(V); + return WidenMap.splat(V, B); } Value *InnerLoopVectorizer::reverseVector(Value *Vec) { -- cgit v1.1 From f1af1feeee0f0ec797410762c006211f9c1e2a0f Mon Sep 17 00:00:00 2001 From: Edwin Vane Date: Tue, 29 Jan 2013 17:42:24 +0000 Subject: Fixing warnings revealed by gcc release build Fixed set-but-not-used warnings. Reviewer: gribozavr git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173810 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/StackColoring.cpp | 5 ++--- lib/Transforms/Instrumentation/MemorySanitizer.cpp | 1 + lib/Transforms/Scalar/SROA.cpp | 1 + 3 files changed, 4 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp index c173293..bd0d809 100644 --- a/lib/CodeGen/StackColoring.cpp +++ b/lib/CodeGen/StackColoring.cpp @@ -202,9 +202,8 @@ void StackColoring::getAnalysisUsage(AnalysisUsage &AU) const { void StackColoring::dump() { for (df_iterator FI = df_begin(MF), FE = df_end(MF); FI != FE; ++FI) { - unsigned Num = BasicBlocks[*FI]; - DEBUG(dbgs()<<"Inspecting block #"<getName()<<"]\n"); - Num = 0; + DEBUG(dbgs()<<"Inspecting block #"<getName()<<"]\n"); DEBUG(dbgs()<<"BEGIN : {"); for (unsigned i=0; i < BlockLiveness[*FI].Begin.size(); ++i) DEBUG(dbgs()< { if (MS.TrackOrigins) IRB.CreateStore(getOrigin(A), getOriginPtrForArgument(A, IRB, ArgOffset)); + (void)Store; assert(Size != 0 && Store != 0); DEBUG(dbgs() << " Param:" << *Store << "\n"); ArgOffset += DataLayout::RoundUpAlignment(Size, 8); diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index 4204171..173f25f 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -2971,6 +2971,7 @@ private: else New = IRB.CreateLifetimeEnd(Ptr, Size); + (void)New; DEBUG(dbgs() << " to: " << *New << "\n"); return true; } -- cgit v1.1 From 9b5a14d59d827166d1fea5368060c7462d8f1db1 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Tue, 29 Jan 2013 18:08:22 +0000 Subject: Remove dead code. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173812 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index ca606da..0a53a3e 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -12067,17 +12067,7 @@ SDValue X86TargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { /*doesNotRet=*/false, /*isReturnValueUsed*/true, Callee, Args, DAG, dl); std::pair CallResult = LowerCallTo(CLI); -#if 1 return CallResult.first; -#else - SDValue RetSin = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, ArgVT, - CallResult.first, DAG.getIntPtrConstant(0)); - SDValue RetCos = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, ArgVT, - CallResult.first, DAG.getIntPtrConstant(1)); - - SDVTList Tys = DAG.getVTList(ArgVT, ArgVT); - return DAG.getNode(ISD::MERGE_VALUES, dl, Tys, RetSin, RetCos); -#endif } /// LowerOperation - Provide custom lowering hooks for some operations. -- cgit v1.1 From 30c3bbe007854506202e112861009e8ca77ad6d4 Mon Sep 17 00:00:00 2001 From: Jyotsna Verma Date: Tue, 29 Jan 2013 18:18:50 +0000 Subject: Add constant extender support for MInst type instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173813 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Hexagon/HexagonInstrInfo.td | 60 ++++++----- lib/Target/Hexagon/HexagonInstrInfoV4.td | 166 +++++++++++++++++++++---------- 2 files changed, 151 insertions(+), 75 deletions(-) (limited to 'lib') diff --git a/lib/Target/Hexagon/HexagonInstrInfo.td b/lib/Target/Hexagon/HexagonInstrInfo.td index 8b183b9..7fd28c8 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.td +++ b/lib/Target/Hexagon/HexagonInstrInfo.td @@ -1195,57 +1195,65 @@ let Defs = [R29, R30, R31], Uses = [R29], neverHasSideEffects = 1 in { //===----------------------------------------------------------------------===// // Multiply and use lower result. // Rd=+mpyi(Rs,#u8) -def MPYI_riu : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u8Imm:$src2), +let isExtendable = 1, opExtendable = 2, isExtentSigned = 0, opExtentBits = 8 in +def MPYI_riu : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u8Ext:$src2), "$dst =+ mpyi($src1, #$src2)", [(set (i32 IntRegs:$dst), (mul (i32 IntRegs:$src1), - u8ImmPred:$src2))]>; + u8ExtPred:$src2))]>; // Rd=-mpyi(Rs,#u8) -def MPYI_rin : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, n8Imm:$src2), +def MPYI_rin : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u8Imm:$src2), "$dst =- mpyi($src1, #$src2)", - [(set (i32 IntRegs:$dst), (mul (i32 IntRegs:$src1), - n8ImmPred:$src2))]>; + [(set (i32 IntRegs:$dst), (ineg (mul (i32 IntRegs:$src1), + u8ImmPred:$src2)))]>; // Rd=mpyi(Rs,#m9) // s9 is NOT the same as m9 - but it works.. so far. // Assembler maps to either Rd=+mpyi(Rs,#u8 or Rd=-mpyi(Rs,#u8) // depending on the value of m9. See Arch Spec. -def MPYI_ri : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s9Imm:$src2), +let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 9, +CextOpcode = "MPYI", InputType = "imm" in +def MPYI_ri : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s9Ext:$src2), "$dst = mpyi($src1, #$src2)", [(set (i32 IntRegs:$dst), (mul (i32 IntRegs:$src1), - s9ImmPred:$src2))]>; + s9ExtPred:$src2))]>, ImmRegRel; // Rd=mpyi(Rs,Rt) +let CextOpcode = "MPYI", InputType = "reg" in def MPYI : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), "$dst = mpyi($src1, $src2)", [(set (i32 IntRegs:$dst), (mul (i32 IntRegs:$src1), - (i32 IntRegs:$src2)))]>; + (i32 IntRegs:$src2)))]>, ImmRegRel; // Rx+=mpyi(Rs,#u8) +let isExtendable = 1, opExtendable = 3, isExtentSigned = 0, opExtentBits = 8, +CextOpcode = "MPYI_acc", InputType = "imm" in def MPYI_acc_ri : MInst_acc<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2, u8Imm:$src3), + (ins IntRegs:$src1, IntRegs:$src2, u8Ext:$src3), "$dst += mpyi($src2, #$src3)", [(set (i32 IntRegs:$dst), - (add (mul (i32 IntRegs:$src2), u8ImmPred:$src3), + (add (mul (i32 IntRegs:$src2), u8ExtPred:$src3), (i32 IntRegs:$src1)))], - "$src1 = $dst">; + "$src1 = $dst">, ImmRegRel; // Rx+=mpyi(Rs,Rt) +let CextOpcode = "MPYI_acc", InputType = "reg" in def MPYI_acc_rr : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), "$dst += mpyi($src2, $src3)", [(set (i32 IntRegs:$dst), (add (mul (i32 IntRegs:$src2), (i32 IntRegs:$src3)), (i32 IntRegs:$src1)))], - "$src1 = $dst">; + "$src1 = $dst">, ImmRegRel; // Rx-=mpyi(Rs,#u8) +let isExtendable = 1, opExtendable = 3, isExtentSigned = 0, opExtentBits = 8 in def MPYI_sub_ri : MInst_acc<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2, u8Imm:$src3), + (ins IntRegs:$src1, IntRegs:$src2, u8Ext:$src3), "$dst -= mpyi($src2, #$src3)", [(set (i32 IntRegs:$dst), (sub (i32 IntRegs:$src1), (mul (i32 IntRegs:$src2), - u8ImmPred:$src3)))], + u8ExtPred:$src3)))], "$src1 = $dst">; // Multiply and use upper result. @@ -1314,7 +1322,7 @@ def MPYU64_acc : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, // Rxx-=mpyu(Rs,Rt) def MPYU64_sub : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "$dst += mpyu($src2, $src3)", + "$dst -= mpyu($src2, $src3)", [(set (i64 DoubleRegs:$dst), (sub (i64 DoubleRegs:$src1), (mul (i64 (anyext (i32 IntRegs:$src2))), @@ -1322,37 +1330,43 @@ def MPYU64_sub : MInst_acc<(outs DoubleRegs:$dst), "$src1 = $dst">; +let InputType = "reg", CextOpcode = "ADD_acc" in def ADDrr_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), "$dst += add($src2, $src3)", [(set (i32 IntRegs:$dst), (add (add (i32 IntRegs:$src2), (i32 IntRegs:$src3)), (i32 IntRegs:$src1)))], - "$src1 = $dst">; + "$src1 = $dst">, ImmRegRel; +let isExtendable = 1, opExtendable = 3, isExtentSigned = 1, opExtentBits = 8, +InputType = "imm", CextOpcode = "ADD_acc" in def ADDri_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1, - IntRegs:$src2, s8Imm:$src3), + IntRegs:$src2, s8Ext:$src3), "$dst += add($src2, #$src3)", [(set (i32 IntRegs:$dst), (add (add (i32 IntRegs:$src2), - s8ImmPred:$src3), + s8_16ExtPred:$src3), (i32 IntRegs:$src1)))], - "$src1 = $dst">; + "$src1 = $dst">, ImmRegRel; +let CextOpcode = "SUB_acc", InputType = "reg" in def SUBrr_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), "$dst -= add($src2, $src3)", [(set (i32 IntRegs:$dst), (sub (i32 IntRegs:$src1), (add (i32 IntRegs:$src2), (i32 IntRegs:$src3))))], - "$src1 = $dst">; + "$src1 = $dst">, ImmRegRel; +let isExtendable = 1, opExtendable = 3, isExtentSigned = 1, opExtentBits = 8, +CextOpcode = "SUB_acc", InputType = "imm" in def SUBri_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1, - IntRegs:$src2, s8Imm:$src3), + IntRegs:$src2, s8Ext:$src3), "$dst -= add($src2, #$src3)", [(set (i32 IntRegs:$dst), (sub (i32 IntRegs:$src1), (add (i32 IntRegs:$src2), - s8ImmPred:$src3)))], - "$src1 = $dst">; + s8_16ExtPred:$src3)))], + "$src1 = $dst">, ImmRegRel; //===----------------------------------------------------------------------===// // MTYPE/MPYH - diff --git a/lib/Target/Hexagon/HexagonInstrInfoV4.td b/lib/Target/Hexagon/HexagonInstrInfoV4.td index 372de9a..1afddc7 100644 --- a/lib/Target/Hexagon/HexagonInstrInfoV4.td +++ b/lib/Target/Hexagon/HexagonInstrInfoV4.td @@ -3061,31 +3061,37 @@ let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC] in { // Add and accumulate. // Rd=add(Rs,add(Ru,#s6)) +let isExtendable = 1, opExtendable = 3, isExtentSigned = 1, opExtentBits = 6, +validSubTargets = HasV4SubT in def ADDr_ADDri_V4 : MInst<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2, s6Imm:$src3), + (ins IntRegs:$src1, IntRegs:$src2, s6Ext:$src3), "$dst = add($src1, add($src2, #$src3))", [(set (i32 IntRegs:$dst), (add (i32 IntRegs:$src1), (add (i32 IntRegs:$src2), - s6ImmPred:$src3)))]>, + s6_16ExtPred:$src3)))]>, Requires<[HasV4T]>; // Rd=add(Rs,sub(#s6,Ru)) +let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 6, +validSubTargets = HasV4SubT in def ADDr_SUBri_V4 : MInst<(outs IntRegs:$dst), - (ins IntRegs:$src1, s6Imm:$src2, IntRegs:$src3), + (ins IntRegs:$src1, s6Ext:$src2, IntRegs:$src3), "$dst = add($src1, sub(#$src2, $src3))", [(set (i32 IntRegs:$dst), - (add (i32 IntRegs:$src1), (sub s6ImmPred:$src2, + (add (i32 IntRegs:$src1), (sub s6_10ExtPred:$src2, (i32 IntRegs:$src3))))]>, Requires<[HasV4T]>; // Generates the same instruction as ADDr_SUBri_V4 but matches different // pattern. // Rd=add(Rs,sub(#s6,Ru)) +let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 6, +validSubTargets = HasV4SubT in def ADDri_SUBr_V4 : MInst<(outs IntRegs:$dst), - (ins IntRegs:$src1, s6Imm:$src2, IntRegs:$src3), + (ins IntRegs:$src1, s6Ext:$src2, IntRegs:$src3), "$dst = add($src1, sub(#$src2, $src3))", [(set (i32 IntRegs:$dst), - (sub (add (i32 IntRegs:$src1), s6ImmPred:$src2), + (sub (add (i32 IntRegs:$src1), s6_10ExtPred:$src2), (i32 IntRegs:$src3)))]>, Requires<[HasV4T]>; @@ -3099,6 +3105,7 @@ def ADDri_SUBr_V4 : MInst<(outs IntRegs:$dst), // Logical doublewords. // Rdd=and(Rtt,~Rss) +let validSubTargets = HasV4SubT in def ANDd_NOTd_V4 : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), "$dst = and($src1, ~$src2)", @@ -3107,6 +3114,7 @@ def ANDd_NOTd_V4 : MInst<(outs DoubleRegs:$dst), Requires<[HasV4T]>; // Rdd=or(Rtt,~Rss) +let validSubTargets = HasV4SubT in def ORd_NOTd_V4 : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), "$dst = or($src1, ~$src2)", @@ -3117,6 +3125,7 @@ def ORd_NOTd_V4 : MInst<(outs DoubleRegs:$dst), // Logical-logical doublewords. // Rxx^=xor(Rss,Rtt) +let validSubTargets = HasV4SubT in def XORd_XORdd: MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3), "$dst ^= xor($src2, $src3)", @@ -3129,17 +3138,20 @@ def XORd_XORdd: MInst_acc<(outs DoubleRegs:$dst), // Logical-logical words. // Rx=or(Ru,and(Rx,#s10)) +let isExtendable = 1, opExtendable = 3, isExtentSigned = 1, opExtentBits = 10, +validSubTargets = HasV4SubT in def ORr_ANDri_V4 : MInst_acc<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs: $src2, s10Imm:$src3), + (ins IntRegs:$src1, IntRegs: $src2, s10Ext:$src3), "$dst = or($src1, and($src2, #$src3))", [(set (i32 IntRegs:$dst), (or (i32 IntRegs:$src1), (and (i32 IntRegs:$src2), - s10ImmPred:$src3)))], + s10ExtPred:$src3)))], "$src2 = $dst">, Requires<[HasV4T]>; // Rx[&|^]=and(Rs,Rt) // Rx&=and(Rs,Rt) +let validSubTargets = HasV4SubT in def ANDr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), "$dst &= and($src2, $src3)", @@ -3150,6 +3162,7 @@ def ANDr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst), Requires<[HasV4T]>; // Rx|=and(Rs,Rt) +let validSubTargets = HasV4SubT, CextOpcode = "ORr_ANDr", InputType = "reg" in def ORr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), "$dst |= and($src2, $src3)", @@ -3157,9 +3170,10 @@ def ORr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst), (or (i32 IntRegs:$src1), (and (i32 IntRegs:$src2), (i32 IntRegs:$src3))))], "$src1 = $dst">, - Requires<[HasV4T]>; + Requires<[HasV4T]>, ImmRegRel; // Rx^=and(Rs,Rt) +let validSubTargets = HasV4SubT in def XORr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), "$dst ^= and($src2, $src3)", @@ -3171,6 +3185,7 @@ def XORr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst), // Rx[&|^]=and(Rs,~Rt) // Rx&=and(Rs,~Rt) +let validSubTargets = HasV4SubT in def ANDr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), "$dst &= and($src2, ~$src3)", @@ -3181,6 +3196,7 @@ def ANDr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst), Requires<[HasV4T]>; // Rx|=and(Rs,~Rt) +let validSubTargets = HasV4SubT in def ORr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), "$dst |= and($src2, ~$src3)", @@ -3191,6 +3207,7 @@ def ORr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst), Requires<[HasV4T]>; // Rx^=and(Rs,~Rt) +let validSubTargets = HasV4SubT in def XORr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), "$dst ^= and($src2, ~$src3)", @@ -3202,6 +3219,7 @@ def XORr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst), // Rx[&|^]=or(Rs,Rt) // Rx&=or(Rs,Rt) +let validSubTargets = HasV4SubT in def ANDr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), "$dst &= or($src2, $src3)", @@ -3212,6 +3230,7 @@ def ANDr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst), Requires<[HasV4T]>; // Rx|=or(Rs,Rt) +let validSubTargets = HasV4SubT, CextOpcode = "ORr_ORr", InputType = "reg" in def ORr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), "$dst |= or($src2, $src3)", @@ -3219,9 +3238,10 @@ def ORr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst), (or (i32 IntRegs:$src1), (or (i32 IntRegs:$src2), (i32 IntRegs:$src3))))], "$src1 = $dst">, - Requires<[HasV4T]>; + Requires<[HasV4T]>, ImmRegRel; // Rx^=or(Rs,Rt) +let validSubTargets = HasV4SubT in def XORr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), "$dst ^= or($src2, $src3)", @@ -3233,6 +3253,7 @@ def XORr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst), // Rx[&|^]=xor(Rs,Rt) // Rx&=xor(Rs,Rt) +let validSubTargets = HasV4SubT in def ANDr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), "$dst &= xor($src2, $src3)", @@ -3243,6 +3264,7 @@ def ANDr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst), Requires<[HasV4T]>; // Rx|=xor(Rs,Rt) +let validSubTargets = HasV4SubT in def ORr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), "$dst |= xor($src2, $src3)", @@ -3253,6 +3275,7 @@ def ORr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst), Requires<[HasV4T]>; // Rx^=xor(Rs,Rt) +let validSubTargets = HasV4SubT in def XORr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), "$dst ^= xor($src2, $src3)", @@ -3263,24 +3286,28 @@ def XORr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst), Requires<[HasV4T]>; // Rx|=and(Rs,#s10) +let isExtendable = 1, opExtendable = 3, isExtentSigned = 1, opExtentBits = 10, +validSubTargets = HasV4SubT, CextOpcode = "ORr_ANDr", InputType = "imm" in def ORr_ANDri2_V4 : MInst_acc<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs: $src2, s10Imm:$src3), + (ins IntRegs:$src1, IntRegs: $src2, s10Ext:$src3), "$dst |= and($src2, #$src3)", [(set (i32 IntRegs:$dst), (or (i32 IntRegs:$src1), (and (i32 IntRegs:$src2), - s10ImmPred:$src3)))], + s10ExtPred:$src3)))], "$src1 = $dst">, - Requires<[HasV4T]>; + Requires<[HasV4T]>, ImmRegRel; // Rx|=or(Rs,#s10) +let isExtendable = 1, opExtendable = 3, isExtentSigned = 1, opExtentBits = 10, +validSubTargets = HasV4SubT, CextOpcode = "ORr_ORr", InputType = "imm" in def ORr_ORri_V4 : MInst_acc<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs: $src2, s10Imm:$src3), + (ins IntRegs:$src1, IntRegs: $src2, s10Ext:$src3), "$dst |= or($src2, #$src3)", [(set (i32 IntRegs:$dst), (or (i32 IntRegs:$src1), (and (i32 IntRegs:$src2), - s10ImmPred:$src3)))], + s10ExtPred:$src3)))], "$src1 = $dst">, - Requires<[HasV4T]>; + Requires<[HasV4T]>, ImmRegRel; // Modulo wrap @@ -3327,25 +3354,41 @@ def ORr_ORri_V4 : MInst_acc<(outs IntRegs:$dst), // Multiply and user lower result. // Rd=add(#u6,mpyi(Rs,#U6)) +let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 6, +validSubTargets = HasV4SubT in def ADDi_MPYri_V4 : MInst<(outs IntRegs:$dst), - (ins u6Imm:$src1, IntRegs:$src2, u6Imm:$src3), + (ins u6Ext:$src1, IntRegs:$src2, u6Imm:$src3), "$dst = add(#$src1, mpyi($src2, #$src3))", [(set (i32 IntRegs:$dst), (add (mul (i32 IntRegs:$src2), u6ImmPred:$src3), - u6ImmPred:$src1))]>, + u6ExtPred:$src1))]>, Requires<[HasV4T]>; -// Rd=add(#u6,mpyi(Rs,Rt)) +// Rd=add(##,mpyi(Rs,#U6)) +def : Pat <(add (mul (i32 IntRegs:$src2), u6ImmPred:$src3), + (HexagonCONST32 tglobaladdr:$src1)), + (i32 (ADDi_MPYri_V4 tglobaladdr:$src1, IntRegs:$src2, + u6ImmPred:$src3))>; +// Rd=add(#u6,mpyi(Rs,Rt)) +let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 6, +validSubTargets = HasV4SubT, InputType = "imm", CextOpcode = "ADD_MPY" in def ADDi_MPYrr_V4 : MInst<(outs IntRegs:$dst), - (ins u6Imm:$src1, IntRegs:$src2, IntRegs:$src3), + (ins u6Ext:$src1, IntRegs:$src2, IntRegs:$src3), "$dst = add(#$src1, mpyi($src2, $src3))", [(set (i32 IntRegs:$dst), (add (mul (i32 IntRegs:$src2), (i32 IntRegs:$src3)), - u6ImmPred:$src1))]>, - Requires<[HasV4T]>; + u6ExtPred:$src1))]>, + Requires<[HasV4T]>, ImmRegRel; + +// Rd=add(##,mpyi(Rs,Rt)) +def : Pat <(add (mul (i32 IntRegs:$src2), (i32 IntRegs:$src3)), + (HexagonCONST32 tglobaladdr:$src1)), + (i32 (ADDi_MPYrr_V4 tglobaladdr:$src1, IntRegs:$src2, + IntRegs:$src3))>; // Rd=add(Ru,mpyi(#u6:2,Rs)) +let validSubTargets = HasV4SubT in def ADDr_MPYir_V4 : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u6Imm:$src2, IntRegs:$src3), "$dst = add($src1, mpyi(#$src2, $src3))", @@ -3355,15 +3398,18 @@ def ADDr_MPYir_V4 : MInst<(outs IntRegs:$dst), Requires<[HasV4T]>; // Rd=add(Ru,mpyi(Rs,#u6)) +let isExtendable = 1, opExtendable = 3, isExtentSigned = 0, opExtentBits = 6, +validSubTargets = HasV4SubT, InputType = "imm", CextOpcode = "ADD_MPY" in def ADDr_MPYri_V4 : MInst<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2, u6Imm:$src3), + (ins IntRegs:$src1, IntRegs:$src2, u6Ext:$src3), "$dst = add($src1, mpyi($src2, #$src3))", [(set (i32 IntRegs:$dst), (add (i32 IntRegs:$src1), (mul (i32 IntRegs:$src2), - u6ImmPred:$src3)))]>, - Requires<[HasV4T]>; + u6ExtPred:$src3)))]>, + Requires<[HasV4T]>, ImmRegRel; // Rx=add(Ru,mpyi(Rx,Rs)) +let validSubTargets = HasV4SubT, InputType = "reg", CextOpcode = "ADD_MPY" in def ADDr_MPYrr_V4 : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), "$dst = add($src1, mpyi($src2, $src3))", @@ -3371,7 +3417,7 @@ def ADDr_MPYrr_V4 : MInst_acc<(outs IntRegs:$dst), (add (i32 IntRegs:$src1), (mul (i32 IntRegs:$src2), (i32 IntRegs:$src3))))], "$src2 = $dst">, - Requires<[HasV4T]>; + Requires<[HasV4T]>, ImmRegRel; // Polynomial multiply words @@ -3414,92 +3460,107 @@ def ADDr_MPYrr_V4 : MInst_acc<(outs IntRegs:$dst), // Shift by immediate and accumulate. // Rx=add(#u8,asl(Rx,#U5)) +let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8, +validSubTargets = HasV4SubT in def ADDi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst), - (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3), + (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3), "$dst = add(#$src1, asl($src2, #$src3))", [(set (i32 IntRegs:$dst), (add (shl (i32 IntRegs:$src2), u5ImmPred:$src3), - u8ImmPred:$src1))], + u8ExtPred:$src1))], "$src2 = $dst">, Requires<[HasV4T]>; // Rx=add(#u8,lsr(Rx,#U5)) +let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8, +validSubTargets = HasV4SubT in def ADDi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst), - (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3), + (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3), "$dst = add(#$src1, lsr($src2, #$src3))", [(set (i32 IntRegs:$dst), (add (srl (i32 IntRegs:$src2), u5ImmPred:$src3), - u8ImmPred:$src1))], + u8ExtPred:$src1))], "$src2 = $dst">, Requires<[HasV4T]>; // Rx=sub(#u8,asl(Rx,#U5)) +let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8, +validSubTargets = HasV4SubT in def SUBi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst), - (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3), + (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3), "$dst = sub(#$src1, asl($src2, #$src3))", [(set (i32 IntRegs:$dst), (sub (shl (i32 IntRegs:$src2), u5ImmPred:$src3), - u8ImmPred:$src1))], + u8ExtPred:$src1))], "$src2 = $dst">, Requires<[HasV4T]>; // Rx=sub(#u8,lsr(Rx,#U5)) +let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8, +validSubTargets = HasV4SubT in def SUBi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst), - (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3), + (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3), "$dst = sub(#$src1, lsr($src2, #$src3))", [(set (i32 IntRegs:$dst), (sub (srl (i32 IntRegs:$src2), u5ImmPred:$src3), - u8ImmPred:$src1))], + u8ExtPred:$src1))], "$src2 = $dst">, Requires<[HasV4T]>; //Shift by immediate and logical. //Rx=and(#u8,asl(Rx,#U5)) +let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8, +validSubTargets = HasV4SubT in def ANDi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst), - (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3), + (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3), "$dst = and(#$src1, asl($src2, #$src3))", [(set (i32 IntRegs:$dst), (and (shl (i32 IntRegs:$src2), u5ImmPred:$src3), - u8ImmPred:$src1))], + u8ExtPred:$src1))], "$src2 = $dst">, Requires<[HasV4T]>; //Rx=and(#u8,lsr(Rx,#U5)) +let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8, +validSubTargets = HasV4SubT in def ANDi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst), - (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3), + (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3), "$dst = and(#$src1, lsr($src2, #$src3))", [(set (i32 IntRegs:$dst), (and (srl (i32 IntRegs:$src2), u5ImmPred:$src3), - u8ImmPred:$src1))], + u8ExtPred:$src1))], "$src2 = $dst">, Requires<[HasV4T]>; //Rx=or(#u8,asl(Rx,#U5)) -let AddedComplexity = 30 in +let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8, +AddedComplexity = 30, validSubTargets = HasV4SubT in def ORi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst), - (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3), + (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3), "$dst = or(#$src1, asl($src2, #$src3))", [(set (i32 IntRegs:$dst), (or (shl (i32 IntRegs:$src2), u5ImmPred:$src3), - u8ImmPred:$src1))], + u8ExtPred:$src1))], "$src2 = $dst">, Requires<[HasV4T]>; //Rx=or(#u8,lsr(Rx,#U5)) -let AddedComplexity = 30 in +let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8, +AddedComplexity = 30, validSubTargets = HasV4SubT in def ORi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst), - (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3), + (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3), "$dst = or(#$src1, lsr($src2, #$src3))", [(set (i32 IntRegs:$dst), (or (srl (i32 IntRegs:$src2), u5ImmPred:$src3), - u8ImmPred:$src1))], + u8ExtPred:$src1))], "$src2 = $dst">, Requires<[HasV4T]>; //Shift by register. //Rd=lsl(#s6,Rt) +let validSubTargets = HasV4SubT in { def LSLi_V4 : MInst<(outs IntRegs:$dst), (ins s6Imm:$src1, IntRegs:$src2), "$dst = lsl(#$src1, $src2)", [(set (i32 IntRegs:$dst), (shl s6ImmPred:$src1, @@ -3547,7 +3608,7 @@ def LSRd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst), (i32 IntRegs:$src3))))], "$src1 = $dst">, Requires<[HasV4T]>; - +} //===----------------------------------------------------------------------===// // XTYPE/SHIFT - @@ -3990,7 +4051,7 @@ def CMPbEQri_V4 : MInst<(outs PredRegs:$dst), Requires<[HasV4T]>; // Pd=cmpb.eq(Rs,Rt) -let isCompare = 1 in +let isCompare = 1, validSubTargets = HasV4SubT in def CMPbEQrr_ubub_V4 : MInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), "$dst = cmpb.eq($src1, $src2)", @@ -4000,7 +4061,7 @@ def CMPbEQrr_ubub_V4 : MInst<(outs PredRegs:$dst), Requires<[HasV4T]>; // Pd=cmpb.eq(Rs,Rt) -let isCompare = 1 in +let isCompare = 1, validSubTargets = HasV4SubT in def CMPbEQrr_sbsb_V4 : MInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), "$dst = cmpb.eq($src1, $src2)", @@ -4010,7 +4071,7 @@ def CMPbEQrr_sbsb_V4 : MInst<(outs PredRegs:$dst), Requires<[HasV4T]>; // Pd=cmpb.gt(Rs,Rt) -let isCompare = 1 in +let isCompare = 1, validSubTargets = HasV4SubT in def CMPbGTrr_V4 : MInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), "$dst = cmpb.gt($src1, $src2)", @@ -4020,13 +4081,14 @@ def CMPbGTrr_V4 : MInst<(outs PredRegs:$dst), Requires<[HasV4T]>; // Pd=cmpb.gtu(Rs,#u7) -let isCompare = 1 in +let isExtendable = 1, opExtendable = 2, isExtentSigned = 0, opExtentBits = 7, +isCompare = 1, validSubTargets = HasV4SubT, CextOpcode = "CMPbGTU", InputType = "imm" in def CMPbGTUri_V4 : MInst<(outs PredRegs:$dst), - (ins IntRegs:$src1, u7Imm:$src2), + (ins IntRegs:$src1, u7Ext:$src2), "$dst = cmpb.gtu($src1, #$src2)", [(set (i1 PredRegs:$dst), (setugt (and (i32 IntRegs:$src1), 255), - u7ImmPred:$src2))]>, - Requires<[HasV4T]>; + u7ExtPred:$src2))]>, + Requires<[HasV4T]>, ImmRegRel; // Pd=cmpb.gtu(Rs,Rt) let isCompare = 1 in -- cgit v1.1 From 9c3846c99c8dd2aeddf5084ce4b8725c230216a0 Mon Sep 17 00:00:00 2001 From: Jyotsna Verma Date: Tue, 29 Jan 2013 18:42:41 +0000 Subject: Use multiclass for post-increment store instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173816 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Hexagon/HexagonISelDAGToDAG.cpp | 4 +- lib/Target/Hexagon/HexagonInstrFormatsV4.td | 9 +- lib/Target/Hexagon/HexagonInstrInfo.td | 167 ++++++---------- lib/Target/Hexagon/HexagonInstrInfoV4.td | 291 ++++++---------------------- 4 files changed, 125 insertions(+), 346 deletions(-) (limited to 'lib') diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp index db292f2..381032b 100644 --- a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp +++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp @@ -608,8 +608,8 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, DebugLoc dl) { // Offset value must be within representable range // and must have correct alignment properties. if (TII->isValidAutoIncImm(StoredVT, Val)) { - SDValue Ops[] = { Value, Base, - CurDAG->getTargetConstant(Val, MVT::i32), Chain}; + SDValue Ops[] = {Base, CurDAG->getTargetConstant(Val, MVT::i32), Value, + Chain}; unsigned Opcode = 0; // Figure out the post inc version of opcode. diff --git a/lib/Target/Hexagon/HexagonInstrFormatsV4.td b/lib/Target/Hexagon/HexagonInstrFormatsV4.td index 05f1e23..29973e7 100644 --- a/lib/Target/Hexagon/HexagonInstrFormatsV4.td +++ b/lib/Target/Hexagon/HexagonInstrFormatsV4.td @@ -46,11 +46,10 @@ class NVInstPost_V4 pattern, } // Post increment ST Instruction. -class NVInstPI_V4 pattern, - string cstr> - : NVInstPost_V4 { - let rt{0-4} = 0; -} +let mayStore = 1 in +class NVInstPI_V4 pattern = [], + string cstr = ""> + : NVInstPost_V4; class MEMInst_V4 pattern> : InstHexagon { diff --git a/lib/Target/Hexagon/HexagonInstrInfo.td b/lib/Target/Hexagon/HexagonInstrInfo.td index 7fd28c8..11c0167 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.td +++ b/lib/Target/Hexagon/HexagonInstrInfo.td @@ -1419,35 +1419,71 @@ def STd_GP : STInst2<(outs), []>, Requires<[NoV4T]>; -let hasCtrlDep = 1, isPredicable = 1 in -def POST_STdri : STInstPI<(outs IntRegs:$dst), - (ins DoubleRegs:$src1, IntRegs:$src2, s4Imm:$offset), - "memd($src2++#$offset) = $src1", - [(set IntRegs:$dst, - (post_store (i64 DoubleRegs:$src1), (i32 IntRegs:$src2), - s4_3ImmPred:$offset))], - "$src2 = $dst">; +//===----------------------------------------------------------------------===// +// Post increment store +//===----------------------------------------------------------------------===// -// if ([!]Pv) memd(Rx++#s4:3)=Rtt -// if (Pv) memd(Rx++#s4:3)=Rtt -let AddedComplexity = 10, neverHasSideEffects = 1, - isPredicated = 1 in -def POST_STdri_cPt : STInst2PI<(outs IntRegs:$dst), - (ins PredRegs:$src1, DoubleRegs:$src2, IntRegs:$src3, - s4_3Imm:$offset), - "if ($src1) memd($src3++#$offset) = $src2", - [], - "$src3 = $dst">; - -// if (!Pv) memd(Rx++#s4:3)=Rtt -let AddedComplexity = 10, neverHasSideEffects = 1, isPredicated = 1, - isPredicated = 1 in -def POST_STdri_cNotPt : STInst2PI<(outs IntRegs:$dst), - (ins PredRegs:$src1, DoubleRegs:$src2, IntRegs:$src3, - s4_3Imm:$offset), - "if (!$src1) memd($src3++#$offset) = $src2", +multiclass ST_PostInc_Pbase { + let PNewValue = !if(isPredNew, "new", "") in + def NAME : STInst2PI<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$offset, RC:$src3), + !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", + ") ")#mnemonic#"($src2++#$offset) = $src3", [], - "$src3 = $dst">; + "$src2 = $dst">; +} + +multiclass ST_PostInc_Pred { + let PredSense = !if(PredNot, "false", "true") in { + defm _c#NAME# : ST_PostInc_Pbase; + // Predicate new + let Predicates = [HasV4T], validSubTargets = HasV4SubT in + defm _cdn#NAME#_V4 : ST_PostInc_Pbase; + } +} + +let hasCtrlDep = 1, isNVStorable = 1, neverHasSideEffects = 1 in +multiclass ST_PostInc { + + let hasCtrlDep = 1, BaseOpcode = "POST_"#BaseOp in { + let isPredicable = 1 in + def NAME : STInst2PI<(outs IntRegs:$dst), + (ins IntRegs:$src1, ImmOp:$offset, RC:$src2), + #mnemonic#"($src1++#$offset) = $src2", + [], + "$src1 = $dst">; + + let isPredicated = 1 in { + defm Pt : ST_PostInc_Pred; + defm NotPt : ST_PostInc_Pred; + } + } +} + +defm POST_STbri: ST_PostInc <"memb", "STrib", IntRegs, s4_0Imm>, AddrModeRel; +defm POST_SThri: ST_PostInc <"memh", "STrih", IntRegs, s4_1Imm>, AddrModeRel; +defm POST_STwri: ST_PostInc <"memw", "STriw", IntRegs, s4_2Imm>, AddrModeRel; + +let isNVStorable = 0 in +defm POST_STdri: ST_PostInc <"memd", "STrid", DoubleRegs, s4_3Imm>, AddrModeRel; + +def : Pat<(post_truncsti8 (i32 IntRegs:$src1), IntRegs:$src2, + s4_3ImmPred:$offset), + (POST_STbri IntRegs:$src2, s4_0ImmPred:$offset, IntRegs:$src1)>; + +def : Pat<(post_truncsti16 (i32 IntRegs:$src1), IntRegs:$src2, + s4_3ImmPred:$offset), + (POST_SThri IntRegs:$src2, s4_1ImmPred:$offset, IntRegs:$src1)>; + +def : Pat<(post_store (i32 IntRegs:$src1), IntRegs:$src2, s4_2ImmPred:$offset), + (POST_STwri IntRegs:$src2, s4_1ImmPred:$offset, IntRegs:$src1)>; + +def : Pat<(post_store (i64 DoubleRegs:$src1), IntRegs:$src2, + s4_3ImmPred:$offset), + (POST_STdri IntRegs:$src2, s4_3ImmPred:$offset, DoubleRegs:$src1)>; //===----------------------------------------------------------------------===// // multiclass for the store instructions with MEMri operand. @@ -1609,32 +1645,6 @@ def STb_GP : STInst2<(outs), []>, Requires<[NoV4T]>; -// memb(Rx++#s4:0)=Rt -let hasCtrlDep = 1, isPredicable = 1 in -def POST_STbri : STInstPI<(outs IntRegs:$dst), (ins IntRegs:$src1, - IntRegs:$src2, - s4Imm:$offset), - "memb($src2++#$offset) = $src1", - [(set IntRegs:$dst, - (post_truncsti8 (i32 IntRegs:$src1), (i32 IntRegs:$src2), - s4_0ImmPred:$offset))], - "$src2 = $dst">; - -// if ([!]Pv) memb(Rx++#s4:0)=Rt -// if (Pv) memb(Rx++#s4:0)=Rt -let hasCtrlDep = 1, isPredicated = 1 in -def POST_STbri_cPt : STInst2PI<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset), - "if ($src1) memb($src3++#$offset) = $src2", - [],"$src3 = $dst">; - -// if (!Pv) memb(Rx++#s4:0)=Rt -let hasCtrlDep = 1, isPredicated = 1 in -def POST_STbri_cNotPt : STInst2PI<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset), - "if (!$src1) memb($src3++#$offset) = $src2", - [],"$src3 = $dst">; - let neverHasSideEffects = 1 in def STrih_GP : STInst2<(outs), (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src), @@ -1650,31 +1660,6 @@ def STh_GP : STInst2<(outs), Requires<[NoV4T]>; // memh(Rx++#s4:1)=Rt.H -// memh(Rx++#s4:1)=Rt -let hasCtrlDep = 1, isPredicable = 1 in -def POST_SThri : STInstPI<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2, s4Imm:$offset), - "memh($src2++#$offset) = $src1", - [(set IntRegs:$dst, - (post_truncsti16 (i32 IntRegs:$src1), (i32 IntRegs:$src2), - s4_1ImmPred:$offset))], - "$src2 = $dst">; - -// if ([!]Pv) memh(Rx++#s4:1)=Rt -// if (Pv) memh(Rx++#s4:1)=Rt -let hasCtrlDep = 1, isPredicated = 1 in -def POST_SThri_cPt : STInst2PI<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset), - "if ($src1) memh($src3++#$offset) = $src2", - [],"$src3 = $dst">; - -// if (!Pv) memh(Rx++#s4:1)=Rt -let hasCtrlDep = 1, isPredicated = 1 in -def POST_SThri_cNotPt : STInst2PI<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset), - "if (!$src1) memh($src3++#$offset) = $src2", - [],"$src3 = $dst">; - // Store word. // Store predicate. @@ -1698,32 +1683,6 @@ def STw_GP : STInst2<(outs), []>, Requires<[NoV4T]>; -let hasCtrlDep = 1, isPredicable = 1 in -def POST_STwri : STInstPI<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2, s4Imm:$offset), - "memw($src2++#$offset) = $src1", - [(set IntRegs:$dst, - (post_store (i32 IntRegs:$src1), (i32 IntRegs:$src2), - s4_2ImmPred:$offset))], - "$src2 = $dst">; - -// if ([!]Pv) memw(Rx++#s4:2)=Rt -// if (Pv) memw(Rx++#s4:2)=Rt -let hasCtrlDep = 1, isPredicated = 1 in -def POST_STwri_cPt : STInst2PI<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset), - "if ($src1) memw($src3++#$offset) = $src2", - [],"$src3 = $dst">; - -// if (!Pv) memw(Rx++#s4:2)=Rt -let hasCtrlDep = 1, isPredicated = 1 in -def POST_STwri_cNotPt : STInst2PI<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset), - "if (!$src1) memw($src3++#$offset) = $src2", - [],"$src3 = $dst">; - - - // Allocate stack frame. let Defs = [R29, R30], Uses = [R31, R30], neverHasSideEffects = 1 in { def ALLOCFRAME : STInst2<(outs), diff --git a/lib/Target/Hexagon/HexagonInstrInfoV4.td b/lib/Target/Hexagon/HexagonInstrInfoV4.td index 1afddc7..2e389c6 100644 --- a/lib/Target/Hexagon/HexagonInstrInfoV4.td +++ b/lib/Target/Hexagon/HexagonInstrInfoV4.td @@ -1652,34 +1652,12 @@ def STrid_shl_V4 : STInst<(outs), // if ([!]Pv[.new]) memd(#u6)=Rtt // TODO: needs to be implemented. -// if ([!]Pv[.new]) memd(Rx++#s4:3)=Rtt -// if (Pv) memd(Rx++#s4:3)=Rtt -// if (Pv.new) memd(Rx++#s4:3)=Rtt -let AddedComplexity = 10, neverHasSideEffects = 1, - isPredicated = 1 in -def POST_STdri_cdnPt_V4 : STInst2PI<(outs IntRegs:$dst), - (ins PredRegs:$src1, DoubleRegs:$src2, IntRegs:$src3, - s4_3Imm:$offset), - "if ($src1.new) memd($src3++#$offset) = $src2", - [], - "$src3 = $dst">, - Requires<[HasV4T]>; - -// if (!Pv) memd(Rx++#s4:3)=Rtt -// if (!Pv.new) memd(Rx++#s4:3)=Rtt -let AddedComplexity = 10, neverHasSideEffects = 1, - isPredicated = 1 in -def POST_STdri_cdnNotPt_V4 : STInst2PI<(outs IntRegs:$dst), - (ins PredRegs:$src1, DoubleRegs:$src2, IntRegs:$src3, - s4_3Imm:$offset), - "if (!$src1.new) memd($src3++#$offset) = $src2", - [], - "$src3 = $dst">, - Requires<[HasV4T]>; - - +//===----------------------------------------------------------------------===// // multiclass for store instructions with base + immediate offset // addressing mode and immediate stored value. +// mem[bhw](Rx++#s4:3)=#s8 +// if ([!]Pv[.new]) mem[bhw](Rx++#s4:3)=#s6 +//===----------------------------------------------------------------------===// multiclass ST_Imm_Pbase { let PNewValue = !if(isPredNew, "new", "") in @@ -1718,9 +1696,9 @@ multiclass ST_Imm { let addrMode = BaseImmOffset, InputType = "imm", validSubTargets = HasV4SubT in { - defm STrib_imm : ST_Imm<"memb", "STrib", u6_0Imm>, ImmRegRel; - defm STrih_imm : ST_Imm<"memh", "STrih", u6_1Imm>, ImmRegRel; - defm STriw_imm : ST_Imm<"memw", "STriw", u6_2Imm>, ImmRegRel; + defm STrib_imm : ST_Imm<"memb", "STrib", u6_0Imm>, ImmRegRel, PredNewRel; + defm STrih_imm : ST_Imm<"memh", "STrih", u6_1Imm>, ImmRegRel, PredNewRel; + defm STriw_imm : ST_Imm<"memw", "STriw", u6_2Imm>, ImmRegRel, PredNewRel; } let Predicates = [HasV4T], AddedComplexity = 10 in { @@ -1757,30 +1735,6 @@ def STrib_shl_V4 : STInst<(outs), // memb(gp+#u16:0)=Rt -// Store byte conditionally. -// if ([!]Pv[.new]) memb(#u6)=Rt -// if ([!]Pv[.new]) memb(Rx++#s4:0)=Rt -// if (Pv) memb(Rx++#s4:0)=Rt -// if (Pv.new) memb(Rx++#s4:0)=Rt -let hasCtrlDep = 1, - isPredicated = 1 in -def POST_STbri_cdnPt_V4 : STInst2PI<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset), - "if ($src1.new) memb($src3++#$offset) = $src2", - [],"$src3 = $dst">, - Requires<[HasV4T]>; - -// if (!Pv) memb(Rx++#s4:0)=Rt -// if (!Pv.new) memb(Rx++#s4:0)=Rt -let hasCtrlDep = 1, - isPredicated = 1 in -def POST_STbri_cdnNotPt_V4 : STInst2PI<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset), - "if (!$src1.new) memb($src3++#$offset) = $src2", - [],"$src3 = $dst">, - Requires<[HasV4T]>; - - // Store halfword. // TODO: needs to be implemented // memh(Re=#U6)=Rt.H @@ -1823,28 +1777,6 @@ def STrih_shl_V4 : STInst<(outs), // if ([!]Pv[.new]) memh(Rx++#s4:1)=Rt.H // TODO: Needs to be implemented. -// if ([!]Pv[.new]) memh(Rx++#s4:1)=Rt -// if (Pv) memh(Rx++#s4:1)=Rt -// if (Pv.new) memh(Rx++#s4:1)=Rt -let hasCtrlDep = 1, - isPredicated = 1 in -def POST_SThri_cdnPt_V4 : STInst2PI<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset), - "if ($src1.new) memh($src3++#$offset) = $src2", - [],"$src3 = $dst">, - Requires<[HasV4T]>; - -// if (!Pv) memh(Rx++#s4:1)=Rt -// if (!Pv.new) memh(Rx++#s4:1)=Rt -let hasCtrlDep = 1, - isPredicated = 1 in -def POST_SThri_cdnNotPt_V4 : STInst2PI<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset), - "if (!$src1.new) memh($src3++#$offset) = $src2", - [],"$src3 = $dst">, - Requires<[HasV4T]>; - - // Store word. // memw(Re=#U6)=Rt // TODO: Needs to be implemented. @@ -1879,29 +1811,6 @@ def STriw_shl_V4 : STInst<(outs), // memw(Rx++Mu:brev)=Rt // memw(gp+#u16:2)=Rt - -// if ([!]Pv[.new]) memw(Rx++#s4:2)=Rt -// if (Pv) memw(Rx++#s4:2)=Rt -// if (Pv.new) memw(Rx++#s4:2)=Rt -let hasCtrlDep = 1, - isPredicated = 1 in -def POST_STwri_cdnPt_V4 : STInst2PI<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset), - "if ($src1.new) memw($src3++#$offset) = $src2", - [],"$src3 = $dst">, - Requires<[HasV4T]>; - -// if (!Pv) memw(Rx++#s4:2)=Rt -// if (!Pv.new) memw(Rx++#s4:2)=Rt -let hasCtrlDep = 1, - isPredicated = 1 in -def POST_STwri_cdnNotPt_V4 : STInst2PI<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset), - "if (!$src1.new) memw($src3++#$offset) = $src2", - [],"$src3 = $dst">, - Requires<[HasV4T]>; - - /// store to global address let isPredicable = 1, neverHasSideEffects = 1 in @@ -2463,14 +2372,58 @@ def STrib_shl_nv_V4 : NVInst_V4<(outs), []>, Requires<[HasV4T]>; -// memb(Rx++#s4:0)=Nt.new -let mayStore = 1, hasCtrlDep = 1, isPredicable = 1 in -def POST_STbri_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2, s4_0Imm:$offset), - "memb($src2++#$offset) = $src1.new", +//===----------------------------------------------------------------------===// +// Post increment store +// mem[bhwd](Rx++#s4:[0123])=Nt.new +//===----------------------------------------------------------------------===// + +multiclass ST_PostInc_Pbase_nv { + let PNewValue = !if(isPredNew, "new", "") in + def NAME#_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$offset, RC:$src3), + !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", + ") ")#mnemonic#"($src2++#$offset) = $src3.new", [], "$src2 = $dst">, Requires<[HasV4T]>; +} + +multiclass ST_PostInc_Pred_nv { + let PredSense = !if(PredNot, "false", "true") in { + defm _c#NAME : ST_PostInc_Pbase_nv; + // Predicate new + let Predicates = [HasV4T], validSubTargets = HasV4SubT in + defm _cdn#NAME : ST_PostInc_Pbase_nv; + } +} + +let hasCtrlDep = 1, isNVStore = 1, neverHasSideEffects = 1 in +multiclass ST_PostInc_nv { + + let BaseOpcode = "POST_"#BaseOp in { + let isPredicable = 1 in + def NAME#_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), + (ins IntRegs:$src1, ImmOp:$offset, RC:$src2), + mnemonic#"($src1++#$offset) = $src2.new", + [], + "$src1 = $dst">, + Requires<[HasV4T]>; + + let isPredicated = 1 in { + defm Pt : ST_PostInc_Pred_nv; + defm NotPt : ST_PostInc_Pred_nv; + } + } +} + +let validSubTargets = HasV4SubT in { +defm POST_STbri: ST_PostInc_nv <"memb", "STrib", IntRegs, s4_0Imm>, AddrModeRel; +defm POST_SThri: ST_PostInc_nv <"memh", "STrih", IntRegs, s4_1Imm>, AddrModeRel; +defm POST_STwri: ST_PostInc_nv <"memw", "STriw", IntRegs, s4_2Imm>, AddrModeRel; +} // memb(Rx++#s4:0:circ(Mu))=Nt.new // memb(Rx++I:circ(Mu))=Nt.new @@ -2493,43 +2446,6 @@ def STb_GP_nv_V4 : NVInst_V4<(outs), []>, Requires<[HasV4T]>; -// if ([!]Pv[.new]) memb(Rx++#s4:0)=Nt.new -// if (Pv) memb(Rx++#s4:0)=Nt.new -let mayStore = 1, hasCtrlDep = 1, - isPredicated = 1 in -def POST_STbri_cPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset), - "if ($src1) memb($src3++#$offset) = $src2.new", - [],"$src3 = $dst">, - Requires<[HasV4T]>; - -// if (Pv.new) memb(Rx++#s4:0)=Nt.new -let mayStore = 1, hasCtrlDep = 1, - isPredicated = 1 in -def POST_STbri_cdnPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset), - "if ($src1.new) memb($src3++#$offset) = $src2.new", - [],"$src3 = $dst">, - Requires<[HasV4T]>; - -// if (!Pv) memb(Rx++#s4:0)=Nt.new -let mayStore = 1, hasCtrlDep = 1, - isPredicated = 1 in -def POST_STbri_cNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset), - "if (!$src1) memb($src3++#$offset) = $src2.new", - [],"$src3 = $dst">, - Requires<[HasV4T]>; - -// if (!Pv.new) memb(Rx++#s4:0)=Nt.new -let mayStore = 1, hasCtrlDep = 1, - isPredicated = 1 in -def POST_STbri_cdnNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset), - "if (!$src1.new) memb($src3++#$offset) = $src2.new", - [],"$src3 = $dst">, - Requires<[HasV4T]>; - // memh(Ru<<#u2+#U6)=Nt.new let mayStore = 1, AddedComplexity = 10 in def STrih_shl_nv_V4 : NVInst_V4<(outs), @@ -2538,15 +2454,6 @@ def STrih_shl_nv_V4 : NVInst_V4<(outs), []>, Requires<[HasV4T]>; -// memh(Rx++#s4:1)=Nt.new -let mayStore = 1, hasCtrlDep = 1, isPredicable = 1 in -def POST_SThri_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2, s4_1Imm:$offset), - "memh($src2++#$offset) = $src1.new", - [], - "$src2 = $dst">, - Requires<[HasV4T]>; - // memh(Rx++#s4:1:circ(Mu))=Nt.new // memh(Rx++I:circ(Mu))=Nt.new // memh(Rx++Mu)=Nt.new @@ -2568,44 +2475,6 @@ def STh_GP_nv_V4 : NVInst_V4<(outs), []>, Requires<[HasV4T]>; - -// if ([!]Pv[]) memh(Rx++#s4:1)=Nt.new -// if (Pv) memh(Rx++#s4:1)=Nt.new -let mayStore = 1, hasCtrlDep = 1, - isPredicated = 1 in -def POST_SThri_cPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset), - "if ($src1) memh($src3++#$offset) = $src2.new", - [],"$src3 = $dst">, - Requires<[HasV4T]>; - -// if (Pv.new) memh(Rx++#s4:1)=Nt.new -let mayStore = 1, hasCtrlDep = 1, - isPredicated = 1 in -def POST_SThri_cdnPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset), - "if ($src1.new) memh($src3++#$offset) = $src2.new", - [],"$src3 = $dst">, - Requires<[HasV4T]>; - -// if (!Pv) memh(Rx++#s4:1)=Nt.new -let mayStore = 1, hasCtrlDep = 1, - isPredicated = 1 in -def POST_SThri_cNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset), - "if (!$src1) memh($src3++#$offset) = $src2.new", - [],"$src3 = $dst">, - Requires<[HasV4T]>; - -// if (!Pv.new) memh(Rx++#s4:1)=Nt.new -let mayStore = 1, hasCtrlDep = 1, - isPredicated = 1 in -def POST_SThri_cdnNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset), - "if (!$src1.new) memh($src3++#$offset) = $src2.new", - [],"$src3 = $dst">, - Requires<[HasV4T]>; - // memw(Ru<<#u2+#U6)=Nt.new let mayStore = 1, AddedComplexity = 10 in def STriw_shl_nv_V4 : NVInst_V4<(outs), @@ -2614,15 +2483,6 @@ def STriw_shl_nv_V4 : NVInst_V4<(outs), []>, Requires<[HasV4T]>; -// memw(Rx++#s4:2)=Nt.new -let mayStore = 1, hasCtrlDep = 1, isPredicable = 1 in -def POST_STwri_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2, s4_2Imm:$offset), - "memw($src2++#$offset) = $src1.new", - [], - "$src2 = $dst">, - Requires<[HasV4T]>; - // memw(Rx++#s4:2:circ(Mu))=Nt.new // memw(Rx++I:circ(Mu))=Nt.new // memw(Rx++Mu)=Nt.new @@ -2642,45 +2502,6 @@ def STw_GP_nv_V4 : NVInst_V4<(outs), []>, Requires<[HasV4T]>; -// if ([!]Pv[.new]) memw(Rx++#s4:2)=Nt.new -// if (Pv) memw(Rx++#s4:2)=Nt.new -let mayStore = 1, hasCtrlDep = 1, - isPredicated = 1 in -def POST_STwri_cPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset), - "if ($src1) memw($src3++#$offset) = $src2.new", - [],"$src3 = $dst">, - Requires<[HasV4T]>; - -// if (Pv.new) memw(Rx++#s4:2)=Nt.new -let mayStore = 1, hasCtrlDep = 1, - isPredicated = 1 in -def POST_STwri_cdnPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset), - "if ($src1.new) memw($src3++#$offset) = $src2.new", - [],"$src3 = $dst">, - Requires<[HasV4T]>; - -// if (!Pv) memw(Rx++#s4:2)=Nt.new -let mayStore = 1, hasCtrlDep = 1, - isPredicated = 1 in -def POST_STwri_cNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset), - "if (!$src1) memw($src3++#$offset) = $src2.new", - [],"$src3 = $dst">, - Requires<[HasV4T]>; - -// if (!Pv.new) memw(Rx++#s4:2)=Nt.new -let mayStore = 1, hasCtrlDep = 1, - isPredicated = 1 in -def POST_STwri_cdnNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset), - "if (!$src1.new) memw($src3++#$offset) = $src2.new", - [],"$src3 = $dst">, - Requires<[HasV4T]>; - - - // if (Pv) memb(##global) = Rt let mayStore = 1, neverHasSideEffects = 1 in def STb_GP_cPt_nv_V4 : NVInst_V4<(outs), -- cgit v1.1 From aaf2e639ecf2e5a1647e6d59ef53dc76da0a6102 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Tue, 29 Jan 2013 19:35:24 +0000 Subject: Support artificial parameters in function types. Provides the functionality for Clang change r172911 - I just had this still lying around. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173820 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp | 2 ++ 1 file changed, 2 insertions(+) (limited to 'lib') diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 2e4ed5d..f0655e6 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -913,6 +913,8 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { } else { DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); addType(Arg, DIType(Ty)); + if (DIType(Ty).isArtificial()) + addFlag(Arg, dwarf::DW_AT_artificial); Buffer.addChild(Arg); } } -- cgit v1.1 From 9f175f88190fae22df9c6e68af618d0493477ff9 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Tue, 29 Jan 2013 20:37:10 +0000 Subject: s/Data/Kind/g. No functionality change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173827 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/AttributeImpl.h | 15 +++++++++------ lib/IR/Attributes.cpp | 48 ++++++++++++++++++++++++------------------------ 2 files changed, 33 insertions(+), 30 deletions(-) (limited to 'lib') diff --git a/lib/IR/AttributeImpl.h b/lib/IR/AttributeImpl.h index 91f5005..7be5a16 100644 --- a/lib/IR/AttributeImpl.h +++ b/lib/IR/AttributeImpl.h @@ -31,15 +31,15 @@ class LLVMContext; /// could be a single enum, a tuple, or a string. class AttributeImpl : public FoldingSetNode { LLVMContext &Context; - Constant *Data; + Constant *Kind; SmallVector Vals; // AttributesImpl is uniqued, these should not be publicly available. void operator=(const AttributeImpl &) LLVM_DELETED_FUNCTION; AttributeImpl(const AttributeImpl &) LLVM_DELETED_FUNCTION; public: - AttributeImpl(LLVMContext &C, Constant *Data) - : Context(C), Data(Data) {} + AttributeImpl(LLVMContext &C, Constant *Kind) + : Context(C), Kind(Kind) {} explicit AttributeImpl(LLVMContext &C, Attribute::AttrKind data); AttributeImpl(LLVMContext &C, Attribute::AttrKind data, ArrayRef values); @@ -48,6 +48,9 @@ public: bool hasAttribute(Attribute::AttrKind A) const; bool hasAttributes() const; + Constant *getAttributeKind() const { return Kind; } + ArrayRef getAttributeValues() const { return Vals; } + LLVMContext &getContext() { return Context; } ArrayRef getValues() const { return Vals; } @@ -63,11 +66,11 @@ public: bool operator<(const AttributeImpl &AI) const; void Profile(FoldingSetNodeID &ID) const { - Profile(ID, Data, Vals); + Profile(ID, Kind, Vals); } - static void Profile(FoldingSetNodeID &ID, Constant *Data, + static void Profile(FoldingSetNodeID &ID, Constant *Kind, ArrayRef Vals) { - ID.AddPointer(Data); + ID.AddPointer(Kind); for (unsigned I = 0, E = Vals.size(); I != E; ++I) ID.AddPointer(Vals[I]); } diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 2cf7621..a3f62ae 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -195,20 +195,20 @@ uint64_t Attribute::Raw() const { // AttributeImpl Definition //===----------------------------------------------------------------------===// -AttributeImpl::AttributeImpl(LLVMContext &C, Attribute::AttrKind data) +AttributeImpl::AttributeImpl(LLVMContext &C, Attribute::AttrKind kind) : Context(C) { - Data = ConstantInt::get(Type::getInt64Ty(C), data); + Kind = ConstantInt::get(Type::getInt64Ty(C), kind); } -AttributeImpl::AttributeImpl(LLVMContext &C, Attribute::AttrKind data, +AttributeImpl::AttributeImpl(LLVMContext &C, Attribute::AttrKind kind, ArrayRef values) : Context(C) { - Data = ConstantInt::get(Type::getInt64Ty(C), data); + Kind = ConstantInt::get(Type::getInt64Ty(C), kind); Vals.reserve(values.size()); Vals.append(values.begin(), values.end()); } -AttributeImpl::AttributeImpl(LLVMContext &C, StringRef data) +AttributeImpl::AttributeImpl(LLVMContext &C, StringRef kind) : Context(C) { - Data = ConstantDataArray::getString(C, data); + Kind = ConstantDataArray::getString(C, kind); } bool AttributeImpl::hasAttribute(Attribute::AttrKind A) const { @@ -229,36 +229,36 @@ uint64_t AttributeImpl::getStackAlignment() const { return 1ULL << ((Mask >> 26) - 1); } -bool AttributeImpl::operator==(Attribute::AttrKind Kind) const { - if (ConstantInt *CI = dyn_cast(Data)) - return CI->getZExtValue() == Kind; +bool AttributeImpl::operator==(Attribute::AttrKind kind) const { + if (ConstantInt *CI = dyn_cast(Kind)) + return CI->getZExtValue() == kind; return false; } -bool AttributeImpl::operator!=(Attribute::AttrKind Kind) const { - return !(*this == Kind); +bool AttributeImpl::operator!=(Attribute::AttrKind kind) const { + return !(*this == kind); } -bool AttributeImpl::operator==(StringRef Kind) const { - if (ConstantDataArray *CDA = dyn_cast(Data)) +bool AttributeImpl::operator==(StringRef kind) const { + if (ConstantDataArray *CDA = dyn_cast(Kind)) if (CDA->isString()) - return CDA->getAsString() == Kind; + return CDA->getAsString() == kind; return false; } -bool AttributeImpl::operator!=(StringRef Kind) const { - return !(*this == Kind); +bool AttributeImpl::operator!=(StringRef kind) const { + return !(*this == kind); } bool AttributeImpl::operator<(const AttributeImpl &AI) const { - if (!Data && !AI.Data) return false; - if (!Data && AI.Data) return true; - if (Data && !AI.Data) return false; + if (!Kind && !AI.Kind) return false; + if (!Kind && AI.Kind) return true; + if (Kind && !AI.Kind) return false; - ConstantInt *ThisCI = dyn_cast(Data); - ConstantInt *ThatCI = dyn_cast(AI.Data); + ConstantInt *ThisCI = dyn_cast(Kind); + ConstantInt *ThatCI = dyn_cast(AI.Kind); - ConstantDataArray *ThisCDA = dyn_cast(Data); - ConstantDataArray *ThatCDA = dyn_cast(AI.Data); + ConstantDataArray *ThisCDA = dyn_cast(Kind); + ConstantDataArray *ThatCDA = dyn_cast(AI.Kind); if (ThisCI && ThatCI) return ThisCI->getZExtValue() < ThatCI->getZExtValue(); @@ -274,7 +274,7 @@ bool AttributeImpl::operator<(const AttributeImpl &AI) const { uint64_t AttributeImpl::Raw() const { // FIXME: Remove this. - return cast(Data)->getZExtValue(); + return cast(Kind)->getZExtValue(); } uint64_t AttributeImpl::getAttrMask(Attribute::AttrKind Val) { -- cgit v1.1 From 6dc3781d44e56f0addf28b06232a50f3f9e6b1af Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Tue, 29 Jan 2013 20:45:34 +0000 Subject: Add a couple of accessor methods to get the kind and values of an attribute. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173828 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Attributes.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'lib') diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index a3f62ae..1a97110 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -83,6 +83,14 @@ bool Attribute::hasAttributes() const { return pImpl && pImpl->hasAttributes(); } +Constant *Attribute::getAttributeKind() const { + return pImpl ? pImpl->getAttributeKind() : 0; +} + +ArrayRef Attribute::getAttributeValues() const { + return pImpl ? pImpl->getAttributeValues() : ArrayRef(); +} + /// This returns the alignment field of an attribute as a byte alignment value. unsigned Attribute::getAlignment() const { if (!hasAttribute(Attribute::Alignment)) -- cgit v1.1 From 82b83011a1e330e41147dbad97e44939840ba755 Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Tue, 29 Jan 2013 21:00:52 +0000 Subject: Changed DoesObjCBlockEscape => DoesRetainableObjPtrEscape so I can use it to perform escape analysis of other retainable object pointers in other locations. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173829 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/ObjCARC/ObjCARCOpts.cpp | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp index e968c8b..727b4f7 100644 --- a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp @@ -171,33 +171,36 @@ static const Value *FindSingleUseIdentifiedObject(const Value *Arg) { return 0; } -/// \brief Test whether the given pointer, which is an Objective C block -/// pointer, does not "escape". +/// \brief Test whether the given retainable object pointer escapes. /// /// This differs from regular escape analysis in that a use as an /// argument to a call is not considered an escape. /// -static bool DoesObjCBlockEscape(const Value *BlockPtr) { +static bool DoesRetainableObjPtrEscape(const User *Ptr) { - DEBUG(dbgs() << "DoesObjCBlockEscape: Target: " << *BlockPtr << "\n"); + DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Target: " << *Ptr << "\n"); // Walk the def-use chains. SmallVector Worklist; - Worklist.push_back(BlockPtr); + Worklist.push_back(Ptr); + // If Ptr has any operands add them as well. + for (User::const_op_iterator I = Ptr->op_begin(), E = Ptr->op_end(); I != E; ++I) { + Worklist.push_back(*I); + } // Ensure we do not visit any value twice. - SmallPtrSet VisitedSet; + SmallPtrSet VisitedSet; do { const Value *V = Worklist.pop_back_val(); - DEBUG(dbgs() << "DoesObjCBlockEscape: Visiting: " << *V << "\n"); + DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Visiting: " << *V << "\n"); for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end(); UI != UE; ++UI) { const User *UUser = *UI; - DEBUG(dbgs() << "DoesObjCBlockEscape: User: " << *UUser << "\n"); + DEBUG(dbgs() << "DoesRetainableObjPtrEscape: User: " << *UUser << "\n"); // Special - Use by a call (callee or argument) is not considered // to be an escape. @@ -207,7 +210,7 @@ static bool DoesObjCBlockEscape(const Value *BlockPtr) { case IC_StoreStrong: case IC_Autorelease: case IC_AutoreleaseRV: { - DEBUG(dbgs() << "DoesObjCBlockEscape: User copies pointer arguments. " + DEBUG(dbgs() << "DoesRetainableObjPtrEscape: User copies pointer arguments. " "Block Escapes!\n"); // These special functions make copies of their pointer arguments. return true; @@ -220,11 +223,11 @@ static bool DoesObjCBlockEscape(const Value *BlockPtr) { isa(UUser) || isa(UUser)) { if (!VisitedSet.insert(UUser)) { - DEBUG(dbgs() << "DoesObjCBlockEscape: User copies value. Escapes " + DEBUG(dbgs() << "DoesRetainableObjPtrEscape: User copies value. Escapes " "if result escapes. Adding to list.\n"); Worklist.push_back(UUser); } else { - DEBUG(dbgs() << "DoesObjCBlockEscape: Already visited node.\n"); + DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Already visited node.\n"); } continue; } @@ -241,13 +244,13 @@ static bool DoesObjCBlockEscape(const Value *BlockPtr) { continue; } // Otherwise, conservatively assume an escape. - DEBUG(dbgs() << "DoesObjCBlockEscape: Assuming block escapes.\n"); + DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Assuming block escapes.\n"); return true; } } while (!Worklist.empty()); // No escapes found. - DEBUG(dbgs() << "DoesObjCBlockEscape: Block does not escape.\n"); + DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Block does not escape.\n"); return false; } @@ -822,7 +825,7 @@ bool ObjCARCOpt::IsRetainBlockOptimizable(const Instruction *Inst) { // If the pointer "escapes" (not including being used in a call), // the copy may be needed. - if (DoesObjCBlockEscape(Inst)) + if (DoesRetainableObjPtrEscape(Inst)) return false; // Otherwise, it's not needed. -- cgit v1.1 From a519c97b4278970b7104005205c6f42910cb9acb Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Tue, 29 Jan 2013 21:07:51 +0000 Subject: Added some periods to some comments and added an overload for operator<< for type Sequence so I can print out Sequences in debug statements. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173831 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/ObjCARC/ObjCARCOpts.cpp | 34 ++++++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp index 727b4f7..068da13 100644 --- a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp @@ -312,13 +312,35 @@ namespace { /// objc_retain and objc_release are actually needed. enum Sequence { S_None, - S_Retain, ///< objc_retain(x) - S_CanRelease, ///< foo(x) -- x could possibly see a ref count decrement - S_Use, ///< any use of x - S_Stop, ///< like S_Release, but code motion is stopped - S_Release, ///< objc_release(x) - S_MovableRelease ///< objc_release(x), !clang.imprecise_release + S_Retain, ///< objc_retain(x). + S_CanRelease, ///< foo(x) -- x could possibly see a ref count decrement. + S_Use, ///< any use of x. + S_Release, ///< objc_release(x). + S_MovableRelease, ///< objc_release(x), !clang.imprecise_release. + S_Stop ///< like S_Release, but code motion is stopped. }; + + raw_ostream &operator<<(raw_ostream &OS, const Sequence S) + LLVM_ATTRIBUTE_UNUSED; + raw_ostream &operator<<(raw_ostream &OS, const Sequence S) { + switch (S) { + case S_None: + return OS << "S_None"; + case S_Retain: + return OS << "S_Retain"; + case S_CanRelease: + return OS << "S_CanRelease"; + case S_Use: + return OS << "S_Use"; + case S_Release: + return OS << "S_Release"; + case S_MovableRelease: + return OS << "S_MovableRelease"; + case S_Stop: + return OS << "S_Stop"; + } + llvm_unreachable("Unknown sequence type."); + } } static Sequence MergeSeqs(Sequence A, Sequence B, bool TopDown) { -- cgit v1.1 From 02c953ea8b8130bd60fdacb97649d6344f1e8143 Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Tue, 29 Jan 2013 21:07:53 +0000 Subject: Fixed a few debug messages and some 80+ violations. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173832 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/ObjCARC/ObjCARCOpts.cpp | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp index 068da13..c082701 100644 --- a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp @@ -177,14 +177,14 @@ static const Value *FindSingleUseIdentifiedObject(const Value *Arg) { /// argument to a call is not considered an escape. /// static bool DoesRetainableObjPtrEscape(const User *Ptr) { - DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Target: " << *Ptr << "\n"); // Walk the def-use chains. SmallVector Worklist; Worklist.push_back(Ptr); // If Ptr has any operands add them as well. - for (User::const_op_iterator I = Ptr->op_begin(), E = Ptr->op_end(); I != E; ++I) { + for (User::const_op_iterator I = Ptr->op_begin(), E = Ptr->op_end(); I != E; + ++I) { Worklist.push_back(*I); } @@ -210,8 +210,8 @@ static bool DoesRetainableObjPtrEscape(const User *Ptr) { case IC_StoreStrong: case IC_Autorelease: case IC_AutoreleaseRV: { - DEBUG(dbgs() << "DoesRetainableObjPtrEscape: User copies pointer arguments. " - "Block Escapes!\n"); + DEBUG(dbgs() << "DoesRetainableObjPtrEscape: User copies pointer " + "arguments. Pointer Escapes!\n"); // These special functions make copies of their pointer arguments. return true; } @@ -223,11 +223,12 @@ static bool DoesRetainableObjPtrEscape(const User *Ptr) { isa(UUser) || isa(UUser)) { if (!VisitedSet.insert(UUser)) { - DEBUG(dbgs() << "DoesRetainableObjPtrEscape: User copies value. Escapes " - "if result escapes. Adding to list.\n"); + DEBUG(dbgs() << "DoesRetainableObjPtrEscape: User copies value. " + "Ptr escapes if result escapes. Adding to list.\n"); Worklist.push_back(UUser); } else { - DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Already visited node.\n"); + DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Already visited node." + "\n"); } continue; } @@ -244,13 +245,13 @@ static bool DoesRetainableObjPtrEscape(const User *Ptr) { continue; } // Otherwise, conservatively assume an escape. - DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Assuming block escapes.\n"); + DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Assuming ptr escapes.\n"); return true; } } while (!Worklist.empty()); // No escapes found. - DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Block does not escape.\n"); + DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Ptr does not escape.\n"); return false; } -- cgit v1.1 From f662a59b8b031bd43e43e0282b58bef920f0793d Mon Sep 17 00:00:00 2001 From: Weiming Zhao Date: Tue, 29 Jan 2013 21:18:43 +0000 Subject: Add a special handling case for untyped CopyFromReg node in GetCostForDef() of ScheduleDAGRRList git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173833 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 31b9bf3..10d1adf 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -21,6 +21,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/IR/DataLayout.h" @@ -274,8 +275,17 @@ static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, // the expansion of custom DAG-to-DAG patterns. if (VT == MVT::Untyped) { const SDNode *Node = RegDefPos.GetNode(); - unsigned Opcode = Node->getMachineOpcode(); + // Special handling for CopyFromReg of untyped values. + if (!Node->isMachineOpcode() && Node->getOpcode() == ISD::CopyFromReg) { + unsigned Reg = cast(Node->getOperand(1))->getReg(); + const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(Reg); + RegClass = RC->getID(); + Cost = 1; + return; + } + + unsigned Opcode = Node->getMachineOpcode(); if (Opcode == TargetOpcode::REG_SEQUENCE) { unsigned DstRCIdx = cast(Node->getOperand(0))->getZExtValue(); const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx); -- cgit v1.1 From b82d200dcb748619e22491ecfb9662ae86d7dfd5 Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Tue, 29 Jan 2013 21:39:02 +0000 Subject: Moved S_Stop back to its previous position in the sequence order. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173834 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/ObjCARC/ObjCARCOpts.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp index c082701..dbb2cc8 100644 --- a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp @@ -316,9 +316,9 @@ namespace { S_Retain, ///< objc_retain(x). S_CanRelease, ///< foo(x) -- x could possibly see a ref count decrement. S_Use, ///< any use of x. + S_Stop, ///< like S_Release, but code motion is stopped. S_Release, ///< objc_release(x). S_MovableRelease, ///< objc_release(x), !clang.imprecise_release. - S_Stop ///< like S_Release, but code motion is stopped. }; raw_ostream &operator<<(raw_ostream &OS, const Sequence S) -- cgit v1.1 From 485da648484e443f38c8ddbe5cb8ace07e2166e7 Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Tue, 29 Jan 2013 21:41:44 +0000 Subject: Removed trailing comma in last element of enum declaration. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173836 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/ObjCARC/ObjCARCOpts.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp index dbb2cc8..e6cd1a7 100644 --- a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp @@ -318,7 +318,7 @@ namespace { S_Use, ///< any use of x. S_Stop, ///< like S_Release, but code motion is stopped. S_Release, ///< objc_release(x). - S_MovableRelease, ///< objc_release(x), !clang.imprecise_release. + S_MovableRelease ///< objc_release(x), !clang.imprecise_release. }; raw_ostream &operator<<(raw_ostream &OS, const Sequence S) -- cgit v1.1 From d8550495768d87f9b0d3219c40a5b15518ccacc8 Mon Sep 17 00:00:00 2001 From: Pekka Jaaskelainen Date: Tue, 29 Jan 2013 21:42:08 +0000 Subject: LoopVectorize: convert TinyTripCountVectorThreshold constant to a command line switch. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173837 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/LoopVectorize.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 16eb21e..1b242c9 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -101,7 +101,9 @@ EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, cl::desc("Enable if-conversion during vectorization.")); /// We don't vectorize loops with a known constant trip count below this number. -static const unsigned TinyTripCountVectorThreshold = 16; +static cl::opt +TinyTripCountVectorThreshold("vectorizer-min-trip-count", cl::init(16), cl::Hidden, + cl::desc("The minimum trip count in the loops to vectorize.")); /// We don't unroll loops with a known constant trip count below this number. static const unsigned TinyTripCountUnrollThreshold = 128; -- cgit v1.1 From 0933134a304b47d3767aad202df9f0e09743da6d Mon Sep 17 00:00:00 2001 From: "Michael J. Spencer" Date: Tue, 29 Jan 2013 22:10:07 +0000 Subject: [MC][COFF] Delay handling symbol aliases when writing Fixes PR14447 and PR9034. Patch by Nico Rieck! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173839 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/WinCOFFObjectWriter.cpp | 26 ++++++++++++++++---------- lib/MC/WinCOFFStreamer.cpp | 39 --------------------------------------- 2 files changed, 16 insertions(+), 49 deletions(-) (limited to 'lib') diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp index e1d6538..fc7aa94 100644 --- a/lib/MC/WinCOFFObjectWriter.cpp +++ b/lib/MC/WinCOFFObjectWriter.cpp @@ -147,7 +147,9 @@ public: object_t *createCOFFEntity(StringRef Name, list_t &List); void DefineSection(MCSectionData const &SectionData); - void DefineSymbol(MCSymbolData const &SymbolData, MCAssembler &Assembler); + void DefineSymbol(MCSymbol const &Symbol, + MCSymbolData const &SymbolData, + MCAssembler &Assembler); void MakeSymbolReal(COFFSymbol &S, size_t Index); void MakeSectionReal(COFFSection &S, size_t Number); @@ -408,9 +410,10 @@ void WinCOFFObjectWriter::DefineSection(MCSectionData const &SectionData) { /// This function takes a section data object from the assembler /// and creates the associated COFF symbol staging object. -void WinCOFFObjectWriter::DefineSymbol(MCSymbolData const &SymbolData, +void WinCOFFObjectWriter::DefineSymbol(MCSymbol const &Symbol, + MCSymbolData const &SymbolData, MCAssembler &Assembler) { - COFFSymbol *coff_symbol = GetOrCreateCOFFSymbol(&SymbolData.getSymbol()); + COFFSymbol *coff_symbol = GetOrCreateCOFFSymbol(&Symbol); coff_symbol->Data.Type = (SymbolData.getFlags() & 0x0000FFFF) >> 0; coff_symbol->Data.StorageClass = (SymbolData.getFlags() & 0x00FF0000) >> 16; @@ -418,9 +421,9 @@ void WinCOFFObjectWriter::DefineSymbol(MCSymbolData const &SymbolData, if (SymbolData.getFlags() & COFF::SF_WeakExternal) { coff_symbol->Data.StorageClass = COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL; - if (SymbolData.getSymbol().isVariable()) { + if (Symbol.isVariable()) { coff_symbol->Data.StorageClass = COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL; - const MCExpr *Value = SymbolData.getSymbol().getVariableValue(); + const MCExpr *Value = Symbol.getVariableValue(); // FIXME: This assert message isn't very good. assert(Value->getKind() == MCExpr::SymbolRef && @@ -428,10 +431,10 @@ void WinCOFFObjectWriter::DefineSymbol(MCSymbolData const &SymbolData, const MCSymbolRefExpr *SymbolRef = static_cast(Value); - coff_symbol->Other = GetOrCreateCOFFSymbol(&SymbolRef->getSymbol()); + coff_symbol->Other = GetOrCreateCOFFSymbol(&Symbol); } else { std::string WeakName = std::string(".weak.") - + SymbolData.getSymbol().getName().str() + + Symbol.getName().str() + ".default"; COFFSymbol *WeakDefault = createSymbol(WeakName); WeakDefault->Data.SectionNumber = COFF::IMAGE_SYM_ABSOLUTE; @@ -464,7 +467,7 @@ void WinCOFFObjectWriter::DefineSymbol(MCSymbolData const &SymbolData, // Bind internal COFF symbol to MC symbol. coff_symbol->MCData = &SymbolData; - SymbolMap[&SymbolData.getSymbol()] = coff_symbol; + SymbolMap[&Symbol] = coff_symbol; } /// making a section real involves assigned it a number and putting @@ -619,8 +622,11 @@ void WinCOFFObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm, for (MCAssembler::const_symbol_iterator i = Asm.symbol_begin(), e = Asm.symbol_end(); i != e; i++) { - if (ExportSymbol(*i, Asm)) - DefineSymbol(*i, Asm); + if (ExportSymbol(*i, Asm)) { + const MCSymbol &Alias = i->getSymbol(); + const MCSymbol &Symbol = Alias.AliasedSymbol(); + DefineSymbol(Alias, Asm.getSymbolData(Symbol), Asm); + } } } diff --git a/lib/MC/WinCOFFStreamer.cpp b/lib/MC/WinCOFFStreamer.cpp index cc2c272..b529489b 100644 --- a/lib/MC/WinCOFFStreamer.cpp +++ b/lib/MC/WinCOFFStreamer.cpp @@ -55,7 +55,6 @@ public: virtual void EmitDebugLabel(MCSymbol *Symbol); virtual void EmitAssemblerFlag(MCAssemblerFlag Flag); virtual void EmitThumbFunc(MCSymbol *Func); - virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value); virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute); virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue); virtual void BeginCOFFSymbolDef(MCSymbol const *Symbol); @@ -201,44 +200,6 @@ void WinCOFFStreamer::EmitThumbFunc(MCSymbol *Func) { llvm_unreachable("not implemented"); } -void WinCOFFStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) { - assert((Symbol->isInSection() - ? Symbol->getSection().getVariant() == MCSection::SV_COFF - : true) && "Got non COFF section in the COFF backend!"); - // FIXME: This is all very ugly and depressing. What needs to happen here - // depends on quite a few things that are all part of relaxation, which we - // don't really even do. - - if (Value->getKind() != MCExpr::SymbolRef) { - MCObjectStreamer::EmitAssignment(Symbol, Value); - } else { - // FIXME: This is a horrible way to do this :(. This should really be - // handled after we are done with the MC* objects and immediately before - // writing out the object file when we know exactly what the symbol should - // look like in the coff symbol table. I'm not doing that now because the - // COFF object writer doesn't have a clearly defined separation between MC - // data structures, the object writers data structures, and the raw, POD, - // data structures that get written to disk. - - // Copy over the aliased data. - MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); - const MCSymbolData &RealSD = getAssembler().getOrCreateSymbolData( - dyn_cast(Value)->getSymbol()); - - // FIXME: This is particularly nasty because it breaks as soon as any data - // members of MCSymbolData change. - SD.CommonAlign = RealSD.CommonAlign; - SD.CommonSize = RealSD.CommonSize; - SD.Flags = RealSD.Flags; - SD.Fragment = RealSD.Fragment; - SD.Index = RealSD.Index; - SD.IsExternal = RealSD.IsExternal; - SD.IsPrivateExtern = RealSD.IsPrivateExtern; - SD.Offset = RealSD.Offset; - SD.SymbolSize = RealSD.SymbolSize; - } -} - void WinCOFFStreamer::EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) { assert(Symbol && "Symbol must be non-null!"); -- cgit v1.1 From 5500943771f53340e8f219496b512286e4f5b014 Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Tue, 29 Jan 2013 22:29:59 +0000 Subject: Made certain small functions in PtrState inlined. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173842 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/ObjCARC/ObjCARCOpts.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp index e6cd1a7..3bfb003 100644 --- a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp @@ -451,31 +451,31 @@ namespace { PtrState() : KnownPositiveRefCount(false), Partial(false), Seq(S_None) {} - void SetKnownPositiveRefCount() { + inline void SetKnownPositiveRefCount() { KnownPositiveRefCount = true; } - void ClearRefCount() { + inline void ClearRefCount() { KnownPositiveRefCount = false; } - bool IsKnownIncremented() const { + inline bool IsKnownIncremented() const { return KnownPositiveRefCount; } - void SetSeq(Sequence NewSeq) { + inline void SetSeq(Sequence NewSeq) { Seq = NewSeq; } - Sequence GetSeq() const { + inline Sequence GetSeq() const { return Seq; } - void ClearSequenceProgress() { + inline void ClearSequenceProgress() { ResetSequenceProgress(S_None); } - void ResetSequenceProgress(Sequence NewSeq) { + inline void ResetSequenceProgress(Sequence NewSeq) { Seq = NewSeq; Partial = false; RRI.clear(); -- cgit v1.1 From 76122f9c13e7b840687de2b8d5767c73dac02a3b Mon Sep 17 00:00:00 2001 From: Dmitri Gribenko Date: Tue, 29 Jan 2013 23:27:45 +0000 Subject: Remove unused variable (unused since r173839) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173847 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/WinCOFFObjectWriter.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp index fc7aa94..6dffed7 100644 --- a/lib/MC/WinCOFFObjectWriter.cpp +++ b/lib/MC/WinCOFFObjectWriter.cpp @@ -423,14 +423,11 @@ void WinCOFFObjectWriter::DefineSymbol(MCSymbol const &Symbol, if (Symbol.isVariable()) { coff_symbol->Data.StorageClass = COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL; - const MCExpr *Value = Symbol.getVariableValue(); // FIXME: This assert message isn't very good. - assert(Value->getKind() == MCExpr::SymbolRef && + assert(Symbol.getVariableValue()->getKind() == MCExpr::SymbolRef && "Value must be a SymbolRef!"); - const MCSymbolRefExpr *SymbolRef = - static_cast(Value); coff_symbol->Other = GetOrCreateCOFFSymbol(&Symbol); } else { std::string WeakName = std::string(".weak.") -- cgit v1.1 From 0261cea689c71a15175faf37fdc6bd1d9f69c46e Mon Sep 17 00:00:00 2001 From: Renato Golin Date: Tue, 29 Jan 2013 23:31:38 +0000 Subject: Adding simple cast cost to ARM Changing ARMBaseTargetMachine to return ARMTargetLowering intead of the generic one (similar to x86 code). Tests showing which instructions were added to cast when necessary or cost zero when not. Downcast to 16 bits are not lowered in NEON, so costs are not there yet. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173849 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMTargetMachine.h | 4 +++ lib/Target/ARM/ARMTargetTransformInfo.cpp | 44 +++++++++++++++++++++++++++++-- 2 files changed, 46 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h index be6bec7..d4caf5c 100644 --- a/lib/Target/ARM/ARMTargetMachine.h +++ b/lib/Target/ARM/ARMTargetMachine.h @@ -46,6 +46,10 @@ public: virtual ARMJITInfo *getJITInfo() { return &JITInfo; } virtual const ARMSubtarget *getSubtargetImpl() const { return &Subtarget; } + virtual const ARMTargetLowering *getTargetLowering() const { + // Implemented by derived classes + llvm_unreachable("getTargetLowering not implemented"); + } virtual const InstrItineraryData *getInstrItineraryData() const { return &InstrItins; } diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp index 404a6ff..61cb1f6 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -20,6 +20,7 @@ #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Target/TargetLowering.h" +#include "llvm/Target/CostTable.h" using namespace llvm; // Declare the pass initialization routine locally as target-specific passes @@ -34,18 +35,20 @@ namespace { class ARMTTI : public ImmutablePass, public TargetTransformInfo { const ARMBaseTargetMachine *TM; const ARMSubtarget *ST; + const ARMTargetLowering *TLI; /// Estimate the overhead of scalarizing an instruction. Insert and Extract /// are set if the result needs to be inserted and/or extracted from vectors. unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; public: - ARMTTI() : ImmutablePass(ID), TM(0), ST(0) { + ARMTTI() : ImmutablePass(ID), TM(0), ST(0), TLI(0) { llvm_unreachable("This pass cannot be directly constructed"); } ARMTTI(const ARMBaseTargetMachine *TM) - : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()) { + : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()), + TLI(TM->getTargetLowering()) { initializeARMTTIPass(*PassRegistry::getPassRegistry()); } @@ -111,6 +114,9 @@ public: return 1; } + unsigned getCastInstrCost(unsigned Opcode, Type *Dst, + Type *Src) const; + /// @} }; @@ -157,3 +163,37 @@ unsigned ARMTTI::getIntImmCost(const APInt &Imm, Type *Ty) const { } return 2; } + +unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst, + Type *Src) const { + int ISD = TLI->InstructionOpcodeToISD(Opcode); + assert(ISD && "Invalid opcode"); + + EVT SrcTy = TLI->getValueType(Src); + EVT DstTy = TLI->getValueType(Dst); + + if (!SrcTy.isSimple() || !DstTy.isSimple()) + return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); + + // Some arithmetic, load and store operations have specific instructions + // to cast up/down their types automatically at no extra cost + // TODO: Get these tables to know at least what the related operations are + static const TypeConversionCostTblEntry NEONConversionTbl[] = { + { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 0 }, + { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 0 }, + { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i32, 1 }, + { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i32, 1 }, + { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0 }, + { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 }, + }; + + if (ST->hasNEON()) { + int Idx = ConvertCostTableLookup(NEONConversionTbl, + array_lengthof(NEONConversionTbl), + ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()); + if (Idx != -1) + return NEONConversionTbl[Idx].Cost; + } + + return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); +} -- cgit v1.1 From 544cc21cf4807116251a699d8b1d3d4bace21597 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Wed, 30 Jan 2013 00:26:49 +0000 Subject: [mips] Lower EH_RETURN. Patch by Sasa Stankovic. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173862 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsISelLowering.cpp | 32 ++++++++++++++++++ lib/Target/Mips/MipsISelLowering.h | 3 ++ lib/Target/Mips/MipsInstrInfo.td | 21 ++++++++++++ lib/Target/Mips/MipsMachineFunction.cpp | 16 +++++++++ lib/Target/Mips/MipsMachineFunction.h | 16 ++++++++- lib/Target/Mips/MipsSEFrameLowering.cpp | 60 +++++++++++++++++++++++++++++++++ lib/Target/Mips/MipsSEFrameLowering.h | 1 + lib/Target/Mips/MipsSEInstrInfo.cpp | 29 ++++++++++++++++ lib/Target/Mips/MipsSEInstrInfo.h | 2 ++ lib/Target/Mips/MipsSERegisterInfo.cpp | 6 +++- 10 files changed, 184 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 7b8557d..cf8ed35 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -162,6 +162,7 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const { case MipsISD::GPRel: return "MipsISD::GPRel"; case MipsISD::ThreadPointer: return "MipsISD::ThreadPointer"; case MipsISD::Ret: return "MipsISD::Ret"; + case MipsISD::EH_RETURN: return "MipsISD::EH_RETURN"; case MipsISD::FPBrcond: return "MipsISD::FPBrcond"; case MipsISD::FPCmp: return "MipsISD::FPCmp"; case MipsISD::CMovFP_T: return "MipsISD::CMovFP_T"; @@ -445,6 +446,8 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); setOperationAction(ISD::EHSELECTION, MVT::i64, Expand); + setOperationAction(ISD::EH_RETURN, MVT::Other, Custom); + setOperationAction(ISD::VAARG, MVT::Other, Expand); setOperationAction(ISD::VACOPY, MVT::Other, Expand); setOperationAction(ISD::VAEND, MVT::Other, Expand); @@ -1045,6 +1048,7 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const case ISD::FABS: return LowerFABS(Op, DAG); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); + case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG); case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG); case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG); case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG); @@ -2228,6 +2232,34 @@ SDValue MipsTargetLowering::LowerRETURNADDR(SDValue Op, return DAG.getCopyFromReg(DAG.getEntryNode(), Op.getDebugLoc(), Reg, VT); } +// An EH_RETURN is the result of lowering llvm.eh.return which in turn is +// generated from __builtin_eh_return (offset, handler) +// The effect of this is to adjust the stack pointer by "offset" +// and then branch to "handler". +SDValue MipsTargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) + const { + MachineFunction &MF = DAG.getMachineFunction(); + MipsFunctionInfo *MipsFI = MF.getInfo(); + + MipsFI->setCallsEhReturn(); + SDValue Chain = Op.getOperand(0); + SDValue Offset = Op.getOperand(1); + SDValue Handler = Op.getOperand(2); + DebugLoc DL = Op.getDebugLoc(); + EVT Ty = IsN64 ? MVT::i64 : MVT::i32; + + // Store stack offset in V1, store jump target in V0. Glue CopyToReg and + // EH_RETURN nodes, so that instructions are emitted back-to-back. + unsigned OffsetReg = IsN64 ? Mips::V1_64 : Mips::V1; + unsigned AddrReg = IsN64 ? Mips::V0_64 : Mips::V0; + Chain = DAG.getCopyToReg(Chain, DL, OffsetReg, Offset, SDValue()); + Chain = DAG.getCopyToReg(Chain, DL, AddrReg, Handler, Chain.getValue(1)); + return DAG.getNode(MipsISD::EH_RETURN, DL, MVT::Other, Chain, + DAG.getRegister(OffsetReg, Ty), + DAG.getRegister(AddrReg, getPointerTy()), + Chain.getValue(1)); +} + // TODO: set SType according to the desired memory barrier behavior. SDValue MipsTargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const { diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 053f8de..00aa282 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -65,6 +65,8 @@ namespace llvm { // Return Ret, + EH_RETURN, + // MAdd/Sub nodes MAdd, MAddu, @@ -275,6 +277,7 @@ namespace llvm { SDValue LowerFABS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG& DAG) const; SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const; SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG& DAG) const; diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 74f3178..162ffb0 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -867,6 +867,27 @@ def TAILCALL_R : JumpFR, MTLO_FM<8>, IsTailCall; def RET : RetBase, MTLO_FM<8>; +// Exception handling related node and instructions. +// The conversion sequence is: +// ISD::EH_RETURN -> MipsISD::EH_RETURN -> +// MIPSeh_return -> (stack change + indirect branch) +// +// MIPSeh_return takes the place of regular return instruction +// but takes two arguments (V1, V0) which are used for storing +// the offset and return address respectively. +def SDT_MipsEHRET : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisPtrTy<1>]>; + +def MIPSehret : SDNode<"MipsISD::EH_RETURN", SDT_MipsEHRET, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; + +let Uses = [V0, V1], isTerminator = 1, isReturn = 1, isBarrier = 1 in { + def MIPSeh_return32 : MipsPseudo<(outs), (ins CPURegs:$spoff, CPURegs:$dst), + [(MIPSehret CPURegs:$spoff, CPURegs:$dst)]>; + def MIPSeh_return64 : MipsPseudo<(outs), (ins CPU64Regs:$spoff, + CPU64Regs:$dst), + [(MIPSehret CPU64Regs:$spoff, CPU64Regs:$dst)]>; +} + /// Multiply and Divide Instructions. def MULT : Mult<"mult", IIImul, CPURegsOpnd, [HI, LO]>, MULT_FM<0, 0x18>; def MULTu : Mult<"multu", IIImul, CPURegsOpnd, [HI, LO]>, MULT_FM<0, 0x19>; diff --git a/lib/Target/Mips/MipsMachineFunction.cpp b/lib/Target/Mips/MipsMachineFunction.cpp index 0c71596..59b23f7 100644 --- a/lib/Target/Mips/MipsMachineFunction.cpp +++ b/lib/Target/Mips/MipsMachineFunction.cpp @@ -56,4 +56,20 @@ unsigned MipsFunctionInfo::getMips16SPAliasReg() { return Mips16SPAliasReg = MF.getRegInfo().createVirtualRegister(RC); } +void MipsFunctionInfo::createEhDataRegsFI() { + for (int I = 0; I < 4; ++I) { + const MipsSubtarget &ST = MF.getTarget().getSubtarget(); + const TargetRegisterClass *RC = ST.isABI_N64() ? + &Mips::CPU64RegsRegClass : &Mips::CPURegsRegClass; + + EhDataRegFI[I] = MF.getFrameInfo()->CreateStackObject(RC->getSize(), + RC->getAlignment(), false); + } +} + +bool MipsFunctionInfo::isEhDataRegFI(int FI) const { + return CallsEhReturn && (FI == EhDataRegFI[0] || FI == EhDataRegFI[1] + || FI == EhDataRegFI[2] || FI == EhDataRegFI[3]); +} + void MipsFunctionInfo::anchor() { } diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h index eb6e1cf..b05b348 100644 --- a/lib/Target/Mips/MipsMachineFunction.h +++ b/lib/Target/Mips/MipsMachineFunction.h @@ -53,10 +53,16 @@ class MipsFunctionInfo : public MachineFunctionInfo { /// Size of incoming argument area. unsigned IncomingArgSize; + /// CallsEhReturn - Whether the function calls llvm.eh.return. + bool CallsEhReturn; + + /// Frame objects for spilling eh data registers. + int EhDataRegFI[4]; + public: MipsFunctionInfo(MachineFunction& MF) : MF(MF), SRetReturnReg(0), GlobalBaseReg(0), Mips16SPAliasReg(0), - VarArgsFrameIndex(0) + VarArgsFrameIndex(0), CallsEhReturn(false) {} unsigned getSRetReturnReg() const { return SRetReturnReg; } @@ -78,6 +84,14 @@ public: } unsigned getIncomingArgSize() const { return IncomingArgSize; } + + bool callsEhReturn() const { return CallsEhReturn; } + void setCallsEhReturn() { CallsEhReturn = true; } + + void createEhDataRegsFI(); + int getEhDataRegFI(unsigned Reg) const { return EhDataRegFI[Reg]; } + bool isEhDataRegFI(int FI) const; + }; } // end of namespace llvm diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp index 60b1233..e5ecf2c 100644 --- a/lib/Target/Mips/MipsSEFrameLowering.cpp +++ b/lib/Target/Mips/MipsSEFrameLowering.cpp @@ -29,9 +29,21 @@ using namespace llvm; +unsigned MipsSEFrameLowering::ehDataReg(unsigned I) const { + static const unsigned EhDataReg[] = { + Mips::A0, Mips::A1, Mips::A2, Mips::A3 + }; + static const unsigned EhDataReg64[] = { + Mips::A0_64, Mips::A1_64, Mips::A2_64, Mips::A3_64 + }; + + return STI.isABI_N64() ? EhDataReg64[I] : EhDataReg[I]; +} + void MipsSEFrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); MachineFrameInfo *MFI = MF.getFrameInfo(); + MipsFunctionInfo *MipsFI = MF.getInfo(); const MipsRegisterInfo *RegInfo = static_cast(MF.getTarget().getRegisterInfo()); const MipsSEInstrInfo &TII = @@ -105,6 +117,30 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF) const { } } + if (MipsFI->callsEhReturn()) { + const TargetRegisterClass *RC = STI.isABI_N64() ? + &Mips::CPU64RegsRegClass : &Mips::CPURegsRegClass; + + // Insert instructions that spill eh data registers. + for (int I = 0; I < 4; ++I) { + if (!MBB.isLiveIn(ehDataReg(I))) + MBB.addLiveIn(ehDataReg(I)); + TII.storeRegToStackSlot(MBB, MBBI, ehDataReg(I), false, + MipsFI->getEhDataRegFI(I), RC, RegInfo); + } + + // Emit .cfi_offset directives for eh data registers. + MCSymbol *CSLabel2 = MMI.getContext().CreateTempSymbol(); + BuildMI(MBB, MBBI, dl, + TII.get(TargetOpcode::PROLOG_LABEL)).addSym(CSLabel2); + for (int I = 0; I < 4; ++I) { + int64_t Offset = MFI->getObjectOffset(MipsFI->getEhDataRegFI(I)); + DstML = MachineLocation(MachineLocation::VirtualFP, Offset); + SrcML = MachineLocation(ehDataReg(I)); + Moves.push_back(MachineMove(CSLabel2, DstML, SrcML)); + } + } + // if framepointer enabled, set it to point to the stack pointer. if (hasFP(MF)) { // Insert instruction "move $fp, $sp" at this location. @@ -124,6 +160,9 @@ void MipsSEFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); MachineFrameInfo *MFI = MF.getFrameInfo(); + MipsFunctionInfo *MipsFI = MF.getInfo(); + const MipsRegisterInfo *RegInfo = + static_cast(MF.getTarget().getRegisterInfo()); const MipsSEInstrInfo &TII = *static_cast(MF.getTarget().getInstrInfo()); DebugLoc dl = MBBI->getDebugLoc(); @@ -144,6 +183,22 @@ void MipsSEFrameLowering::emitEpilogue(MachineFunction &MF, BuildMI(MBB, I, dl, TII.get(ADDu), SP).addReg(FP).addReg(ZERO); } + if (MipsFI->callsEhReturn()) { + const TargetRegisterClass *RC = STI.isABI_N64() ? + &Mips::CPU64RegsRegClass : &Mips::CPURegsRegClass; + + // Find first instruction that restores a callee-saved register. + MachineBasicBlock::iterator I = MBBI; + for (unsigned i = 0; i < MFI->getCalleeSavedInfo().size(); ++i) + --I; + + // Insert instructions that restore eh data registers. + for (int J = 0; J < 4; ++J) { + TII.loadRegFromStackSlot(MBB, I, ehDataReg(J), MipsFI->getEhDataRegFI(J), + RC, RegInfo); + } + } + // Get the number of bytes from FrameInfo uint64_t StackSize = MFI->getStackSize(); @@ -198,12 +253,17 @@ void MipsSEFrameLowering:: processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS) const { MachineRegisterInfo &MRI = MF.getRegInfo(); + MipsFunctionInfo *MipsFI = MF.getInfo(); unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP; // Mark $fp as used if function has dedicated frame pointer. if (hasFP(MF)) MRI.setPhysRegUsed(FP); + // Create spill slots for eh data registers if function calls eh_return. + if (MipsFI->callsEhReturn()) + MipsFI->createEhDataRegsFI(); + // Set scavenging frame index if necessary. uint64_t MaxSPOffset = MF.getInfo()->getIncomingArgSize() + estimateStackSize(MF); diff --git a/lib/Target/Mips/MipsSEFrameLowering.h b/lib/Target/Mips/MipsSEFrameLowering.h index 6481a0a..9b04ee9 100644 --- a/lib/Target/Mips/MipsSEFrameLowering.h +++ b/lib/Target/Mips/MipsSEFrameLowering.h @@ -37,6 +37,7 @@ public: void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS) const; + unsigned ehDataReg(unsigned I) const; }; } // End llvm namespace diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp index cd8f9f4..a9809ef 100644 --- a/lib/Target/Mips/MipsSEInstrInfo.cpp +++ b/lib/Target/Mips/MipsSEInstrInfo.cpp @@ -220,6 +220,10 @@ bool MipsSEInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { case Mips::ExtractElementF64: ExpandExtractElementF64(MBB, MI); break; + case Mips::MIPSeh_return32: + case Mips::MIPSeh_return64: + ExpandEhReturn(MBB, MI); + break; } MBB.erase(MI); @@ -356,6 +360,31 @@ void MipsSEInstrInfo::ExpandBuildPairF64(MachineBasicBlock &MBB, .addReg(HiReg); } +void MipsSEInstrInfo::ExpandEhReturn(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + // This pseudo instruction is generated as part of the lowering of + // ISD::EH_RETURN. We convert it to a stack increment by OffsetReg, and + // indirect jump to TargetReg + const MipsSubtarget &STI = TM.getSubtarget(); + unsigned ADDU = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu; + unsigned OR = STI.isABI_N64() ? Mips::OR64 : Mips::OR; + unsigned JR = STI.isABI_N64() ? Mips::JR64 : Mips::JR; + unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP; + unsigned RA = STI.isABI_N64() ? Mips::RA_64 : Mips::RA; + unsigned ZERO = STI.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO; + unsigned OffsetReg = I->getOperand(0).getReg(); + unsigned TargetReg = I->getOperand(1).getReg(); + + // or $ra, $v0, $zero + // addu $sp, $sp, $v1 + // jr $ra + BuildMI(MBB, I, I->getDebugLoc(), TM.getInstrInfo()->get(OR), RA) + .addReg(TargetReg).addReg(ZERO); + BuildMI(MBB, I, I->getDebugLoc(), TM.getInstrInfo()->get(ADDU), SP) + .addReg(SP).addReg(OffsetReg); + BuildMI(MBB, I, I->getDebugLoc(), TM.getInstrInfo()->get(JR)).addReg(RA); +} + const MipsInstrInfo *llvm::createMipsSEInstrInfo(MipsTargetMachine &TM) { return new MipsSEInstrInfo(TM); } diff --git a/lib/Target/Mips/MipsSEInstrInfo.h b/lib/Target/Mips/MipsSEInstrInfo.h index 55b78b2..3e22b33 100644 --- a/lib/Target/Mips/MipsSEInstrInfo.h +++ b/lib/Target/Mips/MipsSEInstrInfo.h @@ -85,6 +85,8 @@ private: MachineBasicBlock::iterator I) const; void ExpandBuildPairF64(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; + void ExpandEhReturn(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; }; } diff --git a/lib/Target/Mips/MipsSERegisterInfo.cpp b/lib/Target/Mips/MipsSERegisterInfo.cpp index abeab7b..0c0a1a3 100644 --- a/lib/Target/Mips/MipsSERegisterInfo.cpp +++ b/lib/Target/Mips/MipsSERegisterInfo.cpp @@ -83,6 +83,7 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II, MachineInstr &MI = *II; MachineFunction &MF = *MI.getParent()->getParent(); MachineFrameInfo *MFI = MF.getFrameInfo(); + MipsFunctionInfo *MipsFI = MF.getInfo(); const std::vector &CSI = MFI->getCalleeSavedInfo(); int MinCSFI = 0; @@ -93,15 +94,18 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II, MaxCSFI = CSI[CSI.size() - 1].getFrameIdx(); } + bool EhDataRegFI = MipsFI->isEhDataRegFI(FrameIndex); + // The following stack frame objects are always referenced relative to $sp: // 1. Outgoing arguments. // 2. Pointer to dynamically allocated stack space. // 3. Locations for callee-saved registers. + // 4. Locations for eh data registers. // Everything else is referenced relative to whatever register // getFrameRegister() returns. unsigned FrameReg; - if (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI) + if ((FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI) || EhDataRegFI) FrameReg = Subtarget.isABI_N64() ? Mips::SP_64 : Mips::SP; else FrameReg = getFrameRegister(MF); -- cgit v1.1 From 9a7bf438b50fed2c77f0e2bc835defa5b4728f82 Mon Sep 17 00:00:00 2001 From: Jack Carter Date: Wed, 30 Jan 2013 02:09:52 +0000 Subject: This patch reworks how llvm targets set and update ELF header e_flags. Currently gathering information such as symbol, section and data is done by collecting it in an MCAssembler object. From MCAssembler and MCAsmLayout objects ELFObjectWriter::WriteObject() forms and streams out the ELF object file. This patch just adds a few members to the MCAssember class to store and access the e_flag settings. It allows for runtime additions to the e_flag by assembler directives. The standalone assembler can get to MCAssembler from getParser().getStreamer().getAssembler(). This patch is the generic infrastructure and will be followed by patches for ARM and Mips for their target specific use. Contributer: Jack Carter git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173882 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/ELFObjectWriter.cpp | 14 ++++++-------- lib/MC/MCAssembler.cpp | 3 ++- lib/MC/MCELFObjectTargetWriter.cpp | 5 ----- lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp | 6 ------ lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp | 14 -------------- 5 files changed, 8 insertions(+), 34 deletions(-) (limited to 'lib') diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp index 3b12d3a..8805773 100644 --- a/lib/MC/ELFObjectWriter.cpp +++ b/lib/MC/ELFObjectWriter.cpp @@ -142,9 +142,6 @@ class ELFObjectWriter : public MCObjectWriter { bool hasRelocationAddend() const { return TargetObjectWriter->hasRelocationAddend(); } - unsigned getEFlags() const { - return TargetObjectWriter->getEFlags(); - } unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, bool IsPCRel, bool IsRelocWithSymbol, int64_t Addend) const { @@ -152,7 +149,6 @@ class ELFObjectWriter : public MCObjectWriter { IsRelocWithSymbol, Addend); } - public: ELFObjectWriter(MCELFObjectTargetWriter *MOTW, raw_ostream &_OS, bool IsLittleEndian) @@ -233,7 +229,8 @@ class ELFObjectWriter : public MCObjectWriter { F.getContents().append(&buf[0], &buf[8]); } - void WriteHeader(uint64_t SectionDataSize, + void WriteHeader(const MCAssembler &Asm, + uint64_t SectionDataSize, unsigned NumberOfSections); void WriteSymbolEntry(MCDataFragment *SymtabF, @@ -373,7 +370,8 @@ ELFObjectWriter::~ELFObjectWriter() {} // Emit the ELF header. -void ELFObjectWriter::WriteHeader(uint64_t SectionDataSize, +void ELFObjectWriter::WriteHeader(const MCAssembler &Asm, + uint64_t SectionDataSize, unsigned NumberOfSections) { // ELF Header // ---------- @@ -411,7 +409,7 @@ void ELFObjectWriter::WriteHeader(uint64_t SectionDataSize, sizeof(ELF::Elf32_Ehdr))); // e_shoff = sec hdr table off in bytes // e_flags = whatever the target wants - Write32(getEFlags()); + Write32(Asm.getELFHeaderEFlags()); // e_ehsize = ELF header size Write16(is64Bit() ? sizeof(ELF::Elf64_Ehdr) : sizeof(ELF::Elf32_Ehdr)); @@ -1534,7 +1532,7 @@ void ELFObjectWriter::WriteObject(MCAssembler &Asm, } // Write out the ELF header ... - WriteHeader(SectionHeaderOffset, NumSections + 1); + WriteHeader(Asm, SectionHeaderOffset, NumSections + 1); // ... then the regular sections ... // + because of .shstrtab diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp index c51ddc8..983de1a 100644 --- a/lib/MC/MCAssembler.cpp +++ b/lib/MC/MCAssembler.cpp @@ -264,7 +264,7 @@ MCAssembler::MCAssembler(MCContext &Context_, MCAsmBackend &Backend_, raw_ostream &OS_) : Context(Context_), Backend(Backend_), Emitter(Emitter_), Writer(Writer_), OS(OS_), BundleAlignSize(0), RelaxAll(false), NoExecStack(false), - SubsectionsViaSymbols(false) { + SubsectionsViaSymbols(false), ELFHeaderEFlags(0) { } MCAssembler::~MCAssembler() { @@ -281,6 +281,7 @@ void MCAssembler::reset() { RelaxAll = false; NoExecStack = false; SubsectionsViaSymbols = false; + ELFHeaderEFlags = 0; // reset objects owned by us getBackend().reset(); diff --git a/lib/MC/MCELFObjectTargetWriter.cpp b/lib/MC/MCELFObjectTargetWriter.cpp index 74cd042..4cac84d 100644 --- a/lib/MC/MCELFObjectTargetWriter.cpp +++ b/lib/MC/MCELFObjectTargetWriter.cpp @@ -24,11 +24,6 @@ MCELFObjectTargetWriter::MCELFObjectTargetWriter(bool Is64Bit_, IsN64(IsN64_){ } -/// Default e_flags = 0 -unsigned MCELFObjectTargetWriter::getEFlags() const { - return 0; -} - const MCSymbol *MCELFObjectTargetWriter::ExplicitRelSym(const MCAssembler &Asm, const MCValue &Target, const MCFragment &F, diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp index 9193e40..f98bbd2 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp @@ -37,7 +37,6 @@ namespace { virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, bool IsPCRel, bool IsRelocWithSymbol, int64_t Addend) const; - virtual unsigned getEFlags() const; virtual const MCSymbol *ExplicitRelSym(const MCAssembler &Asm, const MCValue &Target, const MCFragment &F, @@ -53,11 +52,6 @@ ARMELFObjectWriter::ARMELFObjectWriter(uint8_t OSABI) ARMELFObjectWriter::~ARMELFObjectWriter() {} -// FIXME: get the real EABI Version from the Triple. -unsigned ARMELFObjectWriter::getEFlags() const { - return ELF::EF_ARM_EABIMASK & DefaultEABIVersion; -} - // In ARM, _MergedGlobals and other most symbols get emitted directly. // I.e. not as an offset to a section symbol. // This code is an approximation of what ARM/gcc does. diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp index 7afb77e..6471b51 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp @@ -42,7 +42,6 @@ namespace { virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, bool IsPCRel, bool IsRelocWithSymbol, int64_t Addend) const; - virtual unsigned getEFlags() const; virtual const MCSymbol *ExplicitRelSym(const MCAssembler &Asm, const MCValue &Target, const MCFragment &F, @@ -61,19 +60,6 @@ MipsELFObjectWriter::MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI, MipsELFObjectWriter::~MipsELFObjectWriter() {} -// FIXME: get the real EABI Version from the Subtarget class. -unsigned MipsELFObjectWriter::getEFlags() const { - - // FIXME: We can't tell if we are PIC (dynamic) or CPIC (static) - unsigned Flag = ELF::EF_MIPS_NOREORDER; - - if (is64Bit()) - Flag |= ELF::EF_MIPS_ARCH_64R2; - else - Flag |= ELF::EF_MIPS_ARCH_32R2; - return Flag; -} - const MCSymbol *MipsELFObjectWriter::ExplicitRelSym(const MCAssembler &Asm, const MCValue &Target, const MCFragment &F, -- cgit v1.1 From dba14301f0098f9fc5c0d244bf334f55a6a21960 Mon Sep 17 00:00:00 2001 From: Jack Carter Date: Wed, 30 Jan 2013 02:16:36 +0000 Subject: This patch implements runtime Mips specific setting of ELF header e_flags. Contributer: Jack Carter git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173884 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp | 61 +++++++++++++++++++++++ lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h | 37 ++++++++++++++ lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp | 3 +- lib/Target/Mips/MipsAsmPrinter.cpp | 5 ++ lib/Target/Mips/MipsSubtarget.cpp | 4 +- lib/Target/Mips/MipsSubtarget.h | 6 +++ 6 files changed, 113 insertions(+), 3 deletions(-) create mode 100644 lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp create mode 100644 lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h (limited to 'lib') diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp new file mode 100644 index 0000000..89891ff --- /dev/null +++ b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp @@ -0,0 +1,61 @@ +//===-- MipsELFStreamer.cpp - MipsELFStreamer ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===-------------------------------------------------------------------===// +#include "MCTargetDesc/MipsELFStreamer.h" +#include "MipsSubtarget.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/ErrorHandling.h" + +namespace llvm { + + MCELFStreamer* createMipsELFStreamer(MCContext &Context, MCAsmBackend &TAB, + raw_ostream &OS, MCCodeEmitter *Emitter, + bool RelaxAll, bool NoExecStack) { + MipsELFStreamer *S = new MipsELFStreamer(Context, TAB, OS, Emitter, + RelaxAll, NoExecStack); + return S; + } + + // For llc. Set a group of ELF header flags + void + MipsELFStreamer::emitELFHeaderFlagsCG(const MipsSubtarget &Subtarget) { + + if (hasRawTextSupport()) + return; + + // Update e_header flags + MCAssembler& MCA = getAssembler(); + unsigned EFlags = MCA.getELFHeaderEFlags(); + + EFlags |= ELF::EF_MIPS_NOREORDER; + + // Architecture + if (Subtarget.hasMips64r2()) + EFlags |= ELF::EF_MIPS_ARCH_64R2; + else if (Subtarget.hasMips64()) + EFlags |= ELF::EF_MIPS_ARCH_64; + else if (Subtarget.hasMips32r2()) + EFlags |= ELF::EF_MIPS_ARCH_32R2; + else + EFlags |= ELF::EF_MIPS_ARCH_32; + + // Relocation Model + Reloc::Model RM = Subtarget.getRelocationModel(); + if (RM == Reloc::PIC_ || RM == Reloc::Default) + EFlags |= ELF::EF_MIPS_PIC; + else if (RM == Reloc::Static) + ; // Do nothing for Reloc::Static + else + llvm_unreachable("Unsupported relocation model for e_flags"); + + MCA.setELFHeaderEFlags(EFlags); + + + } +} diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h new file mode 100644 index 0000000..7739bd9 --- /dev/null +++ b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h @@ -0,0 +1,37 @@ +//=== MipsELFStreamer.h - MipsELFStreamer ------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENCE.TXT for details. +// +//===-------------------------------------------------------------------===// +#ifndef MIPSELFSTREAMER_H_ +#define MIPSELFSTREAMER_H_ + +#include "llvm/MC/MCELFStreamer.h" + +namespace llvm { +class MipsSubtarget; + +class MipsELFStreamer : public MCELFStreamer { +private: + unsigned EFlags; +public: + MipsELFStreamer(MCContext &Context, MCAsmBackend &TAB, + raw_ostream &OS, MCCodeEmitter *Emitter, + bool RelaxAll, bool NoExecStack) + : MCELFStreamer(Context, TAB, OS, Emitter), EFlags(0) { + } + + ~MipsELFStreamer() {} + void emitELFHeaderFlagsCG(const MipsSubtarget &Subtarget); +// void emitELFHeaderFlagCG(unsigned Val); +}; + + MCELFStreamer* createMipsELFStreamer(MCContext &Context, MCAsmBackend &TAB, + raw_ostream &OS, MCCodeEmitter *Emitter, + bool RelaxAll, bool NoExecStack); +} + +#endif /* MIPSELFSTREAMER_H_ */ diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp index 9360971..be83b54 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "MCTargetDesc/MipsELFStreamer.h" #include "MipsMCTargetDesc.h" #include "InstPrinter/MipsInstPrinter.h" #include "MipsMCAsmInfo.h" @@ -131,7 +132,7 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT, bool NoExecStack) { Triple TheTriple(TT); - return createELFStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll, NoExecStack); + return createMipsELFStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll, NoExecStack); } extern "C" void LLVMInitializeMipsTargetMC() { diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp index e3c3429..df6baaf 100644 --- a/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/MipsAsmPrinter.cpp @@ -15,6 +15,7 @@ #define DEBUG_TYPE "mips-asm-printer" #include "InstPrinter/MipsInstPrinter.h" #include "MCTargetDesc/MipsBaseInfo.h" +#include "MCTargetDesc/MipsELFStreamer.h" #include "Mips.h" #include "MipsAsmPrinter.h" #include "MipsInstrInfo.h" @@ -545,9 +546,13 @@ void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) { void MipsAsmPrinter::EmitEndOfAsmFile(Module &M) { + if (OutStreamer.hasRawTextSupport()) return; + // Emit Mips ELF register info Subtarget->getMReginfo().emitMipsReginfoSectionCG( OutStreamer, getObjFileLowering(), *Subtarget); + MipsELFStreamer & MES = static_cast(OutStreamer); + MES.emitELFHeaderFlagsCG(*Subtarget); } MachineLocation diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp index 30d377a..6ad97db 100644 --- a/lib/Target/Mips/MipsSubtarget.cpp +++ b/lib/Target/Mips/MipsSubtarget.cpp @@ -26,13 +26,13 @@ void MipsSubtarget::anchor() { } MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU, const std::string &FS, bool little, - Reloc::Model RM) : + Reloc::Model _RM) : MipsGenSubtargetInfo(TT, CPU, FS), MipsArchVersion(Mips32), MipsABI(UnknownABI), IsLittle(little), IsSingleFloat(false), IsFP64bit(false), IsGP64bit(false), HasVFPU(false), IsLinux(true), HasSEInReg(false), HasCondMov(false), HasSwap(false), HasBitCount(false), HasFPIdx(false), - InMips16Mode(false), HasDSP(false), HasDSPR2(false), IsAndroid(false) + InMips16Mode(false), HasDSP(false), HasDSPR2(false), IsAndroid(false), RM(_RM) { std::string CPUName = CPU; if (CPUName.empty()) diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h index 001d8d1..63cde8d 100644 --- a/lib/Target/Mips/MipsSubtarget.h +++ b/lib/Target/Mips/MipsSubtarget.h @@ -100,6 +100,9 @@ protected: // The instance to the register info section object MipsReginfo MRI; + // Relocation Model + Reloc::Model RM; + public: virtual bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, AntiDepBreakMode& Mode, @@ -152,6 +155,9 @@ public: // Grab MipsRegInfo object const MipsReginfo &getMReginfo() const { return MRI; } + + // Grab relocation model + Reloc::Model getRelocationModel() const {return RM;} }; } // End llvm namespace -- cgit v1.1 From 97130e2b3de080e231caac86dbce1500e4e7af16 Mon Sep 17 00:00:00 2001 From: Jack Carter Date: Wed, 30 Jan 2013 02:24:33 +0000 Subject: This patch implements runtime ARM specific setting of ELF header e_flags. Contributer: Jack Carter git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173885 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMAsmPrinter.cpp | 11 +++++++++++ lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 10 ++++++++++ 2 files changed, 21 insertions(+) (limited to 'lib') diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index ee2a228..397736a 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -37,6 +37,7 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCELFStreamer.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstBuilder.h" #include "llvm/MC/MCObjectStreamer.h" @@ -45,6 +46,7 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" @@ -699,6 +701,15 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) { // generates code that does this, it is always safe to set. OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols); } + // FIXME: This should eventually end up somewhere else where more + // intelligent flag decisions can be made. For now we are just maintaining + // the status quo for ARM and setting EF_ARM_EABI_VER5 as the default. + if (Subtarget->isTargetELF()) { + if (OutStreamer.hasRawTextSupport()) return; + + MCELFStreamer &MES = static_cast(OutStreamer); + MES.getAssembler().setELFHeaderEFlags(ELF::EF_ARM_EABI_VER5); + } } //===----------------------------------------------------------------------===// diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index e9bdc4a..77620e9 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -18,7 +18,9 @@ #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCELFStreamer.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrDesc.h" @@ -28,6 +30,7 @@ #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/ELF.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetRegistry.h" @@ -250,6 +253,13 @@ public: // Not in an ITBlock to start with. ITState.CurPosition = ~0U; + + // Set ELF header flags. + // FIXME: This should eventually end up somewhere else where more + // intelligent flag decisions can be made. For now we are just maintaining + // the status quo for ARM and setting EF_ARM_EABI_VER5 as the default. + MCELFStreamer &MES = static_cast(Parser.getStreamer()); + MES.getAssembler().setELFHeaderEFlags(ELF::EF_ARM_EABI_VER5); } // Implementation of the MCTargetAsmParser interface: -- cgit v1.1 From cbaf2ac072369f67536485bcc00e7de1f1202679 Mon Sep 17 00:00:00 2001 From: Jack Carter Date: Wed, 30 Jan 2013 02:32:36 +0000 Subject: Forgot to add new file to CMakeLists git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173886 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MCTargetDesc/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/Target/Mips/MCTargetDesc/CMakeLists.txt b/lib/Target/Mips/MCTargetDesc/CMakeLists.txt index f5b0cca..4212c94 100644 --- a/lib/Target/Mips/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/Mips/MCTargetDesc/CMakeLists.txt @@ -6,6 +6,7 @@ add_llvm_library(LLVMMipsDesc MipsMCTargetDesc.cpp MipsELFObjectWriter.cpp MipsReginfo.cpp + MipsELFStreamer.cpp ) add_dependencies(LLVMMipsDesc MipsCommonTableGen) -- cgit v1.1 From 84fb9633caf6dc5b63ca98fd4d92e6086ab146d0 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Wed, 30 Jan 2013 02:56:02 +0000 Subject: Remove unused variable (introduced in r173884) to clear clang -Werror build git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173887 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h index 7739bd9..305fba3 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h +++ b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h @@ -15,8 +15,6 @@ namespace llvm { class MipsSubtarget; class MipsELFStreamer : public MCELFStreamer { -private: - unsigned EFlags; public: MipsELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS, MCCodeEmitter *Emitter, -- cgit v1.1 From d7800dfba255305941d81a6b8b30cb37ef50f5ef Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Wed, 30 Jan 2013 03:04:07 +0000 Subject: Removing initializer for the field removed in r173887 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173888 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h index 305fba3..0d067e0 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h +++ b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h @@ -19,7 +19,7 @@ public: MipsELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS, MCCodeEmitter *Emitter, bool RelaxAll, bool NoExecStack) - : MCELFStreamer(Context, TAB, OS, Emitter), EFlags(0) { + : MCELFStreamer(Context, TAB, OS, Emitter) { } ~MipsELFStreamer() {} -- cgit v1.1 From cc687faba373e6aa3cefe594bd9f3212e18617eb Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Wed, 30 Jan 2013 06:35:22 +0000 Subject: InstCombine: canonicalize sext-and --> select sext-not-and --> select. Patch by Muhammad Tauqir Ahmad. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173901 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/InstCombine/InstCombineAndOrXor.cpp | 28 ++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'lib') diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index c1e60d4..bf065fe 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -1245,6 +1245,34 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { } } + { + Value *X = 0; + bool OpsSwapped = false; + // Canonicalize SExt or Not to the LHS + if (match(Op1, m_SExt(m_Value())) || + match(Op1, m_Not(m_Value()))) { + std::swap(Op0, Op1); + OpsSwapped = true; + } + + // Fold (and (sext bool to A), B) --> (select bool, B, 0) + if (match(Op0, m_SExt(m_Value(X))) && + X->getType()->getScalarType()->isIntegerTy(1)) { + Value *Zero = Constant::getNullValue(Op1->getType()); + return SelectInst::Create(X, Op1, Zero); + } + + // Fold (and ~(sext bool to A), B) --> (select bool, 0, B) + if (match(Op0, m_Not(m_SExt(m_Value(X)))) && + X->getType()->getScalarType()->isIntegerTy(1)) { + Value *Zero = Constant::getNullValue(Op0->getType()); + return SelectInst::Create(X, Zero, Op1); + } + + if (OpsSwapped) + std::swap(Op0, Op1); + } + return Changed ? &I : 0; } -- cgit v1.1 From 254aed5e22a36effb1e1b9e8d03f8cd5e9bb7586 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Wed, 30 Jan 2013 06:54:41 +0000 Subject: Attempt to fix dragonegg. Use the number of slots to determine if the AttributeSet has attributes or not. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173902 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Verifier.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp index 39f95fa..2d69493 100644 --- a/lib/IR/Verifier.cpp +++ b/lib/IR/Verifier.cpp @@ -797,7 +797,7 @@ void Verifier::VerifyFunctionAttrs(FunctionType *FT, } static bool VerifyAttributeCount(const AttributeSet &Attrs, unsigned Params) { - if (Attrs.isEmpty()) + if (Attrs.getNumSlots() == 0) return true; unsigned LastSlot = Attrs.getNumSlots() - 1; -- cgit v1.1 From 328027bf269bb0c108bd8533908ccb36ba11e9f0 Mon Sep 17 00:00:00 2001 From: Dmitri Gribenko Date: Wed, 30 Jan 2013 12:05:05 +0000 Subject: Move UTF conversion routines from clang/lib/Basic to llvm/lib/Support This is required to use them in TableGen. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173923 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Support/CMakeLists.txt | 2 + lib/Support/ConvertUTF.c | 571 ++++++++++++++++++++++++++++++++++++++ lib/Support/ConvertUTFWrapper.cpp | 76 +++++ 3 files changed, 649 insertions(+) create mode 100644 lib/Support/ConvertUTF.c create mode 100644 lib/Support/ConvertUTFWrapper.cpp (limited to 'lib') diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt index f294a17..5ba69fc 100644 --- a/lib/Support/CMakeLists.txt +++ b/lib/Support/CMakeLists.txt @@ -8,6 +8,8 @@ add_llvm_library(LLVMSupport circular_raw_ostream.cpp CommandLine.cpp ConstantRange.cpp + ConvertUTF.c + ConvertUTFWrapper.cpp CrashRecoveryContext.cpp DataExtractor.cpp DataStream.cpp diff --git a/lib/Support/ConvertUTF.c b/lib/Support/ConvertUTF.c new file mode 100644 index 0000000..23f17ca --- /dev/null +++ b/lib/Support/ConvertUTF.c @@ -0,0 +1,571 @@ +/*===--- ConvertUTF.c - Universal Character Names conversions ---------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is distributed under the University of Illinois Open Source + * License. See LICENSE.TXT for details. + * + *===------------------------------------------------------------------------=*/ +/* + * Copyright 2001-2004 Unicode, Inc. + * + * Disclaimer + * + * This source code is provided as is by Unicode, Inc. No claims are + * made as to fitness for any particular purpose. No warranties of any + * kind are expressed or implied. The recipient agrees to determine + * applicability of information provided. If this file has been + * purchased on magnetic or optical media from Unicode, Inc., the + * sole remedy for any claim will be exchange of defective media + * within 90 days of receipt. + * + * Limitations on Rights to Redistribute This Code + * + * Unicode, Inc. hereby grants the right to freely use the information + * supplied in this file in the creation of products supporting the + * Unicode Standard, and to make copies of this file in any form + * for internal or external distribution as long as this notice + * remains attached. + */ + +/* --------------------------------------------------------------------- + + Conversions between UTF32, UTF-16, and UTF-8. Source code file. + Author: Mark E. Davis, 1994. + Rev History: Rick McGowan, fixes & updates May 2001. + Sept 2001: fixed const & error conditions per + mods suggested by S. Parent & A. Lillich. + June 2002: Tim Dodd added detection and handling of incomplete + source sequences, enhanced error detection, added casts + to eliminate compiler warnings. + July 2003: slight mods to back out aggressive FFFE detection. + Jan 2004: updated switches in from-UTF8 conversions. + Oct 2004: updated to use UNI_MAX_LEGAL_UTF32 in UTF-32 conversions. + + See the header file "ConvertUTF.h" for complete documentation. + +------------------------------------------------------------------------ */ + + +#include "llvm/Support/ConvertUTF.h" +#ifdef CVTUTF_DEBUG +#include +#endif + +static const int halfShift = 10; /* used for shifting by 10 bits */ + +static const UTF32 halfBase = 0x0010000UL; +static const UTF32 halfMask = 0x3FFUL; + +#define UNI_SUR_HIGH_START (UTF32)0xD800 +#define UNI_SUR_HIGH_END (UTF32)0xDBFF +#define UNI_SUR_LOW_START (UTF32)0xDC00 +#define UNI_SUR_LOW_END (UTF32)0xDFFF +#define false 0 +#define true 1 + +/* --------------------------------------------------------------------- */ + +/* + * Index into the table below with the first byte of a UTF-8 sequence to + * get the number of trailing bytes that are supposed to follow it. + * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is + * left as-is for anyone who may want to do such conversion, which was + * allowed in earlier algorithms. + */ +static const char trailingBytesForUTF8[256] = { + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 +}; + +/* + * Magic values subtracted from a buffer value during UTF8 conversion. + * This table contains as many values as there might be trailing bytes + * in a UTF-8 sequence. + */ +static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL, + 0x03C82080UL, 0xFA082080UL, 0x82082080UL }; + +/* + * Once the bits are split out into bytes of UTF-8, this is a mask OR-ed + * into the first byte, depending on how many bytes follow. There are + * as many entries in this table as there are UTF-8 sequence types. + * (I.e., one byte sequence, two byte... etc.). Remember that sequencs + * for *legal* UTF-8 will be 4 or fewer bytes total. + */ +static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; + +/* --------------------------------------------------------------------- */ + +/* The interface converts a whole buffer to avoid function-call overhead. + * Constants have been gathered. Loops & conditionals have been removed as + * much as possible for efficiency, in favor of drop-through switches. + * (See "Note A" at the bottom of the file for equivalent code.) + * If your compiler supports it, the "isLegalUTF8" call can be turned + * into an inline function. + */ + + +/* --------------------------------------------------------------------- */ + +ConversionResult ConvertUTF32toUTF16 ( + const UTF32** sourceStart, const UTF32* sourceEnd, + UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) { + ConversionResult result = conversionOK; + const UTF32* source = *sourceStart; + UTF16* target = *targetStart; + while (source < sourceEnd) { + UTF32 ch; + if (target >= targetEnd) { + result = targetExhausted; break; + } + ch = *source++; + if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */ + /* UTF-16 surrogate values are illegal in UTF-32; 0xffff or 0xfffe are both reserved values */ + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) { + if (flags == strictConversion) { + --source; /* return to the illegal value itself */ + result = sourceIllegal; + break; + } else { + *target++ = UNI_REPLACEMENT_CHAR; + } + } else { + *target++ = (UTF16)ch; /* normal case */ + } + } else if (ch > UNI_MAX_LEGAL_UTF32) { + if (flags == strictConversion) { + result = sourceIllegal; + } else { + *target++ = UNI_REPLACEMENT_CHAR; + } + } else { + /* target is a character in range 0xFFFF - 0x10FFFF. */ + if (target + 1 >= targetEnd) { + --source; /* Back up source pointer! */ + result = targetExhausted; break; + } + ch -= halfBase; + *target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START); + *target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START); + } + } + *sourceStart = source; + *targetStart = target; + return result; +} + +/* --------------------------------------------------------------------- */ + +ConversionResult ConvertUTF16toUTF32 ( + const UTF16** sourceStart, const UTF16* sourceEnd, + UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) { + ConversionResult result = conversionOK; + const UTF16* source = *sourceStart; + UTF32* target = *targetStart; + UTF32 ch, ch2; + while (source < sourceEnd) { + const UTF16* oldSource = source; /* In case we have to back up because of target overflow. */ + ch = *source++; + /* If we have a surrogate pair, convert to UTF32 first. */ + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) { + /* If the 16 bits following the high surrogate are in the source buffer... */ + if (source < sourceEnd) { + ch2 = *source; + /* If it's a low surrogate, convert to UTF32. */ + if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) { + ch = ((ch - UNI_SUR_HIGH_START) << halfShift) + + (ch2 - UNI_SUR_LOW_START) + halfBase; + ++source; + } else if (flags == strictConversion) { /* it's an unpaired high surrogate */ + --source; /* return to the illegal value itself */ + result = sourceIllegal; + break; + } + } else { /* We don't have the 16 bits following the high surrogate. */ + --source; /* return to the high surrogate */ + result = sourceExhausted; + break; + } + } else if (flags == strictConversion) { + /* UTF-16 surrogate values are illegal in UTF-32 */ + if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) { + --source; /* return to the illegal value itself */ + result = sourceIllegal; + break; + } + } + if (target >= targetEnd) { + source = oldSource; /* Back up source pointer! */ + result = targetExhausted; break; + } + *target++ = ch; + } + *sourceStart = source; + *targetStart = target; +#ifdef CVTUTF_DEBUG +if (result == sourceIllegal) { + fprintf(stderr, "ConvertUTF16toUTF32 illegal seq 0x%04x,%04x\n", ch, ch2); + fflush(stderr); +} +#endif + return result; +} +ConversionResult ConvertUTF16toUTF8 ( + const UTF16** sourceStart, const UTF16* sourceEnd, + UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) { + ConversionResult result = conversionOK; + const UTF16* source = *sourceStart; + UTF8* target = *targetStart; + while (source < sourceEnd) { + UTF32 ch; + unsigned short bytesToWrite = 0; + const UTF32 byteMask = 0xBF; + const UTF32 byteMark = 0x80; + const UTF16* oldSource = source; /* In case we have to back up because of target overflow. */ + ch = *source++; + /* If we have a surrogate pair, convert to UTF32 first. */ + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) { + /* If the 16 bits following the high surrogate are in the source buffer... */ + if (source < sourceEnd) { + UTF32 ch2 = *source; + /* If it's a low surrogate, convert to UTF32. */ + if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) { + ch = ((ch - UNI_SUR_HIGH_START) << halfShift) + + (ch2 - UNI_SUR_LOW_START) + halfBase; + ++source; + } else if (flags == strictConversion) { /* it's an unpaired high surrogate */ + --source; /* return to the illegal value itself */ + result = sourceIllegal; + break; + } + } else { /* We don't have the 16 bits following the high surrogate. */ + --source; /* return to the high surrogate */ + result = sourceExhausted; + break; + } + } else if (flags == strictConversion) { + /* UTF-16 surrogate values are illegal in UTF-32 */ + if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) { + --source; /* return to the illegal value itself */ + result = sourceIllegal; + break; + } + } + /* Figure out how many bytes the result will require */ + if (ch < (UTF32)0x80) { bytesToWrite = 1; + } else if (ch < (UTF32)0x800) { bytesToWrite = 2; + } else if (ch < (UTF32)0x10000) { bytesToWrite = 3; + } else if (ch < (UTF32)0x110000) { bytesToWrite = 4; + } else { bytesToWrite = 3; + ch = UNI_REPLACEMENT_CHAR; + } + + target += bytesToWrite; + if (target > targetEnd) { + source = oldSource; /* Back up source pointer! */ + target -= bytesToWrite; result = targetExhausted; break; + } + switch (bytesToWrite) { /* note: everything falls through. */ + case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; + case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; + case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; + case 1: *--target = (UTF8)(ch | firstByteMark[bytesToWrite]); + } + target += bytesToWrite; + } + *sourceStart = source; + *targetStart = target; + return result; +} + +/* --------------------------------------------------------------------- */ + +ConversionResult ConvertUTF32toUTF8 ( + const UTF32** sourceStart, const UTF32* sourceEnd, + UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) { + ConversionResult result = conversionOK; + const UTF32* source = *sourceStart; + UTF8* target = *targetStart; + while (source < sourceEnd) { + UTF32 ch; + unsigned short bytesToWrite = 0; + const UTF32 byteMask = 0xBF; + const UTF32 byteMark = 0x80; + ch = *source++; + if (flags == strictConversion ) { + /* UTF-16 surrogate values are illegal in UTF-32 */ + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) { + --source; /* return to the illegal value itself */ + result = sourceIllegal; + break; + } + } + /* + * Figure out how many bytes the result will require. Turn any + * illegally large UTF32 things (> Plane 17) into replacement chars. + */ + if (ch < (UTF32)0x80) { bytesToWrite = 1; + } else if (ch < (UTF32)0x800) { bytesToWrite = 2; + } else if (ch < (UTF32)0x10000) { bytesToWrite = 3; + } else if (ch <= UNI_MAX_LEGAL_UTF32) { bytesToWrite = 4; + } else { bytesToWrite = 3; + ch = UNI_REPLACEMENT_CHAR; + result = sourceIllegal; + } + + target += bytesToWrite; + if (target > targetEnd) { + --source; /* Back up source pointer! */ + target -= bytesToWrite; result = targetExhausted; break; + } + switch (bytesToWrite) { /* note: everything falls through. */ + case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; + case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; + case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; + case 1: *--target = (UTF8) (ch | firstByteMark[bytesToWrite]); + } + target += bytesToWrite; + } + *sourceStart = source; + *targetStart = target; + return result; +} + +/* --------------------------------------------------------------------- */ + +/* + * Utility routine to tell whether a sequence of bytes is legal UTF-8. + * This must be called with the length pre-determined by the first byte. + * If not calling this from ConvertUTF8to*, then the length can be set by: + * length = trailingBytesForUTF8[*source]+1; + * and the sequence is illegal right away if there aren't that many bytes + * available. + * If presented with a length > 4, this returns false. The Unicode + * definition of UTF-8 goes up to 4-byte sequences. + */ + +static Boolean isLegalUTF8(const UTF8 *source, int length) { + UTF8 a; + const UTF8 *srcptr = source+length; + switch (length) { + default: return false; + /* Everything else falls through when "true"... */ + case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false; + case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false; + case 2: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false; + + switch (*source) { + /* no fall-through in this inner switch */ + case 0xE0: if (a < 0xA0) return false; break; + case 0xED: if (a > 0x9F) return false; break; + case 0xF0: if (a < 0x90) return false; break; + case 0xF4: if (a > 0x8F) return false; break; + default: if (a < 0x80) return false; + } + + case 1: if (*source >= 0x80 && *source < 0xC2) return false; + } + if (*source > 0xF4) return false; + return true; +} + +/* --------------------------------------------------------------------- */ + +/* + * Exported function to return whether a UTF-8 sequence is legal or not. + * This is not used here; it's just exported. + */ +Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd) { + int length = trailingBytesForUTF8[*source]+1; + if (length > sourceEnd - source) { + return false; + } + return isLegalUTF8(source, length); +} + +/* --------------------------------------------------------------------- */ + +/* + * Exported function to return the total number of bytes in a codepoint + * represented in UTF-8, given the value of the first byte. + */ +unsigned getNumBytesForUTF8(UTF8 first) { + return trailingBytesForUTF8[first] + 1; +} + +/* --------------------------------------------------------------------- */ + +/* + * Exported function to return whether a UTF-8 string is legal or not. + * This is not used here; it's just exported. + */ +Boolean isLegalUTF8String(const UTF8 **source, const UTF8 *sourceEnd) { + while (*source != sourceEnd) { + int length = trailingBytesForUTF8[**source] + 1; + if (length > sourceEnd - *source || !isLegalUTF8(*source, length)) + return false; + *source += length; + } + return true; +} + +/* --------------------------------------------------------------------- */ + +ConversionResult ConvertUTF8toUTF16 ( + const UTF8** sourceStart, const UTF8* sourceEnd, + UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) { + ConversionResult result = conversionOK; + const UTF8* source = *sourceStart; + UTF16* target = *targetStart; + while (source < sourceEnd) { + UTF32 ch = 0; + unsigned short extraBytesToRead = trailingBytesForUTF8[*source]; + if (extraBytesToRead >= sourceEnd - source) { + result = sourceExhausted; break; + } + /* Do this check whether lenient or strict */ + if (!isLegalUTF8(source, extraBytesToRead+1)) { + result = sourceIllegal; + break; + } + /* + * The cases all fall through. See "Note A" below. + */ + switch (extraBytesToRead) { + case 5: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */ + case 4: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */ + case 3: ch += *source++; ch <<= 6; + case 2: ch += *source++; ch <<= 6; + case 1: ch += *source++; ch <<= 6; + case 0: ch += *source++; + } + ch -= offsetsFromUTF8[extraBytesToRead]; + + if (target >= targetEnd) { + source -= (extraBytesToRead+1); /* Back up source pointer! */ + result = targetExhausted; break; + } + if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */ + /* UTF-16 surrogate values are illegal in UTF-32 */ + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) { + if (flags == strictConversion) { + source -= (extraBytesToRead+1); /* return to the illegal value itself */ + result = sourceIllegal; + break; + } else { + *target++ = UNI_REPLACEMENT_CHAR; + } + } else { + *target++ = (UTF16)ch; /* normal case */ + } + } else if (ch > UNI_MAX_UTF16) { + if (flags == strictConversion) { + result = sourceIllegal; + source -= (extraBytesToRead+1); /* return to the start */ + break; /* Bail out; shouldn't continue */ + } else { + *target++ = UNI_REPLACEMENT_CHAR; + } + } else { + /* target is a character in range 0xFFFF - 0x10FFFF. */ + if (target + 1 >= targetEnd) { + source -= (extraBytesToRead+1); /* Back up source pointer! */ + result = targetExhausted; break; + } + ch -= halfBase; + *target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START); + *target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START); + } + } + *sourceStart = source; + *targetStart = target; + return result; +} + +/* --------------------------------------------------------------------- */ + +ConversionResult ConvertUTF8toUTF32 ( + const UTF8** sourceStart, const UTF8* sourceEnd, + UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) { + ConversionResult result = conversionOK; + const UTF8* source = *sourceStart; + UTF32* target = *targetStart; + while (source < sourceEnd) { + UTF32 ch = 0; + unsigned short extraBytesToRead = trailingBytesForUTF8[*source]; + if (extraBytesToRead >= sourceEnd - source) { + result = sourceExhausted; break; + } + /* Do this check whether lenient or strict */ + if (!isLegalUTF8(source, extraBytesToRead+1)) { + result = sourceIllegal; + break; + } + /* + * The cases all fall through. See "Note A" below. + */ + switch (extraBytesToRead) { + case 5: ch += *source++; ch <<= 6; + case 4: ch += *source++; ch <<= 6; + case 3: ch += *source++; ch <<= 6; + case 2: ch += *source++; ch <<= 6; + case 1: ch += *source++; ch <<= 6; + case 0: ch += *source++; + } + ch -= offsetsFromUTF8[extraBytesToRead]; + + if (target >= targetEnd) { + source -= (extraBytesToRead+1); /* Back up the source pointer! */ + result = targetExhausted; break; + } + if (ch <= UNI_MAX_LEGAL_UTF32) { + /* + * UTF-16 surrogate values are illegal in UTF-32, and anything + * over Plane 17 (> 0x10FFFF) is illegal. + */ + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) { + if (flags == strictConversion) { + source -= (extraBytesToRead+1); /* return to the illegal value itself */ + result = sourceIllegal; + break; + } else { + *target++ = UNI_REPLACEMENT_CHAR; + } + } else { + *target++ = ch; + } + } else { /* i.e., ch > UNI_MAX_LEGAL_UTF32 */ + result = sourceIllegal; + *target++ = UNI_REPLACEMENT_CHAR; + } + } + *sourceStart = source; + *targetStart = target; + return result; +} + +/* --------------------------------------------------------------------- + + Note A. + The fall-through switches in UTF-8 reading code save a + temp variable, some decrements & conditionals. The switches + are equivalent to the following loop: + { + int tmpBytesToRead = extraBytesToRead+1; + do { + ch += *source++; + --tmpBytesToRead; + if (tmpBytesToRead) ch <<= 6; + } while (tmpBytesToRead > 0); + } + In UTF-8 writing code, the switches on "bytesToWrite" are + similarly unrolled loops. + + --------------------------------------------------------------------- */ diff --git a/lib/Support/ConvertUTFWrapper.cpp b/lib/Support/ConvertUTFWrapper.cpp new file mode 100644 index 0000000..458fbb0 --- /dev/null +++ b/lib/Support/ConvertUTFWrapper.cpp @@ -0,0 +1,76 @@ +//===-- ConvertUTFWrapper.cpp - Wrap ConvertUTF.h with clang data types -----=== +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/ConvertUTF.h" + +namespace llvm { + +bool ConvertUTF8toWide(unsigned WideCharWidth, llvm::StringRef Source, + char *&ResultPtr, const UTF8 *&ErrorPtr) { + assert(WideCharWidth == 1 || WideCharWidth == 2 || WideCharWidth == 4); + ConversionResult result = conversionOK; + // Copy the character span over. + if (WideCharWidth == 1) { + const UTF8 *Pos = reinterpret_cast(Source.begin()); + if (!isLegalUTF8String(&Pos, reinterpret_cast(Source.end()))) { + result = sourceIllegal; + ErrorPtr = Pos; + } else { + memcpy(ResultPtr, Source.data(), Source.size()); + ResultPtr += Source.size(); + } + } else if (WideCharWidth == 2) { + const UTF8 *sourceStart = (const UTF8*)Source.data(); + // FIXME: Make the type of the result buffer correct instead of + // using reinterpret_cast. + UTF16 *targetStart = reinterpret_cast(ResultPtr); + ConversionFlags flags = strictConversion; + result = ConvertUTF8toUTF16( + &sourceStart, sourceStart + Source.size(), + &targetStart, targetStart + 2*Source.size(), flags); + if (result == conversionOK) + ResultPtr = reinterpret_cast(targetStart); + else + ErrorPtr = sourceStart; + } else if (WideCharWidth == 4) { + const UTF8 *sourceStart = (const UTF8*)Source.data(); + // FIXME: Make the type of the result buffer correct instead of + // using reinterpret_cast. + UTF32 *targetStart = reinterpret_cast(ResultPtr); + ConversionFlags flags = strictConversion; + result = ConvertUTF8toUTF32( + &sourceStart, sourceStart + Source.size(), + &targetStart, targetStart + 4*Source.size(), flags); + if (result == conversionOK) + ResultPtr = reinterpret_cast(targetStart); + else + ErrorPtr = sourceStart; + } + assert((result != targetExhausted) + && "ConvertUTF8toUTFXX exhausted target buffer"); + return result == conversionOK; +} + +bool ConvertCodePointToUTF8(unsigned Source, char *&ResultPtr) { + const UTF32 *SourceStart = &Source; + const UTF32 *SourceEnd = SourceStart + 1; + UTF8 *TargetStart = reinterpret_cast(ResultPtr); + UTF8 *TargetEnd = TargetStart + 4; + ConversionResult CR = ConvertUTF32toUTF8(&SourceStart, SourceEnd, + &TargetStart, TargetEnd, + strictConversion); + if (CR != conversionOK) + return false; + + ResultPtr = reinterpret_cast(TargetStart); + return true; +} + +} // end namespace llvm + -- cgit v1.1 From 52b1b3bbc6c8a7c7e5669e3169984a48b3f1a4b3 Mon Sep 17 00:00:00 2001 From: Logan Chien Date: Wed, 30 Jan 2013 15:39:04 +0000 Subject: Override virtual function for ARM EH directives. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173939 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp | 218 ++++++++++++++++++++++++- 1 file changed, 216 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp index 39ded8f..526f571 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -13,6 +13,7 @@ // //===----------------------------------------------------------------------===// +#include "ARMUnwindOp.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Twine.h" #include "llvm/MC/MCAsmBackend.h" @@ -56,11 +57,25 @@ public: ARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS, MCCodeEmitter *Emitter, bool IsThumb) : MCELFStreamer(Context, TAB, OS, Emitter), - IsThumb(IsThumb), MappingSymbolCounter(0), LastEMS(EMS_None) { + IsThumb(IsThumb), MappingSymbolCounter(0), LastEMS(EMS_None), + ExTab(0), FnStart(0), Personality(0), CantUnwind(false) { } ~ARMELFStreamer() {} + // ARM exception handling directives + virtual void EmitFnStart(); + virtual void EmitFnEnd(); + virtual void EmitCantUnwind(); + virtual void EmitPersonality(const MCSymbol *Per); + virtual void EmitHandlerData(); + virtual void EmitSetFP(unsigned NewFpReg, + unsigned NewSpReg, + int64_t Offset = 0); + virtual void EmitPad(int64_t Offset); + virtual void EmitRegSave(const SmallVectorImpl &RegList, + bool isVector); + virtual void ChangeSection(const MCSection *Section) { // We have to keep track of the mapping symbol state of any sections we // use. Each one should start off as EMS_None, which is provided as the @@ -172,6 +187,15 @@ private: SD.setFlags(SD.getFlags() | ELF_Other_ThumbFunc); } + // Helper functions for ARM exception handling directives + void Reset(); + + void EmitPersonalityFixup(StringRef Name); + + void SwitchToEHSection(const char *Prefix, unsigned Type, unsigned Flags, + SectionKind Kind, const MCSymbol &Fn); + void SwitchToExTabSection(const MCSymbol &FnStart); + void SwitchToExIdxSection(const MCSymbol &FnStart); bool IsThumb; int64_t MappingSymbolCounter; @@ -179,10 +203,200 @@ private: DenseMap LastMappingSymbols; ElfMappingSymbol LastEMS; - /// @} + // ARM Exception Handling Frame Information + MCSymbol *ExTab; + MCSymbol *FnStart; + const MCSymbol *Personality; + bool CantUnwind; }; } +inline void ARMELFStreamer::SwitchToEHSection(const char *Prefix, + unsigned Type, + unsigned Flags, + SectionKind Kind, + const MCSymbol &Fn) { + const MCSectionELF &FnSection = + static_cast(Fn.getSection()); + + // Create the name for new section + StringRef FnSecName(FnSection.getSectionName()); + SmallString<128> EHSecName(Prefix); + if (FnSecName != ".text") { + EHSecName += FnSecName; + } + + // Get .ARM.extab or .ARM.exidx section + const MCSectionELF *EHSection = NULL; + if (const MCSymbol *Group = FnSection.getGroup()) { + EHSection = getContext().getELFSection( + EHSecName, Type, Flags | ELF::SHF_GROUP, Kind, + FnSection.getEntrySize(), Group->getName()); + } else { + EHSection = getContext().getELFSection(EHSecName, Type, Flags, Kind); + } + assert(EHSection); + + // Switch to .ARM.extab or .ARM.exidx section + SwitchSection(EHSection); + EmitCodeAlignment(4, 0); +} + +inline void ARMELFStreamer::SwitchToExTabSection(const MCSymbol &FnStart) { + SwitchToEHSection(".ARM.extab", + ELF::SHT_PROGBITS, + ELF::SHF_ALLOC, + SectionKind::getDataRel(), + FnStart); +} + +inline void ARMELFStreamer::SwitchToExIdxSection(const MCSymbol &FnStart) { + SwitchToEHSection(".ARM.exidx", + ELF::SHT_ARM_EXIDX, + ELF::SHF_ALLOC | ELF::SHF_LINK_ORDER, + SectionKind::getDataRel(), + FnStart); +} + +void ARMELFStreamer::Reset() { + ExTab = NULL; + FnStart = NULL; + Personality = NULL; + CantUnwind = false; +} + +// Add the R_ARM_NONE fixup at the same position +void ARMELFStreamer::EmitPersonalityFixup(StringRef Name) { + const MCSymbol *PersonalitySym = getContext().GetOrCreateSymbol(Name); + + const MCSymbolRefExpr *PersonalityRef = + MCSymbolRefExpr::Create(PersonalitySym, + MCSymbolRefExpr::VK_ARM_NONE, + getContext()); + + AddValueSymbols(PersonalityRef); + MCDataFragment *DF = getOrCreateDataFragment(); + DF->getFixups().push_back( + MCFixup::Create(DF->getContents().size(), PersonalityRef, + MCFixup::getKindForSize(4, false))); +} + +void ARMELFStreamer::EmitFnStart() { + assert(FnStart == 0); + FnStart = getContext().CreateTempSymbol(); + EmitLabel(FnStart); +} + +void ARMELFStreamer::EmitFnEnd() { + assert(FnStart && ".fnstart must preceeds .fnend"); + + // Emit unwind opcodes if there is no .handlerdata directive + int PersonalityIndex = -1; + if (!ExTab && !CantUnwind) { + // For __aeabi_unwind_cpp_pr1, we have to emit opcodes in .ARM.extab. + SwitchToExTabSection(*FnStart); + + // Create .ARM.extab label for offset in .ARM.exidx + ExTab = getContext().CreateTempSymbol(); + EmitLabel(ExTab); + + PersonalityIndex = 1; + + uint32_t Entry = 0; + uint32_t NumExtraEntryWords = 0; + Entry |= NumExtraEntryWords << 24; + Entry |= (EHT_COMPACT | PersonalityIndex) << 16; + + // TODO: This should be generated according to .save, .vsave, .setfp + // directives. Currently, we are simply generating FINISH opcode. + Entry |= UNWIND_OPCODE_FINISH << 8; + Entry |= UNWIND_OPCODE_FINISH; + + EmitIntValue(Entry, 4, 0); + } + + // Emit the exception index table entry + SwitchToExIdxSection(*FnStart); + + if (PersonalityIndex == 1) + EmitPersonalityFixup("__aeabi_unwind_cpp_pr1"); + + const MCSymbolRefExpr *FnStartRef = + MCSymbolRefExpr::Create(FnStart, + MCSymbolRefExpr::VK_ARM_PREL31, + getContext()); + + EmitValue(FnStartRef, 4, 0); + + if (CantUnwind) { + EmitIntValue(EXIDX_CANTUNWIND, 4, 0); + } else { + const MCSymbolRefExpr *ExTabEntryRef = + MCSymbolRefExpr::Create(ExTab, + MCSymbolRefExpr::VK_ARM_PREL31, + getContext()); + EmitValue(ExTabEntryRef, 4, 0); + } + + // Clean exception handling frame information + Reset(); +} + +void ARMELFStreamer::EmitCantUnwind() { + CantUnwind = true; +} + +void ARMELFStreamer::EmitHandlerData() { + SwitchToExTabSection(*FnStart); + + // Create .ARM.extab label for offset in .ARM.exidx + assert(!ExTab); + ExTab = getContext().CreateTempSymbol(); + EmitLabel(ExTab); + + // Emit Personality + assert(Personality && ".personality directive must preceed .handlerdata"); + + const MCSymbolRefExpr *PersonalityRef = + MCSymbolRefExpr::Create(Personality, + MCSymbolRefExpr::VK_ARM_PREL31, + getContext()); + + EmitValue(PersonalityRef, 4, 0); + + // Emit unwind opcodes + uint32_t Entry = 0; + uint32_t NumExtraEntryWords = 0; + + // TODO: This should be generated according to .save, .vsave, .setfp + // directives. Currently, we are simply generating FINISH opcode. + Entry |= NumExtraEntryWords << 24; + Entry |= UNWIND_OPCODE_FINISH << 16; + Entry |= UNWIND_OPCODE_FINISH << 8; + Entry |= UNWIND_OPCODE_FINISH; + + EmitIntValue(Entry, 4, 0); +} + +void ARMELFStreamer::EmitPersonality(const MCSymbol *Per) { + Personality = Per; +} + +void ARMELFStreamer::EmitSetFP(unsigned NewFpReg, + unsigned NewSpReg, + int64_t Offset) { + // TODO: Not implemented +} + +void ARMELFStreamer::EmitPad(int64_t Offset) { + // TODO: Not implemented +} + +void ARMELFStreamer::EmitRegSave(const SmallVectorImpl &RegList, + bool IsVector) { + // TODO: Not implemented +} + namespace llvm { MCELFStreamer* createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS, MCCodeEmitter *Emitter, -- cgit v1.1 From 620d5bd8e43331a9b5ba2437c1de0d3f4a43a34d Mon Sep 17 00:00:00 2001 From: Logan Chien Date: Wed, 30 Jan 2013 15:48:50 +0000 Subject: Add missing header and test cases for r173939. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173941 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h | 112 ++++++++++++++++++++++++++++++ 1 file changed, 112 insertions(+) create mode 100644 lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h (limited to 'lib') diff --git a/lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h b/lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h new file mode 100644 index 0000000..dad5576 --- /dev/null +++ b/lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h @@ -0,0 +1,112 @@ +//===-- ARMUnwindOp.h - ARM Unwind Opcodes ----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the constants for the ARM unwind opcodes and exception +// handling table entry kinds. +// +//===----------------------------------------------------------------------===// + +#ifndef ARM_UNWIND_OP_H +#define ARM_UNWIND_OP_H + +namespace llvm { + + /// ARM exception handling table entry kinds + enum ARMEHTEntryKind { + EHT_GENERIC = 0x00, + EHT_COMPACT = 0x80 + }; + + enum { + /// Special entry for the function never unwind + EXIDX_CANTUNWIND = 0x1 + }; + + /// ARM-defined frame unwinding opcodes + enum ARMUnwindOpcodes { + // Format: 00xxxxxx + // Purpose: vsp = vsp + ((x << 2) + 4) + UNWIND_OPCODE_INC_VSP = 0x00, + + // Format: 01xxxxxx + // Purpose: vsp = vsp - ((x << 2) + 4) + UNWIND_OPCODE_DEC_VSP = 0x40, + + // Format: 10000000 00000000 + // Purpose: refuse to unwind + UNWIND_OPCODE_REFUSE = 0x8000, + + // Format: 1000xxxx xxxxxxxx + // Purpose: pop r[15:12], r[11:4] + // Constraint: x != 0 + UNWIND_OPCODE_POP_REG_MASK_R4 = 0x8000, + + // Format: 1001xxxx + // Purpose: vsp = r[x] + // Constraint: x != 13 && x != 15 + UNWIND_OPCODE_SET_VSP = 0x90, + + // Format: 10100xxx + // Purpose: pop r[(4+x):4] + UNWIND_OPCODE_POP_REG_RANGE_R4 = 0xa0, + + // Format: 10101xxx + // Purpose: pop r14, r[(4+x):4] + UNWIND_OPCODE_POP_REG_RANGE_R4_R14 = 0xa8, + + // Format: 10110000 + // Purpose: finish + UNWIND_OPCODE_FINISH = 0xb0, + + // Format: 10110001 0000xxxx + // Purpose: pop r[3:0] + // Constraint: x != 0 + UNWIND_OPCODE_POP_REG_MASK = 0xb100, + + // Format: 10110010 x(uleb128) + // Purpose: vsp = vsp + ((x << 2) + 0x204) + UNWIND_OPCODE_INC_VSP_ULEB128 = 0xb2, + + // Format: 10110011 xxxxyyyy + // Purpose: pop d[(x+y):x] + UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDX = 0xb300, + + // Format: 10111xxx + // Purpose: pop d[(8+x):8] + UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDX_D8 = 0xb8, + + // Format: 11000xxx + // Purpose: pop wR[(10+x):10] + UNWIND_OPCODE_POP_WIRELESS_MMX_REG_RANGE_WR10 = 0xc0, + + // Format: 11000110 xxxxyyyy + // Purpose: pop wR[(x+y):x] + UNWIND_OPCODE_POP_WIRELESS_MMX_REG_RANGE = 0xc600, + + // Format: 11000111 0000xxxx + // Purpose: pop wCGR[3:0] + // Constraint: x != 0 + UNWIND_OPCODE_POP_WIRELESS_MMX_REG_MASK = 0xc700, + + // Format: 11001000 xxxxyyyy + // Purpose: pop d[(16+x+y):(16+x)] + UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD_D16 = 0xc800, + + // Format: 11001001 xxxxyyyy + // Purpose: pop d[(x+y):x] + UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD = 0xc900, + + // Format: 11010xxx + // Purpose: pop d[(8+x):8] + UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD_D8 = 0xd0 + }; + +} + +#endif // ARM_UNWIND_OP_H -- cgit v1.1 From 0f156af8312a0f3ce88e5c006bf2a52691039ceb Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Wed, 30 Jan 2013 16:30:19 +0000 Subject: Add a special ARM trap encoding for NaCl. More details in this thread: http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20130128/163783.html Patch by JF Bastien git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173943 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARM.td | 5 +++++ lib/Target/ARM/ARMAsmPrinter.cpp | 7 +++++++ lib/Target/ARM/ARMFastISel.cpp | 3 ++- lib/Target/ARM/ARMISelLowering.cpp | 12 +++++++++-- lib/Target/ARM/ARMInstrInfo.td | 28 +++++++++++++++++++++++-- lib/Target/ARM/ARMSubtarget.cpp | 1 + lib/Target/ARM/ARMSubtarget.h | 4 ++++ lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp | 12 ++++++++++- 8 files changed, 66 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index a76715a..46915ee 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -110,6 +110,11 @@ def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true", def FeatureMClass : SubtargetFeature<"mclass", "IsMClass", "true", "Is microcontroller profile ('M' series)">; +// Special TRAP encoding for NaCl, which looks like a TRAP in Thumb too. +// See ARMInstrInfo.td for details. +def FeatureNaClTrap : SubtargetFeature<"nacl-trap", "UseNaClTrap", "true", + "NaCl trap">; + // ARM ISAs. def HasV4TOps : SubtargetFeature<"v4t", "HasV4TOps", "true", "Support ARM v4T instructions">; diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index 397736a..577cdb0 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -1693,6 +1693,13 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { } break; } + case ARM::TRAPNaCl: { + //.long 0xe7fedef0 @ trap + uint32_t Val = 0xe7fedef0UL; + OutStreamer.AddComment("trap"); + OutStreamer.EmitIntValue(Val, 4); + return; + } case ARM::tTRAP: { // Non-Darwin binutils don't yet support the "trap" mnemonic. // FIXME: Remove this special case when they do. diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 94c574a..a2d0cde 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -2562,7 +2562,8 @@ bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) { return SelectCall(&I, "memset"); } case Intrinsic::trap: { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::TRAP)); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get( + Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP)); return true; } } diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 6beb1ab..82b475a 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -6303,7 +6303,16 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { DispatchBB->setIsLandingPad(); MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock(); - BuildMI(TrapBB, dl, TII->get(Subtarget->isThumb() ? ARM::tTRAP : ARM::TRAP)); + unsigned trap_opcode; + if (Subtarget->isThumb()) { + trap_opcode = ARM::tTRAP; + } else { + if (Subtarget->useNaClTrap()) + trap_opcode = ARM::TRAPNaCl; + else + trap_opcode = ARM::TRAP; + } + BuildMI(TrapBB, dl, TII->get(trap_opcode)); DispatchBB->addSuccessor(TrapBB); MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock(); @@ -10317,4 +10326,3 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, return false; } - diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 12712c0..e31c479 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -239,6 +239,9 @@ def IsARM : Predicate<"!Subtarget->isThumb()">, def IsIOS : Predicate<"Subtarget->isTargetIOS()">; def IsNotIOS : Predicate<"!Subtarget->isTargetIOS()">; def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">; +def UseNaClTrap : Predicate<"Subtarget->useNaClTrap()">, + AssemblerPredicate<"FeatureNaClTrap", "NaCl">; +def DontUseNaClTrap : Predicate<"!Subtarget->useNaClTrap()">; // FIXME: Eventually this will be just "hasV6T2Ops". def UseMovt : Predicate<"Subtarget->useMovt()">; @@ -1762,11 +1765,32 @@ def DBG : AI<(outs), (ins imm0_15:$opt), MiscFrm, NoItinerary, "dbg", "\t$opt", let Inst{3-0} = opt; } -// A5.4 Permanently UNDEFINED instructions. +/* + * A5.4 Permanently UNDEFINED instructions. + * + * For most targets use UDF #65006, for which the OS will generate SIGTRAP. + * Other UDF encodings generate SIGILL. + * + * NaCl's OS instead chooses an ARM UDF encoding that's also a UDF in Thumb. + * Encoding A1: + * 1110 0111 1111 iiii iiii iiii 1111 iiii + * Encoding T1: + * 1101 1110 iiii iiii + * It uses the following encoding: + * 1110 0111 1111 1110 1101 1110 1111 0000 + * - In ARM: UDF #60896; + * - In Thumb: UDF #254 followed by a branch-to-self. + */ +let isBarrier = 1, isTerminator = 1 in +def TRAPNaCl : AXI<(outs), (ins), MiscFrm, NoItinerary, + "trap", [(trap)]>, + Requires<[IsARM,UseNaClTrap]> { + let Inst = 0xe7fedef0; +} let isBarrier = 1, isTerminator = 1 in def TRAP : AXI<(outs), (ins), MiscFrm, NoItinerary, "trap", [(trap)]>, - Requires<[IsARM]> { + Requires<[IsARM,DontUseNaClTrap]> { let Inst = 0xe7ffdefe; } diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index f84e7a2..c3dea00 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -80,6 +80,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU, , FPOnlySP(false) , AllowsUnalignedMem(false) , Thumb2DSP(false) + , UseNaClTrap(false) , stackAlignment(4) , CPUString(CPU) , TargetTriple(TT) diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 64878cd..33efabf 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -156,6 +156,9 @@ protected: /// and such) instructions in Thumb2 code. bool Thumb2DSP; + /// NaCl TRAP instruction is generated instead of the regular TRAP. + bool UseNaClTrap; + /// stackAlignment - The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. unsigned stackAlignment; @@ -241,6 +244,7 @@ protected: bool hasRAS() const { return HasRAS; } bool hasMPExtension() const { return HasMPExtension; } bool hasThumb2DSP() const { return Thumb2DSP; } + bool useNaClTrap() const { return UseNaClTrap; } bool hasFP16() const { return HasFP16; } bool hasD16() const { return HasD16; } diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index f4958f3..f09fb5a 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -11,11 +11,12 @@ // //===----------------------------------------------------------------------===// -#include "ARMMCTargetDesc.h" #include "ARMBaseInfo.h" #include "ARMELFStreamer.h" #include "ARMMCAsmInfo.h" +#include "ARMMCTargetDesc.h" #include "InstPrinter/ARMInstPrinter.h" +#include "llvm/ADT/Triple.h" #include "llvm/MC/MCCodeGenInfo.h" #include "llvm/MC/MCInstrAnalysis.h" #include "llvm/MC/MCInstrInfo.h" @@ -37,6 +38,8 @@ using namespace llvm; std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) { + Triple triple(TT); + // Set the boolean corresponding to the current target triple, or the default // if one cannot be determined, to true. unsigned Len = TT.size(); @@ -119,6 +122,13 @@ std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) { ARMArchFeature += ",+thumb-mode"; } + if (triple.isOSNaCl()) { + if (ARMArchFeature.empty()) + ARMArchFeature = "+nacl-trap"; + else + ARMArchFeature += ",+nacl-trap"; + } + return ARMArchFeature; } -- cgit v1.1 From 1229c0cb10e1c3640253ece03670621d96762e75 Mon Sep 17 00:00:00 2001 From: Manman Ren Date: Wed, 30 Jan 2013 17:42:15 +0000 Subject: Linker: correctly link in dbg.declare Given source IR: call void @llvm.dbg.declare(metadata !{i32* %argc.addr}, metadata !14), !dbg !15 we used to generate call void @llvm.dbg.declare(metadata !27, metadata !28), !dbg !29 !27 = metadata !{null} With this patch, we will correctly generate call void @llvm.dbg.declare(metadata !{i32* %argc.addr}, metadata !27), !dbg !28 Looking up %argc.addr in ValueMap will return null, since %argc.addr is already correctly set up, we can use identity mapping. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173946 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Utils/ValueMapper.cpp | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp index a5e1643..b9e4ddf 100644 --- a/lib/Transforms/Utils/ValueMapper.cpp +++ b/lib/Transforms/Utils/ValueMapper.cpp @@ -63,14 +63,23 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags, // Check all operands to see if any need to be remapped. for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i) { Value *OP = MD->getOperand(i); - if (OP == 0 || MapValue(OP, VM, Flags, TypeMapper) == OP) continue; + if (OP == 0) continue; + Value *Mapped_OP = MapValue(OP, VM, Flags, TypeMapper); + // If Mapped_Op is null, we should use indentity map. + if (Mapped_OP == OP || Mapped_OP == 0) continue; // Ok, at least one operand needs remapping. SmallVector Elts; Elts.reserve(MD->getNumOperands()); for (i = 0; i != e; ++i) { Value *Op = MD->getOperand(i); - Elts.push_back(Op ? MapValue(Op, VM, Flags, TypeMapper) : 0); + if (Op == 0) + Elts.push_back(0); + else { + Value *Mapped_Op = MapValue(Op, VM, Flags, TypeMapper); + // If Mapped_Op is null, we should use indentity map. + Elts.push_back(Mapped_Op ? Mapped_Op : Op); + } } MDNode *NewMD = MDNode::get(V->getContext(), Elts); Dummy->replaceAllUsesWith(NewMD); -- cgit v1.1 From f578c89dc6ca3e79667c2aa9d0ac4fe409da7773 Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Wed, 30 Jan 2013 19:24:23 +0000 Subject: Clean up whitespace and indentation a bit git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173960 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/DataLayout.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/IR/DataLayout.cpp b/lib/IR/DataLayout.cpp index b159af6..f09de3a 100644 --- a/lib/IR/DataLayout.cpp +++ b/lib/IR/DataLayout.cpp @@ -126,7 +126,7 @@ DataLayout::InvalidAlignmentElem = LayoutAlignElem::get(INVALID_ALIGN, 0, 0, 0); PointerAlignElem PointerAlignElem::get(uint32_t addr_space, unsigned abi_align, - unsigned pref_align, uint32_t bit_width) { + unsigned pref_align, uint32_t bit_width) { assert(abi_align <= pref_align && "Preferred alignment worse than ABI!"); PointerAlignElem retval; retval.AddressSpace = addr_space; @@ -309,7 +309,7 @@ void DataLayout::parseSpecifier(StringRef Desc) { /// used. DataLayout::DataLayout() : ImmutablePass(ID) { report_fatal_error("Bad DataLayout ctor used. " - "Tool did not specify a DataLayout to use?"); + "Tool did not specify a DataLayout to use?"); } DataLayout::DataLayout(const Module *M) @@ -371,7 +371,7 @@ unsigned DataLayout::getAlignmentInfo(AlignTypeEnum AlignType, // The "best match" for integers is the smallest size that is larger than // the BitWidth requested. if (Alignments[i].TypeBitWidth > BitWidth && (BestMatchIdx == -1 || - Alignments[i].TypeBitWidth < Alignments[BestMatchIdx].TypeBitWidth)) + Alignments[i].TypeBitWidth < Alignments[BestMatchIdx].TypeBitWidth)) BestMatchIdx = i; // However, if there isn't one that's larger, then we must use the // largest one we have (see below) @@ -512,7 +512,7 @@ uint64_t DataLayout::getTypeSizeInBits(Type *Ty) const { case Type::PointerTyID: { unsigned AS = dyn_cast(Ty)->getAddressSpace(); return getPointerSizeInBits(AS); - } + } case Type::ArrayTyID: { ArrayType *ATy = cast(Ty); return getTypeAllocSizeInBits(ATy->getElementType())*ATy->getNumElements(); -- cgit v1.1 From 5bb16fdbb363abee2b9495116ff1a97568460cae Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Wed, 30 Jan 2013 21:17:42 +0000 Subject: Add definitions for the PPC a2q core marked as having QPX available This is the first commit of a large series which will add support for the QPX vector instruction set to the PowerPC backend. This instruction set is used on the IBM Blue Gene/Q supercomputers. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173973 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPC.td | 7 +++++++ lib/Target/PowerPC/PPCSubtarget.h | 2 ++ 2 files changed, 9 insertions(+) (limited to 'lib') diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td index cb15dad..d3bcbae 100644 --- a/lib/Target/PowerPC/PPC.td +++ b/lib/Target/PowerPC/PPC.td @@ -58,6 +58,8 @@ def FeatureISEL : SubtargetFeature<"isel","HasISEL", "true", "Enable the isel instruction">; def FeatureBookE : SubtargetFeature<"booke", "IsBookE", "true", "Enable Book E instructions">; +def FeatureQPX : SubtargetFeature<"qpx","HasQPX", "true", + "Enable QPX instructions">; //===----------------------------------------------------------------------===// // Register File Description @@ -109,6 +111,11 @@ def : Processor<"a2", PPCA2Itineraries, [DirectiveA2, FeatureBookE, FeatureSTFIWX, FeatureISEL, Feature64Bit /*, Feature64BitRegs */]>; +def : Processor<"a2q", PPCA2Itineraries, [DirectiveA2, FeatureBookE, + FeatureMFOCRF, FeatureFSqrt, + FeatureSTFIWX, FeatureISEL, + Feature64Bit /*, Feature64BitRegs */, + FeatureQPX]>; def : Processor<"pwr6", G5Itineraries, [DirectivePwr6, FeatureAltivec, FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX, diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index 3ddae63..28f8587 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -70,6 +70,7 @@ protected: bool Use64BitRegs; bool IsPPC64; bool HasAltivec; + bool HasQPX; bool HasFSQRT; bool HasSTFIWX; bool HasISEL; @@ -150,6 +151,7 @@ public: bool hasFSQRT() const { return HasFSQRT; } bool hasSTFIWX() const { return HasSTFIWX; } bool hasAltivec() const { return HasAltivec; } + bool hasQPX() const { return HasQPX; } bool hasMFOCRF() const { return HasMFOCRF; } bool hasISEL() const { return HasISEL; } bool isBookE() const { return IsBookE; } -- cgit v1.1 From 383da6ba555a3c2be360d9bdc75a74f315f2354e Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Wed, 30 Jan 2013 21:22:59 +0000 Subject: Remove redundant code. It was creating a new AttrBuilder when we could just fill in the AttrBuilder we're building. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173975 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Attributes.cpp | 22 ++-------------------- 1 file changed, 2 insertions(+), 20 deletions(-) (limited to 'lib') diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 1a97110..938a34a 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -767,33 +767,15 @@ AttrBuilder::AttrBuilder(AttributeSet AS, unsigned Idx) AttributeSetImpl *pImpl = AS.pImpl; if (!pImpl) return; - AttrBuilder B; - for (unsigned I = 0, E = pImpl->getNumAttributes(); I != E; ++I) { if (pImpl->getSlotIndex(I) != Idx) continue; - for (AttributeSetNode::const_iterator II = pImpl->begin(I), + for (AttributeSetImpl::const_iterator II = pImpl->begin(I), IE = pImpl->end(I); II != IE; ++II) - B.addAttributes(*II); + addAttributes(*II); break; } - - if (!B.hasAttributes()) return; - - uint64_t Mask = B.Raw(); - - for (Attribute::AttrKind I = Attribute::None; I != Attribute::EndAttrKinds; - I = Attribute::AttrKind(I + 1)) { - if (uint64_t A = (Mask & AttributeImpl::getAttrMask(I))) { - Attrs.insert(I); - - if (I == Attribute::Alignment) - Alignment = 1ULL << ((A >> 16) - 1); - else if (I == Attribute::StackAlignment) - StackAlignment = 1ULL << ((A >> 26)-1); - } - } } void AttrBuilder::clear() { -- cgit v1.1 From f9cd7738a3966986dd50db56d8a74952b3016cc0 Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Wed, 30 Jan 2013 22:43:44 +0000 Subject: Initialize hasQPX in PPCSubtarget This should have gone in with r173973. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173984 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCSubtarget.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp index d9b4e30..91beeae 100644 --- a/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/lib/Target/PowerPC/PPCSubtarget.cpp @@ -36,6 +36,7 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU, , Use64BitRegs(false) , IsPPC64(is64Bit) , HasAltivec(false) + , HasQPX(false) , HasFSQRT(false) , HasSTFIWX(false) , HasISEL(false) -- cgit v1.1 From a66f40a8cc685b2869e7f8d988f9a17439875ece Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Wed, 30 Jan 2013 22:56:35 +0000 Subject: Restrict sin/cos optimization to 64-bit only for now. 32-bit is a bit messy and less critical. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173987 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 21 ++++++++++++--------- lib/Target/X86/X86Subtarget.cpp | 3 ++- 2 files changed, 14 insertions(+), 10 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 0a53a3e..36d1ad4 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1294,7 +1294,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) if (Subtarget->hasSinCos()) { setLibcallName(RTLIB::SINCOS_F32, "sincosf"); setLibcallName(RTLIB::SINCOS_F64, "sincos"); - if (Subtarget->isTargetDarwin() && Subtarget->is64Bit()) { + if (Subtarget->isTargetDarwin()) { // For MacOSX, we don't want to the normal expansion of a libcall to // sincos. We want to issue a libcall to __sincos_stret to avoid memory // traffic. @@ -12037,7 +12037,7 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) { } SDValue X86TargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { - assert(Subtarget->isTargetDarwin()); + assert(Subtarget->isTargetDarwin() && Subtarget->is64Bit()); // For MacOSX, we want to call an alternative entry point: __sincos_stret, // which returns the values in two XMM registers. @@ -12054,18 +12054,21 @@ SDValue X86TargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { Entry.isSExt = false; Entry.isZExt = false; Args.push_back(Entry); - + + // Only optimize x86_64 for now. i386 is a bit messy. For f32, + // the small struct {f32, f32} is returned in (eax, edx). For f64, + // the results are returned via SRet in memory. const char *LibcallName = (ArgVT == MVT::f64) ? "__sincos_stret" : "__sincosf_stret"; SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy()); - + StructType *RetTy = StructType::get(ArgTy, ArgTy, NULL); TargetLowering:: - CallLoweringInfo CLI(DAG.getEntryNode(), RetTy, - false, false, false, false, 0, - CallingConv::C, /*isTaillCall=*/false, - /*doesNotRet=*/false, /*isReturnValueUsed*/true, - Callee, Args, DAG, dl); + CallLoweringInfo CLI(DAG.getEntryNode(), RetTy, + false, false, false, false, 0, + CallingConv::C, /*isTaillCall=*/false, + /*doesNotRet=*/false, /*isReturnValueUsed*/true, + Callee, Args, DAG, dl); std::pair CallResult = LowerCallTo(CLI); return CallResult.first; } diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index dad95c6..6305501 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -157,7 +157,8 @@ const char *X86Subtarget::getBZeroEntry() const { bool X86Subtarget::hasSinCos() const { return getTargetTriple().isMacOSX() && - !getTargetTriple().isMacOSXVersionLT(10, 9); + !getTargetTriple().isMacOSXVersionLT(10, 9) && + is64Bit(); } /// IsLegalToCallImmediateAddr - Return true if the subtarget allows calls -- cgit v1.1 From e74365462a39529ae48ef4d34ec76b4543b8ea29 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Wed, 30 Jan 2013 23:07:40 +0000 Subject: Convert typeIncompatible to return an AttributeSet. There are still places which treat the Attribute object as a collection of attributes. I'm systematically removing them. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173990 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Attributes.cpp | 18 ++++++++---------- lib/IR/Verifier.cpp | 4 ++-- lib/Transforms/IPO/DeadArgumentElimination.cpp | 13 ++++++++++--- lib/Transforms/InstCombine/InstCombineCalls.cpp | 13 ++++++++++--- 4 files changed, 30 insertions(+), 18 deletions(-) (limited to 'lib') diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 938a34a..75ba93a 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -612,15 +612,13 @@ AttributeSet AttributeSet::removeAttributes(LLVMContext &C, unsigned Idx, AttrSet.push_back(getSlotAttributes(I)); } - // Now add the attribute into the correct slot. There may already be an + // Now remove the attribute from the correct slot. There may already be an // AttributeSet there. AttrBuilder B(AS, Idx); for (unsigned I = 0, E = Attrs.pImpl->getNumAttributes(); I != E; ++I) if (Attrs.getSlotIndex(I) == Idx) { - for (AttributeSetImpl::const_iterator II = Attrs.pImpl->begin(I), - IE = Attrs.pImpl->end(I); II != IE; ++II) - B.removeAttributes(*II); + B.removeAttributes(Attrs.pImpl->getSlotAttributes(I), Idx); break; } @@ -813,8 +811,8 @@ AttrBuilder &AttrBuilder::addAttributes(Attribute Attr) { return *this; } -AttrBuilder &AttrBuilder::removeAttributes(Attribute A) { - uint64_t Mask = A.Raw(); +AttrBuilder &AttrBuilder::removeAttributes(AttributeSet A, uint64_t Index) { + uint64_t Mask = A.Raw(Index); for (Attribute::AttrKind I = Attribute::None; I != Attribute::EndAttrKinds; I = Attribute::AttrKind(I + 1)) { @@ -862,8 +860,8 @@ bool AttrBuilder::hasAttributes() const { return !Attrs.empty(); } -bool AttrBuilder::hasAttributes(const Attribute &A) const { - return Raw() & A.Raw(); +bool AttrBuilder::hasAttributes(AttributeSet A, uint64_t Index) const { + return Raw() & A.Raw(Index); } bool AttrBuilder::hasAlignmentAttr() const { @@ -916,7 +914,7 @@ uint64_t AttrBuilder::Raw() const { // AttributeFuncs Function Defintions //===----------------------------------------------------------------------===// -Attribute AttributeFuncs::typeIncompatible(Type *Ty) { +AttributeSet AttributeFuncs::typeIncompatible(Type *Ty, uint64_t Index) { AttrBuilder Incompatible; if (!Ty->isIntegerTy()) @@ -932,7 +930,7 @@ Attribute AttributeFuncs::typeIncompatible(Type *Ty) { .addAttribute(Attribute::NoCapture) .addAttribute(Attribute::StructRet); - return Attribute::get(Ty->getContext(), Incompatible); + return AttributeSet::get(Ty->getContext(), Index, Incompatible); } /// \brief This returns an integer containing an encoding of all the LLVM diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp index 2d69493..5da7448 100644 --- a/lib/IR/Verifier.cpp +++ b/lib/IR/Verifier.cpp @@ -693,9 +693,9 @@ void Verifier::VerifyParameterAttrs(AttributeSet Attrs, uint64_t Idx, Type *Ty, "'noinline and alwaysinline' are incompatible!", V); Assert1(!AttrBuilder(Attrs, Idx). - hasAttributes(AttributeFuncs::typeIncompatible(Ty)), + hasAttributes(AttributeFuncs::typeIncompatible(Ty, Idx), Idx), "Wrong types for attribute: " + - AttributeFuncs::typeIncompatible(Ty).getAsString(), V); + AttributeFuncs::typeIncompatible(Ty, Idx).getAsString(Idx), V); if (PointerType *PTy = dyn_cast(Ty)) Assert1(!Attrs.hasAttribute(Idx, Attribute::ByVal) || diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp index e651fb8..49ef1e7 100644 --- a/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -764,10 +764,14 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { RAttrs = AttributeSet::get(NRetTy->getContext(), AttributeSet::ReturnIndex, AttrBuilder(RAttrs, AttributeSet::ReturnIndex). - removeAttributes(AttributeFuncs::typeIncompatible(NRetTy))); + removeAttributes(AttributeFuncs:: + typeIncompatible(NRetTy, AttributeSet::ReturnIndex), + AttributeSet::ReturnIndex)); else assert(!AttrBuilder(RAttrs, AttributeSet::ReturnIndex). - hasAttributes(AttributeFuncs::typeIncompatible(NRetTy)) && + hasAttributes(AttributeFuncs:: + typeIncompatible(NRetTy, AttributeSet::ReturnIndex), + AttributeSet::ReturnIndex) && "Return attributes no longer compatible?"); if (RAttrs.hasAttributes(AttributeSet::ReturnIndex)) @@ -841,7 +845,10 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { RAttrs = AttributeSet::get(NF->getContext(), AttributeSet::ReturnIndex, AttrBuilder(RAttrs, AttributeSet::ReturnIndex). - removeAttributes(AttributeFuncs::typeIncompatible(NF->getReturnType()))); + removeAttributes(AttributeFuncs:: + typeIncompatible(NF->getReturnType(), + AttributeSet::ReturnIndex), + AttributeSet::ReturnIndex)); if (RAttrs.hasAttributes(AttributeSet::ReturnIndex)) AttributesVec.push_back(AttributeSet::get(NF->getContext(), RAttrs)); diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index f56dc95..64cd1bd 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1015,7 +1015,10 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { if (!CallerPAL.isEmpty() && !Caller->use_empty()) { AttrBuilder RAttrs(CallerPAL, AttributeSet::ReturnIndex); - if (RAttrs.hasAttributes(AttributeFuncs::typeIncompatible(NewRetTy))) + if (RAttrs. + hasAttributes(AttributeFuncs:: + typeIncompatible(NewRetTy, AttributeSet::ReturnIndex), + AttributeSet::ReturnIndex)) return false; // Attribute not compatible with transformed value. } @@ -1045,7 +1048,8 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { return false; // Cannot transform this parameter value. if (AttrBuilder(CallerPAL.getParamAttributes(i + 1), i + 1). - hasAttributes(AttributeFuncs::typeIncompatible(ParamTy))) + hasAttributes(AttributeFuncs:: + typeIncompatible(ParamTy, i + 1), i + 1)) return false; // Attribute not compatible with transformed value. // If the parameter is passed as a byval argument, then we have to have a @@ -1124,7 +1128,10 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { // If the return value is not being used, the type may not be compatible // with the existing attributes. Wipe out any problematic attributes. - RAttrs.removeAttributes(AttributeFuncs::typeIncompatible(NewRetTy)); + RAttrs. + removeAttributes(AttributeFuncs:: + typeIncompatible(NewRetTy, AttributeSet::ReturnIndex), + AttributeSet::ReturnIndex); // Add the new return attributes. if (RAttrs.hasAttributes()) -- cgit v1.1 From ac72eb264c3a8a15cda81aaead6adc8419058666 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Wed, 30 Jan 2013 23:40:31 +0000 Subject: Remove addRetAttributes and addFnAttributes, which aren't useful abstractions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173992 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Core.cpp | 6 +++--- lib/Transforms/IPO/PruneEH.cpp | 8 ++++---- lib/Transforms/Utils/CloneFunction.cpp | 10 ++++++---- 3 files changed, 13 insertions(+), 11 deletions(-) (limited to 'lib') diff --git a/lib/IR/Core.cpp b/lib/IR/Core.cpp index 1e3258f..aaf661f 100644 --- a/lib/IR/Core.cpp +++ b/lib/IR/Core.cpp @@ -1383,9 +1383,9 @@ void LLVMAddFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA) { const AttributeSet PAL = Func->getAttributes(); AttrBuilder B(PA); const AttributeSet PALnew = - PAL.addFnAttributes(Func->getContext(), - AttributeSet::get(Func->getContext(), - AttributeSet::FunctionIndex, B)); + PAL.addAttributes(Func->getContext(), AttributeSet::FunctionIndex, + AttributeSet::get(Func->getContext(), + AttributeSet::FunctionIndex, B)); Func->setAttributes(PALnew); } diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp index 98c2602..73d9323 100644 --- a/lib/Transforms/IPO/PruneEH.cpp +++ b/lib/Transforms/IPO/PruneEH.cpp @@ -147,10 +147,10 @@ bool PruneEH::runOnSCC(CallGraphSCC &SCC) { Function *F = (*I)->getFunction(); const AttributeSet &PAL = F->getAttributes(); const AttributeSet &NPAL = - PAL.addFnAttributes(F->getContext(), - AttributeSet::get(F->getContext(), - AttributeSet::FunctionIndex, - NewAttributes)); + PAL.addAttributes(F->getContext(), AttributeSet::FunctionIndex, + AttributeSet::get(F->getContext(), + AttributeSet::FunctionIndex, + NewAttributes)); if (PAL != NPAL) { MadeChange = true; F->setAttributes(NPAL); diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index 12311c3..a309bce 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -98,11 +98,13 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, Anew->addAttr(OldFunc->getAttributes() .getParamAttributes(I->getArgNo() + 1)); NewFunc->setAttributes(NewFunc->getAttributes() - .addRetAttributes(NewFunc->getContext(), - OldFunc->getAttributes())); + .addAttributes(NewFunc->getContext(), + AttributeSet::ReturnIndex, + OldFunc->getAttributes())); NewFunc->setAttributes(NewFunc->getAttributes() - .addFnAttributes(NewFunc->getContext(), - OldFunc->getAttributes())); + .addAttributes(NewFunc->getContext(), + AttributeSet::FunctionIndex, + OldFunc->getAttributes())); } -- cgit v1.1 From 9a79b320cb7f179118e69427bc684f2232a24bd9 Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Wed, 30 Jan 2013 23:43:27 +0000 Subject: PPC QPX requires a 32-byte aligned stack On systems which support the QPX vector instructions, the stack must be 32-byte aligned. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173993 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCFrameLowering.h | 3 ++- lib/Target/PowerPC/PPCSubtarget.cpp | 6 ++++++ 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h index 3517d8c..b1d63ab 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.h +++ b/lib/Target/PowerPC/PPCFrameLowering.h @@ -27,7 +27,8 @@ class PPCFrameLowering: public TargetFrameLowering { public: PPCFrameLowering(const PPCSubtarget &sti) - : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 16, 0), + : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, + (sti.hasQPX() || sti.isBGQ()) ? 32 : 16, 0), Subtarget(sti) { } diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp index 91beeae..18e4c07 100644 --- a/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/lib/Target/PowerPC/PPCSubtarget.cpp @@ -83,6 +83,12 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU, // Set up darwin-specific properties. if (isDarwin()) HasLazyResolverStubs = true; + + // QPX requires a 32-byte aligned stack. Note that we need to do this if + // we're compiling for a BG/Q system regardless of whether or not QPX + // is enabled because external functions will assume this alignment. + if (hasQPX() || isBGQ()) + StackAlignment = 32; } /// SetJITMode - This is called to inform the subtarget info that we are -- cgit v1.1 From d363ae52995059906d99f7588f47bf891e7db485 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Thu, 31 Jan 2013 00:01:45 +0000 Subject: Fix ConstantFold's folding of icmp instructions to recognize that, for example, a one-past-the-end pointer from one global variable may be equal to the base pointer of another global variable. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173995 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/ConstantFold.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/IR/ConstantFold.cpp b/lib/IR/ConstantFold.cpp index 417e0d1..587b7ce 100644 --- a/lib/IR/ConstantFold.cpp +++ b/lib/IR/ConstantFold.cpp @@ -1495,9 +1495,8 @@ static ICmpInst::Predicate evaluateICmpRelation(Constant *V1, Constant *V2, "Surprising getelementptr!"); return isSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; } else { - // If they are different globals, we don't know what the value is, - // but they can't be equal. - return ICmpInst::ICMP_NE; + // If they are different globals, we don't know what the value is. + return ICmpInst::BAD_ICMP_PREDICATE; } } } else { @@ -1510,10 +1509,10 @@ static ICmpInst::Predicate evaluateICmpRelation(Constant *V1, Constant *V2, default: break; case Instruction::GetElementPtr: // By far the most common case to handle is when the base pointers are - // obviously to the same or different globals. + // obviously to the same global. if (isa(CE1Op0) && isa(CE2Op0)) { - if (CE1Op0 != CE2Op0) // Don't know relative ordering, but not equal - return ICmpInst::ICMP_NE; + if (CE1Op0 != CE2Op0) // Don't know relative ordering. + return ICmpInst::BAD_ICMP_PREDICATE; // Ok, we know that both getelementptr instructions are based on the // same global. From this, we can precisely determine the relative // ordering of the resultant pointers. -- cgit v1.1 From f2335dcb17784148c4d8dd23093ab549153cc132 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Thu, 31 Jan 2013 00:12:20 +0000 Subject: stripAndComputeConstantOffsets is only called on pointers; check this with an assert instead of failing and requiring callers to check for failure. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173998 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/InstructionSimplify.cpp | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) (limited to 'lib') diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index d97e226..bc51457 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -665,8 +665,7 @@ Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, /// no constant offsets applied. static Constant *stripAndComputeConstantOffsets(const DataLayout &TD, Value *&V) { - if (!V->getType()->isPointerTy()) - return 0; + assert(V->getType()->isPointerTy()); unsigned IntPtrWidth = TD.getPointerSizeInBits(); APInt Offset = APInt::getNullValue(IntPtrWidth); @@ -701,11 +700,7 @@ static Constant *stripAndComputeConstantOffsets(const DataLayout &TD, static Constant *computePointerDifference(const DataLayout &TD, Value *LHS, Value *RHS) { Constant *LHSOffset = stripAndComputeConstantOffsets(TD, LHS); - if (!LHSOffset) - return 0; Constant *RHSOffset = stripAndComputeConstantOffsets(TD, RHS); - if (!RHSOffset) - return 0; // If LHS and RHS are not related via constant offsets to the same base // value, there is nothing we can do here. @@ -1710,11 +1705,7 @@ static Constant *computePointerICmp(const DataLayout &TD, } Constant *LHSOffset = stripAndComputeConstantOffsets(TD, LHS); - if (!LHSOffset) - return 0; Constant *RHSOffset = stripAndComputeConstantOffsets(TD, RHS); - if (!RHSOffset) - return 0; // If LHS and RHS are not related via constant offsets to the same base // value, there is nothing we can do here. -- cgit v1.1 From 73dee180c836270644dfa7d90f9c5ba877567999 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Thu, 31 Jan 2013 00:29:54 +0000 Subject: Make sure that the Attribute object represents one attribute only. Several places were still treating the Attribute object as respresenting multiple attributes. Those places now use the AttributeSet to represent multiple attributes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174003 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/AsmParser/LLParser.cpp | 28 ++++++++++++++++------------ lib/AsmParser/LLParser.h | 8 ++++---- lib/IR/Attributes.cpp | 3 +++ 3 files changed, 23 insertions(+), 16 deletions(-) (limited to 'lib') diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index a38f9ea..4910222 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -1472,6 +1472,7 @@ bool LLParser::ParseParameterList(SmallVectorImpl &ArgList, if (ParseToken(lltok::lparen, "expected '(' in call")) return true; + unsigned AttrIndex = 1; while (Lex.getKind() != lltok::rparen) { // If this isn't the first argument, we need a comma. if (!ArgList.empty() && @@ -1489,8 +1490,9 @@ bool LLParser::ParseParameterList(SmallVectorImpl &ArgList, // Otherwise, handle normal operands. if (ParseOptionalParamAttrs(ArgAttrs) || ParseValue(ArgTy, V, PFS)) return true; - ArgList.push_back(ParamInfo(ArgLoc, V, Attribute::get(V->getContext(), - ArgAttrs))); + ArgList.push_back(ParamInfo(ArgLoc, V, AttributeSet::get(V->getContext(), + AttrIndex++, + ArgAttrs))); } Lex.Lex(); // Lex the ')'. @@ -1539,9 +1541,10 @@ bool LLParser::ParseArgumentList(SmallVectorImpl &ArgList, if (!FunctionType::isValidArgumentType(ArgTy)) return Error(TypeLoc, "invalid type for function argument"); + unsigned AttrIndex = 1; ArgList.push_back(ArgInfo(TypeLoc, ArgTy, - Attribute::get(ArgTy->getContext(), - Attrs), Name)); + AttributeSet::get(ArgTy->getContext(), + AttrIndex++, Attrs), Name)); while (EatIfPresent(lltok::comma)) { // Handle ... at end of arg list. @@ -1568,7 +1571,8 @@ bool LLParser::ParseArgumentList(SmallVectorImpl &ArgList, return Error(TypeLoc, "invalid type for function argument"); ArgList.push_back(ArgInfo(TypeLoc, ArgTy, - Attribute::get(ArgTy->getContext(), Attrs), + AttributeSet::get(ArgTy->getContext(), + AttrIndex++, Attrs), Name)); } } @@ -1593,7 +1597,7 @@ bool LLParser::ParseFunctionType(Type *&Result) { for (unsigned i = 0, e = ArgList.size(); i != e; ++i) { if (!ArgList[i].Name.empty()) return Error(ArgList[i].Loc, "argument name invalid in function type"); - if (ArgList[i].Attrs.hasAttributes()) + if (ArgList[i].Attrs.hasAttributes(i + 1)) return Error(ArgList[i].Loc, "argument attributes invalid in function type"); } @@ -2822,8 +2826,8 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { for (unsigned i = 0, e = ArgList.size(); i != e; ++i) { ParamTypeList.push_back(ArgList[i].Ty); - if (ArgList[i].Attrs.hasAttributes()) { - AttrBuilder B(ArgList[i].Attrs); + if (ArgList[i].Attrs.hasAttributes(i + 1)) { + AttrBuilder B(ArgList[i].Attrs, i + 1); Attrs.push_back(AttributeSet::get(RetType->getContext(), i + 1, B)); } } @@ -3382,8 +3386,8 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) { return Error(ArgList[i].Loc, "argument is not of expected type '" + getTypeString(ExpectedTy) + "'"); Args.push_back(ArgList[i].V); - if (ArgList[i].Attrs.hasAttributes()) { - AttrBuilder B(ArgList[i].Attrs); + if (ArgList[i].Attrs.hasAttributes(i + 1)) { + AttrBuilder B(ArgList[i].Attrs, i + 1); Attrs.push_back(AttributeSet::get(RetType->getContext(), i + 1, B)); } } @@ -3784,8 +3788,8 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS, return Error(ArgList[i].Loc, "argument is not of expected type '" + getTypeString(ExpectedTy) + "'"); Args.push_back(ArgList[i].V); - if (ArgList[i].Attrs.hasAttributes()) { - AttrBuilder B(ArgList[i].Attrs); + if (ArgList[i].Attrs.hasAttributes(i + 1)) { + AttrBuilder B(ArgList[i].Attrs, i + 1); Attrs.push_back(AttributeSet::get(RetType->getContext(), i + 1, B)); } } diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h index f255897..d8de779 100644 --- a/lib/AsmParser/LLParser.h +++ b/lib/AsmParser/LLParser.h @@ -326,8 +326,8 @@ namespace llvm { struct ParamInfo { LocTy Loc; Value *V; - Attribute Attrs; - ParamInfo(LocTy loc, Value *v, Attribute attrs) + AttributeSet Attrs; + ParamInfo(LocTy loc, Value *v, AttributeSet attrs) : Loc(loc), V(v), Attrs(attrs) {} }; bool ParseParameterList(SmallVectorImpl &ArgList, @@ -347,9 +347,9 @@ namespace llvm { struct ArgInfo { LocTy Loc; Type *Ty; - Attribute Attrs; + AttributeSet Attrs; std::string Name; - ArgInfo(LocTy L, Type *ty, Attribute Attr, const std::string &N) + ArgInfo(LocTy L, Type *ty, AttributeSet Attr, const std::string &N) : Loc(L), Ty(ty), Attrs(Attr), Name(N) {} }; bool ParseArgumentList(SmallVectorImpl &ArgList, bool &isVarArg); diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 75ba93a..98c12b5 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -40,6 +40,9 @@ Attribute Attribute::get(LLVMContext &Context, AttrBuilder &B) { if (!B.hasAttributes()) return Attribute(); + assert(std::distance(B.begin(), B.end()) == 1 && + "The Attribute object should represent one attribute only!"); + // Otherwise, build a key to look up the existing attributes. LLVMContextImpl *pImpl = Context.pImpl; FoldingSetNodeID ID; -- cgit v1.1 From 5f1686f0b0c38b8b9482fbfec21bf5138c27bd0a Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Thu, 31 Jan 2013 00:32:11 +0000 Subject: Minor code simplification. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174005 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/InstructionSimplify.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index bc51457..9f2aa59 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -2459,7 +2459,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, // Simplify comparisons of related pointers using a powerful, recursive // GEP-walk when we have target data available.. - if (Q.TD && LHS->getType()->isPointerTy() && RHS->getType()->isPointerTy()) + if (Q.TD && LHS->getType()->isPointerTy()) if (Constant *C = computePointerICmp(*Q.TD, Pred, LHS, RHS)) return C; -- cgit v1.1 From a9bd4b4647694f1384a8e1c891931ba7bdff364f Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Thu, 31 Jan 2013 00:50:46 +0000 Subject: Check and allow floating point registers to select the size of the register for inline asm. This conforms to how gcc allows for effective casting of inputs into gprs (fprs is already handled). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174008 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 36d1ad4..c34010c 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -18170,7 +18170,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, Res.first = DestReg; Res.second = &X86::GR8RegClass; } - } else if (VT == MVT::i32) { + } else if (VT == MVT::i32 || VT == MVT::f32) { unsigned DestReg = 0; switch (Res.first) { default: break; @@ -18187,7 +18187,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, Res.first = DestReg; Res.second = &X86::GR32RegClass; } - } else if (VT == MVT::i64) { + } else if (VT == MVT::i64 || VT == MVT::f64) { unsigned DestReg = 0; switch (Res.first) { default: break; -- cgit v1.1 From e187e259963466cc106b8a23c0d5b4d44fdd15a6 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Thu, 31 Jan 2013 00:50:48 +0000 Subject: Whitespace. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174009 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index c34010c..04237e7 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -12038,17 +12038,17 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) { SDValue X86TargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { assert(Subtarget->isTargetDarwin() && Subtarget->is64Bit()); - + // For MacOSX, we want to call an alternative entry point: __sincos_stret, // which returns the values in two XMM registers. DebugLoc dl = Op.getDebugLoc(); SDValue Arg = Op.getOperand(0); EVT ArgVT = Arg.getValueType(); Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); - + ArgListTy Args; ArgListEntry Entry; - + Entry.Node = Arg; Entry.Ty = ArgTy; Entry.isSExt = false; @@ -16512,8 +16512,8 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG, // Represent the data using the same element type that is stored in // memory. In practice, we ''widen'' MemVT. - EVT WideVecVT = - EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), + EVT WideVecVT = + EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), loadRegZize/MemVT.getScalarType().getSizeInBits()); assert(WideVecVT.getSizeInBits() == LoadUnitVecVT.getSizeInBits() && @@ -17199,8 +17199,8 @@ static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG) { return SDValue(); } -// Helper function of PerformSETCCCombine. It is to materialize "setb reg" -// as "sbb reg,reg", since it can be extended without zext and produces +// Helper function of PerformSETCCCombine. It is to materialize "setb reg" +// as "sbb reg,reg", since it can be extended without zext and produces // an all-ones bit which is more useful than 0/1 in some cases. static SDValue MaterializeSETB(DebugLoc DL, SDValue EFLAGS, SelectionDAG &DAG) { return DAG.getNode(ISD::AND, DL, MVT::i8, @@ -17218,13 +17218,13 @@ static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG, SDValue EFLAGS = N->getOperand(1); if (CC == X86::COND_A) { - // Try to convert COND_A into COND_B in an attempt to facilitate + // Try to convert COND_A into COND_B in an attempt to facilitate // materializing "setb reg". // // Do not flip "e > c", where "c" is a constant, because Cmp instruction // cannot take an immediate as its first operand. // - if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.hasOneUse() && + if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.hasOneUse() && EFLAGS.getValueType().isInteger() && !isa(EFLAGS.getOperand(1))) { SDValue NewSub = DAG.getNode(X86ISD::SUB, EFLAGS.getDebugLoc(), -- cgit v1.1 From c6077eff8b9963b59de21e5d1dfae7a6ea75da36 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Thu, 31 Jan 2013 00:53:21 +0000 Subject: Remove the AttrBuilder version of the Attribute::get function. The AttrBuilder is there to build up multiple attributes. The Attribute class represents only one attribute at a time. So remove this unnecessary builder creator method. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174010 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Attributes.cpp | 31 ++++++++++++------------------- lib/IR/Verifier.cpp | 4 +++- 2 files changed, 15 insertions(+), 20 deletions(-) (limited to 'lib') diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 98c12b5..59e86f0 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -30,24 +30,15 @@ using namespace llvm; // Attribute Construction Methods //===----------------------------------------------------------------------===// -Attribute Attribute::get(LLVMContext &Context, AttrKind Kind) { - AttrBuilder B; - return Attribute::get(Context, B.addAttribute(Kind)); -} - -Attribute Attribute::get(LLVMContext &Context, AttrBuilder &B) { - // If there are no attributes, return an empty Attribute class. - if (!B.hasAttributes()) - return Attribute(); - - assert(std::distance(B.begin(), B.end()) == 1 && - "The Attribute object should represent one attribute only!"); +Attribute Attribute::get(LLVMContext &Context, AttrKind Kind, + Constant *Val) { + if (Kind == None) return Attribute(); // Otherwise, build a key to look up the existing attributes. LLVMContextImpl *pImpl = Context.pImpl; FoldingSetNodeID ID; - ConstantInt *CI = ConstantInt::get(Type::getInt64Ty(Context), B.Raw()); - ID.AddPointer(CI); + ID.AddInteger(Kind); + ID.AddPointer(Val); void *InsertPoint; AttributeImpl *PA = pImpl->AttrsSet.FindNodeOrInsertPos(ID, InsertPoint); @@ -55,7 +46,9 @@ Attribute Attribute::get(LLVMContext &Context, AttrBuilder &B) { if (!PA) { // If we didn't find any existing attributes of the same shape then create a // new one and insert it. - PA = new AttributeImpl(Context, CI); + PA = (!Val) ? + new AttributeImpl(Context, Kind) : + new AttributeImpl(Context, Kind, Val); pImpl->AttrsSet.InsertNode(PA, InsertPoint); } @@ -64,14 +57,14 @@ Attribute Attribute::get(LLVMContext &Context, AttrBuilder &B) { } Attribute Attribute::getWithAlignment(LLVMContext &Context, uint64_t Align) { - AttrBuilder B; - return get(Context, B.addAlignmentAttr(Align)); + return get(Context, Attribute::Alignment, + ConstantInt::get(Type::getInt64Ty(Context), Align)); } Attribute Attribute::getWithStackAlignment(LLVMContext &Context, uint64_t Align) { - AttrBuilder B; - return get(Context, B.addStackAlignmentAttr(Align)); + return get(Context, Attribute::StackAlignment, + ConstantInt::get(Type::getInt64Ty(Context), Align)); } //===----------------------------------------------------------------------===// diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp index 5da7448..babc295 100644 --- a/lib/IR/Verifier.cpp +++ b/lib/IR/Verifier.cpp @@ -745,7 +745,9 @@ void Verifier::VerifyFunctionAttrs(FunctionType *FT, AttrBuilder NotFn(Attrs, AttributeSet::FunctionIndex); NotFn.removeFunctionOnlyAttrs(); Assert1(!NotFn.hasAttributes(), "Attribute '" + - Attribute::get(V->getContext(), NotFn).getAsString() + + AttributeSet::get(V->getContext(), + AttributeSet::FunctionIndex, + NotFn).getAsString(AttributeSet::FunctionIndex) + "' do not apply to the function!", V); // Check for mutually incompatible attributes. -- cgit v1.1 From b96129dd4856a5473c52daceaabdfd2262bd96f2 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Thu, 31 Jan 2013 01:04:51 +0000 Subject: Revert for now: --- Reverse-merging r174010 into '.': U include/llvm/IR/Attributes.h U lib/IR/Verifier.cpp U lib/IR/Attributes.cpp git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174012 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Attributes.cpp | 31 +++++++++++++++++++------------ lib/IR/Verifier.cpp | 4 +--- 2 files changed, 20 insertions(+), 15 deletions(-) (limited to 'lib') diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 59e86f0..98c12b5 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -30,15 +30,24 @@ using namespace llvm; // Attribute Construction Methods //===----------------------------------------------------------------------===// -Attribute Attribute::get(LLVMContext &Context, AttrKind Kind, - Constant *Val) { - if (Kind == None) return Attribute(); +Attribute Attribute::get(LLVMContext &Context, AttrKind Kind) { + AttrBuilder B; + return Attribute::get(Context, B.addAttribute(Kind)); +} + +Attribute Attribute::get(LLVMContext &Context, AttrBuilder &B) { + // If there are no attributes, return an empty Attribute class. + if (!B.hasAttributes()) + return Attribute(); + + assert(std::distance(B.begin(), B.end()) == 1 && + "The Attribute object should represent one attribute only!"); // Otherwise, build a key to look up the existing attributes. LLVMContextImpl *pImpl = Context.pImpl; FoldingSetNodeID ID; - ID.AddInteger(Kind); - ID.AddPointer(Val); + ConstantInt *CI = ConstantInt::get(Type::getInt64Ty(Context), B.Raw()); + ID.AddPointer(CI); void *InsertPoint; AttributeImpl *PA = pImpl->AttrsSet.FindNodeOrInsertPos(ID, InsertPoint); @@ -46,9 +55,7 @@ Attribute Attribute::get(LLVMContext &Context, AttrKind Kind, if (!PA) { // If we didn't find any existing attributes of the same shape then create a // new one and insert it. - PA = (!Val) ? - new AttributeImpl(Context, Kind) : - new AttributeImpl(Context, Kind, Val); + PA = new AttributeImpl(Context, CI); pImpl->AttrsSet.InsertNode(PA, InsertPoint); } @@ -57,14 +64,14 @@ Attribute Attribute::get(LLVMContext &Context, AttrKind Kind, } Attribute Attribute::getWithAlignment(LLVMContext &Context, uint64_t Align) { - return get(Context, Attribute::Alignment, - ConstantInt::get(Type::getInt64Ty(Context), Align)); + AttrBuilder B; + return get(Context, B.addAlignmentAttr(Align)); } Attribute Attribute::getWithStackAlignment(LLVMContext &Context, uint64_t Align) { - return get(Context, Attribute::StackAlignment, - ConstantInt::get(Type::getInt64Ty(Context), Align)); + AttrBuilder B; + return get(Context, B.addStackAlignmentAttr(Align)); } //===----------------------------------------------------------------------===// diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp index babc295..5da7448 100644 --- a/lib/IR/Verifier.cpp +++ b/lib/IR/Verifier.cpp @@ -745,9 +745,7 @@ void Verifier::VerifyFunctionAttrs(FunctionType *FT, AttrBuilder NotFn(Attrs, AttributeSet::FunctionIndex); NotFn.removeFunctionOnlyAttrs(); Assert1(!NotFn.hasAttributes(), "Attribute '" + - AttributeSet::get(V->getContext(), - AttributeSet::FunctionIndex, - NotFn).getAsString(AttributeSet::FunctionIndex) + + Attribute::get(V->getContext(), NotFn).getAsString() + "' do not apply to the function!", V); // Check for mutually incompatible attributes. -- cgit v1.1 From 8ed701da9aa388c078cc6aecac2fe355974c90d2 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Thu, 31 Jan 2013 01:51:27 +0000 Subject: Make the AttrBuilder creation method of Attribute private so that people won't use it. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174023 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Verifier.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp index 5da7448..babc295 100644 --- a/lib/IR/Verifier.cpp +++ b/lib/IR/Verifier.cpp @@ -745,7 +745,9 @@ void Verifier::VerifyFunctionAttrs(FunctionType *FT, AttrBuilder NotFn(Attrs, AttributeSet::FunctionIndex); NotFn.removeFunctionOnlyAttrs(); Assert1(!NotFn.hasAttributes(), "Attribute '" + - Attribute::get(V->getContext(), NotFn).getAsString() + + AttributeSet::get(V->getContext(), + AttributeSet::FunctionIndex, + NotFn).getAsString(AttributeSet::FunctionIndex) + "' do not apply to the function!", V); // Check for mutually incompatible attributes. -- cgit v1.1 From a070d2a0355c4993240b5206ebc1d517c151331d Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Thu, 31 Jan 2013 02:00:45 +0000 Subject: Change GetPointerBaseWithConstantOffset's DataLayout argument from a reference to a pointer, so that it can handle the case where DataLayout is not available and behave conservatively. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174024 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/Lint.cpp | 84 +++++++++++++------------- lib/Analysis/Loads.cpp | 3 +- lib/Analysis/MemoryDependenceAnalysis.cpp | 4 +- lib/Analysis/ValueTracking.cpp | 8 ++- lib/Transforms/Scalar/DeadStoreElimination.cpp | 6 +- lib/Transforms/Scalar/GVN.cpp | 6 +- 6 files changed, 55 insertions(+), 56 deletions(-) (limited to 'lib') diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp index fd10a6b..9393508 100644 --- a/lib/Analysis/Lint.cpp +++ b/lib/Analysis/Lint.cpp @@ -412,51 +412,49 @@ void Lint::visitMemoryReference(Instruction &I, } // Check for buffer overflows and misalignment. - if (TD) { - // Only handles memory references that read/write something simple like an - // alloca instruction or a global variable. - int64_t Offset = 0; - if (Value *Base = GetPointerBaseWithConstantOffset(Ptr, Offset, *TD)) { - // OK, so the access is to a constant offset from Ptr. Check that Ptr is - // something we can handle and if so extract the size of this base object - // along with its alignment. - uint64_t BaseSize = AliasAnalysis::UnknownSize; - unsigned BaseAlign = 0; - - if (AllocaInst *AI = dyn_cast(Base)) { - Type *ATy = AI->getAllocatedType(); - if (!AI->isArrayAllocation() && ATy->isSized()) - BaseSize = TD->getTypeAllocSize(ATy); - BaseAlign = AI->getAlignment(); - if (BaseAlign == 0 && ATy->isSized()) - BaseAlign = TD->getABITypeAlignment(ATy); - } else if (GlobalVariable *GV = dyn_cast(Base)) { - // If the global may be defined differently in another compilation unit - // then don't warn about funky memory accesses. - if (GV->hasDefinitiveInitializer()) { - Type *GTy = GV->getType()->getElementType(); - if (GTy->isSized()) - BaseSize = TD->getTypeAllocSize(GTy); - BaseAlign = GV->getAlignment(); - if (BaseAlign == 0 && GTy->isSized()) - BaseAlign = TD->getABITypeAlignment(GTy); - } + // Only handles memory references that read/write something simple like an + // alloca instruction or a global variable. + int64_t Offset = 0; + if (Value *Base = GetPointerBaseWithConstantOffset(Ptr, Offset, TD)) { + // OK, so the access is to a constant offset from Ptr. Check that Ptr is + // something we can handle and if so extract the size of this base object + // along with its alignment. + uint64_t BaseSize = AliasAnalysis::UnknownSize; + unsigned BaseAlign = 0; + + if (AllocaInst *AI = dyn_cast(Base)) { + Type *ATy = AI->getAllocatedType(); + if (TD && !AI->isArrayAllocation() && ATy->isSized()) + BaseSize = TD->getTypeAllocSize(ATy); + BaseAlign = AI->getAlignment(); + if (TD && BaseAlign == 0 && ATy->isSized()) + BaseAlign = TD->getABITypeAlignment(ATy); + } else if (GlobalVariable *GV = dyn_cast(Base)) { + // If the global may be defined differently in another compilation unit + // then don't warn about funky memory accesses. + if (GV->hasDefinitiveInitializer()) { + Type *GTy = GV->getType()->getElementType(); + if (TD && GTy->isSized()) + BaseSize = TD->getTypeAllocSize(GTy); + BaseAlign = GV->getAlignment(); + if (TD && BaseAlign == 0 && GTy->isSized()) + BaseAlign = TD->getABITypeAlignment(GTy); } - - // Accesses from before the start or after the end of the object are not - // defined. - Assert1(Size == AliasAnalysis::UnknownSize || - BaseSize == AliasAnalysis::UnknownSize || - (Offset >= 0 && Offset + Size <= BaseSize), - "Undefined behavior: Buffer overflow", &I); - - // Accesses that say that the memory is more aligned than it is are not - // defined. - if (Align == 0 && Ty && Ty->isSized()) - Align = TD->getABITypeAlignment(Ty); - Assert1(!BaseAlign || Align <= MinAlign(BaseAlign, Offset), - "Undefined behavior: Memory reference address is misaligned", &I); } + + // Accesses from before the start or after the end of the object are not + // defined. + Assert1(Size == AliasAnalysis::UnknownSize || + BaseSize == AliasAnalysis::UnknownSize || + (Offset >= 0 && Offset + Size <= BaseSize), + "Undefined behavior: Buffer overflow", &I); + + // Accesses that say that the memory is more aligned than it is are not + // defined. + if (TD && Align == 0 && Ty && Ty->isSized()) + Align = TD->getABITypeAlignment(Ty); + Assert1(!BaseAlign || Align <= MinAlign(BaseAlign, Offset), + "Undefined behavior: Memory reference address is misaligned", &I); } } diff --git a/lib/Analysis/Loads.cpp b/lib/Analysis/Loads.cpp index 3158873..0902a39 100644 --- a/lib/Analysis/Loads.cpp +++ b/lib/Analysis/Loads.cpp @@ -57,8 +57,7 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, unsigned Align, const DataLayout *TD) { int64_t ByteOffset = 0; Value *Base = V; - if (TD) - Base = GetPointerBaseWithConstantOffset(V, ByteOffset, *TD); + Base = GetPointerBaseWithConstantOffset(V, ByteOffset, TD); if (ByteOffset < 0) // out of bounds return false; diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index eee7607..5cb0016 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -262,7 +262,7 @@ isLoadLoadClobberIfExtendedToFullWidth(const AliasAnalysis::Location &MemLoc, // If we haven't already computed the base/offset of MemLoc, do so now. if (MemLocBase == 0) - MemLocBase = GetPointerBaseWithConstantOffset(MemLoc.Ptr, MemLocOffs, *TD); + MemLocBase = GetPointerBaseWithConstantOffset(MemLoc.Ptr, MemLocOffs, TD); unsigned Size = MemoryDependenceAnalysis:: getLoadLoadClobberFullWidthSize(MemLocBase, MemLocOffs, MemLoc.Size, @@ -287,7 +287,7 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, // Get the base of this load. int64_t LIOffs = 0; const Value *LIBase = - GetPointerBaseWithConstantOffset(LI->getPointerOperand(), LIOffs, TD); + GetPointerBaseWithConstantOffset(LI->getPointerOperand(), LIOffs, &TD); // If the two pointers are not based on the same pointer, we can't tell that // they are related. diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index 23bc444..473ebc8 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -1671,8 +1671,10 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef idx_range, /// it can be expressed as a base pointer plus a constant offset. Return the /// base and offset to the caller. Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, - const DataLayout &TD) { - unsigned BitWidth = TD.getPointerSizeInBits(); + const DataLayout *TD) { + // Without DataLayout, conservatively assume 64-bit offsets, which is + // the widest we support. + unsigned BitWidth = TD ? TD->getPointerSizeInBits() : 64; APInt ByteOffset(BitWidth, 0); while (1) { if (Ptr->getType()->isVectorTy()) @@ -1680,7 +1682,7 @@ Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, if (GEPOperator *GEP = dyn_cast(Ptr)) { APInt GEPOffset(BitWidth, 0); - if (!GEP->accumulateConstantOffset(TD, GEPOffset)) + if (TD && !GEP->accumulateConstantOffset(*TD, GEPOffset)) break; ByteOffset += GEPOffset; Ptr = GEP->getPointerOperand(); diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index fe3acbf..57432c7 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -376,10 +376,10 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later, // Check to see if the later store is to the entire object (either a global, // an alloca, or a byval argument). If so, then it clearly overwrites any // other store to the same object. - const DataLayout &TD = *AA.getDataLayout(); + const DataLayout *TD = AA.getDataLayout(); - const Value *UO1 = GetUnderlyingObject(P1, &TD), - *UO2 = GetUnderlyingObject(P2, &TD); + const Value *UO1 = GetUnderlyingObject(P1, TD), + *UO2 = GetUnderlyingObject(P2, TD); // If we can't resolve the same pointers to the same object, then we can't // analyze them at all. diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index 14201b9..50c4714 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -849,8 +849,8 @@ static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr, return -1; int64_t StoreOffset = 0, LoadOffset = 0; - Value *StoreBase = GetPointerBaseWithConstantOffset(WritePtr, StoreOffset,TD); - Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, TD); + Value *StoreBase = GetPointerBaseWithConstantOffset(WritePtr,StoreOffset,&TD); + Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, &TD); if (StoreBase != LoadBase) return -1; @@ -945,7 +945,7 @@ static int AnalyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr, // then we should widen it! int64_t LoadOffs = 0; const Value *LoadBase = - GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, TD); + GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, &TD); unsigned LoadSize = TD.getTypeStoreSize(LoadTy); unsigned Size = MemoryDependenceAnalysis:: -- cgit v1.1 From 85238aae1a4b6ac67e16560a9855944191f3db5b Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Thu, 31 Jan 2013 02:18:19 +0000 Subject: Remove Attribute::hasAttributes() and make Attribute::hasAttribute() private. The Attribute::hasAttributes() is kind of meaningless since an Attribute can have only one attribute. And we would rather people use the 'operator==' instead of Attribute::hasAttribute(). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174026 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/AttributeImpl.h | 1 - lib/IR/Attributes.cpp | 20 ++++++-------------- 2 files changed, 6 insertions(+), 15 deletions(-) (limited to 'lib') diff --git a/lib/IR/AttributeImpl.h b/lib/IR/AttributeImpl.h index 7be5a16..af9d4fa 100644 --- a/lib/IR/AttributeImpl.h +++ b/lib/IR/AttributeImpl.h @@ -46,7 +46,6 @@ public: AttributeImpl(LLVMContext &C, StringRef data); bool hasAttribute(Attribute::AttrKind A) const; - bool hasAttributes() const; Constant *getAttributeKind() const { return Kind; } ArrayRef getAttributeValues() const { return Vals; } diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 98c12b5..2c84a3d 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -82,10 +82,6 @@ bool Attribute::hasAttribute(AttrKind Val) const { return pImpl && pImpl->hasAttribute(Val); } -bool Attribute::hasAttributes() const { - return pImpl && pImpl->hasAttributes(); -} - Constant *Attribute::getAttributeKind() const { return pImpl ? pImpl->getAttributeKind() : 0; } @@ -185,7 +181,7 @@ std::string Attribute::getAsString() const { } bool Attribute::operator==(AttrKind K) const { - return pImpl && *pImpl == K; + return (pImpl && *pImpl == K) || (!pImpl && K == None); } bool Attribute::operator!=(AttrKind K) const { return !(*this == K); @@ -226,10 +222,6 @@ bool AttributeImpl::hasAttribute(Attribute::AttrKind A) const { return (Raw() & getAttrMask(A)) != 0; } -bool AttributeImpl::hasAttributes() const { - return Raw() != 0; -} - uint64_t AttributeImpl::getAlignment() const { uint64_t Mask = Raw() & getAttrMask(Attribute::Alignment); return 1ULL << ((Mask >> 16) - 1); @@ -369,7 +361,7 @@ AttributeSetNode *AttributeSetNode::get(LLVMContext &C, bool AttributeSetNode::hasAttribute(Attribute::AttrKind Kind) const { for (SmallVectorImpl::const_iterator I = AttrList.begin(), E = AttrList.end(); I != E; ++I) - if (I->hasAttribute(Kind)) + if (*I == Kind) return true; return false; } @@ -377,7 +369,7 @@ bool AttributeSetNode::hasAttribute(Attribute::AttrKind Kind) const { unsigned AttributeSetNode::getAlignment() const { for (SmallVectorImpl::const_iterator I = AttrList.begin(), E = AttrList.end(); I != E; ++I) - if (I->hasAttribute(Attribute::Alignment)) + if (*I == Attribute::Alignment) return I->getAlignment(); return 0; } @@ -385,7 +377,7 @@ unsigned AttributeSetNode::getAlignment() const { unsigned AttributeSetNode::getStackAlignment() const { for (SmallVectorImpl::const_iterator I = AttrList.begin(), E = AttrList.end(); I != E; ++I) - if (I->hasAttribute(Attribute::StackAlignment)) + if (*I == Attribute::StackAlignment) return I->getStackAlignment(); return 0; } @@ -454,7 +446,7 @@ AttributeSet AttributeSet::get(LLVMContext &C, for (unsigned i = 0, e = Attrs.size(); i != e; ++i) { assert((!i || Attrs[i-1].first <= Attrs[i].first) && "Misordered Attributes list!"); - assert(Attrs[i].second.hasAttributes() && + assert(Attrs[i].second != Attribute::None && "Pointless attribute!"); } #endif @@ -682,7 +674,7 @@ bool AttributeSet::hasAttrSomewhere(Attribute::AttrKind Attr) const { for (unsigned I = 0, E = pImpl->getNumAttributes(); I != E; ++I) for (AttributeSetImpl::const_iterator II = pImpl->begin(I), IE = pImpl->end(I); II != IE; ++II) - if (II->hasAttribute(Attr)) + if (*II == Attr) return true; return false; -- cgit v1.1 From de0eb19248f3053c07a5b1dad9c47b8435458337 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Thu, 31 Jan 2013 02:40:59 +0000 Subject: Move isKnownNonNull out of AliasAnalysis.h and into ValueTracking.cpp since it isn't really an AliasAnalysis concept, and ValueTracking has similar things that it could plausibly share code with some day. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174027 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/AliasAnalysis.cpp | 16 ---------------- lib/Analysis/LazyValueInfo.cpp | 1 - lib/Analysis/ValueTracking.cpp | 16 ++++++++++++++++ 3 files changed, 16 insertions(+), 17 deletions(-) (limited to 'lib') diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp index f32bd70..210b80a 100644 --- a/lib/Analysis/AliasAnalysis.cpp +++ b/lib/Analysis/AliasAnalysis.cpp @@ -555,19 +555,3 @@ bool llvm::isIdentifiedObject(const Value *V) { return A->hasNoAliasAttr() || A->hasByValAttr(); return false; } - -/// isKnownNonNull - Return true if we know that the specified value is never -/// null. -bool llvm::isKnownNonNull(const Value *V) { - // Alloca never returns null, malloc might. - if (isa(V)) return true; - - // A byval argument is never null. - if (const Argument *A = dyn_cast(V)) - return A->hasByValAttr(); - - // Global values are not null unless extern weak. - if (const GlobalValue *GV = dyn_cast(V)) - return !GV->hasExternalWeakLinkage(); - return false; -} diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp index 1c94d10..66b5e85 100644 --- a/lib/Analysis/LazyValueInfo.cpp +++ b/lib/Analysis/LazyValueInfo.cpp @@ -16,7 +16,6 @@ #include "llvm/Analysis/LazyValueInfo.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Constants.h" diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index 473ebc8..8e3994e 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -2014,3 +2014,19 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V, return false; // Misc instructions which have effects } } + +/// isKnownNonNull - Return true if we know that the specified value is never +/// null. +bool llvm::isKnownNonNull(const Value *V) { + // Alloca never returns null, malloc might. + if (isa(V)) return true; + + // A byval argument is never null. + if (const Argument *A = dyn_cast(V)) + return A->hasByValAttr(); + + // Global values are not null unless extern weak. + if (const GlobalValue *GV = dyn_cast(V)) + return !GV->hasExternalWeakLinkage(); + return false; +} -- cgit v1.1 From 819f9d6bf91c439967ef623d0c047e7f672683fa Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Thu, 31 Jan 2013 02:45:26 +0000 Subject: Add a comment. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174028 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/InstructionSimplify.cpp | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'lib') diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index 9f2aa59..18d90b6 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -663,6 +663,10 @@ Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, /// accumulates the total constant offset applied in the returned constant. It /// returns 0 if V is not a pointer, and returns the constant '0' if there are /// no constant offsets applied. +/// +/// This is very similar to GetPointerBaseWithConstantOffset except it doesn't +/// follow non-inbounds geps. This allows it to remain usable for icmp ult/etc. +/// folding. static Constant *stripAndComputeConstantOffsets(const DataLayout &TD, Value *&V) { assert(V->getType()->isPointerTy()); -- cgit v1.1 From 3e3de565e9c7258fb97773b3a64fc091355cb2de Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Thu, 31 Jan 2013 02:50:36 +0000 Subject: Change stripAndComputeConstantOffsets to accept a NULL DataLayout pointer as well. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174030 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/InstructionSimplify.cpp | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) (limited to 'lib') diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index 18d90b6..d5e38e5 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -667,11 +667,16 @@ Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, /// This is very similar to GetPointerBaseWithConstantOffset except it doesn't /// follow non-inbounds geps. This allows it to remain usable for icmp ult/etc. /// folding. -static Constant *stripAndComputeConstantOffsets(const DataLayout &TD, +static Constant *stripAndComputeConstantOffsets(const DataLayout *TD, Value *&V) { assert(V->getType()->isPointerTy()); - unsigned IntPtrWidth = TD.getPointerSizeInBits(); + // Without DataLayout, just be conservative for now. Theoretically, more could + // be done in this case. + if (!TD) + return ConstantInt::get(IntegerType::get(V->getContext(), 64), 0); + + unsigned IntPtrWidth = TD->getPointerSizeInBits(); APInt Offset = APInt::getNullValue(IntPtrWidth); // Even though we don't look through PHI nodes, we could be called on an @@ -680,7 +685,7 @@ static Constant *stripAndComputeConstantOffsets(const DataLayout &TD, Visited.insert(V); do { if (GEPOperator *GEP = dyn_cast(V)) { - if (!GEP->isInBounds() || !GEP->accumulateConstantOffset(TD, Offset)) + if (!GEP->isInBounds() || !GEP->accumulateConstantOffset(*TD, Offset)) break; V = GEP->getPointerOperand(); } else if (Operator::getOpcode(V) == Instruction::BitCast) { @@ -695,13 +700,13 @@ static Constant *stripAndComputeConstantOffsets(const DataLayout &TD, assert(V->getType()->isPointerTy() && "Unexpected operand type!"); } while (Visited.insert(V)); - Type *IntPtrTy = TD.getIntPtrType(V->getContext()); + Type *IntPtrTy = TD->getIntPtrType(V->getContext()); return ConstantInt::get(IntPtrTy, Offset); } /// \brief Compute the constant difference between two pointer values. /// If the difference is not a constant, returns zero. -static Constant *computePointerDifference(const DataLayout &TD, +static Constant *computePointerDifference(const DataLayout *TD, Value *LHS, Value *RHS) { Constant *LHSOffset = stripAndComputeConstantOffsets(TD, LHS); Constant *RHSOffset = stripAndComputeConstantOffsets(TD, RHS); @@ -818,9 +823,9 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, return W; // Variations on GEP(base, I, ...) - GEP(base, i, ...) -> GEP(null, I-i, ...). - if (Q.TD && match(Op0, m_PtrToInt(m_Value(X))) && + if (match(Op0, m_PtrToInt(m_Value(X))) && match(Op1, m_PtrToInt(m_Value(Y)))) - if (Constant *Result = computePointerDifference(*Q.TD, X, Y)) + if (Constant *Result = computePointerDifference(Q.TD, X, Y)) return ConstantExpr::getIntegerCast(Result, Op0->getType(), true); // Mul distributes over Sub. Try some generic simplifications based on this. @@ -1683,7 +1688,7 @@ static Value *ExtractEquivalentCondition(Value *V, CmpInst::Predicate Pred, return 0; } -static Constant *computePointerICmp(const DataLayout &TD, +static Constant *computePointerICmp(const DataLayout *TD, CmpInst::Predicate Pred, Value *LHS, Value *RHS) { // We can only fold certain predicates on pointer comparisons. @@ -2463,8 +2468,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, // Simplify comparisons of related pointers using a powerful, recursive // GEP-walk when we have target data available.. - if (Q.TD && LHS->getType()->isPointerTy()) - if (Constant *C = computePointerICmp(*Q.TD, Pred, LHS, RHS)) + if (LHS->getType()->isPointerTy()) + if (Constant *C = computePointerICmp(Q.TD, Pred, LHS, RHS)) return C; if (GetElementPtrInst *GLHS = dyn_cast(LHS)) { -- cgit v1.1 From eddab1550ee10cce3bb26a26e88529cb19451aa3 Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Thu, 31 Jan 2013 03:47:28 +0000 Subject: Revert r174026, "Remove Attribute::hasAttributes() and make Attribute::hasAttribute() private." It broke many hosts to crash. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174035 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/AttributeImpl.h | 1 + lib/IR/Attributes.cpp | 20 ++++++++++++++------ 2 files changed, 15 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/IR/AttributeImpl.h b/lib/IR/AttributeImpl.h index af9d4fa..7be5a16 100644 --- a/lib/IR/AttributeImpl.h +++ b/lib/IR/AttributeImpl.h @@ -46,6 +46,7 @@ public: AttributeImpl(LLVMContext &C, StringRef data); bool hasAttribute(Attribute::AttrKind A) const; + bool hasAttributes() const; Constant *getAttributeKind() const { return Kind; } ArrayRef getAttributeValues() const { return Vals; } diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 2c84a3d..98c12b5 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -82,6 +82,10 @@ bool Attribute::hasAttribute(AttrKind Val) const { return pImpl && pImpl->hasAttribute(Val); } +bool Attribute::hasAttributes() const { + return pImpl && pImpl->hasAttributes(); +} + Constant *Attribute::getAttributeKind() const { return pImpl ? pImpl->getAttributeKind() : 0; } @@ -181,7 +185,7 @@ std::string Attribute::getAsString() const { } bool Attribute::operator==(AttrKind K) const { - return (pImpl && *pImpl == K) || (!pImpl && K == None); + return pImpl && *pImpl == K; } bool Attribute::operator!=(AttrKind K) const { return !(*this == K); @@ -222,6 +226,10 @@ bool AttributeImpl::hasAttribute(Attribute::AttrKind A) const { return (Raw() & getAttrMask(A)) != 0; } +bool AttributeImpl::hasAttributes() const { + return Raw() != 0; +} + uint64_t AttributeImpl::getAlignment() const { uint64_t Mask = Raw() & getAttrMask(Attribute::Alignment); return 1ULL << ((Mask >> 16) - 1); @@ -361,7 +369,7 @@ AttributeSetNode *AttributeSetNode::get(LLVMContext &C, bool AttributeSetNode::hasAttribute(Attribute::AttrKind Kind) const { for (SmallVectorImpl::const_iterator I = AttrList.begin(), E = AttrList.end(); I != E; ++I) - if (*I == Kind) + if (I->hasAttribute(Kind)) return true; return false; } @@ -369,7 +377,7 @@ bool AttributeSetNode::hasAttribute(Attribute::AttrKind Kind) const { unsigned AttributeSetNode::getAlignment() const { for (SmallVectorImpl::const_iterator I = AttrList.begin(), E = AttrList.end(); I != E; ++I) - if (*I == Attribute::Alignment) + if (I->hasAttribute(Attribute::Alignment)) return I->getAlignment(); return 0; } @@ -377,7 +385,7 @@ unsigned AttributeSetNode::getAlignment() const { unsigned AttributeSetNode::getStackAlignment() const { for (SmallVectorImpl::const_iterator I = AttrList.begin(), E = AttrList.end(); I != E; ++I) - if (*I == Attribute::StackAlignment) + if (I->hasAttribute(Attribute::StackAlignment)) return I->getStackAlignment(); return 0; } @@ -446,7 +454,7 @@ AttributeSet AttributeSet::get(LLVMContext &C, for (unsigned i = 0, e = Attrs.size(); i != e; ++i) { assert((!i || Attrs[i-1].first <= Attrs[i].first) && "Misordered Attributes list!"); - assert(Attrs[i].second != Attribute::None && + assert(Attrs[i].second.hasAttributes() && "Pointless attribute!"); } #endif @@ -674,7 +682,7 @@ bool AttributeSet::hasAttrSomewhere(Attribute::AttrKind Attr) const { for (unsigned I = 0, E = pImpl->getNumAttributes(); I != E; ++I) for (AttributeSetImpl::const_iterator II = pImpl->begin(I), IE = pImpl->end(I); II != IE; ++II) - if (*II == Attr) + if (II->hasAttribute(Attr)) return true; return false; -- cgit v1.1 From 82aea644c6d2a21464958fe2e3a1cd0f6bfc82db Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Thu, 31 Jan 2013 06:22:35 +0000 Subject: Remove the Attribute::hasAttributes() function. That function doesn't make sense anymore because there's only one attribute per Attribute object now. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174044 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/AttributeImpl.h | 1 - lib/IR/Attributes.cpp | 10 +--------- 2 files changed, 1 insertion(+), 10 deletions(-) (limited to 'lib') diff --git a/lib/IR/AttributeImpl.h b/lib/IR/AttributeImpl.h index 7be5a16..af9d4fa 100644 --- a/lib/IR/AttributeImpl.h +++ b/lib/IR/AttributeImpl.h @@ -46,7 +46,6 @@ public: AttributeImpl(LLVMContext &C, StringRef data); bool hasAttribute(Attribute::AttrKind A) const; - bool hasAttributes() const; Constant *getAttributeKind() const { return Kind; } ArrayRef getAttributeValues() const { return Vals; } diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 98c12b5..3a8cfe5 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -82,10 +82,6 @@ bool Attribute::hasAttribute(AttrKind Val) const { return pImpl && pImpl->hasAttribute(Val); } -bool Attribute::hasAttributes() const { - return pImpl && pImpl->hasAttributes(); -} - Constant *Attribute::getAttributeKind() const { return pImpl ? pImpl->getAttributeKind() : 0; } @@ -226,10 +222,6 @@ bool AttributeImpl::hasAttribute(Attribute::AttrKind A) const { return (Raw() & getAttrMask(A)) != 0; } -bool AttributeImpl::hasAttributes() const { - return Raw() != 0; -} - uint64_t AttributeImpl::getAlignment() const { uint64_t Mask = Raw() & getAttrMask(Attribute::Alignment); return 1ULL << ((Mask >> 16) - 1); @@ -454,7 +446,7 @@ AttributeSet AttributeSet::get(LLVMContext &C, for (unsigned i = 0, e = Attrs.size(); i != e; ++i) { assert((!i || Attrs[i-1].first <= Attrs[i].first) && "Misordered Attributes list!"); - assert(Attrs[i].second.hasAttributes() && + assert(Attrs[i].second != Attribute::None && "Pointless attribute!"); } #endif -- cgit v1.1 From e22df330a344ddbd536e6bcbc542290953ab4a9d Mon Sep 17 00:00:00 2001 From: Alexey Samsonov Date: Thu, 31 Jan 2013 08:02:11 +0000 Subject: Revert r173946. This breaks compilation of googletest with Clang git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174048 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Utils/ValueMapper.cpp | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp index b9e4ddf..a5e1643 100644 --- a/lib/Transforms/Utils/ValueMapper.cpp +++ b/lib/Transforms/Utils/ValueMapper.cpp @@ -63,23 +63,14 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags, // Check all operands to see if any need to be remapped. for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i) { Value *OP = MD->getOperand(i); - if (OP == 0) continue; - Value *Mapped_OP = MapValue(OP, VM, Flags, TypeMapper); - // If Mapped_Op is null, we should use indentity map. - if (Mapped_OP == OP || Mapped_OP == 0) continue; + if (OP == 0 || MapValue(OP, VM, Flags, TypeMapper) == OP) continue; // Ok, at least one operand needs remapping. SmallVector Elts; Elts.reserve(MD->getNumOperands()); for (i = 0; i != e; ++i) { Value *Op = MD->getOperand(i); - if (Op == 0) - Elts.push_back(0); - else { - Value *Mapped_Op = MapValue(Op, VM, Flags, TypeMapper); - // If Mapped_Op is null, we should use indentity map. - Elts.push_back(Mapped_Op ? Mapped_Op : Op); - } + Elts.push_back(Op ? MapValue(Op, VM, Flags, TypeMapper) : 0); } MDNode *NewMD = MDNode::get(V->getContext(), Elts); Dummy->replaceAllUsesWith(NewMD); -- cgit v1.1 From ea2d8780e9c78628fe5e3312ca4c17c054156c83 Mon Sep 17 00:00:00 2001 From: Evgeniy Stepanov Date: Thu, 31 Jan 2013 09:58:59 +0000 Subject: Annotate BumpPtrAllocator for MemorySanitizer. This change adds MemorySanitizer annotations to BumpPtrAllocator to improve report quality. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174051 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Support/Allocator.cpp | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'lib') diff --git a/lib/Support/Allocator.cpp b/lib/Support/Allocator.cpp index 28f4e64..b4fdc1e 100644 --- a/lib/Support/Allocator.cpp +++ b/lib/Support/Allocator.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/Allocator.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/Memory.h" #include "llvm/Support/Recycler.h" @@ -102,6 +103,10 @@ void *BumpPtrAllocator::Allocate(size_t Size, size_t Alignment) { // Check if we can hold it. if (Ptr + Size <= End) { CurPtr = Ptr + Size; + // Update the allocation point of this memory block in MemorySanitizer. + // Without this, MemorySanitizer reports for values originating from it will + // point to the allocation point of the entire slab. + __msan_allocated_memory(Ptr, Size); return Ptr; } @@ -117,6 +122,7 @@ void *BumpPtrAllocator::Allocate(size_t Size, size_t Alignment) { Ptr = AlignPtr((char*)(NewSlab + 1), Alignment); assert((uintptr_t)Ptr + Size <= (uintptr_t)NewSlab + NewSlab->Size); + __msan_allocated_memory(Ptr, Size); return Ptr; } @@ -125,6 +131,7 @@ void *BumpPtrAllocator::Allocate(size_t Size, size_t Alignment) { Ptr = AlignPtr(CurPtr, Alignment); CurPtr = Ptr + Size; assert(CurPtr <= End && "Unable to allocate memory!"); + __msan_allocated_memory(Ptr, Size); return Ptr; } -- cgit v1.1 From 72062f5744557e270a38192554c3126ea5f97434 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Thu, 31 Jan 2013 12:12:40 +0000 Subject: Add AArch64 as an experimental target. This patch adds support for AArch64 (ARM's 64-bit architecture) to LLVM in the "experimental" category. Currently, it won't be built unless requested explicitly. This initial commit should have support for: + Assembly of all scalar (i.e. non-NEON, non-Crypto) instructions (except the late addition CRC instructions). + CodeGen features required for C++03 and C99. + Compilation for the "small" memory model: code+static data < 4GB. + Absolute and position-independent code. + GNU-style (i.e. "__thread") TLS. + Debugging information. The principal omission, currently, is performance tuning. This patch excludes the NEON support also reviewed due to an outbreak of batshit insanity in our legal department. That will be committed soon bringing the changes to precisely what has been approved. Further reviews would be gratefully received. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174054 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCELFStreamer.cpp | 4 +- lib/MC/MCObjectFileInfo.cpp | 19 + lib/Support/Triple.cpp | 8 + lib/Target/AArch64/AArch64.h | 42 + lib/Target/AArch64/AArch64.td | 68 + lib/Target/AArch64/AArch64AsmPrinter.cpp | 361 ++ lib/Target/AArch64/AArch64AsmPrinter.h | 85 + lib/Target/AArch64/AArch64CallingConv.td | 196 + lib/Target/AArch64/AArch64ConstantIslandPass.cpp | 1420 ++++++ lib/Target/AArch64/AArch64FrameLowering.cpp | 644 +++ lib/Target/AArch64/AArch64FrameLowering.h | 103 + lib/Target/AArch64/AArch64ISelDAGToDAG.cpp | 422 ++ lib/Target/AArch64/AArch64ISelLowering.cpp | 2957 +++++++++++ lib/Target/AArch64/AArch64ISelLowering.h | 247 + lib/Target/AArch64/AArch64InstrFormats.td | 1011 ++++ lib/Target/AArch64/AArch64InstrInfo.cpp | 805 +++ lib/Target/AArch64/AArch64InstrInfo.h | 110 + lib/Target/AArch64/AArch64InstrInfo.td | 5298 ++++++++++++++++++++ lib/Target/AArch64/AArch64MCInstLower.cpp | 140 + lib/Target/AArch64/AArch64MachineFunctionInfo.cpp | 14 + lib/Target/AArch64/AArch64MachineFunctionInfo.h | 158 + lib/Target/AArch64/AArch64RegisterInfo.cpp | 211 + lib/Target/AArch64/AArch64RegisterInfo.h | 79 + lib/Target/AArch64/AArch64RegisterInfo.td | 205 + lib/Target/AArch64/AArch64Schedule.td | 10 + lib/Target/AArch64/AArch64SelectionDAGInfo.cpp | 25 + lib/Target/AArch64/AArch64SelectionDAGInfo.h | 32 + lib/Target/AArch64/AArch64Subtarget.cpp | 43 + lib/Target/AArch64/AArch64Subtarget.h | 54 + lib/Target/AArch64/AArch64TargetMachine.cpp | 78 + lib/Target/AArch64/AArch64TargetMachine.h | 69 + lib/Target/AArch64/AArch64TargetObjectFile.cpp | 19 + lib/Target/AArch64/AArch64TargetObjectFile.h | 27 + lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp | 2025 ++++++++ lib/Target/AArch64/AsmParser/CMakeLists.txt | 7 + lib/Target/AArch64/AsmParser/LLVMBuild.txt | 24 + lib/Target/AArch64/AsmParser/Makefile | 15 + lib/Target/AArch64/CMakeLists.txt | 35 + .../AArch64/Disassembler/AArch64Disassembler.cpp | 791 +++ lib/Target/AArch64/Disassembler/CMakeLists.txt | 7 + lib/Target/AArch64/Disassembler/LLVMBuild.txt | 24 + lib/Target/AArch64/Disassembler/Makefile | 16 + .../AArch64/InstPrinter/AArch64InstPrinter.cpp | 408 ++ .../AArch64/InstPrinter/AArch64InstPrinter.h | 171 + lib/Target/AArch64/InstPrinter/CMakeLists.txt | 8 + lib/Target/AArch64/InstPrinter/LLVMBuild.txt | 24 + lib/Target/AArch64/InstPrinter/Makefile | 15 + lib/Target/AArch64/LLVMBuild.txt | 36 + .../AArch64/MCTargetDesc/AArch64AsmBackend.cpp | 580 +++ lib/Target/AArch64/MCTargetDesc/AArch64BaseInfo.h | 779 +++ .../MCTargetDesc/AArch64ELFObjectWriter.cpp | 287 ++ .../AArch64/MCTargetDesc/AArch64ELFStreamer.cpp | 160 + .../AArch64/MCTargetDesc/AArch64ELFStreamer.h | 27 + .../AArch64/MCTargetDesc/AArch64FixupKinds.h | 108 + .../AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp | 41 + lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h | 27 + .../AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp | 517 ++ lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp | 173 + lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h | 161 + .../AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp | 991 ++++ .../AArch64/MCTargetDesc/AArch64MCTargetDesc.h | 65 + lib/Target/AArch64/MCTargetDesc/CMakeLists.txt | 13 + lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt | 24 + lib/Target/AArch64/MCTargetDesc/Makefile | 16 + lib/Target/AArch64/Makefile | 30 + lib/Target/AArch64/README.txt | 2 + .../AArch64/TargetInfo/AArch64TargetInfo.cpp | 20 + lib/Target/AArch64/TargetInfo/CMakeLists.txt | 7 + lib/Target/AArch64/TargetInfo/LLVMBuild.txt | 24 + lib/Target/AArch64/TargetInfo/Makefile | 15 + lib/Target/ARM/MCTargetDesc/ARMMCExpr.h | 3 + lib/Target/LLVMBuild.txt | 2 +- 72 files changed, 22640 insertions(+), 2 deletions(-) create mode 100644 lib/Target/AArch64/AArch64.h create mode 100644 lib/Target/AArch64/AArch64.td create mode 100644 lib/Target/AArch64/AArch64AsmPrinter.cpp create mode 100644 lib/Target/AArch64/AArch64AsmPrinter.h create mode 100644 lib/Target/AArch64/AArch64CallingConv.td create mode 100644 lib/Target/AArch64/AArch64ConstantIslandPass.cpp create mode 100644 lib/Target/AArch64/AArch64FrameLowering.cpp create mode 100644 lib/Target/AArch64/AArch64FrameLowering.h create mode 100644 lib/Target/AArch64/AArch64ISelDAGToDAG.cpp create mode 100644 lib/Target/AArch64/AArch64ISelLowering.cpp create mode 100644 lib/Target/AArch64/AArch64ISelLowering.h create mode 100644 lib/Target/AArch64/AArch64InstrFormats.td create mode 100644 lib/Target/AArch64/AArch64InstrInfo.cpp create mode 100644 lib/Target/AArch64/AArch64InstrInfo.h create mode 100644 lib/Target/AArch64/AArch64InstrInfo.td create mode 100644 lib/Target/AArch64/AArch64MCInstLower.cpp create mode 100644 lib/Target/AArch64/AArch64MachineFunctionInfo.cpp create mode 100644 lib/Target/AArch64/AArch64MachineFunctionInfo.h create mode 100644 lib/Target/AArch64/AArch64RegisterInfo.cpp create mode 100644 lib/Target/AArch64/AArch64RegisterInfo.h create mode 100644 lib/Target/AArch64/AArch64RegisterInfo.td create mode 100644 lib/Target/AArch64/AArch64Schedule.td create mode 100644 lib/Target/AArch64/AArch64SelectionDAGInfo.cpp create mode 100644 lib/Target/AArch64/AArch64SelectionDAGInfo.h create mode 100644 lib/Target/AArch64/AArch64Subtarget.cpp create mode 100644 lib/Target/AArch64/AArch64Subtarget.h create mode 100644 lib/Target/AArch64/AArch64TargetMachine.cpp create mode 100644 lib/Target/AArch64/AArch64TargetMachine.h create mode 100644 lib/Target/AArch64/AArch64TargetObjectFile.cpp create mode 100644 lib/Target/AArch64/AArch64TargetObjectFile.h create mode 100644 lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp create mode 100644 lib/Target/AArch64/AsmParser/CMakeLists.txt create mode 100644 lib/Target/AArch64/AsmParser/LLVMBuild.txt create mode 100644 lib/Target/AArch64/AsmParser/Makefile create mode 100644 lib/Target/AArch64/CMakeLists.txt create mode 100644 lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp create mode 100644 lib/Target/AArch64/Disassembler/CMakeLists.txt create mode 100644 lib/Target/AArch64/Disassembler/LLVMBuild.txt create mode 100644 lib/Target/AArch64/Disassembler/Makefile create mode 100644 lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp create mode 100644 lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h create mode 100644 lib/Target/AArch64/InstPrinter/CMakeLists.txt create mode 100644 lib/Target/AArch64/InstPrinter/LLVMBuild.txt create mode 100644 lib/Target/AArch64/InstPrinter/Makefile create mode 100644 lib/Target/AArch64/LLVMBuild.txt create mode 100644 lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp create mode 100644 lib/Target/AArch64/MCTargetDesc/AArch64BaseInfo.h create mode 100644 lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp create mode 100644 lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp create mode 100644 lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h create mode 100644 lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h create mode 100644 lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp create mode 100644 lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h create mode 100644 lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp create mode 100644 lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp create mode 100644 lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h create mode 100644 lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp create mode 100644 lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h create mode 100644 lib/Target/AArch64/MCTargetDesc/CMakeLists.txt create mode 100644 lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt create mode 100644 lib/Target/AArch64/MCTargetDesc/Makefile create mode 100644 lib/Target/AArch64/Makefile create mode 100644 lib/Target/AArch64/README.txt create mode 100644 lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp create mode 100644 lib/Target/AArch64/TargetInfo/CMakeLists.txt create mode 100644 lib/Target/AArch64/TargetInfo/LLVMBuild.txt create mode 100644 lib/Target/AArch64/TargetInfo/Makefile (limited to 'lib') diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp index e5b749e..c4c8e6e 100644 --- a/lib/MC/MCELFStreamer.cpp +++ b/lib/MC/MCELFStreamer.cpp @@ -300,7 +300,9 @@ void MCELFStreamer::EmitFileDirective(StringRef Filename) { void MCELFStreamer::fixSymbolsInTLSFixups(const MCExpr *expr) { switch (expr->getKind()) { - case MCExpr::Target: llvm_unreachable("Can't handle target exprs yet!"); + case MCExpr::Target: + cast(expr)->fixELFSymbolsInTLSFixups(getAssembler()); + break; case MCExpr::Constant: break; diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp index 1f5548f..ae0abde 100644 --- a/lib/MC/MCObjectFileInfo.cpp +++ b/lib/MC/MCObjectFileInfo.cpp @@ -256,6 +256,25 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) { TTypeEncoding = (CMModel == CodeModel::Small) ? dwarf::DW_EH_PE_udata4 : dwarf::DW_EH_PE_absptr; } + } else if (T.getArch() == Triple::aarch64) { + FDECFIEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; + + // The small model guarantees static code/data size < 4GB, but not where it + // will be in memory. Most of these could end up >2GB away so even a signed + // pc-relative 32-bit address is insufficient, theoretically. + if (RelocM == Reloc::PIC_) { + PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | + dwarf::DW_EH_PE_sdata8; + LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8; + FDEEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; + TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | + dwarf::DW_EH_PE_sdata8; + } else { + PersonalityEncoding = dwarf::DW_EH_PE_absptr; + LSDAEncoding = dwarf::DW_EH_PE_absptr; + FDEEncoding = dwarf::DW_EH_PE_udata4; + TTypeEncoding = dwarf::DW_EH_PE_absptr; + } } else if (T.getArch() == Triple::ppc64) { PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8; diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp index ad7b189..d2508ac 100644 --- a/lib/Support/Triple.cpp +++ b/lib/Support/Triple.cpp @@ -19,6 +19,7 @@ const char *Triple::getArchTypeName(ArchType Kind) { switch (Kind) { case UnknownArch: return "unknown"; + case aarch64: return "aarch64"; case arm: return "arm"; case hexagon: return "hexagon"; case mips: return "mips"; @@ -53,6 +54,8 @@ const char *Triple::getArchTypePrefix(ArchType Kind) { default: return 0; + case aarch64: return "aarch64"; + case arm: case thumb: return "arm"; @@ -152,6 +155,7 @@ const char *Triple::getEnvironmentTypeName(EnvironmentType Kind) { Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) { return StringSwitch(Name) + .Case("aarch64", aarch64) .Case("arm", arm) .Case("mips", mips) .Case("mipsel", mipsel) @@ -215,6 +219,7 @@ static Triple::ArchType parseArch(StringRef ArchName) { .Case("powerpc", Triple::ppc) .Cases("powerpc64", "ppu", Triple::ppc64) .Case("mblaze", Triple::mblaze) + .Case("aarch64", Triple::aarch64) .Cases("arm", "xscale", Triple::arm) // FIXME: It would be good to replace these with explicit names for all the // various suffixes supported. @@ -676,6 +681,7 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) { case llvm::Triple::spir: return 32; + case llvm::Triple::aarch64: case llvm::Triple::mips64: case llvm::Triple::mips64el: case llvm::Triple::nvptx64: @@ -704,6 +710,7 @@ Triple Triple::get32BitArchVariant() const { Triple T(*this); switch (getArch()) { case Triple::UnknownArch: + case Triple::aarch64: case Triple::msp430: T.setArch(UnknownArch); break; @@ -755,6 +762,7 @@ Triple Triple::get64BitArchVariant() const { T.setArch(UnknownArch); break; + case Triple::aarch64: case Triple::spir64: case Triple::mips64: case Triple::mips64el: diff --git a/lib/Target/AArch64/AArch64.h b/lib/Target/AArch64/AArch64.h new file mode 100644 index 0000000..622814d --- /dev/null +++ b/lib/Target/AArch64/AArch64.h @@ -0,0 +1,42 @@ +//==-- AArch64.h - Top-level interface for AArch64 representation -*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the entry points for global functions defined in the LLVM +// AArch64 back-end. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_AARCH64_H +#define LLVM_TARGET_AARCH64_H + +#include "MCTargetDesc/AArch64MCTargetDesc.h" +#include "llvm/Target/TargetMachine.h" + +namespace llvm { + +class AArch64AsmPrinter; +class FunctionPass; +class AArch64TargetMachine; +class MachineInstr; +class MCInst; + +FunctionPass *createAArch64ISelDAG(AArch64TargetMachine &TM, + CodeGenOpt::Level OptLevel); + +FunctionPass *createAArch64ConstantIslandPass(); + +FunctionPass *createAArch64CleanupLocalDynamicTLSPass(); + +void LowerAArch64MachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, + AArch64AsmPrinter &AP); + + +} + +#endif diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td new file mode 100644 index 0000000..750fec7 --- /dev/null +++ b/lib/Target/AArch64/AArch64.td @@ -0,0 +1,68 @@ +//===- AArch64.td - Describe the AArch64 Target Machine ---------*- tblgen -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This is the top level entry point for the AArch64 target. +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Target-independent interfaces +//===----------------------------------------------------------------------===// + +include "llvm/Target/Target.td" + +//===----------------------------------------------------------------------===// +// AArch64 Subtarget features. +// + +def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true", + "Enable Advanced SIMD instructions">; + +def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true", + "Enable cryptographic instructions">; + +//===----------------------------------------------------------------------===// +// AArch64 Processors +// + +include "AArch64Schedule.td" + +def : Processor<"generic", GenericItineraries, [FeatureNEON, FeatureCrypto]>; + +//===----------------------------------------------------------------------===// +// Register File Description +//===----------------------------------------------------------------------===// + +include "AArch64RegisterInfo.td" + +include "AArch64CallingConv.td" + +//===----------------------------------------------------------------------===// +// Instruction Descriptions +//===----------------------------------------------------------------------===// + +include "AArch64InstrInfo.td" + +def AArch64InstrInfo : InstrInfo; + +//===----------------------------------------------------------------------===// +// Assembly printer +//===----------------------------------------------------------------------===// + +def A64InstPrinter : AsmWriter { + string AsmWriterClassName = "InstPrinter"; + bit isMCAsmWriter = 1; +} + +//===----------------------------------------------------------------------===// +// Declare the target which we are implementing +//===----------------------------------------------------------------------===// + +def AArch64 : Target { + let InstructionSet = AArch64InstrInfo; + let AssemblyWriters = [A64InstPrinter]; +} diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp new file mode 100644 index 0000000..63cc88f --- /dev/null +++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -0,0 +1,361 @@ +//===-- AArch64AsmPrinter.cpp - Print machine code to an AArch64 .s file --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a printer that converts from our internal representation +// of machine-dependent LLVM code to GAS-format AArch64 assembly language. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "asm-printer" +#include "AArch64AsmPrinter.h" +#include "InstPrinter/AArch64InstPrinter.h" +#include "llvm/DebugInfo.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineModuleInfoImpls.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Target/Mangler.h" + +using namespace llvm; + +MachineLocation +AArch64AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const { + // See emitFrameIndexDebugValue in InstrInfo for where this instruction is + // expected to be created. + assert(MI->getNumOperands() == 4 && MI->getOperand(0).isReg() + && MI->getOperand(1).isImm() && "unexpected custom DBG_VALUE"); + return MachineLocation(MI->getOperand(0).getReg(), MI->getOperand(1).getImm()); +} + +/// Try to print a floating-point register as if it belonged to a specified +/// register-class. For example the inline asm operand modifier "b" requires its +/// argument to be printed as "bN". +static bool printModifiedFPRAsmOperand(const MachineOperand &MO, + const TargetRegisterInfo *TRI, + const TargetRegisterClass &RegClass, + raw_ostream &O) { + if (!MO.isReg()) + return true; + + for (MCRegAliasIterator AR(MO.getReg(), TRI, true); AR.isValid(); ++AR) { + if (RegClass.contains(*AR)) { + O << AArch64InstPrinter::getRegisterName(*AR); + return false; + } + } + return true; +} + +/// Implements the 'w' and 'x' inline asm operand modifiers, which print a GPR +/// with the obvious type and an immediate 0 as either wzr or xzr. +static bool printModifiedGPRAsmOperand(const MachineOperand &MO, + const TargetRegisterInfo *TRI, + const TargetRegisterClass &RegClass, + raw_ostream &O) { + char Prefix = &RegClass == &AArch64::GPR32RegClass ? 'w' : 'x'; + + if (MO.isImm() && MO.getImm() == 0) { + O << Prefix << "zr"; + return false; + } else if (MO.isReg()) { + if (MO.getReg() == AArch64::XSP || MO.getReg() == AArch64::WSP) { + O << (Prefix == 'x' ? "sp" : "wsp"); + return false; + } + + for (MCRegAliasIterator AR(MO.getReg(), TRI, true); AR.isValid(); ++AR) { + if (RegClass.contains(*AR)) { + O << AArch64InstPrinter::getRegisterName(*AR); + return false; + } + } + } + + return true; +} + +bool AArch64AsmPrinter::printSymbolicAddress(const MachineOperand &MO, + bool PrintImmediatePrefix, + StringRef Suffix, raw_ostream &O) { + StringRef Name; + StringRef Modifier; + switch (MO.getType()) { + default: llvm_unreachable("Unexpected operand for symbolic address constraint"); + case MachineOperand::MO_GlobalAddress: + Name = Mang->getSymbol(MO.getGlobal())->getName(); + + // Global variables may be accessed either via a GOT or in various fun and + // interesting TLS-model specific ways. Set the prefix modifier as + // appropriate here. + if (const GlobalVariable *GV = dyn_cast(MO.getGlobal())) { + Reloc::Model RelocM = TM.getRelocationModel(); + if (GV->isThreadLocal()) { + switch (TM.getTLSModel(GV)) { + case TLSModel::GeneralDynamic: + Modifier = "tlsdesc"; + break; + case TLSModel::LocalDynamic: + Modifier = "dtprel"; + break; + case TLSModel::InitialExec: + Modifier = "gottprel"; + break; + case TLSModel::LocalExec: + Modifier = "tprel"; + break; + } + } else if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) { + Modifier = "got"; + } + } + break; + case MachineOperand::MO_BlockAddress: + Name = GetBlockAddressSymbol(MO.getBlockAddress())->getName(); + break; + case MachineOperand::MO_ExternalSymbol: + Name = MO.getSymbolName(); + break; + case MachineOperand::MO_ConstantPoolIndex: + Name = GetCPISymbol(MO.getIndex())->getName(); + break; + } + + // Some instructions (notably ADRP) don't take the # prefix for + // immediates. Only print it if asked to. + if (PrintImmediatePrefix) + O << '#'; + + // Only need the joining "_" if both the prefix and the suffix are + // non-null. This little block simply takes care of the four possibly + // combinations involved there. + if (Modifier == "" && Suffix == "") + O << Name; + else if (Modifier == "" && Suffix != "") + O << ":" << Suffix << ':' << Name; + else if (Modifier != "" && Suffix == "") + O << ":" << Modifier << ':' << Name; + else + O << ":" << Modifier << '_' << Suffix << ':' << Name; + + return false; +} + +bool AArch64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, + unsigned AsmVariant, + const char *ExtraCode, raw_ostream &O) { + const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo(); + if (!ExtraCode || !ExtraCode[0]) { + // There's actually no operand modifier, which leads to a slightly eclectic + // set of behaviour which we have to handle here. + const MachineOperand &MO = MI->getOperand(OpNum); + switch (MO.getType()) { + default: + llvm_unreachable("Unexpected operand for inline assembly"); + case MachineOperand::MO_Register: + // GCC prints the unmodified operand of a 'w' constraint as the vector + // register. Technically, we could allocate the argument as a VPR128, but + // that leads to extremely dodgy copies being generated to get the data + // there. + if (printModifiedFPRAsmOperand(MO, TRI, AArch64::VPR128RegClass, O)) + O << AArch64InstPrinter::getRegisterName(MO.getReg()); + break; + case MachineOperand::MO_Immediate: + O << '#' << MO.getImm(); + break; + case MachineOperand::MO_FPImmediate: + assert(MO.getFPImm()->isExactlyValue(0.0) && "Only FP 0.0 expected"); + O << "#0.0"; + break; + case MachineOperand::MO_BlockAddress: + case MachineOperand::MO_ConstantPoolIndex: + case MachineOperand::MO_GlobalAddress: + case MachineOperand::MO_ExternalSymbol: + return printSymbolicAddress(MO, false, "", O); + } + return false; + } + + // We have a real modifier to handle. + switch(ExtraCode[0]) { + default: + // See if this is a generic operand + return AsmPrinter::PrintAsmOperand(MI, OpNum, AsmVariant, ExtraCode, O); + case 'c': // Don't print "#" before an immediate operand. + if (!MI->getOperand(OpNum).isImm()) + return true; + O << MI->getOperand(OpNum).getImm(); + return false; + case 'w': + // Output 32-bit general register operand, constant zero as wzr, or stack + // pointer as wsp. Ignored when used with other operand types. + return printModifiedGPRAsmOperand(MI->getOperand(OpNum), TRI, + AArch64::GPR32RegClass, O); + case 'x': + // Output 64-bit general register operand, constant zero as xzr, or stack + // pointer as sp. Ignored when used with other operand types. + return printModifiedGPRAsmOperand(MI->getOperand(OpNum), TRI, + AArch64::GPR64RegClass, O); + case 'H': + // Output higher numbered of a 64-bit general register pair + case 'Q': + // Output least significant register of a 64-bit general register pair + case 'R': + // Output most significant register of a 64-bit general register pair + + // FIXME note: these three operand modifiers will require, to some extent, + // adding a paired GPR64 register class. Initial investigation suggests that + // assertions are hit unless it has a type and is made legal for that type + // in ISelLowering. After that step is made, the number of modifications + // needed explodes (operation legality, calling conventions, stores, reg + // copies ...). + llvm_unreachable("FIXME: Unimplemented register pairs"); + case 'b': + // Output 8-bit FP/SIMD scalar register operand, prefixed with b. + return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI, + AArch64::FPR8RegClass, O); + case 'h': + // Output 16-bit FP/SIMD scalar register operand, prefixed with h. + return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI, + AArch64::FPR16RegClass, O); + case 's': + // Output 32-bit FP/SIMD scalar register operand, prefixed with s. + return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI, + AArch64::FPR32RegClass, O); + case 'd': + // Output 64-bit FP/SIMD scalar register operand, prefixed with d. + return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI, + AArch64::FPR64RegClass, O); + case 'q': + // Output 128-bit FP/SIMD scalar register operand, prefixed with q. + return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI, + AArch64::FPR128RegClass, O); + case 'A': + // Output symbolic address with appropriate relocation modifier (also + // suitable for ADRP). + return printSymbolicAddress(MI->getOperand(OpNum), false, "", O); + case 'L': + // Output bits 11:0 of symbolic address with appropriate :lo12: relocation + // modifier. + return printSymbolicAddress(MI->getOperand(OpNum), true, "lo12", O); + case 'G': + // Output bits 23:12 of symbolic address with appropriate :hi12: relocation + // modifier (currently only for TLS local exec). + return printSymbolicAddress(MI->getOperand(OpNum), true, "hi12", O); + } + + +} + +bool AArch64AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, + unsigned OpNum, + unsigned AsmVariant, + const char *ExtraCode, + raw_ostream &O) { + // Currently both the memory constraints (m and Q) behave the same and amount + // to the address as a single register. In future, we may allow "m" to provide + // both a base and an offset. + const MachineOperand &MO = MI->getOperand(OpNum); + assert(MO.isReg() && "unexpected inline assembly memory operand"); + O << '[' << AArch64InstPrinter::getRegisterName(MO.getReg()) << ']'; + return false; +} + +void AArch64AsmPrinter::PrintDebugValueComment(const MachineInstr *MI, + raw_ostream &OS) { + unsigned NOps = MI->getNumOperands(); + assert(NOps==4); + OS << '\t' << MAI->getCommentString() << "DEBUG_VALUE: "; + // cast away const; DIetc do not take const operands for some reason. + DIVariable V(const_cast(MI->getOperand(NOps-1).getMetadata())); + OS << V.getName(); + OS << " <- "; + // Frame address. Currently handles register +- offset only. + assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm()); + OS << '[' << AArch64InstPrinter::getRegisterName(MI->getOperand(0).getReg()); + OS << '+' << MI->getOperand(1).getImm(); + OS << ']'; + OS << "+" << MI->getOperand(NOps - 2).getImm(); +} + + +#include "AArch64GenMCPseudoLowering.inc" + +void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) { + // Do any auto-generated pseudo lowerings. + if (emitPseudoExpansionLowering(OutStreamer, MI)) + return; + + switch (MI->getOpcode()) { + case AArch64::CONSTPOOL_ENTRY: { + unsigned LabelId = (unsigned)MI->getOperand(0).getImm(); + unsigned CPIdx = (unsigned)MI->getOperand(1).getIndex(); + + OutStreamer.EmitLabel(GetCPISymbol(LabelId)); + + const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPIdx]; + if (MCPE.isMachineConstantPoolEntry()) + EmitMachineConstantPoolValue(MCPE.Val.MachineCPVal); + else + EmitGlobalConstant(MCPE.Val.ConstVal); + + return; + } + case AArch64::DBG_VALUE: { + if (isVerbose() && OutStreamer.hasRawTextSupport()) { + SmallString<128> TmpStr; + raw_svector_ostream OS(TmpStr); + PrintDebugValueComment(MI, OS); + OutStreamer.EmitRawText(StringRef(OS.str())); + } + return; + } + } + + MCInst TmpInst; + LowerAArch64MachineInstrToMCInst(MI, TmpInst, *this); + OutStreamer.EmitInstruction(TmpInst); +} + +void AArch64AsmPrinter::EmitEndOfAsmFile(Module &M) { + if (Subtarget->isTargetELF()) { + const TargetLoweringObjectFileELF &TLOFELF = + static_cast(getObjFileLowering()); + + MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo(); + + // Output stubs for external and common global variables. + MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList(); + if (!Stubs.empty()) { + OutStreamer.SwitchSection(TLOFELF.getDataRelSection()); + const DataLayout *TD = TM.getDataLayout(); + + for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { + OutStreamer.EmitLabel(Stubs[i].first); + OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(), + TD->getPointerSize(0), 0); + } + Stubs.clear(); + } + } +} + +bool AArch64AsmPrinter::runOnMachineFunction(MachineFunction &MF) { + MCP = MF.getConstantPool(); + return AsmPrinter::runOnMachineFunction(MF); +} + +// Force static initialization. +extern "C" void LLVMInitializeAArch64AsmPrinter() { + RegisterAsmPrinter X(TheAArch64Target); +} + diff --git a/lib/Target/AArch64/AArch64AsmPrinter.h b/lib/Target/AArch64/AArch64AsmPrinter.h new file mode 100644 index 0000000..492be66 --- /dev/null +++ b/lib/Target/AArch64/AArch64AsmPrinter.h @@ -0,0 +1,85 @@ +// AArch64AsmPrinter.h - Print machine code to an AArch64 .s file -*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// AArch64 Assembly printer class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_AARCH64ASMPRINTER_H +#define LLVM_AARCH64ASMPRINTER_H + +#include "AArch64.h" +#include "AArch64TargetMachine.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/Support/Compiler.h" + +namespace llvm { + +class MCOperand; + +class LLVM_LIBRARY_VISIBILITY AArch64AsmPrinter : public AsmPrinter { + + /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can + /// make the right decision when printing asm code for different targets. + const AArch64Subtarget *Subtarget; + const MachineConstantPool *MCP; + + // emitPseudoExpansionLowering - tblgen'erated. + bool emitPseudoExpansionLowering(MCStreamer &OutStreamer, + const MachineInstr *MI); + + public: + explicit AArch64AsmPrinter(TargetMachine &TM, MCStreamer &Streamer) + : AsmPrinter(TM, Streamer) { + Subtarget = &TM.getSubtarget(); + } + + bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const; + + MCOperand lowerSymbolOperand(const MachineOperand &MO, + const MCSymbol *Sym) const; + + void EmitInstruction(const MachineInstr *MI); + void EmitEndOfAsmFile(Module &M); + + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &O); + bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &O); + + void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS); + + /// printSymbolicAddress - Given some kind of reasonably bare symbolic + /// reference, print out the appropriate asm string to represent it. If + /// appropriate, a relocation-specifier will be produced, composed of a + /// general class derived from the MO parameter and an instruction-specific + /// suffix, provided in Suffix. E.g. ":got_lo12:" if a Suffix of "lo12" is + /// given. + bool printSymbolicAddress(const MachineOperand &MO, + bool PrintImmediatePrefix, + StringRef Suffix, raw_ostream &O); + + MachineLocation getDebugValueLocation(const MachineInstr *MI) const; + + virtual const char *getPassName() const { + return "AArch64 Assembly Printer"; + } + + /// A no-op on AArch64 because we emit our constant pool entries inline with + /// the function. + virtual void EmitConstantPool() {} + + virtual bool runOnMachineFunction(MachineFunction &MF); +}; +} // end namespace llvm + +#endif diff --git a/lib/Target/AArch64/AArch64CallingConv.td b/lib/Target/AArch64/AArch64CallingConv.td new file mode 100644 index 0000000..b880d83 --- /dev/null +++ b/lib/Target/AArch64/AArch64CallingConv.td @@ -0,0 +1,196 @@ +//==-- AArch64CallingConv.td - Calling Conventions for ARM ----*- tblgen -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This describes the calling conventions for AArch64 architecture. +//===----------------------------------------------------------------------===// + + +// The AArch64 Procedure Call Standard is unfortunately specified at a slightly +// higher level of abstraction than LLVM's target interface presents. In +// particular, it refers (like other ABIs, in fact) directly to +// structs. However, generic LLVM code takes the liberty of lowering structure +// arguments to the component fields before we see them. +// +// As a result, the obvious direct map from LLVM IR to PCS concepts can't be +// implemented, so the goals of this calling convention are, in decreasing +// priority order: +// 1. Expose *some* way to express the concepts required to implement the +// generic PCS from a front-end. +// 2. Provide a sane ABI for pure LLVM. +// 3. Follow the generic PCS as closely as is naturally possible. +// +// The suggested front-end implementation of PCS features is: +// * Integer, float and vector arguments of all sizes which end up in +// registers are passed and returned via the natural LLVM type. +// * Structure arguments with size <= 16 bytes are passed and returned in +// registers as similar integer or composite types. For example: +// [1 x i64], [2 x i64] or [1 x i128] (if alignment 16 needed). +// * HFAs in registers follow rules similar to small structs: appropriate +// composite types. +// * Structure arguments with size > 16 bytes are passed via a pointer, +// handled completely by the front-end. +// * Structure return values > 16 bytes via an sret pointer argument. +// * Other stack-based arguments (not large structs) are passed using byval +// pointers. Padding arguments are added beforehand to guarantee a large +// struct doesn't later use integer registers. +// +// N.b. this means that it is the front-end's responsibility (if it cares about +// PCS compliance) to check whether enough registers are available for an +// argument when deciding how to pass it. + +class CCIfAlign: + CCIf<"ArgFlags.getOrigAlign() == " # Align, A>; + +def CC_A64_APCS : CallingConv<[ + // SRet is an LLVM-specific concept, so it takes precedence over general ABI + // concerns. However, this rule will be used by C/C++ frontends to implement + // structure return. + CCIfSRet>, + + // Put ByVal arguments directly on the stack. Minimum size and alignment of a + // slot is 64-bit. + CCIfByVal>, + + // Canonicalise the various types that live in different floating-point + // registers. This makes sense because the PCS does not distinguish Short + // Vectors and Floating-point types. + CCIfType<[v2i8], CCBitConvertToType>, + CCIfType<[v4i8, v2i16], CCBitConvertToType>, + CCIfType<[v8i8, v4i16, v2i32, v2f32], CCBitConvertToType>, + CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], + CCBitConvertToType>, + + // PCS: "C.1: If the argument is a Half-, Single-, Double- or Quad- precision + // Floating-point or Short Vector Type and the NSRN is less than 8, then the + // argument is allocated to the least significant bits of register + // v[NSRN]. The NSRN is incremented by one. The argument has now been + // allocated." + CCIfType<[f16], CCAssignToReg<[B0, B1, B2, B3, B4, B5, B6, B7]>>, + CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7]>>, + CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, + CCIfType<[f128], CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + + // PCS: "C.2: If the argument is an HFA and there are sufficient unallocated + // SIMD and Floating-point registers (NSRN - number of elements < 8), then the + // argument is allocated to SIMD and Floating-point registers (with one + // register per element of the HFA). The NSRN is incremented by the number of + // registers used. The argument has now been allocated." + // + // N.b. As above, this rule is the responsibility of the front-end. + + // "C.3: If the argument is an HFA then the NSRN is set to 8 and the size of + // the argument is rounded up to the nearest multiple of 8 bytes." + // + // "C.4: If the argument is an HFA, a Quad-precision Floating-point or Short + // Vector Type then the NSAA is rounded up to the larger of 8 or the Natural + // Alignment of the Argument's type." + // + // It is expected that these will be satisfied by adding dummy arguments to + // the prototype. + + // PCS: "C.5: If the argument is a Half- or Single- precision Floating-point + // type then the size of the argument is set to 8 bytes. The effect is as if + // the argument had been copied to the least significant bits of a 64-bit + // register and the remaining bits filled with unspecified values." + CCIfType<[f16, f32], CCPromoteToType>, + + // PCS: "C.6: If the argument is an HFA, a Half-, Single-, Double- or Quad- + // precision Floating-point or Short Vector Type, then the argument is copied + // to memory at the adjusted NSAA. The NSAA is incremented by the size of the + // argument. The argument has now been allocated." + CCIfType<[f64], CCAssignToStack<8, 8>>, + CCIfType<[f128], CCAssignToStack<16, 16>>, + + // PCS: "C.7: If the argument is an Integral Type, the size of the argument is + // less than or equal to 8 bytes and the NGRN is less than 8, the argument is + // copied to the least significant bits of x[NGRN]. The NGRN is incremented by + // one. The argument has now been allocated." + + // First we implement C.8 and C.9 (128-bit types get even registers). i128 is + // represented as two i64s, the first one being split. If we delayed this + // operation C.8 would never be reached. + CCIfType<[i64], + CCIfSplit>>, + + // Note: the promotion also implements C.14. + CCIfType<[i8, i16, i32], CCPromoteToType>, + + // And now the real implementation of C.7 + CCIfType<[i64], CCAssignToReg<[X0, X1, X2, X3, X4, X5, X6, X7]>>, + + // PCS: "C.8: If the argument has an alignment of 16 then the NGRN is rounded + // up to the next even number." + // + // "C.9: If the argument is an Integral Type, the size of the argument is + // equal to 16 and the NGRN is less than 7, the argument is copied to x[NGRN] + // and x[NGRN+1], x[NGRN] shall contain the lower addressed double-word of the + // memory representation of the argument. The NGRN is incremented by two. The + // argument has now been allocated." + // + // Subtlety here: what if alignment is 16 but it is not an integral type? All + // floating-point types have been allocated already, which leaves composite + // types: this is why a front-end may need to produce i128 for a struct <= 16 + // bytes. + + // PCS: "C.10 If the argument is a Composite Type and the size in double-words + // of the argument is not more than 8 minus NGRN, then the argument is copied + // into consecutive general-purpose registers, starting at x[NGRN]. The + // argument is passed as though it had been loaded into the registers from a + // double-word aligned address with an appropriate sequence of LDR + // instructions loading consecutive registers from memory (the contents of any + // unused parts of the registers are unspecified by this standard). The NGRN + // is incremented by the number of registers used. The argument has now been + // allocated." + // + // Another one that's the responsibility of the front-end (sigh). + + // PCS: "C.11: The NGRN is set to 8." + CCCustom<"CC_AArch64NoMoreRegs">, + + // PCS: "C.12: The NSAA is rounded up to the larger of 8 or the Natural + // Alignment of the argument's type." + // + // PCS: "C.13: If the argument is a composite type then the argument is copied + // to memory at the adjusted NSAA. The NSAA is by the size of the + // argument. The argument has now been allocated." + // + // Note that the effect of this corresponds to a memcpy rather than register + // stores so that the struct ends up correctly addressable at the adjusted + // NSAA. + + // PCS: "C.14: If the size of the argument is less than 8 bytes then the size + // of the argument is set to 8 bytes. The effect is as if the argument was + // copied to the least significant bits of a 64-bit register and the remaining + // bits filled with unspecified values." + // + // Integer types were widened above. Floating-point and composite types have + // already been allocated completely. Nothing to do. + + // PCS: "C.15: The argument is copied to memory at the adjusted NSAA. The NSAA + // is incremented by the size of the argument. The argument has now been + // allocated." + CCIfType<[i64], CCIfSplit>>, + CCIfType<[i64], CCAssignToStack<8, 8>> + +]>; + +// According to the PCS, X19-X30 are callee-saved, however only the low 64-bits +// of vector registers (8-15) are callee-saved. The order here is is picked up +// by PrologEpilogInserter.cpp to allocate stack slots, starting from top of +// stack upon entry. This gives the customary layout of x30 at [sp-8], x29 at +// [sp-16], ... +def CSR_PCS : CalleeSavedRegs<(add (sequence "X%u", 30, 19), + (sequence "D%u", 15, 8))>; + + +// TLS descriptor calls are extremely restricted in their changes, to allow +// optimisations in the (hopefully) more common fast path where no real action +// is needed. They actually have to preserve all registers, except for the +// unavoidable X30 and the return register X0. +def TLSDesc : CalleeSavedRegs<(add (sequence "X%u", 29, 1), + (sequence "Q%u", 31, 0))>; diff --git a/lib/Target/AArch64/AArch64ConstantIslandPass.cpp b/lib/Target/AArch64/AArch64ConstantIslandPass.cpp new file mode 100644 index 0000000..7734866 --- /dev/null +++ b/lib/Target/AArch64/AArch64ConstantIslandPass.cpp @@ -0,0 +1,1420 @@ +//===-- AArch64ConstantIslandPass.cpp - AArch64 constant islands ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass that splits the constant pool up into 'islands' +// which are scattered through-out the function. This is required due to the +// limited pc-relative displacements that AArch64 has. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "aarch64-cp-islands" +#include "AArch64.h" +#include "AArch64InstrInfo.h" +#include "AArch64MachineFunctionInfo.h" +#include "AArch64Subtarget.h" +#include "AArch64MachineFunctionInfo.h" +#include "MCTargetDesc/AArch64BaseInfo.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/CommandLine.h" +#include +using namespace llvm; + +STATISTIC(NumCPEs, "Number of constpool entries"); +STATISTIC(NumSplit, "Number of uncond branches inserted"); +STATISTIC(NumCBrFixed, "Number of cond branches fixed"); + +// FIXME: This option should be removed once it has received sufficient testing. +static cl::opt +AlignConstantIslands("aarch64-align-constant-islands", cl::Hidden, + cl::init(true), cl::desc("Align constant islands in code")); + +/// Return the worst case padding that could result from unknown offset bits. +/// This does not include alignment padding caused by known offset bits. +/// +/// @param LogAlign log2(alignment) +/// @param KnownBits Number of known low offset bits. +static inline unsigned UnknownPadding(unsigned LogAlign, unsigned KnownBits) { + if (KnownBits < LogAlign) + return (1u << LogAlign) - (1u << KnownBits); + return 0; +} + +namespace { + /// Due to limited PC-relative displacements, AArch64 requires constant pool + /// entries to be scattered among the instructions inside a function. To do + /// this, it completely ignores the normal LLVM constant pool; instead, it + /// places constants wherever it feels like with special instructions. + /// + /// The terminology used in this pass includes: + /// Islands - Clumps of constants placed in the function. + /// Water - Potential places where an island could be formed. + /// CPE - A constant pool entry that has been placed somewhere, which + /// tracks a list of users. + class AArch64ConstantIslands : public MachineFunctionPass { + /// Information about the offset and size of a single basic block. + struct BasicBlockInfo { + /// Distance from the beginning of the function to the beginning of this + /// basic block. + /// + /// Offsets are computed assuming worst case padding before an aligned + /// block. This means that subtracting basic block offsets always gives a + /// conservative estimate of the real distance which may be smaller. + /// + /// Because worst case padding is used, the computed offset of an aligned + /// block may not actually be aligned. + unsigned Offset; + + /// Size of the basic block in bytes. If the block contains inline + /// assembly, this is a worst case estimate. + /// + /// The size does not include any alignment padding whether from the + /// beginning of the block, or from an aligned jump table at the end. + unsigned Size; + + /// The number of low bits in Offset that are known to be exact. The + /// remaining bits of Offset are an upper bound. + uint8_t KnownBits; + + /// When non-zero, the block contains instructions (inline asm) of unknown + /// size. The real size may be smaller than Size bytes by a multiple of 1 + /// << Unalign. + uint8_t Unalign; + + BasicBlockInfo() : Offset(0), Size(0), KnownBits(0), Unalign(0) {} + + /// Compute the number of known offset bits internally to this block. + /// This number should be used to predict worst case padding when + /// splitting the block. + unsigned internalKnownBits() const { + unsigned Bits = Unalign ? Unalign : KnownBits; + // If the block size isn't a multiple of the known bits, assume the + // worst case padding. + if (Size & ((1u << Bits) - 1)) + Bits = CountTrailingZeros_32(Size); + return Bits; + } + + /// Compute the offset immediately following this block. If LogAlign is + /// specified, return the offset the successor block will get if it has + /// this alignment. + unsigned postOffset(unsigned LogAlign = 0) const { + unsigned PO = Offset + Size; + if (!LogAlign) + return PO; + // Add alignment padding from the terminator. + return PO + UnknownPadding(LogAlign, internalKnownBits()); + } + + /// Compute the number of known low bits of postOffset. If this block + /// contains inline asm, the number of known bits drops to the + /// instruction alignment. An aligned terminator may increase the number + /// of know bits. + /// If LogAlign is given, also consider the alignment of the next block. + unsigned postKnownBits(unsigned LogAlign = 0) const { + return std::max(LogAlign, internalKnownBits()); + } + }; + + std::vector BBInfo; + + /// A sorted list of basic blocks where islands could be placed (i.e. blocks + /// that don't fall through to the following block, due to a return, + /// unreachable, or unconditional branch). + std::vector WaterList; + + /// The subset of WaterList that was created since the previous iteration by + /// inserting unconditional branches. + SmallSet NewWaterList; + + typedef std::vector::iterator water_iterator; + + /// One user of a constant pool, keeping the machine instruction pointer, + /// the constant pool being referenced, and the number of bits used by the + /// instruction for displacement. The HighWaterMark records the highest + /// basic block where a new CPEntry can be placed. To ensure this pass + /// terminates, the CP entries are initially placed at the end of the + /// function and then move monotonically to lower addresses. The exception + /// to this rule is when the current CP entry for a particular CPUser is out + /// of range, but there is another CP entry for the same constant value in + /// range. We want to use the existing in-range CP entry, but if it later + /// moves out of range, the search for new water should resume where it left + /// off. The HighWaterMark is used to record that point. + struct CPUser { + MachineInstr *MI; + MachineInstr *CPEMI; + MachineBasicBlock *HighWaterMark; + private: + unsigned OffsetBits; + public: + CPUser(MachineInstr *mi, MachineInstr *cpemi, unsigned offsetbits) + : MI(mi), CPEMI(cpemi), OffsetBits(offsetbits) { + HighWaterMark = CPEMI->getParent(); + } + /// Returns the number of bits used to specify the offset. + unsigned getOffsetBits() const { + return OffsetBits; + } + + /// Returns the maximum positive displacement possible from this CPUser + /// (essentially INT_MAX * 4). + unsigned getMaxPosDisp() const { + return (1 << (OffsetBits - 1)) - 1; + } + }; + + /// Keep track of all of the machine instructions that use various constant + /// pools and their max displacement. + std::vector CPUsers; + + /// One per constant pool entry, keeping the machine instruction pointer, + /// the constpool index, and the number of CPUser's which reference this + /// entry. + struct CPEntry { + MachineInstr *CPEMI; + unsigned CPI; + unsigned RefCount; + CPEntry(MachineInstr *cpemi, unsigned cpi, unsigned rc = 0) + : CPEMI(cpemi), CPI(cpi), RefCount(rc) {} + }; + + /// Keep track of all of the constant pool entry machine instructions. For + /// each original constpool index (i.e. those that existed upon entry to + /// this pass), it keeps a vector of entries. Original elements are cloned + /// as we go along; the clones are put in the vector of the original + /// element, but have distinct CPIs. + std::vector > CPEntries; + + /// One per immediate branch, keeping the machine instruction pointer, + /// conditional or unconditional, the max displacement, and (if IsCond is + /// true) the corresponding inverted branch opcode. + struct ImmBranch { + MachineInstr *MI; + unsigned OffsetBits : 31; + bool IsCond : 1; + ImmBranch(MachineInstr *mi, unsigned offsetbits, bool cond) + : MI(mi), OffsetBits(offsetbits), IsCond(cond) {} + }; + + /// Keep track of all the immediate branch instructions. + /// + std::vector ImmBranches; + + MachineFunction *MF; + MachineConstantPool *MCP; + const AArch64InstrInfo *TII; + const AArch64Subtarget *STI; + AArch64MachineFunctionInfo *AFI; + public: + static char ID; + AArch64ConstantIslands() : MachineFunctionPass(ID) {} + + virtual bool runOnMachineFunction(MachineFunction &MF); + + virtual const char *getPassName() const { + return "AArch64 constant island placement pass"; + } + + private: + void doInitialPlacement(std::vector &CPEMIs); + CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI); + unsigned getCPELogAlign(const MachineInstr *CPEMI); + void scanFunctionJumpTables(); + void initializeFunctionInfo(const std::vector &CPEMIs); + MachineBasicBlock *splitBlockBeforeInstr(MachineInstr *MI); + void updateForInsertedWaterBlock(MachineBasicBlock *NewBB); + void adjustBBOffsetsAfter(MachineBasicBlock *BB); + bool decrementCPEReferenceCount(unsigned CPI, MachineInstr* CPEMI); + int findInRangeCPEntry(CPUser& U, unsigned UserOffset); + bool findAvailableWater(CPUser&U, unsigned UserOffset, + water_iterator &WaterIter); + void createNewWater(unsigned CPUserIndex, unsigned UserOffset, + MachineBasicBlock *&NewMBB); + bool handleConstantPoolUser(unsigned CPUserIndex); + void removeDeadCPEMI(MachineInstr *CPEMI); + bool removeUnusedCPEntries(); + bool isCPEntryInRange(MachineInstr *MI, unsigned UserOffset, + MachineInstr *CPEMI, unsigned OffsetBits, + bool DoDump = false); + bool isWaterInRange(unsigned UserOffset, MachineBasicBlock *Water, + CPUser &U, unsigned &Growth); + bool isBBInRange(MachineInstr *MI, MachineBasicBlock *BB, + unsigned OffsetBits); + bool fixupImmediateBr(ImmBranch &Br); + bool fixupConditionalBr(ImmBranch &Br); + + void computeBlockSize(MachineBasicBlock *MBB); + unsigned getOffsetOf(MachineInstr *MI) const; + unsigned getUserOffset(CPUser&) const; + void dumpBBs(); + void verify(); + + bool isOffsetInRange(unsigned UserOffset, unsigned TrialOffset, + unsigned BitsAvailable); + bool isOffsetInRange(unsigned UserOffset, unsigned TrialOffset, + const CPUser &U) { + return isOffsetInRange(UserOffset, TrialOffset, U.getOffsetBits()); + } + }; + char AArch64ConstantIslands::ID = 0; +} + +/// check BBOffsets, BBSizes, alignment of islands +void AArch64ConstantIslands::verify() { +#ifndef NDEBUG + for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); + MBBI != E; ++MBBI) { + MachineBasicBlock *MBB = MBBI; + unsigned MBBId = MBB->getNumber(); + assert(!MBBId || BBInfo[MBBId - 1].postOffset() <= BBInfo[MBBId].Offset); + } + DEBUG(dbgs() << "Verifying " << CPUsers.size() << " CP users.\n"); + for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) { + CPUser &U = CPUsers[i]; + unsigned UserOffset = getUserOffset(U); + // Verify offset using the real max displacement without the safety + // adjustment. + if (isCPEntryInRange(U.MI, UserOffset, U.CPEMI, U.getOffsetBits(), + /* DoDump = */ true)) { + DEBUG(dbgs() << "OK\n"); + continue; + } + DEBUG(dbgs() << "Out of range.\n"); + dumpBBs(); + DEBUG(MF->dump()); + llvm_unreachable("Constant pool entry out of range!"); + } +#endif +} + +/// print block size and offset information - debugging +void AArch64ConstantIslands::dumpBBs() { + DEBUG({ + for (unsigned J = 0, E = BBInfo.size(); J !=E; ++J) { + const BasicBlockInfo &BBI = BBInfo[J]; + dbgs() << format("%08x BB#%u\t", BBI.Offset, J) + << " kb=" << unsigned(BBI.KnownBits) + << " ua=" << unsigned(BBI.Unalign) + << format(" size=%#x\n", BBInfo[J].Size); + } + }); +} + +/// Returns an instance of the constpool island pass. +FunctionPass *llvm::createAArch64ConstantIslandPass() { + return new AArch64ConstantIslands(); +} + +bool AArch64ConstantIslands::runOnMachineFunction(MachineFunction &mf) { + MF = &mf; + MCP = mf.getConstantPool(); + + DEBUG(dbgs() << "***** AArch64ConstantIslands: " + << MCP->getConstants().size() << " CP entries, aligned to " + << MCP->getConstantPoolAlignment() << " bytes *****\n"); + + TII = (const AArch64InstrInfo*)MF->getTarget().getInstrInfo(); + AFI = MF->getInfo(); + STI = &MF->getTarget().getSubtarget(); + + // This pass invalidates liveness information when it splits basic blocks. + MF->getRegInfo().invalidateLiveness(); + + // Renumber all of the machine basic blocks in the function, guaranteeing that + // the numbers agree with the position of the block in the function. + MF->RenumberBlocks(); + + // Perform the initial placement of the constant pool entries. To start with, + // we put them all at the end of the function. + std::vector CPEMIs; + if (!MCP->isEmpty()) + doInitialPlacement(CPEMIs); + + /// The next UID to take is the first unused one. + AFI->initPICLabelUId(CPEMIs.size()); + + // Do the initial scan of the function, building up information about the + // sizes of each block, the location of all the water, and finding all of the + // constant pool users. + initializeFunctionInfo(CPEMIs); + CPEMIs.clear(); + DEBUG(dumpBBs()); + + + /// Remove dead constant pool entries. + bool MadeChange = removeUnusedCPEntries(); + + // Iteratively place constant pool entries and fix up branches until there + // is no change. + unsigned NoCPIters = 0, NoBRIters = 0; + while (true) { + DEBUG(dbgs() << "Beginning CP iteration #" << NoCPIters << '\n'); + bool CPChange = false; + for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) + CPChange |= handleConstantPoolUser(i); + if (CPChange && ++NoCPIters > 30) + report_fatal_error("Constant Island pass failed to converge!"); + DEBUG(dumpBBs()); + + // Clear NewWaterList now. If we split a block for branches, it should + // appear as "new water" for the next iteration of constant pool placement. + NewWaterList.clear(); + + DEBUG(dbgs() << "Beginning BR iteration #" << NoBRIters << '\n'); + bool BRChange = false; + for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i) + BRChange |= fixupImmediateBr(ImmBranches[i]); + if (BRChange && ++NoBRIters > 30) + report_fatal_error("Branch Fix Up pass failed to converge!"); + DEBUG(dumpBBs()); + + if (!CPChange && !BRChange) + break; + MadeChange = true; + } + + // After a while, this might be made debug-only, but it is not expensive. + verify(); + + DEBUG(dbgs() << '\n'; dumpBBs()); + + BBInfo.clear(); + WaterList.clear(); + CPUsers.clear(); + CPEntries.clear(); + ImmBranches.clear(); + + return MadeChange; +} + +/// Perform the initial placement of the constant pool entries. To start with, +/// we put them all at the end of the function. +void +AArch64ConstantIslands::doInitialPlacement(std::vector &CPEMIs) { + // Create the basic block to hold the CPE's. + MachineBasicBlock *BB = MF->CreateMachineBasicBlock(); + MF->push_back(BB); + + // MachineConstantPool measures alignment in bytes. We measure in log2(bytes). + unsigned MaxAlign = Log2_32(MCP->getConstantPoolAlignment()); + + // Mark the basic block as required by the const-pool. + // If AlignConstantIslands isn't set, use 4-byte alignment for everything. + BB->setAlignment(AlignConstantIslands ? MaxAlign : 2); + + // The function needs to be as aligned as the basic blocks. The linker may + // move functions around based on their alignment. + MF->ensureAlignment(BB->getAlignment()); + + // Order the entries in BB by descending alignment. That ensures correct + // alignment of all entries as long as BB is sufficiently aligned. Keep + // track of the insertion point for each alignment. We are going to bucket + // sort the entries as they are created. + SmallVector InsPoint(MaxAlign + 1, BB->end()); + + // Add all of the constants from the constant pool to the end block, use an + // identity mapping of CPI's to CPE's. + const std::vector &CPs = MCP->getConstants(); + + const DataLayout &TD = *MF->getTarget().getDataLayout(); + for (unsigned i = 0, e = CPs.size(); i != e; ++i) { + unsigned Size = TD.getTypeAllocSize(CPs[i].getType()); + assert(Size >= 4 && "Too small constant pool entry"); + unsigned Align = CPs[i].getAlignment(); + assert(isPowerOf2_32(Align) && "Invalid alignment"); + // Verify that all constant pool entries are a multiple of their alignment. + // If not, we would have to pad them out so that instructions stay aligned. + assert((Size % Align) == 0 && "CP Entry not multiple of 4 bytes!"); + + // Insert CONSTPOOL_ENTRY before entries with a smaller alignment. + unsigned LogAlign = Log2_32(Align); + MachineBasicBlock::iterator InsAt = InsPoint[LogAlign]; + MachineInstr *CPEMI = + BuildMI(*BB, InsAt, DebugLoc(), TII->get(AArch64::CONSTPOOL_ENTRY)) + .addImm(i).addConstantPoolIndex(i).addImm(Size); + CPEMIs.push_back(CPEMI); + + // Ensure that future entries with higher alignment get inserted before + // CPEMI. This is bucket sort with iterators. + for (unsigned a = LogAlign + 1; a <= MaxAlign; ++a) + if (InsPoint[a] == InsAt) + InsPoint[a] = CPEMI; + + // Add a new CPEntry, but no corresponding CPUser yet. + std::vector CPEs; + CPEs.push_back(CPEntry(CPEMI, i)); + CPEntries.push_back(CPEs); + ++NumCPEs; + DEBUG(dbgs() << "Moved CPI#" << i << " to end of function, size = " + << Size << ", align = " << Align <<'\n'); + } + DEBUG(BB->dump()); +} + +/// Return true if the specified basic block can fallthrough into the block +/// immediately after it. +static bool BBHasFallthrough(MachineBasicBlock *MBB) { + // Get the next machine basic block in the function. + MachineFunction::iterator MBBI = MBB; + // Can't fall off end of function. + if (llvm::next(MBBI) == MBB->getParent()->end()) + return false; + + MachineBasicBlock *NextBB = llvm::next(MBBI); + for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(), + E = MBB->succ_end(); I != E; ++I) + if (*I == NextBB) + return true; + + return false; +} + +/// Given the constpool index and CONSTPOOL_ENTRY MI, look up the corresponding +/// CPEntry. +AArch64ConstantIslands::CPEntry +*AArch64ConstantIslands::findConstPoolEntry(unsigned CPI, + const MachineInstr *CPEMI) { + std::vector &CPEs = CPEntries[CPI]; + // Number of entries per constpool index should be small, just do a + // linear search. + for (unsigned i = 0, e = CPEs.size(); i != e; ++i) { + if (CPEs[i].CPEMI == CPEMI) + return &CPEs[i]; + } + return NULL; +} + +/// Returns the required alignment of the constant pool entry represented by +/// CPEMI. Alignment is measured in log2(bytes) units. +unsigned AArch64ConstantIslands::getCPELogAlign(const MachineInstr *CPEMI) { + assert(CPEMI && CPEMI->getOpcode() == AArch64::CONSTPOOL_ENTRY); + + // Everything is 4-byte aligned unless AlignConstantIslands is set. + if (!AlignConstantIslands) + return 2; + + unsigned CPI = CPEMI->getOperand(1).getIndex(); + assert(CPI < MCP->getConstants().size() && "Invalid constant pool index."); + unsigned Align = MCP->getConstants()[CPI].getAlignment(); + assert(isPowerOf2_32(Align) && "Invalid CPE alignment"); + return Log2_32(Align); +} + +/// Do the initial scan of the function, building up information about the sizes +/// of each block, the location of all the water, and finding all of the +/// constant pool users. +void AArch64ConstantIslands:: +initializeFunctionInfo(const std::vector &CPEMIs) { + BBInfo.clear(); + BBInfo.resize(MF->getNumBlockIDs()); + + // First thing, compute the size of all basic blocks, and see if the function + // has any inline assembly in it. If so, we have to be conservative about + // alignment assumptions, as we don't know for sure the size of any + // instructions in the inline assembly. + for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) + computeBlockSize(I); + + // The known bits of the entry block offset are determined by the function + // alignment. + BBInfo.front().KnownBits = MF->getAlignment(); + + // Compute block offsets and known bits. + adjustBBOffsetsAfter(MF->begin()); + + // Now go back through the instructions and build up our data structures. + for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); + MBBI != E; ++MBBI) { + MachineBasicBlock &MBB = *MBBI; + + // If this block doesn't fall through into the next MBB, then this is + // 'water' that a constant pool island could be placed. + if (!BBHasFallthrough(&MBB)) + WaterList.push_back(&MBB); + + for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); + I != E; ++I) { + if (I->isDebugValue()) + continue; + + int Opc = I->getOpcode(); + if (I->isBranch()) { + bool IsCond = false; + + // The offsets encoded in instructions here scale by the instruction + // size (4 bytes), effectively increasing their range by 2 bits. + unsigned Bits = 0; + switch (Opc) { + default: + continue; // Ignore other JT branches + case AArch64::TBZxii: + case AArch64::TBZwii: + case AArch64::TBNZxii: + case AArch64::TBNZwii: + IsCond = true; + Bits = 14 + 2; + break; + case AArch64::Bcc: + case AArch64::CBZx: + case AArch64::CBZw: + case AArch64::CBNZx: + case AArch64::CBNZw: + IsCond = true; + Bits = 19 + 2; + break; + case AArch64::Bimm: + Bits = 26 + 2; + break; + } + + // Record this immediate branch. + ImmBranches.push_back(ImmBranch(I, Bits, IsCond)); + } + + if (Opc == AArch64::CONSTPOOL_ENTRY) + continue; + + // Scan the instructions for constant pool operands. + for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) + if (I->getOperand(op).isCPI()) { + // We found one. The addressing mode tells us the max displacement + // from the PC that this instruction permits. + + // The offsets encoded in instructions here scale by the instruction + // size (4 bytes), effectively increasing their range by 2 bits. + unsigned Bits = 0; + + switch (Opc) { + default: + llvm_unreachable("Unknown addressing mode for CP reference!"); + + case AArch64::LDRw_lit: + case AArch64::LDRx_lit: + case AArch64::LDRs_lit: + case AArch64::LDRd_lit: + case AArch64::LDRq_lit: + case AArch64::LDRSWx_lit: + case AArch64::PRFM_lit: + Bits = 19 + 2; + } + + // Remember that this is a user of a CP entry. + unsigned CPI = I->getOperand(op).getIndex(); + MachineInstr *CPEMI = CPEMIs[CPI]; + CPUsers.push_back(CPUser(I, CPEMI, Bits)); + + // Increment corresponding CPEntry reference count. + CPEntry *CPE = findConstPoolEntry(CPI, CPEMI); + assert(CPE && "Cannot find a corresponding CPEntry!"); + CPE->RefCount++; + + // Instructions can only use one CP entry, don't bother scanning the + // rest of the operands. + break; + } + } + } +} + +/// Compute the size and some alignment information for MBB. This function +/// updates BBInfo directly. +void AArch64ConstantIslands::computeBlockSize(MachineBasicBlock *MBB) { + BasicBlockInfo &BBI = BBInfo[MBB->getNumber()]; + BBI.Size = 0; + BBI.Unalign = 0; + + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; + ++I) { + BBI.Size += TII->getInstSizeInBytes(*I); + // For inline asm, GetInstSizeInBytes returns a conservative estimate. + // The actual size may be smaller, but still a multiple of the instr size. + if (I->isInlineAsm()) + BBI.Unalign = 2; + } +} + +/// Return the current offset of the specified machine instruction from the +/// start of the function. This offset changes as stuff is moved around inside +/// the function. +unsigned AArch64ConstantIslands::getOffsetOf(MachineInstr *MI) const { + MachineBasicBlock *MBB = MI->getParent(); + + // The offset is composed of two things: the sum of the sizes of all MBB's + // before this instruction's block, and the offset from the start of the block + // it is in. + unsigned Offset = BBInfo[MBB->getNumber()].Offset; + + // Sum instructions before MI in MBB. + for (MachineBasicBlock::iterator I = MBB->begin(); &*I != MI; ++I) { + assert(I != MBB->end() && "Didn't find MI in its own basic block?"); + Offset += TII->getInstSizeInBytes(*I); + } + return Offset; +} + +/// Little predicate function to sort the WaterList by MBB ID. +static bool CompareMBBNumbers(const MachineBasicBlock *LHS, + const MachineBasicBlock *RHS) { + return LHS->getNumber() < RHS->getNumber(); +} + +/// When a block is newly inserted into the machine function, it upsets all of +/// the block numbers. Renumber the blocks and update the arrays that parallel +/// this numbering. +void AArch64ConstantIslands:: +updateForInsertedWaterBlock(MachineBasicBlock *NewBB) { + // Renumber the MBB's to keep them consecutive. + NewBB->getParent()->RenumberBlocks(NewBB); + + // Insert an entry into BBInfo to align it properly with the (newly + // renumbered) block numbers. + BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo()); + + // Next, update WaterList. Specifically, we need to add NewMBB as having + // available water after it. + water_iterator IP = + std::lower_bound(WaterList.begin(), WaterList.end(), NewBB, + CompareMBBNumbers); + WaterList.insert(IP, NewBB); +} + + +/// Split the basic block containing MI into two blocks, which are joined by +/// an unconditional branch. Update data structures and renumber blocks to +/// account for this change and returns the newly created block. +MachineBasicBlock * +AArch64ConstantIslands::splitBlockBeforeInstr(MachineInstr *MI) { + MachineBasicBlock *OrigBB = MI->getParent(); + + // Create a new MBB for the code after the OrigBB. + MachineBasicBlock *NewBB = + MF->CreateMachineBasicBlock(OrigBB->getBasicBlock()); + MachineFunction::iterator MBBI = OrigBB; ++MBBI; + MF->insert(MBBI, NewBB); + + // Splice the instructions starting with MI over to NewBB. + NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end()); + + // Add an unconditional branch from OrigBB to NewBB. + // Note the new unconditional branch is not being recorded. + // There doesn't seem to be meaningful DebugInfo available; this doesn't + // correspond to anything in the source. + BuildMI(OrigBB, DebugLoc(), TII->get(AArch64::Bimm)).addMBB(NewBB); + ++NumSplit; + + // Update the CFG. All succs of OrigBB are now succs of NewBB. + NewBB->transferSuccessors(OrigBB); + + // OrigBB branches to NewBB. + OrigBB->addSuccessor(NewBB); + + // Update internal data structures to account for the newly inserted MBB. + // This is almost the same as updateForInsertedWaterBlock, except that + // the Water goes after OrigBB, not NewBB. + MF->RenumberBlocks(NewBB); + + // Insert an entry into BBInfo to align it properly with the (newly + // renumbered) block numbers. + BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo()); + + // Next, update WaterList. Specifically, we need to add OrigMBB as having + // available water after it (but not if it's already there, which happens + // when splitting before a conditional branch that is followed by an + // unconditional branch - in that case we want to insert NewBB). + water_iterator IP = + std::lower_bound(WaterList.begin(), WaterList.end(), OrigBB, + CompareMBBNumbers); + MachineBasicBlock* WaterBB = *IP; + if (WaterBB == OrigBB) + WaterList.insert(llvm::next(IP), NewBB); + else + WaterList.insert(IP, OrigBB); + NewWaterList.insert(OrigBB); + + // Figure out how large the OrigBB is. As the first half of the original + // block, it cannot contain a tablejump. The size includes + // the new jump we added. (It should be possible to do this without + // recounting everything, but it's very confusing, and this is rarely + // executed.) + computeBlockSize(OrigBB); + + // Figure out how large the NewMBB is. As the second half of the original + // block, it may contain a tablejump. + computeBlockSize(NewBB); + + // All BBOffsets following these blocks must be modified. + adjustBBOffsetsAfter(OrigBB); + + return NewBB; +} + +/// Compute the offset of U.MI as seen by the hardware displacement computation. +unsigned AArch64ConstantIslands::getUserOffset(CPUser &U) const { + return getOffsetOf(U.MI); +} + +/// Checks whether UserOffset (the location of a constant pool reference) is +/// within OffsetBits of TrialOffset (a proposed location of a constant pool +/// entry). +bool AArch64ConstantIslands::isOffsetInRange(unsigned UserOffset, + unsigned TrialOffset, + unsigned OffsetBits) { + return isIntN(OffsetBits, static_cast(TrialOffset) - UserOffset); +} + +/// Returns true if a CPE placed after the specified Water (a basic block) will +/// be in range for the specific MI. +/// +/// Compute how much the function will grow by inserting a CPE after Water. +bool AArch64ConstantIslands::isWaterInRange(unsigned UserOffset, + MachineBasicBlock* Water, CPUser &U, + unsigned &Growth) { + unsigned CPELogAlign = getCPELogAlign(U.CPEMI); + unsigned CPEOffset = BBInfo[Water->getNumber()].postOffset(CPELogAlign); + unsigned NextBlockOffset, NextBlockAlignment; + MachineFunction::const_iterator NextBlock = Water; + if (++NextBlock == MF->end()) { + NextBlockOffset = BBInfo[Water->getNumber()].postOffset(); + NextBlockAlignment = 0; + } else { + NextBlockOffset = BBInfo[NextBlock->getNumber()].Offset; + NextBlockAlignment = NextBlock->getAlignment(); + } + unsigned Size = U.CPEMI->getOperand(2).getImm(); + unsigned CPEEnd = CPEOffset + Size; + + // The CPE may be able to hide in the alignment padding before the next + // block. It may also cause more padding to be required if it is more aligned + // that the next block. + if (CPEEnd > NextBlockOffset) { + Growth = CPEEnd - NextBlockOffset; + // Compute the padding that would go at the end of the CPE to align the next + // block. + Growth += OffsetToAlignment(CPEEnd, 1u << NextBlockAlignment); + + // If the CPE is to be inserted before the instruction, that will raise + // the offset of the instruction. Also account for unknown alignment padding + // in blocks between CPE and the user. + if (CPEOffset < UserOffset) + UserOffset += Growth + UnknownPadding(MF->getAlignment(), CPELogAlign); + } else + // CPE fits in existing padding. + Growth = 0; + + return isOffsetInRange(UserOffset, CPEOffset, U); +} + +/// Returns true if the distance between specific MI and specific ConstPool +/// entry instruction can fit in MI's displacement field. +bool AArch64ConstantIslands::isCPEntryInRange(MachineInstr *MI, + unsigned UserOffset, + MachineInstr *CPEMI, + unsigned OffsetBits, bool DoDump) { + unsigned CPEOffset = getOffsetOf(CPEMI); + + if (DoDump) { + DEBUG({ + unsigned Block = MI->getParent()->getNumber(); + const BasicBlockInfo &BBI = BBInfo[Block]; + dbgs() << "User of CPE#" << CPEMI->getOperand(0).getImm() + << " bits available=" << OffsetBits + << format(" insn address=%#x", UserOffset) + << " in BB#" << Block << ": " + << format("%#x-%x\t", BBI.Offset, BBI.postOffset()) << *MI + << format("CPE address=%#x offset=%+d: ", CPEOffset, + int(CPEOffset-UserOffset)); + }); + } + + return isOffsetInRange(UserOffset, CPEOffset, OffsetBits); +} + +#ifndef NDEBUG +/// Return true of the specified basic block's only predecessor unconditionally +/// branches to its only successor. +static bool BBIsJumpedOver(MachineBasicBlock *MBB) { + if (MBB->pred_size() != 1 || MBB->succ_size() != 1) + return false; + + MachineBasicBlock *Succ = *MBB->succ_begin(); + MachineBasicBlock *Pred = *MBB->pred_begin(); + MachineInstr *PredMI = &Pred->back(); + if (PredMI->getOpcode() == AArch64::Bimm) + return PredMI->getOperand(0).getMBB() == Succ; + return false; +} +#endif // NDEBUG + +void AArch64ConstantIslands::adjustBBOffsetsAfter(MachineBasicBlock *BB) { + unsigned BBNum = BB->getNumber(); + for(unsigned i = BBNum + 1, e = MF->getNumBlockIDs(); i < e; ++i) { + // Get the offset and known bits at the end of the layout predecessor. + // Include the alignment of the current block. + unsigned LogAlign = MF->getBlockNumbered(i)->getAlignment(); + unsigned Offset = BBInfo[i - 1].postOffset(LogAlign); + unsigned KnownBits = BBInfo[i - 1].postKnownBits(LogAlign); + + // This is where block i begins. Stop if the offset is already correct, + // and we have updated 2 blocks. This is the maximum number of blocks + // changed before calling this function. + if (i > BBNum + 2 && + BBInfo[i].Offset == Offset && + BBInfo[i].KnownBits == KnownBits) + break; + + BBInfo[i].Offset = Offset; + BBInfo[i].KnownBits = KnownBits; + } +} + +/// Find the constant pool entry with index CPI and instruction CPEMI, and +/// decrement its refcount. If the refcount becomes 0 remove the entry and +/// instruction. Returns true if we removed the entry, false if we didn't. +bool AArch64ConstantIslands::decrementCPEReferenceCount(unsigned CPI, + MachineInstr *CPEMI) { + // Find the old entry. Eliminate it if it is no longer used. + CPEntry *CPE = findConstPoolEntry(CPI, CPEMI); + assert(CPE && "Unexpected!"); + if (--CPE->RefCount == 0) { + removeDeadCPEMI(CPEMI); + CPE->CPEMI = NULL; + --NumCPEs; + return true; + } + return false; +} + +/// See if the currently referenced CPE is in range; if not, see if an in-range +/// clone of the CPE is in range, and if so, change the data structures so the +/// user references the clone. Returns: +/// 0 = no existing entry found +/// 1 = entry found, and there were no code insertions or deletions +/// 2 = entry found, and there were code insertions or deletions +int AArch64ConstantIslands::findInRangeCPEntry(CPUser& U, unsigned UserOffset) +{ + MachineInstr *UserMI = U.MI; + MachineInstr *CPEMI = U.CPEMI; + + // Check to see if the CPE is already in-range. + if (isCPEntryInRange(UserMI, UserOffset, CPEMI, U.getOffsetBits(), true)) { + DEBUG(dbgs() << "In range\n"); + return 1; + } + + // No. Look for previously created clones of the CPE that are in range. + unsigned CPI = CPEMI->getOperand(1).getIndex(); + std::vector &CPEs = CPEntries[CPI]; + for (unsigned i = 0, e = CPEs.size(); i != e; ++i) { + // We already tried this one + if (CPEs[i].CPEMI == CPEMI) + continue; + // Removing CPEs can leave empty entries, skip + if (CPEs[i].CPEMI == NULL) + continue; + if (isCPEntryInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.getOffsetBits())) { + DEBUG(dbgs() << "Replacing CPE#" << CPI << " with CPE#" + << CPEs[i].CPI << "\n"); + // Point the CPUser node to the replacement + U.CPEMI = CPEs[i].CPEMI; + // Change the CPI in the instruction operand to refer to the clone. + for (unsigned j = 0, e = UserMI->getNumOperands(); j != e; ++j) + if (UserMI->getOperand(j).isCPI()) { + UserMI->getOperand(j).setIndex(CPEs[i].CPI); + break; + } + // Adjust the refcount of the clone... + CPEs[i].RefCount++; + // ...and the original. If we didn't remove the old entry, none of the + // addresses changed, so we don't need another pass. + return decrementCPEReferenceCount(CPI, CPEMI) ? 2 : 1; + } + } + return 0; +} + +/// Look for an existing entry in the WaterList in which we can place the CPE +/// referenced from U so it's within range of U's MI. Returns true if found, +/// false if not. If it returns true, WaterIter is set to the WaterList +/// entry. To ensure that this pass terminates, the CPE location for a +/// particular CPUser is only allowed to move to a lower address, so search +/// backward from the end of the list and prefer the first water that is in +/// range. +bool AArch64ConstantIslands::findAvailableWater(CPUser &U, unsigned UserOffset, + water_iterator &WaterIter) { + if (WaterList.empty()) + return false; + + unsigned BestGrowth = ~0u; + for (water_iterator IP = prior(WaterList.end()), B = WaterList.begin();; + --IP) { + MachineBasicBlock* WaterBB = *IP; + // Check if water is in range and is either at a lower address than the + // current "high water mark" or a new water block that was created since + // the previous iteration by inserting an unconditional branch. In the + // latter case, we want to allow resetting the high water mark back to + // this new water since we haven't seen it before. Inserting branches + // should be relatively uncommon and when it does happen, we want to be + // sure to take advantage of it for all the CPEs near that block, so that + // we don't insert more branches than necessary. + unsigned Growth; + if (isWaterInRange(UserOffset, WaterBB, U, Growth) && + (WaterBB->getNumber() < U.HighWaterMark->getNumber() || + NewWaterList.count(WaterBB)) && Growth < BestGrowth) { + // This is the least amount of required padding seen so far. + BestGrowth = Growth; + WaterIter = IP; + DEBUG(dbgs() << "Found water after BB#" << WaterBB->getNumber() + << " Growth=" << Growth << '\n'); + + // Keep looking unless it is perfect. + if (BestGrowth == 0) + return true; + } + if (IP == B) + break; + } + return BestGrowth != ~0u; +} + +/// No existing WaterList entry will work for CPUsers[CPUserIndex], so create a +/// place to put the CPE. The end of the block is used if in range, and the +/// conditional branch munged so control flow is correct. Otherwise the block +/// is split to create a hole with an unconditional branch around it. In either +/// case NewMBB is set to a block following which the new island can be inserted +/// (the WaterList is not adjusted). +void AArch64ConstantIslands::createNewWater(unsigned CPUserIndex, + unsigned UserOffset, + MachineBasicBlock *&NewMBB) { + CPUser &U = CPUsers[CPUserIndex]; + MachineInstr *UserMI = U.MI; + MachineInstr *CPEMI = U.CPEMI; + unsigned CPELogAlign = getCPELogAlign(CPEMI); + MachineBasicBlock *UserMBB = UserMI->getParent(); + const BasicBlockInfo &UserBBI = BBInfo[UserMBB->getNumber()]; + + // If the block does not end in an unconditional branch already, and if the + // end of the block is within range, make new water there. + if (BBHasFallthrough(UserMBB)) { + // Size of branch to insert. + unsigned InstrSize = 4; + // Compute the offset where the CPE will begin. + unsigned CPEOffset = UserBBI.postOffset(CPELogAlign) + InstrSize; + + if (isOffsetInRange(UserOffset, CPEOffset, U)) { + DEBUG(dbgs() << "Split at end of BB#" << UserMBB->getNumber() + << format(", expected CPE offset %#x\n", CPEOffset)); + NewMBB = llvm::next(MachineFunction::iterator(UserMBB)); + // Add an unconditional branch from UserMBB to fallthrough block. Record + // it for branch lengthening; this new branch will not get out of range, + // but if the preceding conditional branch is out of range, the targets + // will be exchanged, and the altered branch may be out of range, so the + // machinery has to know about it. + BuildMI(UserMBB, DebugLoc(), TII->get(AArch64::Bimm)).addMBB(NewMBB); + + // 26 bits written down, specifying a multiple of 4. + unsigned OffsetBits = 26 + 2; + ImmBranches.push_back(ImmBranch(&UserMBB->back(), OffsetBits, false)); + BBInfo[UserMBB->getNumber()].Size += InstrSize; + adjustBBOffsetsAfter(UserMBB); + return; + } + } + + // What a big block. Find a place within the block to split it. We make a + // first guess, then walk through the instructions between the one currently + // being looked at and the possible insertion point, and make sure any other + // instructions that reference CPEs will be able to use the same island area; + // if not, we back up the insertion point. + + // Try to split the block so it's fully aligned. Compute the latest split + // point where we can add a 4-byte branch instruction, and then align to + // LogAlign which is the largest possible alignment in the function. + unsigned LogAlign = MF->getAlignment(); + assert(LogAlign >= CPELogAlign && "Over-aligned constant pool entry"); + unsigned KnownBits = UserBBI.internalKnownBits(); + unsigned UPad = UnknownPadding(LogAlign, KnownBits); + unsigned BaseInsertOffset = UserOffset + U.getMaxPosDisp() - UPad; + DEBUG(dbgs() << format("Split in middle of big block before %#x", + BaseInsertOffset)); + + // The 4 in the following is for the unconditional branch we'll be inserting + // Alignment of the island is handled inside isOffsetInRange. + BaseInsertOffset -= 4; + + DEBUG(dbgs() << format(", adjusted to %#x", BaseInsertOffset) + << " la=" << LogAlign + << " kb=" << KnownBits + << " up=" << UPad << '\n'); + + // This could point off the end of the block if we've already got constant + // pool entries following this block; only the last one is in the water list. + // Back past any possible branches (allow for a conditional and a maximally + // long unconditional). + if (BaseInsertOffset + 8 >= UserBBI.postOffset()) { + BaseInsertOffset = UserBBI.postOffset() - UPad - 8; + DEBUG(dbgs() << format("Move inside block: %#x\n", BaseInsertOffset)); + } + unsigned EndInsertOffset = BaseInsertOffset + 4 + UPad + + CPEMI->getOperand(2).getImm(); + MachineBasicBlock::iterator MI = UserMI; + ++MI; + unsigned CPUIndex = CPUserIndex+1; + unsigned NumCPUsers = CPUsers.size(); + for (unsigned Offset = UserOffset+TII->getInstSizeInBytes(*UserMI); + Offset < BaseInsertOffset; + Offset += TII->getInstSizeInBytes(*MI), + MI = llvm::next(MI)) { + assert(MI != UserMBB->end() && "Fell off end of block"); + if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == MI) { + CPUser &U = CPUsers[CPUIndex]; + if (!isOffsetInRange(Offset, EndInsertOffset, U)) { + // Shift intertion point by one unit of alignment so it is within reach. + BaseInsertOffset -= 1u << LogAlign; + EndInsertOffset -= 1u << LogAlign; + } + // This is overly conservative, as we don't account for CPEMIs being + // reused within the block, but it doesn't matter much. Also assume CPEs + // are added in order with alignment padding. We may eventually be able + // to pack the aligned CPEs better. + EndInsertOffset += U.CPEMI->getOperand(2).getImm(); + CPUIndex++; + } + } + + --MI; + NewMBB = splitBlockBeforeInstr(MI); +} + +/// Analyze the specified user, checking to see if it is out-of-range. If so, +/// pick up the constant pool value and move it some place in-range. Return +/// true if we changed any addresses, false otherwise. +bool AArch64ConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) { + CPUser &U = CPUsers[CPUserIndex]; + MachineInstr *UserMI = U.MI; + MachineInstr *CPEMI = U.CPEMI; + unsigned CPI = CPEMI->getOperand(1).getIndex(); + unsigned Size = CPEMI->getOperand(2).getImm(); + // Compute this only once, it's expensive. + unsigned UserOffset = getUserOffset(U); + + // See if the current entry is within range, or there is a clone of it + // in range. + int result = findInRangeCPEntry(U, UserOffset); + if (result==1) return false; + else if (result==2) return true; + + // No existing clone of this CPE is within range. + // We will be generating a new clone. Get a UID for it. + unsigned ID = AFI->createPICLabelUId(); + + // Look for water where we can place this CPE. + MachineBasicBlock *NewIsland = MF->CreateMachineBasicBlock(); + MachineBasicBlock *NewMBB; + water_iterator IP; + if (findAvailableWater(U, UserOffset, IP)) { + DEBUG(dbgs() << "Found water in range\n"); + MachineBasicBlock *WaterBB = *IP; + + // If the original WaterList entry was "new water" on this iteration, + // propagate that to the new island. This is just keeping NewWaterList + // updated to match the WaterList, which will be updated below. + if (NewWaterList.count(WaterBB)) { + NewWaterList.erase(WaterBB); + NewWaterList.insert(NewIsland); + } + // The new CPE goes before the following block (NewMBB). + NewMBB = llvm::next(MachineFunction::iterator(WaterBB)); + + } else { + // No water found. + DEBUG(dbgs() << "No water found\n"); + createNewWater(CPUserIndex, UserOffset, NewMBB); + + // splitBlockBeforeInstr adds to WaterList, which is important when it is + // called while handling branches so that the water will be seen on the + // next iteration for constant pools, but in this context, we don't want + // it. Check for this so it will be removed from the WaterList. + // Also remove any entry from NewWaterList. + MachineBasicBlock *WaterBB = prior(MachineFunction::iterator(NewMBB)); + IP = std::find(WaterList.begin(), WaterList.end(), WaterBB); + if (IP != WaterList.end()) + NewWaterList.erase(WaterBB); + + // We are adding new water. Update NewWaterList. + NewWaterList.insert(NewIsland); + } + + // Remove the original WaterList entry; we want subsequent insertions in + // this vicinity to go after the one we're about to insert. This + // considerably reduces the number of times we have to move the same CPE + // more than once and is also important to ensure the algorithm terminates. + if (IP != WaterList.end()) + WaterList.erase(IP); + + // Okay, we know we can put an island before NewMBB now, do it! + MF->insert(NewMBB, NewIsland); + + // Update internal data structures to account for the newly inserted MBB. + updateForInsertedWaterBlock(NewIsland); + + // Decrement the old entry, and remove it if refcount becomes 0. + decrementCPEReferenceCount(CPI, CPEMI); + + // Now that we have an island to add the CPE to, clone the original CPE and + // add it to the island. + U.HighWaterMark = NewIsland; + U.CPEMI = BuildMI(NewIsland, DebugLoc(), TII->get(AArch64::CONSTPOOL_ENTRY)) + .addImm(ID).addConstantPoolIndex(CPI).addImm(Size); + CPEntries[CPI].push_back(CPEntry(U.CPEMI, ID, 1)); + ++NumCPEs; + + // Mark the basic block as aligned as required by the const-pool entry. + NewIsland->setAlignment(getCPELogAlign(U.CPEMI)); + + // Increase the size of the island block to account for the new entry. + BBInfo[NewIsland->getNumber()].Size += Size; + adjustBBOffsetsAfter(llvm::prior(MachineFunction::iterator(NewIsland))); + + // Finally, change the CPI in the instruction operand to be ID. + for (unsigned i = 0, e = UserMI->getNumOperands(); i != e; ++i) + if (UserMI->getOperand(i).isCPI()) { + UserMI->getOperand(i).setIndex(ID); + break; + } + + DEBUG(dbgs() << " Moved CPE to #" << ID << " CPI=" << CPI + << format(" offset=%#x\n", BBInfo[NewIsland->getNumber()].Offset)); + + return true; +} + +/// Remove a dead constant pool entry instruction. Update sizes and offsets of +/// impacted basic blocks. +void AArch64ConstantIslands::removeDeadCPEMI(MachineInstr *CPEMI) { + MachineBasicBlock *CPEBB = CPEMI->getParent(); + unsigned Size = CPEMI->getOperand(2).getImm(); + CPEMI->eraseFromParent(); + BBInfo[CPEBB->getNumber()].Size -= Size; + // All succeeding offsets have the current size value added in, fix this. + if (CPEBB->empty()) { + BBInfo[CPEBB->getNumber()].Size = 0; + + // This block no longer needs to be aligned. . + CPEBB->setAlignment(0); + } else + // Entries are sorted by descending alignment, so realign from the front. + CPEBB->setAlignment(getCPELogAlign(CPEBB->begin())); + + adjustBBOffsetsAfter(CPEBB); + // An island has only one predecessor BB and one successor BB. Check if + // this BB's predecessor jumps directly to this BB's successor. This + // shouldn't happen currently. + assert(!BBIsJumpedOver(CPEBB) && "How did this happen?"); + // FIXME: remove the empty blocks after all the work is done? +} + +/// Remove constant pool entries whose refcounts are zero. +bool AArch64ConstantIslands::removeUnusedCPEntries() { + unsigned MadeChange = false; + for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) { + std::vector &CPEs = CPEntries[i]; + for (unsigned j = 0, ee = CPEs.size(); j != ee; ++j) { + if (CPEs[j].RefCount == 0 && CPEs[j].CPEMI) { + removeDeadCPEMI(CPEs[j].CPEMI); + CPEs[j].CPEMI = NULL; + MadeChange = true; + } + } + } + return MadeChange; +} + +/// Returns true if the distance between specific MI and specific BB can fit in +/// MI's displacement field. +bool AArch64ConstantIslands::isBBInRange(MachineInstr *MI, + MachineBasicBlock *DestBB, + unsigned OffsetBits) { + int64_t BrOffset = getOffsetOf(MI); + int64_t DestOffset = BBInfo[DestBB->getNumber()].Offset; + + DEBUG(dbgs() << "Branch of destination BB#" << DestBB->getNumber() + << " from BB#" << MI->getParent()->getNumber() + << " bits available=" << OffsetBits + << " from " << getOffsetOf(MI) << " to " << DestOffset + << " offset " << int(DestOffset-BrOffset) << "\t" << *MI); + + return isIntN(OffsetBits, DestOffset - BrOffset); +} + +/// Fix up an immediate branch whose destination is too far away to fit in its +/// displacement field. +bool AArch64ConstantIslands::fixupImmediateBr(ImmBranch &Br) { + MachineInstr *MI = Br.MI; + MachineBasicBlock *DestBB = 0; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + if (MI->getOperand(i).isMBB()) { + DestBB = MI->getOperand(i).getMBB(); + break; + } + } + assert(DestBB && "Branch with no destination BB?"); + + // Check to see if the DestBB is already in-range. + if (isBBInRange(MI, DestBB, Br.OffsetBits)) + return false; + + assert(Br.IsCond && "Only conditional branches should need fixup"); + return fixupConditionalBr(Br); +} + +/// Fix up a conditional branch whose destination is too far away to fit in its +/// displacement field. It is converted to an inverse conditional branch + an +/// unconditional branch to the destination. +bool +AArch64ConstantIslands::fixupConditionalBr(ImmBranch &Br) { + MachineInstr *MI = Br.MI; + MachineBasicBlock *MBB = MI->getParent(); + unsigned CondBrMBBOperand = 0; + + // The general idea is to add an unconditional branch to the destination and + // invert the conditional branch to jump over it. Complications occur around + // fallthrough and unreachable ends to the block. + // b.lt L1 + // => + // b.ge L2 + // b L1 + // L2: + + // First we invert the conditional branch, by creating a replacement if + // necessary. This if statement contains all the special handling of different + // branch types. + if (MI->getOpcode() == AArch64::Bcc) { + // The basic block is operand number 1 for Bcc + CondBrMBBOperand = 1; + + A64CC::CondCodes CC = (A64CC::CondCodes)MI->getOperand(0).getImm(); + CC = A64InvertCondCode(CC); + MI->getOperand(0).setImm(CC); + } else { + MachineInstrBuilder InvertedMI; + int InvertedOpcode; + switch (MI->getOpcode()) { + default: llvm_unreachable("Unknown branch type"); + case AArch64::TBZxii: InvertedOpcode = AArch64::TBNZxii; break; + case AArch64::TBZwii: InvertedOpcode = AArch64::TBNZwii; break; + case AArch64::TBNZxii: InvertedOpcode = AArch64::TBZxii; break; + case AArch64::TBNZwii: InvertedOpcode = AArch64::TBZwii; break; + case AArch64::CBZx: InvertedOpcode = AArch64::CBNZx; break; + case AArch64::CBZw: InvertedOpcode = AArch64::CBNZw; break; + case AArch64::CBNZx: InvertedOpcode = AArch64::CBZx; break; + case AArch64::CBNZw: InvertedOpcode = AArch64::CBZw; break; + } + + InvertedMI = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(InvertedOpcode)); + for (unsigned i = 0, e= MI->getNumOperands(); i != e; ++i) { + InvertedMI.addOperand(MI->getOperand(i)); + if (MI->getOperand(i).isMBB()) + CondBrMBBOperand = i; + } + + MI->eraseFromParent(); + MI = Br.MI = InvertedMI; + } + + // If the branch is at the end of its MBB and that has a fall-through block, + // direct the updated conditional branch to the fall-through + // block. Otherwise, split the MBB before the next instruction. + MachineInstr *BMI = &MBB->back(); + bool NeedSplit = (BMI != MI) || !BBHasFallthrough(MBB); + + ++NumCBrFixed; + if (BMI != MI) { + if (llvm::next(MachineBasicBlock::iterator(MI)) == prior(MBB->end()) && + BMI->getOpcode() == AArch64::Bimm) { + // Last MI in the BB is an unconditional branch. We can swap destinations: + // b.eq L1 (temporarily b.ne L1 after first change) + // b L2 + // => + // b.ne L2 + // b L1 + MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB(); + if (isBBInRange(MI, NewDest, Br.OffsetBits)) { + DEBUG(dbgs() << " Invert Bcc condition and swap its destination with " + << *BMI); + MachineBasicBlock *DestBB = MI->getOperand(CondBrMBBOperand).getMBB(); + BMI->getOperand(0).setMBB(DestBB); + MI->getOperand(CondBrMBBOperand).setMBB(NewDest); + return true; + } + } + } + + if (NeedSplit) { + MachineBasicBlock::iterator MBBI = MI; ++MBBI; + splitBlockBeforeInstr(MBBI); + // No need for the branch to the next block. We're adding an unconditional + // branch to the destination. + int delta = TII->getInstSizeInBytes(MBB->back()); + BBInfo[MBB->getNumber()].Size -= delta; + MBB->back().eraseFromParent(); + // BBInfo[SplitBB].Offset is wrong temporarily, fixed below + } + + // After splitting and removing the unconditional branch from the original BB, + // the structure is now: + // oldbb: + // [things] + // b.invertedCC L1 + // splitbb/fallthroughbb: + // [old b L2/real continuation] + // + // We now have to change the conditional branch to point to splitbb and add an + // unconditional branch after it to L1, giving the final structure: + // oldbb: + // [things] + // b.invertedCC splitbb + // b L1 + // splitbb/fallthroughbb: + // [old b L2/real continuation] + MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB)); + + DEBUG(dbgs() << " Insert B to BB#" + << MI->getOperand(CondBrMBBOperand).getMBB()->getNumber() + << " also invert condition and change dest. to BB#" + << NextBB->getNumber() << "\n"); + + // Insert a new unconditional branch and fixup the destination of the + // conditional one. Also update the ImmBranch as well as adding a new entry + // for the new branch. + BuildMI(MBB, DebugLoc(), TII->get(AArch64::Bimm)) + .addMBB(MI->getOperand(CondBrMBBOperand).getMBB()); + MI->getOperand(CondBrMBBOperand).setMBB(NextBB); + + BBInfo[MBB->getNumber()].Size += TII->getInstSizeInBytes(MBB->back()); + + // 26 bits written down in Bimm, specifying a multiple of 4. + unsigned OffsetBits = 26 + 2; + ImmBranches.push_back(ImmBranch(&MBB->back(), OffsetBits, false)); + + adjustBBOffsetsAfter(MBB); + return true; +} diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp new file mode 100644 index 0000000..2301114 --- /dev/null +++ b/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -0,0 +1,644 @@ +//===- AArch64FrameLowering.cpp - AArch64 Frame Information ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the AArch64 implementation of TargetFrameLowering class. +// +//===----------------------------------------------------------------------===// + +#include "AArch64.h" +#include "AArch64FrameLowering.h" +#include "AArch64MachineFunctionInfo.h" +#include "AArch64InstrInfo.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/IR/Function.h" +#include "llvm/MC/MachineLocation.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace llvm; + +void AArch64FrameLowering::splitSPAdjustments(uint64_t Total, + uint64_t &Initial, + uint64_t &Residual) const { + // 0x1f0 here is a pessimistic (i.e. realistic) boundary: x-register LDP + // instructions have a 7-bit signed immediate scaled by 8, giving a reach of + // 0x1f8, but stack adjustment should always be a multiple of 16. + if (Total <= 0x1f0) { + Initial = Total; + Residual = 0; + } else { + Initial = 0x1f0; + Residual = Total - Initial; + } +} + +void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const { + AArch64MachineFunctionInfo *FuncInfo = + MF.getInfo(); + MachineBasicBlock &MBB = MF.front(); + MachineBasicBlock::iterator MBBI = MBB.begin(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); + + MachineModuleInfo &MMI = MF.getMMI(); + std::vector &Moves = MMI.getFrameMoves(); + bool NeedsFrameMoves = MMI.hasDebugInfo() + || MF.getFunction()->needsUnwindTableEntry(); + + uint64_t NumInitialBytes, NumResidualBytes; + + // Currently we expect the stack to be laid out by + // sub sp, sp, #initial + // stp x29, x30, [sp, #offset] + // ... + // str xxx, [sp, #offset] + // sub sp, sp, #rest (possibly via extra instructions). + if (MFI->getCalleeSavedInfo().size()) { + // If there are callee-saved registers, we want to store them efficiently as + // a block, and virtual base assignment happens too early to do it for us so + // we adjust the stack in two phases: first just for callee-saved fiddling, + // then to allocate the rest of the frame. + splitSPAdjustments(MFI->getStackSize(), NumInitialBytes, NumResidualBytes); + } else { + // If there aren't any callee-saved registers, two-phase adjustment is + // inefficient. It's more efficient to adjust with NumInitialBytes too + // because when we're in a "callee pops argument space" situation, that pop + // must be tacked onto Initial for correctness. + NumInitialBytes = MFI->getStackSize(); + NumResidualBytes = 0; + } + + // Tell everyone else how much adjustment we're expecting them to use. In + // particular if an adjustment is required for a tail call the epilogue could + // have a different view of things. + FuncInfo->setInitialStackAdjust(NumInitialBytes); + + emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16, -NumInitialBytes, + MachineInstr::FrameSetup); + + if (NeedsFrameMoves && NumInitialBytes) { + // We emit this update even if the CFA is set from a frame pointer later so + // that the CFA is valid in the interim. + MCSymbol *SPLabel = MMI.getContext().CreateTempSymbol(); + BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::PROLOG_LABEL)) + .addSym(SPLabel); + + MachineLocation Dst(MachineLocation::VirtualFP); + MachineLocation Src(AArch64::XSP, NumInitialBytes); + Moves.push_back(MachineMove(SPLabel, Dst, Src)); + } + + // Otherwise we need to set the frame pointer and/or add a second stack + // adjustment. + + bool FPNeedsSetting = hasFP(MF); + for (; MBBI != MBB.end(); ++MBBI) { + // Note that this search makes strong assumptions about the operation used + // to store the frame-pointer: it must be "STP x29, x30, ...". This could + // change in future, but until then there's no point in implementing + // untestable more generic cases. + if (FPNeedsSetting && MBBI->getOpcode() == AArch64::LSPair64_STR + && MBBI->getOperand(0).getReg() == AArch64::X29) { + int64_t X29FrameIdx = MBBI->getOperand(2).getIndex(); + FuncInfo->setFramePointerOffset(MFI->getObjectOffset(X29FrameIdx)); + + ++MBBI; + emitRegUpdate(MBB, MBBI, DL, TII, AArch64::X29, AArch64::XSP, + AArch64::X29, + NumInitialBytes + MFI->getObjectOffset(X29FrameIdx), + MachineInstr::FrameSetup); + + // The offset adjustment used when emitting debugging locations relative + // to whatever frame base is set. AArch64 uses the default frame base (FP + // or SP) and this adjusts the calculations to be correct. + MFI->setOffsetAdjustment(- MFI->getObjectOffset(X29FrameIdx) + - MFI->getStackSize()); + + if (NeedsFrameMoves) { + MCSymbol *FPLabel = MMI.getContext().CreateTempSymbol(); + BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::PROLOG_LABEL)) + .addSym(FPLabel); + MachineLocation Dst(MachineLocation::VirtualFP); + MachineLocation Src(AArch64::X29, -MFI->getObjectOffset(X29FrameIdx)); + Moves.push_back(MachineMove(FPLabel, Dst, Src)); + } + + FPNeedsSetting = false; + } + + if (!MBBI->getFlag(MachineInstr::FrameSetup)) + break; + } + + assert(!FPNeedsSetting && "Frame pointer couldn't be set"); + + emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16, -NumResidualBytes, + MachineInstr::FrameSetup); + + // Now we emit the rest of the frame setup information, if necessary: we've + // already noted the FP and initial SP moves so we're left with the prologue's + // final SP update and callee-saved register locations. + if (!NeedsFrameMoves) + return; + + // Reuse the label if appropriate, so create it in this outer scope. + MCSymbol *CSLabel = 0; + + // The rest of the stack adjustment + if (!hasFP(MF) && NumResidualBytes) { + CSLabel = MMI.getContext().CreateTempSymbol(); + BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::PROLOG_LABEL)) + .addSym(CSLabel); + + MachineLocation Dst(MachineLocation::VirtualFP); + MachineLocation Src(AArch64::XSP, NumResidualBytes + NumInitialBytes); + Moves.push_back(MachineMove(CSLabel, Dst, Src)); + } + + // And any callee-saved registers (it's fine to leave them to the end here, + // because the old values are still valid at this point. + const std::vector &CSI = MFI->getCalleeSavedInfo(); + if (CSI.size()) { + if (!CSLabel) { + CSLabel = MMI.getContext().CreateTempSymbol(); + BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::PROLOG_LABEL)) + .addSym(CSLabel); + } + + for (std::vector::const_iterator I = CSI.begin(), + E = CSI.end(); I != E; ++I) { + MachineLocation Dst(MachineLocation::VirtualFP, MFI->getObjectOffset(I->getFrameIdx())); + MachineLocation Src(I->getReg()); + Moves.push_back(MachineMove(CSLabel, Dst, Src)); + } + } +} + +void +AArch64FrameLowering::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + AArch64MachineFunctionInfo *FuncInfo = + MF.getInfo(); + + MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); + DebugLoc DL = MBBI->getDebugLoc(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + unsigned RetOpcode = MBBI->getOpcode(); + + // Initial and residual are named for consitency with the prologue. Note that + // in the epilogue, the residual adjustment is executed first. + uint64_t NumInitialBytes = FuncInfo->getInitialStackAdjust(); + uint64_t NumResidualBytes = MFI.getStackSize() - NumInitialBytes; + uint64_t ArgumentPopSize = 0; + if (RetOpcode == AArch64::TC_RETURNdi || + RetOpcode == AArch64::TC_RETURNxi) { + MachineOperand &JumpTarget = MBBI->getOperand(0); + MachineOperand &StackAdjust = MBBI->getOperand(1); + + MachineInstrBuilder MIB; + if (RetOpcode == AArch64::TC_RETURNdi) { + MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::TAIL_Bimm)); + if (JumpTarget.isGlobal()) { + MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), + JumpTarget.getTargetFlags()); + } else { + assert(JumpTarget.isSymbol() && "unexpected tail call destination"); + MIB.addExternalSymbol(JumpTarget.getSymbolName(), + JumpTarget.getTargetFlags()); + } + } else { + assert(RetOpcode == AArch64::TC_RETURNxi && JumpTarget.isReg() + && "Unexpected tail call"); + + MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::TAIL_BRx)); + MIB.addReg(JumpTarget.getReg(), RegState::Kill); + } + + // Add the extra operands onto the new tail call instruction even though + // they're not used directly (so that liveness is tracked properly etc). + for (unsigned i = 2, e = MBBI->getNumOperands(); i != e; ++i) + MIB->addOperand(MBBI->getOperand(i)); + + + // Delete the pseudo instruction TC_RETURN. + MachineInstr *NewMI = prior(MBBI); + MBB.erase(MBBI); + MBBI = NewMI; + + // For a tail-call in a callee-pops-arguments environment, some or all of + // the stack may actually be in use for the call's arguments, this is + // calculated during LowerCall and consumed here... + ArgumentPopSize = StackAdjust.getImm(); + } else { + // ... otherwise the amount to pop is *all* of the argument space, + // conveniently stored in the MachineFunctionInfo by + // LowerFormalArguments. This will, of course, be zero for the C calling + // convention. + ArgumentPopSize = FuncInfo->getArgumentStackToRestore(); + } + + assert(NumInitialBytes % 16 == 0 && NumResidualBytes % 16 == 0 + && "refusing to adjust stack by misaligned amt"); + + // We may need to address callee-saved registers differently, so find out the + // bound on the frame indices. + const std::vector &CSI = MFI.getCalleeSavedInfo(); + int MinCSFI = 0; + int MaxCSFI = -1; + + if (CSI.size()) { + MinCSFI = CSI[0].getFrameIdx(); + MaxCSFI = CSI[CSI.size() - 1].getFrameIdx(); + } + + // The "residual" stack update comes first from this direction and guarantees + // that SP is NumInitialBytes below its value on function entry, either by a + // direct update or restoring it from the frame pointer. + if (NumInitialBytes + ArgumentPopSize != 0) { + emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16, + NumInitialBytes + ArgumentPopSize); + --MBBI; + } + + + // MBBI now points to the instruction just past the last callee-saved + // restoration (either RET/B if NumInitialBytes == 0, or the "ADD sp, sp" + // otherwise). + + // Now we need to find out where to put the bulk of the stack adjustment + MachineBasicBlock::iterator FirstEpilogue = MBBI; + while (MBBI != MBB.begin()) { + --MBBI; + + unsigned FrameOp; + for (FrameOp = 0; FrameOp < MBBI->getNumOperands(); ++FrameOp) { + if (MBBI->getOperand(FrameOp).isFI()) + break; + } + + // If this instruction doesn't have a frame index we've reached the end of + // the callee-save restoration. + if (FrameOp == MBBI->getNumOperands()) + break; + + // Likewise if it *is* a local reference, but not to a callee-saved object. + int FrameIdx = MBBI->getOperand(FrameOp).getIndex(); + if (FrameIdx < MinCSFI || FrameIdx > MaxCSFI) + break; + + FirstEpilogue = MBBI; + } + + if (MF.getFrameInfo()->hasVarSizedObjects()) { + int64_t StaticFrameBase; + StaticFrameBase = -(NumInitialBytes + FuncInfo->getFramePointerOffset()); + emitRegUpdate(MBB, FirstEpilogue, DL, TII, + AArch64::XSP, AArch64::X29, AArch64::NoRegister, + StaticFrameBase); + } else { + emitSPUpdate(MBB, FirstEpilogue, DL,TII, AArch64::X16, NumResidualBytes); + } +} + +int64_t +AArch64FrameLowering::resolveFrameIndexReference(MachineFunction &MF, + int FrameIndex, + unsigned &FrameReg, + int SPAdj, + bool IsCalleeSaveOp) const { + AArch64MachineFunctionInfo *FuncInfo = + MF.getInfo(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + + int64_t TopOfFrameOffset = MFI->getObjectOffset(FrameIndex); + + assert(!(IsCalleeSaveOp && FuncInfo->getInitialStackAdjust() == 0) + && "callee-saved register in unexpected place"); + + // If the frame for this function is particularly large, we adjust the stack + // in two phases which means the callee-save related operations see a + // different (intermediate) stack size. + int64_t FrameRegPos; + if (IsCalleeSaveOp) { + FrameReg = AArch64::XSP; + FrameRegPos = -static_cast(FuncInfo->getInitialStackAdjust()); + } else if (useFPForAddressing(MF)) { + // Have to use the frame pointer since we have no idea where SP is. + FrameReg = AArch64::X29; + FrameRegPos = FuncInfo->getFramePointerOffset(); + } else { + FrameReg = AArch64::XSP; + FrameRegPos = -static_cast(MFI->getStackSize()) + SPAdj; + } + + return TopOfFrameOffset - FrameRegPos; +} + +/// Estimate and return the size of the frame. +static unsigned estimateStackSize(MachineFunction &MF) { + // FIXME: Make generic? Really consider after upstreaming. This code is now + // shared between PEI, ARM *and* here. + const MachineFrameInfo *MFI = MF.getFrameInfo(); + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); + unsigned MaxAlign = MFI->getMaxAlignment(); + int Offset = 0; + + // This code is very, very similar to PEI::calculateFrameObjectOffsets(). + // It really should be refactored to share code. Until then, changes + // should keep in mind that there's tight coupling between the two. + + for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) { + int FixedOff = -MFI->getObjectOffset(i); + if (FixedOff > Offset) Offset = FixedOff; + } + for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { + if (MFI->isDeadObjectIndex(i)) + continue; + Offset += MFI->getObjectSize(i); + unsigned Align = MFI->getObjectAlignment(i); + // Adjust to alignment boundary + Offset = (Offset+Align-1)/Align*Align; + + MaxAlign = std::max(Align, MaxAlign); + } + + if (MFI->adjustsStack() && TFI->hasReservedCallFrame(MF)) + Offset += MFI->getMaxCallFrameSize(); + + // Round up the size to a multiple of the alignment. If the function has + // any calls or alloca's, align to the target's StackAlignment value to + // ensure that the callee's frame or the alloca data is suitably aligned; + // otherwise, for leaf functions, align to the TransientStackAlignment + // value. + unsigned StackAlign; + if (MFI->adjustsStack() || MFI->hasVarSizedObjects() || + (RegInfo->needsStackRealignment(MF) && MFI->getObjectIndexEnd() != 0)) + StackAlign = TFI->getStackAlignment(); + else + StackAlign = TFI->getTransientStackAlignment(); + + // If the frame pointer is eliminated, all frame offsets will be relative to + // SP not FP. Align to MaxAlign so this works. + StackAlign = std::max(StackAlign, MaxAlign); + unsigned AlignMask = StackAlign - 1; + Offset = (Offset + AlignMask) & ~uint64_t(AlignMask); + + return (unsigned)Offset; +} + +void +AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS) const { + const AArch64RegisterInfo *RegInfo = + static_cast(MF.getTarget().getRegisterInfo()); + MachineFrameInfo *MFI = MF.getFrameInfo(); + const AArch64InstrInfo &TII = + *static_cast(MF.getTarget().getInstrInfo()); + + if (hasFP(MF)) { + MF.getRegInfo().setPhysRegUsed(AArch64::X29); + MF.getRegInfo().setPhysRegUsed(AArch64::X30); + } + + // If addressing of local variables is going to be more complicated than + // shoving a base register and an offset into the instruction then we may well + // need to scavenge registers. We should either specifically add an + // callee-save register for this purpose or allocate an extra spill slot. + + bool BigStack = + (RS && estimateStackSize(MF) >= TII.estimateRSStackLimit(MF)) + || MFI->hasVarSizedObjects() // Access will be from X29: messes things up + || (MFI->adjustsStack() && !hasReservedCallFrame(MF)); + + if (!BigStack) + return; + + // We certainly need some slack space for the scavenger, preferably an extra + // register. + const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(); + uint16_t ExtraReg = AArch64::NoRegister; + + for (unsigned i = 0; CSRegs[i]; ++i) { + if (AArch64::GPR64RegClass.contains(CSRegs[i]) && + !MF.getRegInfo().isPhysRegUsed(CSRegs[i])) { + ExtraReg = CSRegs[i]; + break; + } + } + + if (ExtraReg != 0) { + MF.getRegInfo().setPhysRegUsed(ExtraReg); + } else { + // Create a stack slot for scavenging purposes. PrologEpilogInserter + // helpfully places it near either SP or FP for us to avoid + // infinitely-regression during scavenging. + const TargetRegisterClass *RC = &AArch64::GPR64RegClass; + RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), + RC->getAlignment(), + false)); + } +} + +bool AArch64FrameLowering::determinePrologueDeath(MachineBasicBlock &MBB, + unsigned Reg) const { + // If @llvm.returnaddress is called then it will refer to X30 by some means; + // the prologue store does not kill the register. + if (Reg == AArch64::X30) { + if (MBB.getParent()->getFrameInfo()->isReturnAddressTaken() + && MBB.getParent()->getRegInfo().isLiveIn(Reg)) + return false; + } + + // In all other cases, physical registers are dead after they've been saved + // but live at the beginning of the prologue block. + MBB.addLiveIn(Reg); + return true; +} + +void +AArch64FrameLowering::emitFrameMemOps(bool isPrologue, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const std::vector &CSI, + const TargetRegisterInfo *TRI, + LoadStoreMethod PossClasses[], + unsigned NumClasses) const { + DebugLoc DL = MBB.findDebugLoc(MBBI); + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + + // A certain amount of implicit contract is present here. The actual stack + // offsets haven't been allocated officially yet, so for strictly correct code + // we rely on the fact that the elements of CSI are allocated in order + // starting at SP, purely as dictated by size and alignment. In practice since + // this function handles the only accesses to those slots it's not quite so + // important. + // + // We have also ordered the Callee-saved register list in AArch64CallingConv + // so that the above scheme puts registers in order: in particular we want + // &X30 to be &X29+8 for an ABI-correct frame record (PCS 5.2.2) + for (unsigned i = 0, e = CSI.size(); i < e; ++i) { + unsigned Reg = CSI[i].getReg(); + + // First we need to find out which register class the register belongs to so + // that we can use the correct load/store instrucitons. + unsigned ClassIdx; + for (ClassIdx = 0; ClassIdx < NumClasses; ++ClassIdx) { + if (PossClasses[ClassIdx].RegClass->contains(Reg)) + break; + } + assert(ClassIdx != NumClasses + && "Asked to store register in unexpected class"); + const TargetRegisterClass &TheClass = *PossClasses[ClassIdx].RegClass; + + // Now we need to decide whether it's possible to emit a paired instruction: + // for this we want the next register to be in the same class. + MachineInstrBuilder NewMI; + bool Pair = false; + if (i + 1 < CSI.size() && TheClass.contains(CSI[i+1].getReg())) { + Pair = true; + unsigned StLow = 0, StHigh = 0; + if (isPrologue) { + // Most of these registers will be live-in to the MBB and killed by our + // store, though there are exceptions (see determinePrologueDeath). + StLow = getKillRegState(determinePrologueDeath(MBB, CSI[i+1].getReg())); + StHigh = getKillRegState(determinePrologueDeath(MBB, CSI[i].getReg())); + } else { + StLow = RegState::Define; + StHigh = RegState::Define; + } + + NewMI = BuildMI(MBB, MBBI, DL, TII.get(PossClasses[ClassIdx].PairOpcode)) + .addReg(CSI[i+1].getReg(), StLow) + .addReg(CSI[i].getReg(), StHigh); + + // If it's a paired op, we've consumed two registers + ++i; + } else { + unsigned State; + if (isPrologue) { + State = getKillRegState(determinePrologueDeath(MBB, CSI[i].getReg())); + } else { + State = RegState::Define; + } + + NewMI = BuildMI(MBB, MBBI, DL, TII.get(PossClasses[ClassIdx].SingleOpcode)) + .addReg(CSI[i].getReg(), State); + } + + // Note that the FrameIdx refers to the second register in a pair: it will + // be allocated the smaller numeric address and so is the one an LDP/STP + // address must use. + int FrameIdx = CSI[i].getFrameIdx(); + MachineMemOperand::MemOperandFlags Flags; + Flags = isPrologue ? MachineMemOperand::MOStore : MachineMemOperand::MOLoad; + MachineMemOperand *MMO = + MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx), + Flags, + Pair ? TheClass.getSize() * 2 : TheClass.getSize(), + MFI.getObjectAlignment(FrameIdx)); + + NewMI.addFrameIndex(FrameIdx) + .addImm(0) // address-register offset + .addMemOperand(MMO); + + if (isPrologue) + NewMI.setMIFlags(MachineInstr::FrameSetup); + + // For aesthetic reasons, during an epilogue we want to emit complementary + // operations to the prologue, but in the opposite order. So we still + // iterate through the CalleeSavedInfo list in order, but we put the + // instructions successively earlier in the MBB. + if (!isPrologue) + --MBBI; + } +} + +bool +AArch64FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const std::vector &CSI, + const TargetRegisterInfo *TRI) const { + if (CSI.empty()) + return false; + + static LoadStoreMethod PossibleClasses[] = { + {&AArch64::GPR64RegClass, AArch64::LSPair64_STR, AArch64::LS64_STR}, + {&AArch64::FPR64RegClass, AArch64::LSFPPair64_STR, AArch64::LSFP64_STR}, + }; + unsigned NumClasses = llvm::array_lengthof(PossibleClasses); + + emitFrameMemOps(/* isPrologue = */ true, MBB, MBBI, CSI, TRI, + PossibleClasses, NumClasses); + + return true; +} + +bool +AArch64FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const std::vector &CSI, + const TargetRegisterInfo *TRI) const { + + if (CSI.empty()) + return false; + + static LoadStoreMethod PossibleClasses[] = { + {&AArch64::GPR64RegClass, AArch64::LSPair64_LDR, AArch64::LS64_LDR}, + {&AArch64::FPR64RegClass, AArch64::LSFPPair64_LDR, AArch64::LSFP64_LDR}, + }; + unsigned NumClasses = llvm::array_lengthof(PossibleClasses); + + emitFrameMemOps(/* isPrologue = */ false, MBB, MBBI, CSI, TRI, + PossibleClasses, NumClasses); + + return true; +} + +bool +AArch64FrameLowering::hasFP(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + const TargetRegisterInfo *RI = MF.getTarget().getRegisterInfo(); + + // This is a decision of ABI compliance. The AArch64 PCS gives various options + // for conformance, and even at the most stringent level more or less permits + // elimination for leaf functions because there's no loss of functionality + // (for debugging etc).. + if (MF.getTarget().Options.DisableFramePointerElim(MF) && MFI->hasCalls()) + return true; + + // The following are hard-limits: incorrect code will be generated if we try + // to omit the frame. + return (RI->needsStackRealignment(MF) || + MFI->hasVarSizedObjects() || + MFI->isFrameAddressTaken()); +} + +bool +AArch64FrameLowering::useFPForAddressing(const MachineFunction &MF) const { + return MF.getFrameInfo()->hasVarSizedObjects(); +} + +bool +AArch64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + + // Of the various reasons for having a frame pointer, it's actually only + // variable-sized objects that prevent reservation of a call frame. + return !(hasFP(MF) && MFI->hasVarSizedObjects()); +} diff --git a/lib/Target/AArch64/AArch64FrameLowering.h b/lib/Target/AArch64/AArch64FrameLowering.h new file mode 100644 index 0000000..dfa66ec --- /dev/null +++ b/lib/Target/AArch64/AArch64FrameLowering.h @@ -0,0 +1,103 @@ +//==- AArch64FrameLowering.h - Define frame lowering for AArch64 -*- C++ -*--=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_AARCH64_FRAMEINFO_H +#define LLVM_AARCH64_FRAMEINFO_H + +#include "AArch64Subtarget.h" +#include "llvm/Target/TargetFrameLowering.h" + +namespace llvm { +class AArch64Subtarget; + +class AArch64FrameLowering : public TargetFrameLowering { +private: + // In order to unify the spilling and restoring of callee-saved registers into + // emitFrameMemOps, we need to be able to specify which instructions to use + // for the relevant memory operations on each register class. An array of the + // following struct is populated and passed in to achieve this. + struct LoadStoreMethod { + const TargetRegisterClass *RegClass; // E.g. GPR64RegClass + + // The preferred instruction. + unsigned PairOpcode; // E.g. LSPair64_STR + + // Sometimes only a single register can be handled at once. + unsigned SingleOpcode; // E.g. LS64_STR + }; +protected: + const AArch64Subtarget &STI; + +public: + explicit AArch64FrameLowering(const AArch64Subtarget &sti) + : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 16, 0, 16), + STI(sti) { + } + + /// emitProlog/emitEpilog - These methods insert prolog and epilog code into + /// the function. + virtual void emitPrologue(MachineFunction &MF) const; + virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + + /// Decides how much stack adjustment to perform in each phase of the prologue + /// and epilogue. + void splitSPAdjustments(uint64_t Total, uint64_t &Initial, + uint64_t &Residual) const; + + int64_t resolveFrameIndexReference(MachineFunction &MF, int FrameIndex, + unsigned &FrameReg, int SPAdj, + bool IsCalleeSaveOp) const; + + virtual void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS) const; + + virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector &CSI, + const TargetRegisterInfo *TRI) const; + virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector &CSI, + const TargetRegisterInfo *TRI) const; + + /// If the register is X30 (i.e. LR) and the return address is used in the + /// function then the callee-save store doesn't actually kill the register, + /// otherwise it does. + bool determinePrologueDeath(MachineBasicBlock &MBB, unsigned Reg) const; + + /// This function emits the loads or stores required during prologue and + /// epilogue as efficiently as possible. + /// + /// The operations involved in setting up and tearing down the frame are + /// similar enough to warrant a shared function, particularly as discrepancies + /// between the two would be disastrous. + void emitFrameMemOps(bool isStore, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector &CSI, + const TargetRegisterInfo *TRI, + LoadStoreMethod PossibleClasses[], + unsigned NumClasses) const; + + + virtual bool hasFP(const MachineFunction &MF) const; + + virtual bool useFPForAddressing(const MachineFunction &MF) const; + + /// On AA + virtual bool hasReservedCallFrame(const MachineFunction &MF) const; + +}; + +} // End llvm namespace + +#endif diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp new file mode 100644 index 0000000..9be8ba1 --- /dev/null +++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -0,0 +1,422 @@ +//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines an instruction selector for the AArch64 target. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "aarch64-isel" +#include "AArch64.h" +#include "AArch64InstrInfo.h" +#include "AArch64Subtarget.h" +#include "AArch64TargetMachine.h" +#include "MCTargetDesc/AArch64BaseInfo.h" +#include "llvm/ADT/APSInt.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +//===--------------------------------------------------------------------===// +/// AArch64 specific code to select AArch64 machine instructions for +/// SelectionDAG operations. +/// +namespace { + +class AArch64DAGToDAGISel : public SelectionDAGISel { + AArch64TargetMachine &TM; + const AArch64InstrInfo *TII; + + /// Keep a pointer to the AArch64Subtarget around so that we can + /// make the right decision when generating code for different targets. + const AArch64Subtarget *Subtarget; + +public: + explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm, + CodeGenOpt::Level OptLevel) + : SelectionDAGISel(tm, OptLevel), TM(tm), + TII(static_cast(TM.getInstrInfo())), + Subtarget(&TM.getSubtarget()) { + } + + virtual const char *getPassName() const { + return "AArch64 Instruction Selection"; + } + + // Include the pieces autogenerated from the target description. +#include "AArch64GenDAGISel.inc" + + template + bool SelectOffsetUImm12(SDValue N, SDValue &UImm12) { + const ConstantSDNode *CN = dyn_cast(N); + if (!CN || CN->getZExtValue() % MemSize != 0 + || CN->getZExtValue() / MemSize > 0xfff) + return false; + + UImm12 = CurDAG->getTargetConstant(CN->getZExtValue() / MemSize, MVT::i64); + return true; + } + + template + bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) { + return SelectCVTFixedPosOperand(N, FixedPos, RegWidth); + } + + bool SelectFPZeroOperand(SDValue N, SDValue &Dummy); + + bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned RegWidth); + + bool SelectInlineAsmMemoryOperand(const SDValue &Op, + char ConstraintCode, + std::vector &OutOps); + + bool SelectLogicalImm(SDValue N, SDValue &Imm); + + template + bool SelectTSTBOperand(SDValue N, SDValue &FixedPos) { + return SelectTSTBOperand(N, FixedPos, RegWidth); + } + + bool SelectTSTBOperand(SDValue N, SDValue &FixedPos, unsigned RegWidth); + + SDNode *TrySelectToMoveImm(SDNode *N); + SDNode *SelectToLitPool(SDNode *N); + SDNode *SelectToFPLitPool(SDNode *N); + + SDNode* Select(SDNode*); +private: +}; +} + +bool +AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, + unsigned RegWidth) { + const ConstantFPSDNode *CN = dyn_cast(N); + if (!CN) return false; + + // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits + // is between 1 and 32 for a destination w-register, or 1 and 64 for an + // x-register. + // + // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we + // want THIS_NODE to be 2^fbits. This is much easier to deal with using + // integers. + bool IsExact; + + // fbits is between 1 and 64 in the worst-case, which means the fmul + // could have 2^64 as an actual operand. Need 65 bits of precision. + APSInt IntVal(65, true); + CN->getValueAPF().convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact); + + // N.b. isPowerOf2 also checks for > 0. + if (!IsExact || !IntVal.isPowerOf2()) return false; + unsigned FBits = IntVal.logBase2(); + + // Checks above should have guaranteed that we haven't lost information in + // finding FBits, but it must still be in range. + if (FBits == 0 || FBits > RegWidth) return false; + + FixedPos = CurDAG->getTargetConstant(64 - FBits, MVT::i32); + return true; +} + +bool +AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op, + char ConstraintCode, + std::vector &OutOps) { + switch (ConstraintCode) { + default: llvm_unreachable("Unrecognised AArch64 memory constraint"); + case 'm': + // FIXME: more freedom is actually permitted for 'm'. We can go + // hunting for a base and an offset if we want. Of course, since + // we don't really know how the operand is going to be used we're + // probably restricted to the load/store pair's simm7 as an offset + // range anyway. + case 'Q': + OutOps.push_back(Op); + } + + return false; +} + +bool +AArch64DAGToDAGISel::SelectFPZeroOperand(SDValue N, SDValue &Dummy) { + ConstantFPSDNode *Imm = dyn_cast(N); + if (!Imm || !Imm->getValueAPF().isPosZero()) + return false; + + // Doesn't actually carry any information, but keeps TableGen quiet. + Dummy = CurDAG->getTargetConstant(0, MVT::i32); + return true; +} + +bool AArch64DAGToDAGISel::SelectLogicalImm(SDValue N, SDValue &Imm) { + uint32_t Bits; + uint32_t RegWidth = N.getValueType().getSizeInBits(); + + ConstantSDNode *CN = dyn_cast(N); + if (!CN) return false; + + if (!A64Imms::isLogicalImm(RegWidth, CN->getZExtValue(), Bits)) + return false; + + Imm = CurDAG->getTargetConstant(Bits, MVT::i32); + return true; +} + +SDNode *AArch64DAGToDAGISel::TrySelectToMoveImm(SDNode *Node) { + SDNode *ResNode; + DebugLoc dl = Node->getDebugLoc(); + EVT DestType = Node->getValueType(0); + unsigned DestWidth = DestType.getSizeInBits(); + + unsigned MOVOpcode; + EVT MOVType; + int UImm16, Shift; + uint32_t LogicalBits; + + uint64_t BitPat = cast(Node)->getZExtValue(); + if (A64Imms::isMOVZImm(DestWidth, BitPat, UImm16, Shift)) { + MOVType = DestType; + MOVOpcode = DestWidth == 64 ? AArch64::MOVZxii : AArch64::MOVZwii; + } else if (A64Imms::isMOVNImm(DestWidth, BitPat, UImm16, Shift)) { + MOVType = DestType; + MOVOpcode = DestWidth == 64 ? AArch64::MOVNxii : AArch64::MOVNwii; + } else if (DestWidth == 64 && A64Imms::isMOVNImm(32, BitPat, UImm16, Shift)) { + // To get something like 0x0000_0000_ffff_1234 into a 64-bit register we can + // use a 32-bit instruction: "movn w0, 0xedbc". + MOVType = MVT::i32; + MOVOpcode = AArch64::MOVNwii; + } else if (A64Imms::isLogicalImm(DestWidth, BitPat, LogicalBits)) { + MOVOpcode = DestWidth == 64 ? AArch64::ORRxxi : AArch64::ORRwwi; + uint16_t ZR = DestWidth == 64 ? AArch64::XZR : AArch64::WZR; + + return CurDAG->getMachineNode(MOVOpcode, dl, DestType, + CurDAG->getRegister(ZR, DestType), + CurDAG->getTargetConstant(LogicalBits, MVT::i32)); + } else { + // Can't handle it in one instruction. There's scope for permitting two (or + // more) instructions, but that'll need more thought. + return NULL; + } + + ResNode = CurDAG->getMachineNode(MOVOpcode, dl, MOVType, + CurDAG->getTargetConstant(UImm16, MVT::i32), + CurDAG->getTargetConstant(Shift, MVT::i32)); + + if (MOVType != DestType) { + ResNode = CurDAG->getMachineNode(TargetOpcode::SUBREG_TO_REG, dl, + MVT::i64, MVT::i32, MVT::Other, + CurDAG->getTargetConstant(0, MVT::i64), + SDValue(ResNode, 0), + CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32)); + } + + return ResNode; +} + +SDNode *AArch64DAGToDAGISel::SelectToLitPool(SDNode *Node) { + DebugLoc dl = Node->getDebugLoc(); + uint64_t UnsignedVal = cast(Node)->getZExtValue(); + int64_t SignedVal = cast(Node)->getSExtValue(); + EVT DestType = Node->getValueType(0); + + // Since we may end up loading a 64-bit constant from a 32-bit entry the + // constant in the pool may have a different type to the eventual node. + SDValue PoolEntry; + EVT LoadType; + unsigned LoadInst; + + assert((DestType == MVT::i64 || DestType == MVT::i32) + && "Only expect integer constants at the moment"); + + if (DestType == MVT::i32 || UnsignedVal <= UINT32_MAX) { + // LDR w3, lbl + LoadInst = AArch64::LDRw_lit; + LoadType = MVT::i32; + + PoolEntry = CurDAG->getTargetConstantPool( + ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), UnsignedVal), + MVT::i32); + } else if (SignedVal >= INT32_MIN && SignedVal <= INT32_MAX) { + // We can use a sign-extending 32-bit load: LDRSW x3, lbl + LoadInst = AArch64::LDRSWx_lit; + LoadType = MVT::i64; + + PoolEntry = CurDAG->getTargetConstantPool( + ConstantInt::getSigned(Type::getInt32Ty(*CurDAG->getContext()), + SignedVal), + MVT::i32); + } else { + // Full 64-bit load needed: LDR x3, lbl + LoadInst = AArch64::LDRx_lit; + LoadType = MVT::i64; + + PoolEntry = CurDAG->getTargetConstantPool( + ConstantInt::get(Type::getInt64Ty(*CurDAG->getContext()), UnsignedVal), + MVT::i64); + } + + SDNode *ResNode = CurDAG->getMachineNode(LoadInst, dl, + LoadType, MVT::Other, + PoolEntry, CurDAG->getEntryNode()); + + if (DestType != LoadType) { + // We used the implicit zero-extension of "LDR w3, lbl", tell LLVM this + // fact. + assert(DestType == MVT::i64 && LoadType == MVT::i32 + && "Unexpected load combination"); + + ResNode = CurDAG->getMachineNode(TargetOpcode::SUBREG_TO_REG, dl, + MVT::i64, MVT::i32, MVT::Other, + CurDAG->getTargetConstant(0, MVT::i64), + SDValue(ResNode, 0), + CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32)); + } + + return ResNode; +} + +SDNode *AArch64DAGToDAGISel::SelectToFPLitPool(SDNode *Node) { + DebugLoc dl = Node->getDebugLoc(); + const ConstantFP *FV = cast(Node)->getConstantFPValue(); + EVT DestType = Node->getValueType(0); + + unsigned LoadInst; + switch (DestType.getSizeInBits()) { + case 32: + LoadInst = AArch64::LDRs_lit; + break; + case 64: + LoadInst = AArch64::LDRd_lit; + break; + case 128: + LoadInst = AArch64::LDRq_lit; + break; + default: llvm_unreachable("cannot select floating-point litpool"); + } + + SDValue PoolEntry = CurDAG->getTargetConstantPool(FV, DestType); + SDNode *ResNode = CurDAG->getMachineNode(LoadInst, dl, + DestType, MVT::Other, + PoolEntry, CurDAG->getEntryNode()); + + return ResNode; +} + +bool +AArch64DAGToDAGISel::SelectTSTBOperand(SDValue N, SDValue &FixedPos, + unsigned RegWidth) { + const ConstantSDNode *CN = dyn_cast(N); + if (!CN) return false; + + uint64_t Val = CN->getZExtValue(); + + if (!isPowerOf2_64(Val)) return false; + + unsigned TestedBit = Log2_64(Val); + // Checks above should have guaranteed that we haven't lost information in + // finding TestedBit, but it must still be in range. + if (TestedBit >= RegWidth) return false; + + FixedPos = CurDAG->getTargetConstant(TestedBit, MVT::i64); + return true; +} + +SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { + // Dump information about the Node being selected + DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << "\n"); + + if (Node->isMachineOpcode()) { + DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n"); + return NULL; + } + + switch (Node->getOpcode()) { + case ISD::FrameIndex: { + int FI = cast(Node)->getIndex(); + EVT PtrTy = TLI.getPointerTy(); + SDValue TFI = CurDAG->getTargetFrameIndex(FI, PtrTy); + return CurDAG->SelectNodeTo(Node, AArch64::ADDxxi_lsl0_s, PtrTy, + TFI, CurDAG->getTargetConstant(0, PtrTy)); + } + case ISD::ConstantPool: { + // Constant pools are fine, just create a Target entry. + ConstantPoolSDNode *CN = cast(Node); + const Constant *C = CN->getConstVal(); + SDValue CP = CurDAG->getTargetConstantPool(C, CN->getValueType(0)); + + ReplaceUses(SDValue(Node, 0), CP); + return NULL; + } + case ISD::Constant: { + SDNode *ResNode = 0; + if (cast(Node)->getZExtValue() == 0) { + // XZR and WZR are probably even better than an actual move: most of the + // time they can be folded into another instruction with *no* cost. + + EVT Ty = Node->getValueType(0); + assert((Ty == MVT::i32 || Ty == MVT::i64) && "unexpected type"); + uint16_t Register = Ty == MVT::i32 ? AArch64::WZR : AArch64::XZR; + ResNode = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), + Node->getDebugLoc(), + Register, Ty).getNode(); + } + + // Next best option is a move-immediate, see if we can do that. + if (!ResNode) { + ResNode = TrySelectToMoveImm(Node); + } + + // If even that fails we fall back to a lit-pool entry at the moment. Future + // tuning or restrictions like non-readable code-sections may mandate a + // sequence of MOVZ/MOVN/MOVK instructions. + if (!ResNode) { + ResNode = SelectToLitPool(Node); + } + + assert(ResNode && "We need *some* way to materialise a constant"); + + ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0)); + return NULL; + } + case ISD::ConstantFP: { + if (A64Imms::isFPImm(cast(Node)->getValueAPF())) { + // FMOV will take care of it from TableGen + break; + } + + SDNode *ResNode = SelectToFPLitPool(Node); + ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0)); + return NULL; + } + default: + break; // Let generic code handle it + } + + SDNode *ResNode = SelectCode(Node); + + DEBUG(dbgs() << "=> "; + if (ResNode == NULL || ResNode == Node) + Node->dump(CurDAG); + else + ResNode->dump(CurDAG); + dbgs() << "\n"); + + return ResNode; +} + +/// This pass converts a legalized DAG into a AArch64-specific DAG, ready for +/// instruction scheduling. +FunctionPass *llvm::createAArch64ISelDAG(AArch64TargetMachine &TM, + CodeGenOpt::Level OptLevel) { + return new AArch64DAGToDAGISel(TM, OptLevel); +} diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp new file mode 100644 index 0000000..42e8f09 --- /dev/null +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -0,0 +1,2957 @@ +//===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interfaces that AArch64 uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "aarch64-isel" +#include "AArch64.h" +#include "AArch64ISelLowering.h" +#include "AArch64MachineFunctionInfo.h" +#include "AArch64TargetMachine.h" +#include "AArch64TargetObjectFile.h" +#include "MCTargetDesc/AArch64BaseInfo.h" +#include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/IR/CallingConv.h" + +using namespace llvm; + +static TargetLoweringObjectFile *createTLOF(AArch64TargetMachine &TM) { + const AArch64Subtarget *Subtarget = &TM.getSubtarget(); + + if (Subtarget->isTargetLinux()) + return new AArch64LinuxTargetObjectFile(); + if (Subtarget->isTargetELF()) + return new TargetLoweringObjectFileELF(); + llvm_unreachable("unknown subtarget type"); +} + + +AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM) + : TargetLowering(TM, createTLOF(TM)), + Subtarget(&TM.getSubtarget()), + RegInfo(TM.getRegisterInfo()), + Itins(TM.getInstrItineraryData()) { + + // SIMD compares set the entire lane's bits to 1 + setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); + + // Scalar register <-> type mapping + addRegisterClass(MVT::i32, &AArch64::GPR32RegClass); + addRegisterClass(MVT::i64, &AArch64::GPR64RegClass); + addRegisterClass(MVT::f16, &AArch64::FPR16RegClass); + addRegisterClass(MVT::f32, &AArch64::FPR32RegClass); + addRegisterClass(MVT::f64, &AArch64::FPR64RegClass); + addRegisterClass(MVT::f128, &AArch64::FPR128RegClass); + + // And the vectors + addRegisterClass(MVT::v8i8, &AArch64::VPR64RegClass); + addRegisterClass(MVT::v4i16, &AArch64::VPR64RegClass); + addRegisterClass(MVT::v2i32, &AArch64::VPR64RegClass); + addRegisterClass(MVT::v2f32, &AArch64::VPR64RegClass); + addRegisterClass(MVT::v16i8, &AArch64::VPR128RegClass); + addRegisterClass(MVT::v8i16, &AArch64::VPR128RegClass); + addRegisterClass(MVT::v4i32, &AArch64::VPR128RegClass); + addRegisterClass(MVT::v4f32, &AArch64::VPR128RegClass); + addRegisterClass(MVT::v2f64, &AArch64::VPR128RegClass); + + computeRegisterProperties(); + + // Some atomic operations can be folded into load-acquire or store-release + // instructions on AArch64. It's marginally simpler to let LLVM expand + // everything out to a barrier and then recombine the (few) barriers we can. + setInsertFencesForAtomic(true); + setTargetDAGCombine(ISD::ATOMIC_FENCE); + setTargetDAGCombine(ISD::ATOMIC_STORE); + + // We combine OR nodes for bitfield and NEON BSL operations. + setTargetDAGCombine(ISD::OR); + + setTargetDAGCombine(ISD::AND); + setTargetDAGCombine(ISD::SRA); + + // AArch64 does not have i1 loads, or much of anything for i1 really. + setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); + setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); + setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote); + + setStackPointerRegisterToSaveRestore(AArch64::XSP); + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); + setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); + setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); + + // We'll lower globals to wrappers for selection. + setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); + setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); + + // A64 instructions have the comparison predicate attached to the user of the + // result, but having a separate comparison is valuable for matching. + setOperationAction(ISD::BR_CC, MVT::i32, Custom); + setOperationAction(ISD::BR_CC, MVT::i64, Custom); + setOperationAction(ISD::BR_CC, MVT::f32, Custom); + setOperationAction(ISD::BR_CC, MVT::f64, Custom); + + setOperationAction(ISD::SELECT, MVT::i32, Custom); + setOperationAction(ISD::SELECT, MVT::i64, Custom); + setOperationAction(ISD::SELECT, MVT::f32, Custom); + setOperationAction(ISD::SELECT, MVT::f64, Custom); + + setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); + setOperationAction(ISD::SELECT_CC, MVT::i64, Custom); + setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); + setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); + + setOperationAction(ISD::BRCOND, MVT::Other, Custom); + + setOperationAction(ISD::SETCC, MVT::i32, Custom); + setOperationAction(ISD::SETCC, MVT::i64, Custom); + setOperationAction(ISD::SETCC, MVT::f32, Custom); + setOperationAction(ISD::SETCC, MVT::f64, Custom); + + setOperationAction(ISD::BR_JT, MVT::Other, Expand); + setOperationAction(ISD::JumpTable, MVT::i32, Custom); + setOperationAction(ISD::JumpTable, MVT::i64, Custom); + + setOperationAction(ISD::VASTART, MVT::Other, Custom); + setOperationAction(ISD::VACOPY, MVT::Other, Custom); + setOperationAction(ISD::VAEND, MVT::Other, Expand); + setOperationAction(ISD::VAARG, MVT::Other, Expand); + + setOperationAction(ISD::BlockAddress, MVT::i64, Custom); + + setOperationAction(ISD::ROTL, MVT::i32, Expand); + setOperationAction(ISD::ROTL, MVT::i64, Expand); + + setOperationAction(ISD::UREM, MVT::i32, Expand); + setOperationAction(ISD::UREM, MVT::i64, Expand); + setOperationAction(ISD::UDIVREM, MVT::i32, Expand); + setOperationAction(ISD::UDIVREM, MVT::i64, Expand); + + setOperationAction(ISD::SREM, MVT::i32, Expand); + setOperationAction(ISD::SREM, MVT::i64, Expand); + setOperationAction(ISD::SDIVREM, MVT::i32, Expand); + setOperationAction(ISD::SDIVREM, MVT::i64, Expand); + + setOperationAction(ISD::CTPOP, MVT::i32, Expand); + setOperationAction(ISD::CTPOP, MVT::i64, Expand); + + // Legal floating-point operations. + setOperationAction(ISD::FABS, MVT::f32, Legal); + setOperationAction(ISD::FABS, MVT::f64, Legal); + + setOperationAction(ISD::FCEIL, MVT::f32, Legal); + setOperationAction(ISD::FCEIL, MVT::f64, Legal); + + setOperationAction(ISD::FFLOOR, MVT::f32, Legal); + setOperationAction(ISD::FFLOOR, MVT::f64, Legal); + + setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal); + setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal); + + setOperationAction(ISD::FNEG, MVT::f32, Legal); + setOperationAction(ISD::FNEG, MVT::f64, Legal); + + setOperationAction(ISD::FRINT, MVT::f32, Legal); + setOperationAction(ISD::FRINT, MVT::f64, Legal); + + setOperationAction(ISD::FSQRT, MVT::f32, Legal); + setOperationAction(ISD::FSQRT, MVT::f64, Legal); + + setOperationAction(ISD::FTRUNC, MVT::f32, Legal); + setOperationAction(ISD::FTRUNC, MVT::f64, Legal); + + setOperationAction(ISD::ConstantFP, MVT::f32, Legal); + setOperationAction(ISD::ConstantFP, MVT::f64, Legal); + setOperationAction(ISD::ConstantFP, MVT::f128, Legal); + + // Illegal floating-point operations. + setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); + + setOperationAction(ISD::FCOS, MVT::f32, Expand); + setOperationAction(ISD::FCOS, MVT::f64, Expand); + + setOperationAction(ISD::FEXP, MVT::f32, Expand); + setOperationAction(ISD::FEXP, MVT::f64, Expand); + + setOperationAction(ISD::FEXP2, MVT::f32, Expand); + setOperationAction(ISD::FEXP2, MVT::f64, Expand); + + setOperationAction(ISD::FLOG, MVT::f32, Expand); + setOperationAction(ISD::FLOG, MVT::f64, Expand); + + setOperationAction(ISD::FLOG2, MVT::f32, Expand); + setOperationAction(ISD::FLOG2, MVT::f64, Expand); + + setOperationAction(ISD::FLOG10, MVT::f32, Expand); + setOperationAction(ISD::FLOG10, MVT::f64, Expand); + + setOperationAction(ISD::FPOW, MVT::f32, Expand); + setOperationAction(ISD::FPOW, MVT::f64, Expand); + + setOperationAction(ISD::FPOWI, MVT::f32, Expand); + setOperationAction(ISD::FPOWI, MVT::f64, Expand); + + setOperationAction(ISD::FREM, MVT::f32, Expand); + setOperationAction(ISD::FREM, MVT::f64, Expand); + + setOperationAction(ISD::FSIN, MVT::f32, Expand); + setOperationAction(ISD::FSIN, MVT::f64, Expand); + + + // Virtually no operation on f128 is legal, but LLVM can't expand them when + // there's a valid register class, so we need custom operations in most cases. + setOperationAction(ISD::FABS, MVT::f128, Expand); + setOperationAction(ISD::FADD, MVT::f128, Custom); + setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand); + setOperationAction(ISD::FCOS, MVT::f128, Expand); + setOperationAction(ISD::FDIV, MVT::f128, Custom); + setOperationAction(ISD::FMA, MVT::f128, Expand); + setOperationAction(ISD::FMUL, MVT::f128, Custom); + setOperationAction(ISD::FNEG, MVT::f128, Expand); + setOperationAction(ISD::FP_EXTEND, MVT::f128, Expand); + setOperationAction(ISD::FP_ROUND, MVT::f128, Expand); + setOperationAction(ISD::FPOW, MVT::f128, Expand); + setOperationAction(ISD::FREM, MVT::f128, Expand); + setOperationAction(ISD::FRINT, MVT::f128, Expand); + setOperationAction(ISD::FSIN, MVT::f128, Expand); + setOperationAction(ISD::FSQRT, MVT::f128, Expand); + setOperationAction(ISD::FSUB, MVT::f128, Custom); + setOperationAction(ISD::FTRUNC, MVT::f128, Expand); + setOperationAction(ISD::SETCC, MVT::f128, Custom); + setOperationAction(ISD::BR_CC, MVT::f128, Custom); + setOperationAction(ISD::SELECT, MVT::f128, Expand); + setOperationAction(ISD::SELECT_CC, MVT::f128, Custom); + setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom); + + // Lowering for many of the conversions is actually specified by the non-f128 + // type. The LowerXXX function will be trivial when f128 isn't involved. + setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom); + setOperationAction(ISD::FP_ROUND, MVT::f32, Custom); + setOperationAction(ISD::FP_ROUND, MVT::f64, Custom); + + // This prevents LLVM trying to compress double constants into a floating + // constant-pool entry and trying to load from there. It's of doubtful benefit + // for A64: we'd need LDR followed by FCVT, I believe. + setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand); + + setTruncStoreAction(MVT::f128, MVT::f64, Expand); + setTruncStoreAction(MVT::f128, MVT::f32, Expand); + setTruncStoreAction(MVT::f128, MVT::f16, Expand); + setTruncStoreAction(MVT::f64, MVT::f32, Expand); + setTruncStoreAction(MVT::f64, MVT::f16, Expand); + setTruncStoreAction(MVT::f32, MVT::f16, Expand); + + setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand); + setOperationAction(ISD::EHSELECTION, MVT::i64, Expand); + + setExceptionPointerRegister(AArch64::X0); + setExceptionSelectorRegister(AArch64::X1); +} + +EVT AArch64TargetLowering::getSetCCResultType(EVT VT) const { + // It's reasonably important that this value matches the "natural" legal + // promotion from i1 for scalar types. Otherwise LegalizeTypes can get itself + // in a twist (e.g. inserting an any_extend which then becomes i64 -> i64). + if (!VT.isVector()) return MVT::i32; + return VT.changeVectorElementTypeToInteger(); +} + +static void getExclusiveOperation(unsigned Size, unsigned &ldrOpc, + unsigned &strOpc) { + switch (Size) { + default: llvm_unreachable("unsupported size for atomic binary op!"); + case 1: + ldrOpc = AArch64::LDXR_byte; + strOpc = AArch64::STXR_byte; + break; + case 2: + ldrOpc = AArch64::LDXR_hword; + strOpc = AArch64::STXR_hword; + break; + case 4: + ldrOpc = AArch64::LDXR_word; + strOpc = AArch64::STXR_word; + break; + case 8: + ldrOpc = AArch64::LDXR_dword; + strOpc = AArch64::STXR_dword; + break; + } +} + +MachineBasicBlock * +AArch64TargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, + unsigned Size, + unsigned BinOpcode) const { + // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction *MF = BB->getParent(); + MachineFunction::iterator It = BB; + ++It; + + unsigned dest = MI->getOperand(0).getReg(); + unsigned ptr = MI->getOperand(1).getReg(); + unsigned incr = MI->getOperand(2).getReg(); + DebugLoc dl = MI->getDebugLoc(); + + MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); + + unsigned ldrOpc, strOpc; + getExclusiveOperation(Size, ldrOpc, strOpc); + + MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MF->insert(It, loopMBB); + MF->insert(It, exitMBB); + + // Transfer the remainder of BB and its successor edges to exitMBB. + exitMBB->splice(exitMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + exitMBB->transferSuccessorsAndUpdatePHIs(BB); + + const TargetRegisterClass *TRC + = Size == 8 ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; + unsigned scratch = (!BinOpcode) ? incr : MRI.createVirtualRegister(TRC); + + // thisMBB: + // ... + // fallthrough --> loopMBB + BB->addSuccessor(loopMBB); + + // loopMBB: + // ldxr dest, ptr + // scratch, dest, incr + // stxr stxr_status, scratch, ptr + // cmp stxr_status, #0 + // b.ne loopMBB + // fallthrough --> exitMBB + BB = loopMBB; + BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); + if (BinOpcode) { + // All arithmetic operations we'll be creating are designed to take an extra + // shift or extend operand, which we can conveniently set to zero. + + // Operand order needs to go the other way for NAND. + if (BinOpcode == AArch64::BICwww_lsl || BinOpcode == AArch64::BICxxx_lsl) + BuildMI(BB, dl, TII->get(BinOpcode), scratch) + .addReg(incr).addReg(dest).addImm(0); + else + BuildMI(BB, dl, TII->get(BinOpcode), scratch) + .addReg(dest).addReg(incr).addImm(0); + } + + // From the stxr, the register is GPR32; from the cmp it's GPR32wsp + unsigned stxr_status = MRI.createVirtualRegister(&AArch64::GPR32RegClass); + MRI.constrainRegClass(stxr_status, &AArch64::GPR32wspRegClass); + + BuildMI(BB, dl, TII->get(strOpc), stxr_status).addReg(scratch).addReg(ptr); + BuildMI(BB, dl, TII->get(AArch64::SUBwwi_lsl0_cmp)) + .addReg(stxr_status).addImm(0); + BuildMI(BB, dl, TII->get(AArch64::Bcc)) + .addImm(A64CC::NE).addMBB(loopMBB); + + BB->addSuccessor(loopMBB); + BB->addSuccessor(exitMBB); + + // exitMBB: + // ... + BB = exitMBB; + + MI->eraseFromParent(); // The instruction is gone now. + + return BB; +} + +MachineBasicBlock * +AArch64TargetLowering::emitAtomicBinaryMinMax(MachineInstr *MI, + MachineBasicBlock *BB, + unsigned Size, + unsigned CmpOp, + A64CC::CondCodes Cond) const { + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction *MF = BB->getParent(); + MachineFunction::iterator It = BB; + ++It; + + unsigned dest = MI->getOperand(0).getReg(); + unsigned ptr = MI->getOperand(1).getReg(); + unsigned incr = MI->getOperand(2).getReg(); + unsigned oldval = dest; + DebugLoc dl = MI->getDebugLoc(); + + MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); + const TargetRegisterClass *TRC, *TRCsp; + if (Size == 8) { + TRC = &AArch64::GPR64RegClass; + TRCsp = &AArch64::GPR64xspRegClass; + } else { + TRC = &AArch64::GPR32RegClass; + TRCsp = &AArch64::GPR32wspRegClass; + } + + unsigned ldrOpc, strOpc; + getExclusiveOperation(Size, ldrOpc, strOpc); + + MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MF->insert(It, loopMBB); + MF->insert(It, exitMBB); + + // Transfer the remainder of BB and its successor edges to exitMBB. + exitMBB->splice(exitMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + exitMBB->transferSuccessorsAndUpdatePHIs(BB); + + unsigned scratch = MRI.createVirtualRegister(TRC); + MRI.constrainRegClass(scratch, TRCsp); + + // thisMBB: + // ... + // fallthrough --> loopMBB + BB->addSuccessor(loopMBB); + + // loopMBB: + // ldxr dest, ptr + // cmp incr, dest (, sign extend if necessary) + // csel scratch, dest, incr, cond + // stxr stxr_status, scratch, ptr + // cmp stxr_status, #0 + // b.ne loopMBB + // fallthrough --> exitMBB + BB = loopMBB; + BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); + + // Build compare and cmov instructions. + MRI.constrainRegClass(incr, TRCsp); + BuildMI(BB, dl, TII->get(CmpOp)) + .addReg(incr).addReg(oldval).addImm(0); + + BuildMI(BB, dl, TII->get(Size == 8 ? AArch64::CSELxxxc : AArch64::CSELwwwc), + scratch) + .addReg(oldval).addReg(incr).addImm(Cond); + + unsigned stxr_status = MRI.createVirtualRegister(&AArch64::GPR32RegClass); + MRI.constrainRegClass(stxr_status, &AArch64::GPR32wspRegClass); + + BuildMI(BB, dl, TII->get(strOpc), stxr_status) + .addReg(scratch).addReg(ptr); + BuildMI(BB, dl, TII->get(AArch64::SUBwwi_lsl0_cmp)) + .addReg(stxr_status).addImm(0); + BuildMI(BB, dl, TII->get(AArch64::Bcc)) + .addImm(A64CC::NE).addMBB(loopMBB); + + BB->addSuccessor(loopMBB); + BB->addSuccessor(exitMBB); + + // exitMBB: + // ... + BB = exitMBB; + + MI->eraseFromParent(); // The instruction is gone now. + + return BB; +} + +MachineBasicBlock * +AArch64TargetLowering::emitAtomicCmpSwap(MachineInstr *MI, + MachineBasicBlock *BB, + unsigned Size) const { + unsigned dest = MI->getOperand(0).getReg(); + unsigned ptr = MI->getOperand(1).getReg(); + unsigned oldval = MI->getOperand(2).getReg(); + unsigned newval = MI->getOperand(3).getReg(); + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + DebugLoc dl = MI->getDebugLoc(); + + MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); + const TargetRegisterClass *TRCsp; + TRCsp = Size == 8 ? &AArch64::GPR64xspRegClass : &AArch64::GPR32wspRegClass; + + unsigned ldrOpc, strOpc; + getExclusiveOperation(Size, ldrOpc, strOpc); + + MachineFunction *MF = BB->getParent(); + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator It = BB; + ++It; // insert the new blocks after the current block + + MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MF->insert(It, loop1MBB); + MF->insert(It, loop2MBB); + MF->insert(It, exitMBB); + + // Transfer the remainder of BB and its successor edges to exitMBB. + exitMBB->splice(exitMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + exitMBB->transferSuccessorsAndUpdatePHIs(BB); + + // thisMBB: + // ... + // fallthrough --> loop1MBB + BB->addSuccessor(loop1MBB); + + // loop1MBB: + // ldxr dest, [ptr] + // cmp dest, oldval + // b.ne exitMBB + BB = loop1MBB; + BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); + + unsigned CmpOp = Size == 8 ? AArch64::CMPxx_lsl : AArch64::CMPww_lsl; + MRI.constrainRegClass(dest, TRCsp); + BuildMI(BB, dl, TII->get(CmpOp)) + .addReg(dest).addReg(oldval).addImm(0); + BuildMI(BB, dl, TII->get(AArch64::Bcc)) + .addImm(A64CC::NE).addMBB(exitMBB); + BB->addSuccessor(loop2MBB); + BB->addSuccessor(exitMBB); + + // loop2MBB: + // strex stxr_status, newval, [ptr] + // cmp stxr_status, #0 + // b.ne loop1MBB + BB = loop2MBB; + unsigned stxr_status = MRI.createVirtualRegister(&AArch64::GPR32RegClass); + MRI.constrainRegClass(stxr_status, &AArch64::GPR32wspRegClass); + + BuildMI(BB, dl, TII->get(strOpc), stxr_status).addReg(newval).addReg(ptr); + BuildMI(BB, dl, TII->get(AArch64::SUBwwi_lsl0_cmp)) + .addReg(stxr_status).addImm(0); + BuildMI(BB, dl, TII->get(AArch64::Bcc)) + .addImm(A64CC::NE).addMBB(loop1MBB); + BB->addSuccessor(loop1MBB); + BB->addSuccessor(exitMBB); + + // exitMBB: + // ... + BB = exitMBB; + + MI->eraseFromParent(); // The instruction is gone now. + + return BB; +} + +MachineBasicBlock * +AArch64TargetLowering::EmitF128CSEL(MachineInstr *MI, + MachineBasicBlock *MBB) const { + // We materialise the F128CSEL pseudo-instruction using conditional branches + // and loads, giving an instruciton sequence like: + // str q0, [sp] + // b.ne IfTrue + // b Finish + // IfTrue: + // str q1, [sp] + // Finish: + // ldr q0, [sp] + // + // Using virtual registers would probably not be beneficial since COPY + // instructions are expensive for f128 (there's no actual instruction to + // implement them). + // + // An alternative would be to do an integer-CSEL on some address. E.g.: + // mov x0, sp + // add x1, sp, #16 + // str q0, [x0] + // str q1, [x1] + // csel x0, x0, x1, ne + // ldr q0, [x0] + // + // It's unclear which approach is actually optimal. + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + MachineFunction *MF = MBB->getParent(); + const BasicBlock *LLVM_BB = MBB->getBasicBlock(); + DebugLoc DL = MI->getDebugLoc(); + MachineFunction::iterator It = MBB; + ++It; + + unsigned DestReg = MI->getOperand(0).getReg(); + unsigned IfTrueReg = MI->getOperand(1).getReg(); + unsigned IfFalseReg = MI->getOperand(2).getReg(); + unsigned CondCode = MI->getOperand(3).getImm(); + bool NZCVKilled = MI->getOperand(4).isKill(); + + MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *EndBB = MF->CreateMachineBasicBlock(LLVM_BB); + MF->insert(It, TrueBB); + MF->insert(It, EndBB); + + // Transfer rest of current basic-block to EndBB + EndBB->splice(EndBB->begin(), MBB, + llvm::next(MachineBasicBlock::iterator(MI)), + MBB->end()); + EndBB->transferSuccessorsAndUpdatePHIs(MBB); + + // We need somewhere to store the f128 value needed. + int ScratchFI = MF->getFrameInfo()->CreateSpillStackObject(16, 16); + + // [... start of incoming MBB ...] + // str qIFFALSE, [sp] + // b.cc IfTrue + // b Done + BuildMI(MBB, DL, TII->get(AArch64::LSFP128_STR)) + .addReg(IfFalseReg) + .addFrameIndex(ScratchFI) + .addImm(0); + BuildMI(MBB, DL, TII->get(AArch64::Bcc)) + .addImm(CondCode) + .addMBB(TrueBB); + BuildMI(MBB, DL, TII->get(AArch64::Bimm)) + .addMBB(EndBB); + MBB->addSuccessor(TrueBB); + MBB->addSuccessor(EndBB); + + // IfTrue: + // str qIFTRUE, [sp] + BuildMI(TrueBB, DL, TII->get(AArch64::LSFP128_STR)) + .addReg(IfTrueReg) + .addFrameIndex(ScratchFI) + .addImm(0); + + // Note: fallthrough. We can rely on LLVM adding a branch if it reorders the + // blocks. + TrueBB->addSuccessor(EndBB); + + // Done: + // ldr qDEST, [sp] + // [... rest of incoming MBB ...] + if (!NZCVKilled) + EndBB->addLiveIn(AArch64::NZCV); + MachineInstr *StartOfEnd = EndBB->begin(); + BuildMI(*EndBB, StartOfEnd, DL, TII->get(AArch64::LSFP128_LDR), DestReg) + .addFrameIndex(ScratchFI) + .addImm(0); + + MI->eraseFromParent(); + return EndBB; +} + +MachineBasicBlock * +AArch64TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *MBB) const { + switch (MI->getOpcode()) { + default: llvm_unreachable("Unhandled instruction with custom inserter"); + case AArch64::F128CSEL: + return EmitF128CSEL(MI, MBB); + case AArch64::ATOMIC_LOAD_ADD_I8: + return emitAtomicBinary(MI, MBB, 1, AArch64::ADDwww_lsl); + case AArch64::ATOMIC_LOAD_ADD_I16: + return emitAtomicBinary(MI, MBB, 2, AArch64::ADDwww_lsl); + case AArch64::ATOMIC_LOAD_ADD_I32: + return emitAtomicBinary(MI, MBB, 4, AArch64::ADDwww_lsl); + case AArch64::ATOMIC_LOAD_ADD_I64: + return emitAtomicBinary(MI, MBB, 8, AArch64::ADDxxx_lsl); + + case AArch64::ATOMIC_LOAD_SUB_I8: + return emitAtomicBinary(MI, MBB, 1, AArch64::SUBwww_lsl); + case AArch64::ATOMIC_LOAD_SUB_I16: + return emitAtomicBinary(MI, MBB, 2, AArch64::SUBwww_lsl); + case AArch64::ATOMIC_LOAD_SUB_I32: + return emitAtomicBinary(MI, MBB, 4, AArch64::SUBwww_lsl); + case AArch64::ATOMIC_LOAD_SUB_I64: + return emitAtomicBinary(MI, MBB, 8, AArch64::SUBxxx_lsl); + + case AArch64::ATOMIC_LOAD_AND_I8: + return emitAtomicBinary(MI, MBB, 1, AArch64::ANDwww_lsl); + case AArch64::ATOMIC_LOAD_AND_I16: + return emitAtomicBinary(MI, MBB, 2, AArch64::ANDwww_lsl); + case AArch64::ATOMIC_LOAD_AND_I32: + return emitAtomicBinary(MI, MBB, 4, AArch64::ANDwww_lsl); + case AArch64::ATOMIC_LOAD_AND_I64: + return emitAtomicBinary(MI, MBB, 8, AArch64::ANDxxx_lsl); + + case AArch64::ATOMIC_LOAD_OR_I8: + return emitAtomicBinary(MI, MBB, 1, AArch64::ORRwww_lsl); + case AArch64::ATOMIC_LOAD_OR_I16: + return emitAtomicBinary(MI, MBB, 2, AArch64::ORRwww_lsl); + case AArch64::ATOMIC_LOAD_OR_I32: + return emitAtomicBinary(MI, MBB, 4, AArch64::ORRwww_lsl); + case AArch64::ATOMIC_LOAD_OR_I64: + return emitAtomicBinary(MI, MBB, 8, AArch64::ORRxxx_lsl); + + case AArch64::ATOMIC_LOAD_XOR_I8: + return emitAtomicBinary(MI, MBB, 1, AArch64::EORwww_lsl); + case AArch64::ATOMIC_LOAD_XOR_I16: + return emitAtomicBinary(MI, MBB, 2, AArch64::EORwww_lsl); + case AArch64::ATOMIC_LOAD_XOR_I32: + return emitAtomicBinary(MI, MBB, 4, AArch64::EORwww_lsl); + case AArch64::ATOMIC_LOAD_XOR_I64: + return emitAtomicBinary(MI, MBB, 8, AArch64::EORxxx_lsl); + + case AArch64::ATOMIC_LOAD_NAND_I8: + return emitAtomicBinary(MI, MBB, 1, AArch64::BICwww_lsl); + case AArch64::ATOMIC_LOAD_NAND_I16: + return emitAtomicBinary(MI, MBB, 2, AArch64::BICwww_lsl); + case AArch64::ATOMIC_LOAD_NAND_I32: + return emitAtomicBinary(MI, MBB, 4, AArch64::BICwww_lsl); + case AArch64::ATOMIC_LOAD_NAND_I64: + return emitAtomicBinary(MI, MBB, 8, AArch64::BICxxx_lsl); + + case AArch64::ATOMIC_LOAD_MIN_I8: + return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_sxtb, A64CC::GT); + case AArch64::ATOMIC_LOAD_MIN_I16: + return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_sxth, A64CC::GT); + case AArch64::ATOMIC_LOAD_MIN_I32: + return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::GT); + case AArch64::ATOMIC_LOAD_MIN_I64: + return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::GT); + + case AArch64::ATOMIC_LOAD_MAX_I8: + return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_sxtb, A64CC::LT); + case AArch64::ATOMIC_LOAD_MAX_I16: + return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_sxth, A64CC::LT); + case AArch64::ATOMIC_LOAD_MAX_I32: + return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::LT); + case AArch64::ATOMIC_LOAD_MAX_I64: + return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::LT); + + case AArch64::ATOMIC_LOAD_UMIN_I8: + return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_uxtb, A64CC::HI); + case AArch64::ATOMIC_LOAD_UMIN_I16: + return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_uxth, A64CC::HI); + case AArch64::ATOMIC_LOAD_UMIN_I32: + return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::HI); + case AArch64::ATOMIC_LOAD_UMIN_I64: + return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::HI); + + case AArch64::ATOMIC_LOAD_UMAX_I8: + return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_uxtb, A64CC::LO); + case AArch64::ATOMIC_LOAD_UMAX_I16: + return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_uxth, A64CC::LO); + case AArch64::ATOMIC_LOAD_UMAX_I32: + return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::LO); + case AArch64::ATOMIC_LOAD_UMAX_I64: + return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::LO); + + case AArch64::ATOMIC_SWAP_I8: + return emitAtomicBinary(MI, MBB, 1, 0); + case AArch64::ATOMIC_SWAP_I16: + return emitAtomicBinary(MI, MBB, 2, 0); + case AArch64::ATOMIC_SWAP_I32: + return emitAtomicBinary(MI, MBB, 4, 0); + case AArch64::ATOMIC_SWAP_I64: + return emitAtomicBinary(MI, MBB, 8, 0); + + case AArch64::ATOMIC_CMP_SWAP_I8: + return emitAtomicCmpSwap(MI, MBB, 1); + case AArch64::ATOMIC_CMP_SWAP_I16: + return emitAtomicCmpSwap(MI, MBB, 2); + case AArch64::ATOMIC_CMP_SWAP_I32: + return emitAtomicCmpSwap(MI, MBB, 4); + case AArch64::ATOMIC_CMP_SWAP_I64: + return emitAtomicCmpSwap(MI, MBB, 8); + } +} + + +const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { + switch (Opcode) { + case AArch64ISD::BR_CC: return "AArch64ISD::BR_CC"; + case AArch64ISD::Call: return "AArch64ISD::Call"; + case AArch64ISD::FPMOV: return "AArch64ISD::FPMOV"; + case AArch64ISD::GOTLoad: return "AArch64ISD::GOTLoad"; + case AArch64ISD::BFI: return "AArch64ISD::BFI"; + case AArch64ISD::EXTR: return "AArch64ISD::EXTR"; + case AArch64ISD::Ret: return "AArch64ISD::Ret"; + case AArch64ISD::SBFX: return "AArch64ISD::SBFX"; + case AArch64ISD::SELECT_CC: return "AArch64ISD::SELECT_CC"; + case AArch64ISD::SETCC: return "AArch64ISD::SETCC"; + case AArch64ISD::TC_RETURN: return "AArch64ISD::TC_RETURN"; + case AArch64ISD::THREAD_POINTER: return "AArch64ISD::THREAD_POINTER"; + case AArch64ISD::TLSDESCCALL: return "AArch64ISD::TLSDESCCALL"; + case AArch64ISD::WrapperSmall: return "AArch64ISD::WrapperSmall"; + + default: return NULL; + } +} + +static const uint16_t AArch64FPRArgRegs[] = { + AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, + AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7 +}; +static const unsigned NumFPRArgRegs = llvm::array_lengthof(AArch64FPRArgRegs); + +static const uint16_t AArch64ArgRegs[] = { + AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, + AArch64::X4, AArch64::X5, AArch64::X6, AArch64::X7 +}; +static const unsigned NumArgRegs = llvm::array_lengthof(AArch64ArgRegs); + +static bool CC_AArch64NoMoreRegs(unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + // Mark all remaining general purpose registers as allocated. We don't + // backtrack: if (for example) an i128 gets put on the stack, no subsequent + // i64 will go in registers (C.11). + for (unsigned i = 0; i < NumArgRegs; ++i) + State.AllocateReg(AArch64ArgRegs[i]); + + return false; +} + +#include "AArch64GenCallingConv.inc" + +CCAssignFn *AArch64TargetLowering::CCAssignFnForNode(CallingConv::ID CC) const { + + switch(CC) { + default: llvm_unreachable("Unsupported calling convention"); + case CallingConv::Fast: + case CallingConv::C: + return CC_A64_APCS; + } +} + +void +AArch64TargetLowering::SaveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, + DebugLoc DL, SDValue &Chain) const { + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + AArch64MachineFunctionInfo *FuncInfo = MF.getInfo(); + + SmallVector MemOps; + + unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(AArch64ArgRegs, + NumArgRegs); + unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(AArch64FPRArgRegs, + NumFPRArgRegs); + + unsigned GPRSaveSize = 8 * (NumArgRegs - FirstVariadicGPR); + int GPRIdx = 0; + if (GPRSaveSize != 0) { + GPRIdx = MFI->CreateStackObject(GPRSaveSize, 8, false); + + SDValue FIN = DAG.getFrameIndex(GPRIdx, getPointerTy()); + + for (unsigned i = FirstVariadicGPR; i < NumArgRegs; ++i) { + unsigned VReg = MF.addLiveIn(AArch64ArgRegs[i], &AArch64::GPR64RegClass); + SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64); + SDValue Store = DAG.getStore(Val.getValue(1), DL, Val, FIN, + MachinePointerInfo::getStack(i * 8), + false, false, 0); + MemOps.push_back(Store); + FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN, + DAG.getConstant(8, getPointerTy())); + } + } + + unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR); + int FPRIdx = 0; + if (FPRSaveSize != 0) { + FPRIdx = MFI->CreateStackObject(FPRSaveSize, 16, false); + + SDValue FIN = DAG.getFrameIndex(FPRIdx, getPointerTy()); + + for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) { + unsigned VReg = MF.addLiveIn(AArch64FPRArgRegs[i], + &AArch64::FPR128RegClass); + SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128); + SDValue Store = DAG.getStore(Val.getValue(1), DL, Val, FIN, + MachinePointerInfo::getStack(i * 16), + false, false, 0); + MemOps.push_back(Store); + FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN, + DAG.getConstant(16, getPointerTy())); + } + } + + int StackIdx = MFI->CreateFixedObject(8, CCInfo.getNextStackOffset(), true); + + FuncInfo->setVariadicStackIdx(StackIdx); + FuncInfo->setVariadicGPRIdx(GPRIdx); + FuncInfo->setVariadicGPRSize(GPRSaveSize); + FuncInfo->setVariadicFPRIdx(FPRIdx); + FuncInfo->setVariadicFPRSize(FPRSaveSize); + + if (!MemOps.empty()) { + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &MemOps[0], + MemOps.size()); + } +} + + +SDValue +AArch64TargetLowering::LowerFormalArguments(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) const { + MachineFunction &MF = DAG.getMachineFunction(); + AArch64MachineFunctionInfo *FuncInfo + = MF.getInfo(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt; + + SmallVector ArgLocs; + CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), + getTargetMachine(), ArgLocs, *DAG.getContext()); + CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForNode(CallConv)); + + SmallVector ArgValues; + + SDValue ArgValue; + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + ISD::ArgFlagsTy Flags = Ins[i].Flags; + + if (Flags.isByVal()) { + // Byval is used for small structs and HFAs in the PCS, but the system + // should work in a non-compliant manner for larger structs. + EVT PtrTy = getPointerTy(); + int Size = Flags.getByValSize(); + unsigned NumRegs = (Size + 7) / 8; + + unsigned FrameIdx = MFI->CreateFixedObject(8 * NumRegs, + VA.getLocMemOffset(), + false); + SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrTy); + InVals.push_back(FrameIdxN); + + continue; + } else if (VA.isRegLoc()) { + MVT RegVT = VA.getLocVT(); + const TargetRegisterClass *RC = getRegClassFor(RegVT); + unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); + + ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); + } else { // VA.isRegLoc() + assert(VA.isMemLoc()); + + int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8, + VA.getLocMemOffset(), true); + + SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); + ArgValue = DAG.getLoad(VA.getLocVT(), dl, Chain, FIN, + MachinePointerInfo::getFixedStack(FI), + false, false, false, 0); + + + } + + switch (VA.getLocInfo()) { + default: llvm_unreachable("Unknown loc info!"); + case CCValAssign::Full: break; + case CCValAssign::BCvt: + ArgValue = DAG.getNode(ISD::BITCAST,dl, VA.getValVT(), ArgValue); + break; + case CCValAssign::SExt: + case CCValAssign::ZExt: + case CCValAssign::AExt: { + unsigned DestSize = VA.getValVT().getSizeInBits(); + unsigned DestSubReg; + + switch (DestSize) { + case 8: DestSubReg = AArch64::sub_8; break; + case 16: DestSubReg = AArch64::sub_16; break; + case 32: DestSubReg = AArch64::sub_32; break; + case 64: DestSubReg = AArch64::sub_64; break; + default: llvm_unreachable("Unexpected argument promotion"); + } + + ArgValue = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, + VA.getValVT(), ArgValue, + DAG.getTargetConstant(DestSubReg, MVT::i32)), + 0); + break; + } + } + + InVals.push_back(ArgValue); + } + + if (isVarArg) + SaveVarArgRegisters(CCInfo, DAG, dl, Chain); + + unsigned StackArgSize = CCInfo.getNextStackOffset(); + if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) { + // This is a non-standard ABI so by fiat I say we're allowed to make full + // use of the stack area to be popped, which must be aligned to 16 bytes in + // any case: + StackArgSize = RoundUpToAlignment(StackArgSize, 16); + + // If we're expected to restore the stack (e.g. fastcc) then we'll be adding + // a multiple of 16. + FuncInfo->setArgumentStackToRestore(StackArgSize); + + // This realignment carries over to the available bytes below. Our own + // callers will guarantee the space is free by giving an aligned value to + // CALLSEQ_START. + } + // Even if we're not expected to free up the space, it's useful to know how + // much is there while considering tail calls (because we can reuse it). + FuncInfo->setBytesInStackArgArea(StackArgSize); + + return Chain; +} + +SDValue +AArch64TargetLowering::LowerReturn(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, + DebugLoc dl, SelectionDAG &DAG) const { + // CCValAssign - represent the assignment of the return value to a location. + SmallVector RVLocs; + + // CCState - Info about the registers and stack slots. + CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), + getTargetMachine(), RVLocs, *DAG.getContext()); + + // Analyze outgoing return values. + CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv)); + + // If this is the first return lowered for this function, add + // the regs to the liveout set for the function. + if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { + for (unsigned i = 0; i != RVLocs.size(); ++i) + if (RVLocs[i].isRegLoc()) + DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); + } + + SDValue Flag; + + for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { + // PCS: "If the type, T, of the result of a function is such that void func(T + // arg) would require that arg be passed as a value in a register (or set of + // registers) according to the rules in 5.4, then the result is returned in + // the same registers as would be used for such an argument. + // + // Otherwise, the caller shall reserve a block of memory of sufficient + // size and alignment to hold the result. The address of the memory block + // shall be passed as an additional argument to the function in x8." + // + // This is implemented in two places. The register-return values are dealt + // with here, more complex returns are passed as an sret parameter, which + // means we don't have to worry about it during actual return. + CCValAssign &VA = RVLocs[i]; + assert(VA.isRegLoc() && "Only register-returns should be created by PCS"); + + + SDValue Arg = OutVals[i]; + + // There's no convenient note in the ABI about this as there is for normal + // arguments, but it says return values are passed in the same registers as + // an argument would be. I believe that includes the comments about + // unspecified higher bits, putting the burden of widening on the *caller* + // for return values. + switch (VA.getLocInfo()) { + default: llvm_unreachable("Unknown loc info"); + case CCValAssign::Full: break; + case CCValAssign::SExt: + case CCValAssign::ZExt: + case CCValAssign::AExt: + // Floating-point values should only be extended when they're going into + // memory, which can't happen here so an integer extend is acceptable. + Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); + break; + case CCValAssign::BCvt: + Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); + break; + } + + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); + Flag = Chain.getValue(1); + } + + if (Flag.getNode()) { + return DAG.getNode(AArch64ISD::Ret, dl, MVT::Other, Chain, Flag); + } else { + return DAG.getNode(AArch64ISD::Ret, dl, MVT::Other, Chain); + } +} + +SDValue +AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, + SmallVectorImpl &InVals) const { + SelectionDAG &DAG = CLI.DAG; + DebugLoc &dl = CLI.DL; + SmallVector &Outs = CLI.Outs; + SmallVector &OutVals = CLI.OutVals; + SmallVector &Ins = CLI.Ins; + SDValue Chain = CLI.Chain; + SDValue Callee = CLI.Callee; + bool &IsTailCall = CLI.IsTailCall; + CallingConv::ID CallConv = CLI.CallConv; + bool IsVarArg = CLI.IsVarArg; + + MachineFunction &MF = DAG.getMachineFunction(); + AArch64MachineFunctionInfo *FuncInfo + = MF.getInfo(); + bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt; + bool IsStructRet = !Outs.empty() && Outs[0].Flags.isSRet(); + bool IsSibCall = false; + + if (IsTailCall) { + IsTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, + IsVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(), + Outs, OutVals, Ins, DAG); + + // A sibling call is one where we're under the usual C ABI and not planning + // to change that but can still do a tail call: + if (!TailCallOpt && IsTailCall) + IsSibCall = true; + } + + SmallVector ArgLocs; + CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), + getTargetMachine(), ArgLocs, *DAG.getContext()); + CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv)); + + // On AArch64 (and all other architectures I'm aware of) the most this has to + // do is adjust the stack pointer. + unsigned NumBytes = RoundUpToAlignment(CCInfo.getNextStackOffset(), 16); + if (IsSibCall) { + // Since we're not changing the ABI to make this a tail call, the memory + // operands are already available in the caller's incoming argument space. + NumBytes = 0; + } + + // FPDiff is the byte offset of the call's argument area from the callee's. + // Stores to callee stack arguments will be placed in FixedStackSlots offset + // by this amount for a tail call. In a sibling call it must be 0 because the + // caller will deallocate the entire stack and the callee still expects its + // arguments to begin at SP+0. Completely unused for non-tail calls. + int FPDiff = 0; + + if (IsTailCall && !IsSibCall) { + unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea(); + + // FPDiff will be negative if this tail call requires more space than we + // would automatically have in our incoming argument space. Positive if we + // can actually shrink the stack. + FPDiff = NumReusableBytes - NumBytes; + + // The stack pointer must be 16-byte aligned at all times it's used for a + // memory operation, which in practice means at *all* times and in + // particular across call boundaries. Therefore our own arguments started at + // a 16-byte aligned SP and the delta applied for the tail call should + // satisfy the same constraint. + assert(FPDiff % 16 == 0 && "unaligned stack on tail call"); + } + + if (!IsSibCall) + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); + + SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, AArch64::XSP, getPointerTy()); + + SmallVector MemOpChains; + SmallVector, 8> RegsToPass; + + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + ISD::ArgFlagsTy Flags = Outs[i].Flags; + SDValue Arg = OutVals[i]; + + // Callee does the actual widening, so all extensions just use an implicit + // definition of the rest of the Loc. Aesthetically, this would be nicer as + // an ANY_EXTEND, but that isn't valid for floating-point types and this + // alternative works on integer types too. + switch (VA.getLocInfo()) { + default: llvm_unreachable("Unknown loc info!"); + case CCValAssign::Full: break; + case CCValAssign::SExt: + case CCValAssign::ZExt: + case CCValAssign::AExt: { + unsigned SrcSize = VA.getValVT().getSizeInBits(); + unsigned SrcSubReg; + + switch (SrcSize) { + case 8: SrcSubReg = AArch64::sub_8; break; + case 16: SrcSubReg = AArch64::sub_16; break; + case 32: SrcSubReg = AArch64::sub_32; break; + case 64: SrcSubReg = AArch64::sub_64; break; + default: llvm_unreachable("Unexpected argument promotion"); + } + + Arg = SDValue(DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, dl, + VA.getLocVT(), + DAG.getUNDEF(VA.getLocVT()), + Arg, + DAG.getTargetConstant(SrcSubReg, MVT::i32)), + 0); + + break; + } + case CCValAssign::BCvt: + Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); + break; + } + + if (VA.isRegLoc()) { + // A normal register (sub-) argument. For now we just note it down because + // we want to copy things into registers as late as possible to avoid + // register-pressure (and possibly worse). + RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); + continue; + } + + assert(VA.isMemLoc() && "unexpected argument location"); + + SDValue DstAddr; + MachinePointerInfo DstInfo; + if (IsTailCall) { + uint32_t OpSize = Flags.isByVal() ? Flags.getByValSize() : + VA.getLocVT().getSizeInBits(); + OpSize = (OpSize + 7) / 8; + int32_t Offset = VA.getLocMemOffset() + FPDiff; + int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true); + + DstAddr = DAG.getFrameIndex(FI, getPointerTy()); + DstInfo = MachinePointerInfo::getFixedStack(FI); + + // Make sure any stack arguments overlapping with where we're storing are + // loaded before this eventual operation. Otherwise they'll be clobbered. + Chain = addTokenForArgument(Chain, DAG, MF.getFrameInfo(), FI); + } else { + SDValue PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset()); + + DstAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); + DstInfo = MachinePointerInfo::getStack(VA.getLocMemOffset()); + } + + if (Flags.isByVal()) { + SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i64); + SDValue Cpy = DAG.getMemcpy(Chain, dl, DstAddr, Arg, SizeNode, + Flags.getByValAlign(), + /*isVolatile = */ false, + /*alwaysInline = */ false, + DstInfo, MachinePointerInfo(0)); + MemOpChains.push_back(Cpy); + } else { + // Normal stack argument, put it where it's needed. + SDValue Store = DAG.getStore(Chain, dl, Arg, DstAddr, DstInfo, + false, false, 0); + MemOpChains.push_back(Store); + } + } + + // The loads and stores generated above shouldn't clash with each + // other. Combining them with this TokenFactor notes that fact for the rest of + // the backend. + if (!MemOpChains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &MemOpChains[0], MemOpChains.size()); + + // Most of the rest of the instructions need to be glued together; we don't + // want assignments to actual registers used by a call to be rearranged by a + // well-meaning scheduler. + SDValue InFlag; + + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, + RegsToPass[i].second, InFlag); + InFlag = Chain.getValue(1); + } + + // The linker is responsible for inserting veneers when necessary to put a + // function call destination in range, so we don't need to bother with a + // wrapper here. + if (GlobalAddressSDNode *G = dyn_cast(Callee)) { + const GlobalValue *GV = G->getGlobal(); + Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy()); + } else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) { + const char *Sym = S->getSymbol(); + Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy()); + } + + // We don't usually want to end the call-sequence here because we would tidy + // the frame up *after* the call, however in the ABI-changing tail-call case + // we've carefully laid out the parameters so that when sp is reset they'll be + // in the correct location. + if (IsTailCall && !IsSibCall) { + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), + DAG.getIntPtrConstant(0, true), InFlag); + InFlag = Chain.getValue(1); + } + + // We produce the following DAG scheme for the actual call instruction: + // (AArch64Call Chain, Callee, reg1, ..., regn, preserveMask, inflag? + // + // Most arguments aren't going to be used and just keep the values live as + // far as LLVM is concerned. It's expected to be selected as simply "bl + // callee" (for a direct, non-tail call). + std::vector Ops; + Ops.push_back(Chain); + Ops.push_back(Callee); + + if (IsTailCall) { + // Each tail call may have to adjust the stack by a different amount, so + // this information must travel along with the operation for eventual + // consumption by emitEpilogue. + Ops.push_back(DAG.getTargetConstant(FPDiff, MVT::i32)); + } + + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) + Ops.push_back(DAG.getRegister(RegsToPass[i].first, + RegsToPass[i].second.getValueType())); + + + // Add a register mask operand representing the call-preserved registers. This + // is used later in codegen to constrain register-allocation. + const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); + const uint32_t *Mask = TRI->getCallPreservedMask(CallConv); + assert(Mask && "Missing call preserved mask for calling convention"); + Ops.push_back(DAG.getRegisterMask(Mask)); + + // If we needed glue, put it in as the last argument. + if (InFlag.getNode()) + Ops.push_back(InFlag); + + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + + if (IsTailCall) { + return DAG.getNode(AArch64ISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size()); + } + + Chain = DAG.getNode(AArch64ISD::Call, dl, NodeTys, &Ops[0], Ops.size()); + InFlag = Chain.getValue(1); + + // Now we can reclaim the stack, just as well do it before working out where + // our return value is. + if (!IsSibCall) { + uint64_t CalleePopBytes + = DoesCalleeRestoreStack(CallConv, TailCallOpt) ? NumBytes : 0; + + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), + DAG.getIntPtrConstant(CalleePopBytes, true), + InFlag); + InFlag = Chain.getValue(1); + } + + return LowerCallResult(Chain, InFlag, CallConv, + IsVarArg, Ins, dl, DAG, InVals); +} + +SDValue +AArch64TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, + CallingConv::ID CallConv, bool IsVarArg, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) const { + // Assign locations to each value returned by this call. + SmallVector RVLocs; + CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), + getTargetMachine(), RVLocs, *DAG.getContext()); + CCInfo.AnalyzeCallResult(Ins, CCAssignFnForNode(CallConv)); + + for (unsigned i = 0; i != RVLocs.size(); ++i) { + CCValAssign VA = RVLocs[i]; + + // Return values that are too big to fit into registers should use an sret + // pointer, so this can be a lot simpler than the main argument code. + assert(VA.isRegLoc() && "Memory locations not expected for call return"); + + SDValue Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(), + InFlag); + Chain = Val.getValue(1); + InFlag = Val.getValue(2); + + switch (VA.getLocInfo()) { + default: llvm_unreachable("Unknown loc info!"); + case CCValAssign::Full: break; + case CCValAssign::BCvt: + Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val); + break; + case CCValAssign::ZExt: + case CCValAssign::SExt: + case CCValAssign::AExt: + // Floating-point arguments only get extended/truncated if they're going + // in memory, so using the integer operation is acceptable here. + Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); + break; + } + + InVals.push_back(Val); + } + + return Chain; +} + +bool +AArch64TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, + CallingConv::ID CalleeCC, + bool IsVarArg, + bool IsCalleeStructRet, + bool IsCallerStructRet, + const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, + const SmallVectorImpl &Ins, + SelectionDAG& DAG) const { + + // For CallingConv::C this function knows whether the ABI needs + // changing. That's not true for other conventions so they will have to opt in + // manually. + if (!IsTailCallConvention(CalleeCC) && CalleeCC != CallingConv::C) + return false; + + const MachineFunction &MF = DAG.getMachineFunction(); + const Function *CallerF = MF.getFunction(); + CallingConv::ID CallerCC = CallerF->getCallingConv(); + bool CCMatch = CallerCC == CalleeCC; + + // Byval parameters hand the function a pointer directly into the stack area + // we want to reuse during a tail call. Working around this *is* possible (see + // X86) but less efficient and uglier in LowerCall. + for (Function::const_arg_iterator i = CallerF->arg_begin(), + e = CallerF->arg_end(); i != e; ++i) + if (i->hasByValAttr()) + return false; + + if (getTargetMachine().Options.GuaranteedTailCallOpt) { + if (IsTailCallConvention(CalleeCC) && CCMatch) + return true; + return false; + } + + // Now we search for cases where we can use a tail call without changing the + // ABI. Sibcall is used in some places (particularly gcc) to refer to this + // concept. + + // I want anyone implementing a new calling convention to think long and hard + // about this assert. + assert((!IsVarArg || CalleeCC == CallingConv::C) + && "Unexpected variadic calling convention"); + + if (IsVarArg && !Outs.empty()) { + // At least two cases here: if caller is fastcc then we can't have any + // memory arguments (we'd be expected to clean up the stack afterwards). If + // caller is C then we could potentially use its argument area. + + // FIXME: for now we take the most conservative of these in both cases: + // disallow all variadic memory operands. + SmallVector ArgLocs; + CCState CCInfo(CalleeCC, IsVarArg, DAG.getMachineFunction(), + getTargetMachine(), ArgLocs, *DAG.getContext()); + + CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CalleeCC)); + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) + if (!ArgLocs[i].isRegLoc()) + return false; + } + + // If the calling conventions do not match, then we'd better make sure the + // results are returned in the same way as what the caller expects. + if (!CCMatch) { + SmallVector RVLocs1; + CCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(), + getTargetMachine(), RVLocs1, *DAG.getContext()); + CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC)); + + SmallVector RVLocs2; + CCState CCInfo2(CallerCC, false, DAG.getMachineFunction(), + getTargetMachine(), RVLocs2, *DAG.getContext()); + CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC)); + + if (RVLocs1.size() != RVLocs2.size()) + return false; + for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) { + if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc()) + return false; + if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo()) + return false; + if (RVLocs1[i].isRegLoc()) { + if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg()) + return false; + } else { + if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset()) + return false; + } + } + } + + // Nothing more to check if the callee is taking no arguments + if (Outs.empty()) + return true; + + SmallVector ArgLocs; + CCState CCInfo(CalleeCC, IsVarArg, DAG.getMachineFunction(), + getTargetMachine(), ArgLocs, *DAG.getContext()); + + CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CalleeCC)); + + const AArch64MachineFunctionInfo *FuncInfo + = MF.getInfo(); + + // If the stack arguments for this call would fit into our own save area then + // the call can be made tail. + return CCInfo.getNextStackOffset() <= FuncInfo->getBytesInStackArgArea(); +} + +bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC, + bool TailCallOpt) const { + return CallCC == CallingConv::Fast && TailCallOpt; +} + +bool AArch64TargetLowering::IsTailCallConvention(CallingConv::ID CallCC) const { + return CallCC == CallingConv::Fast; +} + +SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain, + SelectionDAG &DAG, + MachineFrameInfo *MFI, + int ClobberedFI) const { + SmallVector ArgChains; + int64_t FirstByte = MFI->getObjectOffset(ClobberedFI); + int64_t LastByte = FirstByte + MFI->getObjectSize(ClobberedFI) - 1; + + // Include the original chain at the beginning of the list. When this is + // used by target LowerCall hooks, this helps legalize find the + // CALLSEQ_BEGIN node. + ArgChains.push_back(Chain); + + // Add a chain value for each stack argument corresponding + for (SDNode::use_iterator U = DAG.getEntryNode().getNode()->use_begin(), + UE = DAG.getEntryNode().getNode()->use_end(); U != UE; ++U) + if (LoadSDNode *L = dyn_cast(*U)) + if (FrameIndexSDNode *FI = dyn_cast(L->getBasePtr())) + if (FI->getIndex() < 0) { + int64_t InFirstByte = MFI->getObjectOffset(FI->getIndex()); + int64_t InLastByte = InFirstByte; + InLastByte += MFI->getObjectSize(FI->getIndex()) - 1; + + if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) || + (FirstByte <= InFirstByte && InFirstByte <= LastByte)) + ArgChains.push_back(SDValue(L, 1)); + } + + // Build a tokenfactor for all the chains. + return DAG.getNode(ISD::TokenFactor, Chain.getDebugLoc(), MVT::Other, + &ArgChains[0], ArgChains.size()); +} + +static A64CC::CondCodes IntCCToA64CC(ISD::CondCode CC) { + switch (CC) { + case ISD::SETEQ: return A64CC::EQ; + case ISD::SETGT: return A64CC::GT; + case ISD::SETGE: return A64CC::GE; + case ISD::SETLT: return A64CC::LT; + case ISD::SETLE: return A64CC::LE; + case ISD::SETNE: return A64CC::NE; + case ISD::SETUGT: return A64CC::HI; + case ISD::SETUGE: return A64CC::HS; + case ISD::SETULT: return A64CC::LO; + case ISD::SETULE: return A64CC::LS; + default: llvm_unreachable("Unexpected condition code"); + } +} + +bool AArch64TargetLowering::isLegalICmpImmediate(int64_t Val) const { + // icmp is implemented using adds/subs immediate, which take an unsigned + // 12-bit immediate, optionally shifted left by 12 bits. + + // Symmetric by using adds/subs + if (Val < 0) + Val = -Val; + + return (Val & ~0xfff) == 0 || (Val & ~0xfff000) == 0; +} + +SDValue AArch64TargetLowering::getSelectableIntSetCC(SDValue LHS, SDValue RHS, + ISD::CondCode CC, SDValue &A64cc, + SelectionDAG &DAG, DebugLoc &dl) const { + if (ConstantSDNode *RHSC = dyn_cast(RHS.getNode())) { + int64_t C = 0; + EVT VT = RHSC->getValueType(0); + bool knownInvalid = false; + + // I'm not convinced the rest of LLVM handles these edge cases properly, but + // we can at least get it right. + if (isSignedIntSetCC(CC)) { + C = RHSC->getSExtValue(); + } else if (RHSC->getZExtValue() > INT64_MAX) { + // A 64-bit constant not representable by a signed 64-bit integer is far + // too big to fit into a SUBS immediate anyway. + knownInvalid = true; + } else { + C = RHSC->getZExtValue(); + } + + if (!knownInvalid && !isLegalICmpImmediate(C)) { + // Constant does not fit, try adjusting it by one? + switch (CC) { + default: break; + case ISD::SETLT: + case ISD::SETGE: + if (isLegalICmpImmediate(C-1)) { + CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT; + RHS = DAG.getConstant(C-1, VT); + } + break; + case ISD::SETULT: + case ISD::SETUGE: + if (isLegalICmpImmediate(C-1)) { + CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT; + RHS = DAG.getConstant(C-1, VT); + } + break; + case ISD::SETLE: + case ISD::SETGT: + if (isLegalICmpImmediate(C+1)) { + CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE; + RHS = DAG.getConstant(C+1, VT); + } + break; + case ISD::SETULE: + case ISD::SETUGT: + if (isLegalICmpImmediate(C+1)) { + CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE; + RHS = DAG.getConstant(C+1, VT); + } + break; + } + } + } + + A64CC::CondCodes CondCode = IntCCToA64CC(CC); + A64cc = DAG.getConstant(CondCode, MVT::i32); + return DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS, + DAG.getCondCode(CC)); +} + +static A64CC::CondCodes FPCCToA64CC(ISD::CondCode CC, + A64CC::CondCodes &Alternative) { + A64CC::CondCodes CondCode = A64CC::Invalid; + Alternative = A64CC::Invalid; + + switch (CC) { + default: llvm_unreachable("Unknown FP condition!"); + case ISD::SETEQ: + case ISD::SETOEQ: CondCode = A64CC::EQ; break; + case ISD::SETGT: + case ISD::SETOGT: CondCode = A64CC::GT; break; + case ISD::SETGE: + case ISD::SETOGE: CondCode = A64CC::GE; break; + case ISD::SETOLT: CondCode = A64CC::MI; break; + case ISD::SETOLE: CondCode = A64CC::LS; break; + case ISD::SETONE: CondCode = A64CC::MI; Alternative = A64CC::GT; break; + case ISD::SETO: CondCode = A64CC::VC; break; + case ISD::SETUO: CondCode = A64CC::VS; break; + case ISD::SETUEQ: CondCode = A64CC::EQ; Alternative = A64CC::VS; break; + case ISD::SETUGT: CondCode = A64CC::HI; break; + case ISD::SETUGE: CondCode = A64CC::PL; break; + case ISD::SETLT: + case ISD::SETULT: CondCode = A64CC::LT; break; + case ISD::SETLE: + case ISD::SETULE: CondCode = A64CC::LE; break; + case ISD::SETNE: + case ISD::SETUNE: CondCode = A64CC::NE; break; + } + return CondCode; +} + +SDValue +AArch64TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { + DebugLoc DL = Op.getDebugLoc(); + EVT PtrVT = getPointerTy(); + const BlockAddress *BA = cast(Op)->getBlockAddress(); + + assert(getTargetMachine().getCodeModel() == CodeModel::Small + && "Only small code model supported at the moment"); + + // The most efficient code is PC-relative anyway for the small memory model, + // so we don't need to worry about relocation model. + return DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT, + DAG.getTargetBlockAddress(BA, PtrVT, 0, + AArch64II::MO_NO_FLAG), + DAG.getTargetBlockAddress(BA, PtrVT, 0, + AArch64II::MO_LO12), + DAG.getConstant(/*Alignment=*/ 4, MVT::i32)); +} + + +// (BRCOND chain, val, dest) +SDValue +AArch64TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { + DebugLoc dl = Op.getDebugLoc(); + SDValue Chain = Op.getOperand(0); + SDValue TheBit = Op.getOperand(1); + SDValue DestBB = Op.getOperand(2); + + // AArch64 BooleanContents is the default UndefinedBooleanContent, which means + // that as the consumer we are responsible for ignoring rubbish in higher + // bits. + TheBit = DAG.getNode(ISD::AND, dl, MVT::i32, TheBit, + DAG.getConstant(1, MVT::i32)); + + SDValue A64CMP = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, TheBit, + DAG.getConstant(0, TheBit.getValueType()), + DAG.getCondCode(ISD::SETNE)); + + return DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other, Chain, + A64CMP, DAG.getConstant(A64CC::NE, MVT::i32), + DestBB); +} + +// (BR_CC chain, condcode, lhs, rhs, dest) +SDValue +AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { + DebugLoc dl = Op.getDebugLoc(); + SDValue Chain = Op.getOperand(0); + ISD::CondCode CC = cast(Op.getOperand(1))->get(); + SDValue LHS = Op.getOperand(2); + SDValue RHS = Op.getOperand(3); + SDValue DestBB = Op.getOperand(4); + + if (LHS.getValueType() == MVT::f128) { + // f128 comparisons are lowered to runtime calls by a routine which sets + // LHS, RHS and CC appropriately for the rest of this function to continue. + softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl); + + // If softenSetCCOperands returned a scalar, we need to compare the result + // against zero to select between true and false values. + if (RHS.getNode() == 0) { + RHS = DAG.getConstant(0, LHS.getValueType()); + CC = ISD::SETNE; + } + } + + if (LHS.getValueType().isInteger()) { + SDValue A64cc; + + // Integers are handled in a separate function because the combinations of + // immediates and tests can get hairy and we may want to fiddle things. + SDValue CmpOp = getSelectableIntSetCC(LHS, RHS, CC, A64cc, DAG, dl); + + return DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other, + Chain, CmpOp, A64cc, DestBB); + } + + // Note that some LLVM floating-point CondCodes can't be lowered to a single + // conditional branch, hence FPCCToA64CC can set a second test, where either + // passing is sufficient. + A64CC::CondCodes CondCode, Alternative = A64CC::Invalid; + CondCode = FPCCToA64CC(CC, Alternative); + SDValue A64cc = DAG.getConstant(CondCode, MVT::i32); + SDValue SetCC = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS, + DAG.getCondCode(CC)); + SDValue A64BR_CC = DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other, + Chain, SetCC, A64cc, DestBB); + + if (Alternative != A64CC::Invalid) { + A64cc = DAG.getConstant(Alternative, MVT::i32); + A64BR_CC = DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other, + A64BR_CC, SetCC, A64cc, DestBB); + + } + + return A64BR_CC; +} + +SDValue +AArch64TargetLowering::LowerF128ToCall(SDValue Op, SelectionDAG &DAG, + RTLIB::Libcall Call) const { + ArgListTy Args; + ArgListEntry Entry; + for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) { + EVT ArgVT = Op.getOperand(i).getValueType(); + Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + Entry.Node = Op.getOperand(i); Entry.Ty = ArgTy; + Entry.isSExt = false; + Entry.isZExt = false; + Args.push_back(Entry); + } + SDValue Callee = DAG.getExternalSymbol(getLibcallName(Call), getPointerTy()); + + Type *RetTy = Op.getValueType().getTypeForEVT(*DAG.getContext()); + + // By default, the input chain to this libcall is the entry node of the + // function. If the libcall is going to be emitted as a tail call then + // isUsedByReturnOnly will change it to the right chain if the return + // node which is being folded has a non-entry input chain. + SDValue InChain = DAG.getEntryNode(); + + // isTailCall may be true since the callee does not reference caller stack + // frame. Check if it's in the right position. + SDValue TCChain = InChain; + bool isTailCall = isInTailCallPosition(DAG, Op.getNode(), TCChain); + if (isTailCall) + InChain = TCChain; + + TargetLowering:: + CallLoweringInfo CLI(InChain, RetTy, false, false, false, false, + 0, getLibcallCallingConv(Call), isTailCall, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, + Callee, Args, DAG, Op->getDebugLoc()); + std::pair CallInfo = LowerCallTo(CLI); + + if (!CallInfo.second.getNode()) + // It's a tailcall, return the chain (which is the DAG root). + return DAG.getRoot(); + + return CallInfo.first; +} + +SDValue +AArch64TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { + if (Op.getOperand(0).getValueType() != MVT::f128) { + // It's legal except when f128 is involved + return Op; + } + + RTLIB::Libcall LC; + LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType()); + + SDValue SrcVal = Op.getOperand(0); + return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1, + /*isSigned*/ false, Op.getDebugLoc()); +} + +SDValue +AArch64TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { + assert(Op.getValueType() == MVT::f128 && "Unexpected lowering"); + + RTLIB::Libcall LC; + LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType()); + + return LowerF128ToCall(Op, DAG, LC); +} + +SDValue +AArch64TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, + bool IsSigned) const { + if (Op.getOperand(0).getValueType() != MVT::f128) { + // It's legal except when f128 is involved + return Op; + } + + RTLIB::Libcall LC; + if (IsSigned) + LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(), Op.getValueType()); + else + LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), Op.getValueType()); + + return LowerF128ToCall(Op, DAG, LC); +} + +SDValue +AArch64TargetLowering::LowerGlobalAddressELF(SDValue Op, + SelectionDAG &DAG) const { + // TableGen doesn't have easy access to the CodeModel or RelocationModel, so + // we make that distinction here. + + // We support the static, small memory model for now. + assert(getTargetMachine().getCodeModel() == CodeModel::Small); + + EVT PtrVT = getPointerTy(); + DebugLoc dl = Op.getDebugLoc(); + const GlobalAddressSDNode *GN = cast(Op); + const GlobalValue *GV = GN->getGlobal(); + unsigned Alignment = GV->getAlignment(); + + if (Alignment == 0) { + const PointerType *GVPtrTy = cast(GV->getType()); + if (GVPtrTy->getElementType()->isSized()) + Alignment = getDataLayout()->getABITypeAlignment(GVPtrTy->getElementType()); + else { + // Be conservative if we can't guess, not that it really matters: + // functions and labels aren't valid for loads, and the methods used to + // actually calculate an address work with any alignment. + Alignment = 1; + } + } + + unsigned char HiFixup, LoFixup; + Reloc::Model RelocM = getTargetMachine().getRelocationModel(); + bool UseGOT = Subtarget->GVIsIndirectSymbol(GV, RelocM); + + if (UseGOT) { + HiFixup = AArch64II::MO_GOT; + LoFixup = AArch64II::MO_GOT_LO12; + Alignment = 8; + } else { + HiFixup = AArch64II::MO_NO_FLAG; + LoFixup = AArch64II::MO_LO12; + } + + // AArch64's small model demands the following sequence: + // ADRP x0, somewhere + // ADD x0, x0, #:lo12:somewhere ; (or LDR directly). + SDValue GlobalRef = DAG.getNode(AArch64ISD::WrapperSmall, dl, PtrVT, + DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, + HiFixup), + DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, + LoFixup), + DAG.getConstant(Alignment, MVT::i32)); + + if (UseGOT) { + GlobalRef = DAG.getNode(AArch64ISD::GOTLoad, dl, PtrVT, DAG.getEntryNode(), + GlobalRef); + } + + if (GN->getOffset() != 0) + return DAG.getNode(ISD::ADD, dl, PtrVT, GlobalRef, + DAG.getConstant(GN->getOffset(), PtrVT)); + + return GlobalRef; +} + +SDValue AArch64TargetLowering::LowerTLSDescCall(SDValue SymAddr, + SDValue DescAddr, + DebugLoc DL, + SelectionDAG &DAG) const { + EVT PtrVT = getPointerTy(); + + // The function we need to call is simply the first entry in the GOT for this + // descriptor, load it in preparation. + SDValue Func, Chain; + Func = DAG.getNode(AArch64ISD::GOTLoad, DL, PtrVT, DAG.getEntryNode(), + DescAddr); + + // The function takes only one argument: the address of the descriptor itself + // in X0. + SDValue Glue; + Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, AArch64::X0, DescAddr, Glue); + Glue = Chain.getValue(1); + + // Finally, there's a special calling-convention which means that the lookup + // must preserve all registers (except X0, obviously). + const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); + const AArch64RegisterInfo *A64RI + = static_cast(TRI); + const uint32_t *Mask = A64RI->getTLSDescCallPreservedMask(); + + // We're now ready to populate the argument list, as with a normal call: + std::vector Ops; + Ops.push_back(Chain); + Ops.push_back(Func); + Ops.push_back(SymAddr); + Ops.push_back(DAG.getRegister(AArch64::X0, PtrVT)); + Ops.push_back(DAG.getRegisterMask(Mask)); + Ops.push_back(Glue); + + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + Chain = DAG.getNode(AArch64ISD::TLSDESCCALL, DL, NodeTys, &Ops[0], Ops.size()); + Glue = Chain.getValue(1); + + // After the call, the offset from TPIDR_EL0 is in X0, copy it out and pass it + // back to the generic handling code. + return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Glue); +} + +SDValue +AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op, + SelectionDAG &DAG) const { + assert(Subtarget->isTargetELF() && + "TLS not implemented for non-ELF targets"); + const GlobalAddressSDNode *GA = cast(Op); + + TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal()); + + SDValue TPOff; + EVT PtrVT = getPointerTy(); + DebugLoc DL = Op.getDebugLoc(); + const GlobalValue *GV = GA->getGlobal(); + + SDValue ThreadBase = DAG.getNode(AArch64ISD::THREAD_POINTER, DL, PtrVT); + + if (Model == TLSModel::InitialExec) { + TPOff = DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT, + DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, + AArch64II::MO_GOTTPREL), + DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, + AArch64II::MO_GOTTPREL_LO12), + DAG.getConstant(8, MVT::i32)); + TPOff = DAG.getNode(AArch64ISD::GOTLoad, DL, PtrVT, DAG.getEntryNode(), + TPOff); + } else if (Model == TLSModel::LocalExec) { + SDValue HiVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0, + AArch64II::MO_TPREL_G1); + SDValue LoVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0, + AArch64II::MO_TPREL_G0_NC); + + TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZxii, DL, PtrVT, HiVar, + DAG.getTargetConstant(0, MVT::i32)), 0); + TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKxii, DL, PtrVT, TPOff, LoVar, + DAG.getTargetConstant(0, MVT::i32)), 0); + } else if (Model == TLSModel::GeneralDynamic) { + // Accesses used in this sequence go via the TLS descriptor which lives in + // the GOT. Prepare an address we can use to handle this. + SDValue HiDesc = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, + AArch64II::MO_TLSDESC); + SDValue LoDesc = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, + AArch64II::MO_TLSDESC_LO12); + SDValue DescAddr = DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT, + HiDesc, LoDesc, DAG.getConstant(8, MVT::i32)); + SDValue SymAddr = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0); + + TPOff = LowerTLSDescCall(SymAddr, DescAddr, DL, DAG); + } else if (Model == TLSModel::LocalDynamic) { + // Local-dynamic accesses proceed in two phases. A general-dynamic TLS + // descriptor call against the special symbol _TLS_MODULE_BASE_ to calculate + // the beginning of the module's TLS region, followed by a DTPREL offset + // calculation. + + // These accesses will need deduplicating if there's more than one. + AArch64MachineFunctionInfo* MFI = DAG.getMachineFunction() + .getInfo(); + MFI->incNumLocalDynamicTLSAccesses(); + + + // Get the location of _TLS_MODULE_BASE_: + SDValue HiDesc = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT, + AArch64II::MO_TLSDESC); + SDValue LoDesc = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT, + AArch64II::MO_TLSDESC_LO12); + SDValue DescAddr = DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT, + HiDesc, LoDesc, DAG.getConstant(8, MVT::i32)); + SDValue SymAddr = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT); + + ThreadBase = LowerTLSDescCall(SymAddr, DescAddr, DL, DAG); + + // Get the variable's offset from _TLS_MODULE_BASE_ + SDValue HiVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0, + AArch64II::MO_DTPREL_G1); + SDValue LoVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0, + AArch64II::MO_DTPREL_G0_NC); + + TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZxii, DL, PtrVT, HiVar, + DAG.getTargetConstant(0, MVT::i32)), 0); + TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKxii, DL, PtrVT, TPOff, LoVar, + DAG.getTargetConstant(0, MVT::i32)), 0); + } else + llvm_unreachable("Unsupported TLS access model"); + + + return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff); +} + +SDValue +AArch64TargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, + bool IsSigned) const { + if (Op.getValueType() != MVT::f128) { + // Legal for everything except f128. + return Op; + } + + RTLIB::Libcall LC; + if (IsSigned) + LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType()); + else + LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType()); + + return LowerF128ToCall(Op, DAG, LC); +} + + +SDValue +AArch64TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { + JumpTableSDNode *JT = cast(Op); + DebugLoc dl = JT->getDebugLoc(); + + // When compiling PIC, jump tables get put in the code section so a static + // relocation-style is acceptable for both cases. + return DAG.getNode(AArch64ISD::WrapperSmall, dl, getPointerTy(), + DAG.getTargetJumpTable(JT->getIndex(), getPointerTy()), + DAG.getTargetJumpTable(JT->getIndex(), getPointerTy(), + AArch64II::MO_LO12), + DAG.getConstant(1, MVT::i32)); +} + +// (SELECT_CC lhs, rhs, iftrue, iffalse, condcode) +SDValue +AArch64TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { + DebugLoc dl = Op.getDebugLoc(); + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + SDValue IfTrue = Op.getOperand(2); + SDValue IfFalse = Op.getOperand(3); + ISD::CondCode CC = cast(Op.getOperand(4))->get(); + + if (LHS.getValueType() == MVT::f128) { + // f128 comparisons are lowered to libcalls, but slot in nicely here + // afterwards. + softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl); + + // If softenSetCCOperands returned a scalar, we need to compare the result + // against zero to select between true and false values. + if (RHS.getNode() == 0) { + RHS = DAG.getConstant(0, LHS.getValueType()); + CC = ISD::SETNE; + } + } + + if (LHS.getValueType().isInteger()) { + SDValue A64cc; + + // Integers are handled in a separate function because the combinations of + // immediates and tests can get hairy and we may want to fiddle things. + SDValue CmpOp = getSelectableIntSetCC(LHS, RHS, CC, A64cc, DAG, dl); + + return DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(), + CmpOp, IfTrue, IfFalse, A64cc); + } + + // Note that some LLVM floating-point CondCodes can't be lowered to a single + // conditional branch, hence FPCCToA64CC can set a second test, where either + // passing is sufficient. + A64CC::CondCodes CondCode, Alternative = A64CC::Invalid; + CondCode = FPCCToA64CC(CC, Alternative); + SDValue A64cc = DAG.getConstant(CondCode, MVT::i32); + SDValue SetCC = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS, + DAG.getCondCode(CC)); + SDValue A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(), + SetCC, IfTrue, IfFalse, A64cc); + + if (Alternative != A64CC::Invalid) { + A64cc = DAG.getConstant(Alternative, MVT::i32); + A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(), + SetCC, IfTrue, A64SELECT_CC, A64cc); + + } + + return A64SELECT_CC; +} + +// (SELECT testbit, iftrue, iffalse) +SDValue +AArch64TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { + DebugLoc dl = Op.getDebugLoc(); + SDValue TheBit = Op.getOperand(0); + SDValue IfTrue = Op.getOperand(1); + SDValue IfFalse = Op.getOperand(2); + + // AArch64 BooleanContents is the default UndefinedBooleanContent, which means + // that as the consumer we are responsible for ignoring rubbish in higher + // bits. + TheBit = DAG.getNode(ISD::AND, dl, MVT::i32, TheBit, + DAG.getConstant(1, MVT::i32)); + SDValue A64CMP = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, TheBit, + DAG.getConstant(0, TheBit.getValueType()), + DAG.getCondCode(ISD::SETNE)); + + return DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(), + A64CMP, IfTrue, IfFalse, + DAG.getConstant(A64CC::NE, MVT::i32)); +} + +// (SETCC lhs, rhs, condcode) +SDValue +AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { + DebugLoc dl = Op.getDebugLoc(); + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + ISD::CondCode CC = cast(Op.getOperand(2))->get(); + EVT VT = Op.getValueType(); + + if (LHS.getValueType() == MVT::f128) { + // f128 comparisons will be lowered to libcalls giving a valid LHS and RHS + // for the rest of the function (some i32 or i64 values). + softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl); + + // If softenSetCCOperands returned a scalar, use it. + if (RHS.getNode() == 0) { + assert(LHS.getValueType() == Op.getValueType() && + "Unexpected setcc expansion!"); + return LHS; + } + } + + if (LHS.getValueType().isInteger()) { + SDValue A64cc; + + // Integers are handled in a separate function because the combinations of + // immediates and tests can get hairy and we may want to fiddle things. + SDValue CmpOp = getSelectableIntSetCC(LHS, RHS, CC, A64cc, DAG, dl); + + return DAG.getNode(AArch64ISD::SELECT_CC, dl, VT, + CmpOp, DAG.getConstant(1, VT), DAG.getConstant(0, VT), + A64cc); + } + + // Note that some LLVM floating-point CondCodes can't be lowered to a single + // conditional branch, hence FPCCToA64CC can set a second test, where either + // passing is sufficient. + A64CC::CondCodes CondCode, Alternative = A64CC::Invalid; + CondCode = FPCCToA64CC(CC, Alternative); + SDValue A64cc = DAG.getConstant(CondCode, MVT::i32); + SDValue CmpOp = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS, + DAG.getCondCode(CC)); + SDValue A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, VT, + CmpOp, DAG.getConstant(1, VT), + DAG.getConstant(0, VT), A64cc); + + if (Alternative != A64CC::Invalid) { + A64cc = DAG.getConstant(Alternative, MVT::i32); + A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, VT, CmpOp, + DAG.getConstant(1, VT), A64SELECT_CC, A64cc); + } + + return A64SELECT_CC; +} + +SDValue +AArch64TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const { + const Value *DestSV = cast(Op.getOperand(3))->getValue(); + const Value *SrcSV = cast(Op.getOperand(3))->getValue(); + + // We have to make sure we copy the entire structure: 8+8+8+4+4 = 32 bytes + // rather than just 8. + return DAG.getMemcpy(Op.getOperand(0), Op.getDebugLoc(), + Op.getOperand(1), Op.getOperand(2), + DAG.getConstant(32, MVT::i32), 8, false, false, + MachinePointerInfo(DestSV), MachinePointerInfo(SrcSV)); +} + +SDValue +AArch64TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { + // The layout of the va_list struct is specified in the AArch64 Procedure Call + // Standard, section B.3. + MachineFunction &MF = DAG.getMachineFunction(); + AArch64MachineFunctionInfo *FuncInfo = MF.getInfo(); + DebugLoc DL = Op.getDebugLoc(); + + SDValue Chain = Op.getOperand(0); + SDValue VAList = Op.getOperand(1); + const Value *SV = cast(Op.getOperand(2))->getValue(); + SmallVector MemOps; + + // void *__stack at offset 0 + SDValue Stack = DAG.getFrameIndex(FuncInfo->getVariadicStackIdx(), + getPointerTy()); + MemOps.push_back(DAG.getStore(Chain, DL, Stack, VAList, + MachinePointerInfo(SV), false, false, 0)); + + // void *__gr_top at offset 8 + int GPRSize = FuncInfo->getVariadicGPRSize(); + if (GPRSize > 0) { + SDValue GRTop, GRTopAddr; + + GRTopAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList, + DAG.getConstant(8, getPointerTy())); + + GRTop = DAG.getFrameIndex(FuncInfo->getVariadicGPRIdx(), getPointerTy()); + GRTop = DAG.getNode(ISD::ADD, DL, getPointerTy(), GRTop, + DAG.getConstant(GPRSize, getPointerTy())); + + MemOps.push_back(DAG.getStore(Chain, DL, GRTop, GRTopAddr, + MachinePointerInfo(SV, 8), + false, false, 0)); + } + + // void *__vr_top at offset 16 + int FPRSize = FuncInfo->getVariadicFPRSize(); + if (FPRSize > 0) { + SDValue VRTop, VRTopAddr; + VRTopAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList, + DAG.getConstant(16, getPointerTy())); + + VRTop = DAG.getFrameIndex(FuncInfo->getVariadicFPRIdx(), getPointerTy()); + VRTop = DAG.getNode(ISD::ADD, DL, getPointerTy(), VRTop, + DAG.getConstant(FPRSize, getPointerTy())); + + MemOps.push_back(DAG.getStore(Chain, DL, VRTop, VRTopAddr, + MachinePointerInfo(SV, 16), + false, false, 0)); + } + + // int __gr_offs at offset 24 + SDValue GROffsAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList, + DAG.getConstant(24, getPointerTy())); + MemOps.push_back(DAG.getStore(Chain, DL, DAG.getConstant(-GPRSize, MVT::i32), + GROffsAddr, MachinePointerInfo(SV, 24), + false, false, 0)); + + // int __vr_offs at offset 28 + SDValue VROffsAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList, + DAG.getConstant(28, getPointerTy())); + MemOps.push_back(DAG.getStore(Chain, DL, DAG.getConstant(-FPRSize, MVT::i32), + VROffsAddr, MachinePointerInfo(SV, 28), + false, false, 0)); + + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &MemOps[0], + MemOps.size()); +} + +SDValue +AArch64TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { + switch (Op.getOpcode()) { + default: llvm_unreachable("Don't know how to custom lower this!"); + case ISD::FADD: return LowerF128ToCall(Op, DAG, RTLIB::ADD_F128); + case ISD::FSUB: return LowerF128ToCall(Op, DAG, RTLIB::SUB_F128); + case ISD::FMUL: return LowerF128ToCall(Op, DAG, RTLIB::MUL_F128); + case ISD::FDIV: return LowerF128ToCall(Op, DAG, RTLIB::DIV_F128); + case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, true); + case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG, false); + case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG, true); + case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG, false); + case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG); + case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG); + + case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); + case ISD::BRCOND: return LowerBRCOND(Op, DAG); + case ISD::BR_CC: return LowerBR_CC(Op, DAG); + case ISD::GlobalAddress: return LowerGlobalAddressELF(Op, DAG); + case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); + case ISD::JumpTable: return LowerJumpTable(Op, DAG); + case ISD::SELECT: return LowerSELECT(Op, DAG); + case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); + case ISD::SETCC: return LowerSETCC(Op, DAG); + case ISD::VACOPY: return LowerVACOPY(Op, DAG); + case ISD::VASTART: return LowerVASTART(Op, DAG); + } + + return SDValue(); +} + +static SDValue PerformANDCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + + SelectionDAG &DAG = DCI.DAG; + DebugLoc DL = N->getDebugLoc(); + EVT VT = N->getValueType(0); + + // We're looking for an SRA/SHL pair which form an SBFX. + + if (VT != MVT::i32 && VT != MVT::i64) + return SDValue(); + + if (!isa(N->getOperand(1))) + return SDValue(); + + uint64_t TruncMask = N->getConstantOperandVal(1); + if (!isMask_64(TruncMask)) + return SDValue(); + + uint64_t Width = CountPopulation_64(TruncMask); + SDValue Shift = N->getOperand(0); + + if (Shift.getOpcode() != ISD::SRL) + return SDValue(); + + if (!isa(Shift->getOperand(1))) + return SDValue(); + uint64_t LSB = Shift->getConstantOperandVal(1); + + if (LSB > VT.getSizeInBits() || Width > VT.getSizeInBits()) + return SDValue(); + + return DAG.getNode(AArch64ISD::UBFX, DL, VT, Shift.getOperand(0), + DAG.getConstant(LSB, MVT::i64), + DAG.getConstant(LSB + Width - 1, MVT::i64)); +} + +static SDValue PerformATOMIC_FENCECombine(SDNode *FenceNode, + TargetLowering::DAGCombinerInfo &DCI) { + // An atomic operation followed by an acquiring atomic fence can be reduced to + // an acquiring load. The atomic operation provides a convenient pointer to + // load from. If the original operation was a load anyway we can actually + // combine the two operations into an acquiring load. + SelectionDAG &DAG = DCI.DAG; + SDValue AtomicOp = FenceNode->getOperand(0); + AtomicSDNode *AtomicNode = dyn_cast(AtomicOp); + + // A fence on its own can't be optimised + if (!AtomicNode) + return SDValue(); + + uint64_t FenceOrder = FenceNode->getConstantOperandVal(1); + uint64_t FenceScope = FenceNode->getConstantOperandVal(2); + + if (FenceOrder != Acquire || FenceScope != AtomicNode->getSynchScope()) + return SDValue(); + + // If the original operation was an ATOMIC_LOAD then we'll be replacing it, so + // the chain we use should be its input, otherwise we'll put our store after + // it so we use its output chain. + SDValue Chain = AtomicNode->getOpcode() == ISD::ATOMIC_LOAD ? + AtomicNode->getChain() : AtomicOp; + + // We have an acquire fence with a handy atomic operation nearby, we can + // convert the fence into a load-acquire, discarding the result. + DebugLoc DL = FenceNode->getDebugLoc(); + SDValue Op = DAG.getAtomic(ISD::ATOMIC_LOAD, DL, AtomicNode->getMemoryVT(), + AtomicNode->getValueType(0), + Chain, // Chain + AtomicOp.getOperand(1), // Pointer + AtomicNode->getMemOperand(), Acquire, + static_cast(FenceScope)); + + if (AtomicNode->getOpcode() == ISD::ATOMIC_LOAD) + DAG.ReplaceAllUsesWith(AtomicNode, Op.getNode()); + + return Op.getValue(1); +} + +static SDValue PerformATOMIC_STORECombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + // A releasing atomic fence followed by an atomic store can be combined into a + // single store operation. + SelectionDAG &DAG = DCI.DAG; + AtomicSDNode *AtomicNode = cast(N); + SDValue FenceOp = AtomicNode->getOperand(0); + + if (FenceOp.getOpcode() != ISD::ATOMIC_FENCE) + return SDValue(); + + uint64_t FenceOrder + = cast(FenceOp.getOperand(1))->getZExtValue(); + uint64_t FenceScope + = cast(FenceOp.getOperand(2))->getZExtValue(); + + if (FenceOrder != Release || FenceScope != AtomicNode->getSynchScope()) + return SDValue(); + + DebugLoc DL = AtomicNode->getDebugLoc(); + return DAG.getAtomic(ISD::ATOMIC_STORE, DL, AtomicNode->getMemoryVT(), + FenceOp.getOperand(0), // Chain + AtomicNode->getOperand(1), // Pointer + AtomicNode->getOperand(2), // Value + AtomicNode->getMemOperand(), Release, + static_cast(FenceScope)); +} + +/// For a true bitfield insert, the bits getting into that contiguous mask +/// should come from the low part of an existing value: they must be formed from +/// a compatible SHL operation (unless they're already low). This function +/// checks that condition and returns the least-significant bit that's +/// intended. If the operation not a field preparation, -1 is returned. +static int32_t getLSBForBFI(SelectionDAG &DAG, DebugLoc DL, EVT VT, + SDValue &MaskedVal, uint64_t Mask) { + if (!isShiftedMask_64(Mask)) + return -1; + + // Now we need to alter MaskedVal so that it is an appropriate input for a BFI + // instruction. BFI will do a left-shift by LSB before applying the mask we've + // spotted, so in general we should pre-emptively "undo" that by making sure + // the incoming bits have had a right-shift applied to them. + // + // This right shift, however, will combine with existing left/right shifts. In + // the simplest case of a completely straight bitfield operation, it will be + // expected to completely cancel out with an existing SHL. More complicated + // cases (e.g. bitfield to bitfield copy) may still need a real shift before + // the BFI. + + uint64_t LSB = CountTrailingZeros_64(Mask); + int64_t ShiftRightRequired = LSB; + if (MaskedVal.getOpcode() == ISD::SHL && + isa(MaskedVal.getOperand(1))) { + ShiftRightRequired -= MaskedVal.getConstantOperandVal(1); + MaskedVal = MaskedVal.getOperand(0); + } else if (MaskedVal.getOpcode() == ISD::SRL && + isa(MaskedVal.getOperand(1))) { + ShiftRightRequired += MaskedVal.getConstantOperandVal(1); + MaskedVal = MaskedVal.getOperand(0); + } + + if (ShiftRightRequired > 0) + MaskedVal = DAG.getNode(ISD::SRL, DL, VT, MaskedVal, + DAG.getConstant(ShiftRightRequired, MVT::i64)); + else if (ShiftRightRequired < 0) { + // We could actually end up with a residual left shift, for example with + // "struc.bitfield = val << 1". + MaskedVal = DAG.getNode(ISD::SHL, DL, VT, MaskedVal, + DAG.getConstant(-ShiftRightRequired, MVT::i64)); + } + + return LSB; +} + +/// Searches from N for an existing AArch64ISD::BFI node, possibly surrounded by +/// a mask and an extension. Returns true if a BFI was found and provides +/// information on its surroundings. +static bool findMaskedBFI(SDValue N, SDValue &BFI, uint64_t &Mask, + bool &Extended) { + Extended = false; + if (N.getOpcode() == ISD::ZERO_EXTEND) { + Extended = true; + N = N.getOperand(0); + } + + if (N.getOpcode() == ISD::AND && isa(N.getOperand(1))) { + Mask = N->getConstantOperandVal(1); + N = N.getOperand(0); + } else { + // Mask is the whole width. + Mask = (1ULL << N.getValueType().getSizeInBits()) - 1; + } + + if (N.getOpcode() == AArch64ISD::BFI) { + BFI = N; + return true; + } + + return false; +} + +/// Try to combine a subtree (rooted at an OR) into a "masked BFI" node, which +/// is roughly equivalent to (and (BFI ...), mask). This form is used because it +/// can often be further combined with a larger mask. Ultimately, we want mask +/// to be 2^32-1 or 2^64-1 so the AND can be skipped. +static SDValue tryCombineToBFI(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const AArch64Subtarget *Subtarget) { + SelectionDAG &DAG = DCI.DAG; + DebugLoc DL = N->getDebugLoc(); + EVT VT = N->getValueType(0); + + assert(N->getOpcode() == ISD::OR && "Unexpected root"); + + // We need the LHS to be (and SOMETHING, MASK). Find out what that mask is or + // abandon the effort. + SDValue LHS = N->getOperand(0); + if (LHS.getOpcode() != ISD::AND) + return SDValue(); + + uint64_t LHSMask; + if (isa(LHS.getOperand(1))) + LHSMask = LHS->getConstantOperandVal(1); + else + return SDValue(); + + // We also need the RHS to be (and SOMETHING, MASK). Find out what that mask + // is or abandon the effort. + SDValue RHS = N->getOperand(1); + if (RHS.getOpcode() != ISD::AND) + return SDValue(); + + uint64_t RHSMask; + if (isa(RHS.getOperand(1))) + RHSMask = RHS->getConstantOperandVal(1); + else + return SDValue(); + + // Can't do anything if the masks are incompatible. + if (LHSMask & RHSMask) + return SDValue(); + + // Now we need one of the masks to be a contiguous field. Without loss of + // generality that should be the RHS one. + SDValue Bitfield = LHS.getOperand(0); + if (getLSBForBFI(DAG, DL, VT, Bitfield, LHSMask) != -1) { + // We know that LHS is a candidate new value, and RHS isn't already a better + // one. + std::swap(LHS, RHS); + std::swap(LHSMask, RHSMask); + } + + // We've done our best to put the right operands in the right places, all we + // can do now is check whether a BFI exists. + Bitfield = RHS.getOperand(0); + int32_t LSB = getLSBForBFI(DAG, DL, VT, Bitfield, RHSMask); + if (LSB == -1) + return SDValue(); + + uint32_t Width = CountPopulation_64(RHSMask); + assert(Width && "Expected non-zero bitfield width"); + + SDValue BFI = DAG.getNode(AArch64ISD::BFI, DL, VT, + LHS.getOperand(0), Bitfield, + DAG.getConstant(LSB, MVT::i64), + DAG.getConstant(Width, MVT::i64)); + + // Mask is trivial + if ((LHSMask | RHSMask) == (1ULL << VT.getSizeInBits()) - 1) + return BFI; + + return DAG.getNode(ISD::AND, DL, VT, BFI, + DAG.getConstant(LHSMask | RHSMask, VT)); +} + +/// Search for the bitwise combining (with careful masks) of a MaskedBFI and its +/// original input. This is surprisingly common because SROA splits things up +/// into i8 chunks, so the originally detected MaskedBFI may actually only act +/// on the low (say) byte of a word. This is then orred into the rest of the +/// word afterwards. +/// +/// Basic input: (or (and OLDFIELD, MASK1), (MaskedBFI MASK2, OLDFIELD, ...)). +/// +/// If MASK1 and MASK2 are compatible, we can fold the whole thing into the +/// MaskedBFI. We can also deal with a certain amount of extend/truncate being +/// involved. +static SDValue tryCombineToLargerBFI(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const AArch64Subtarget *Subtarget) { + SelectionDAG &DAG = DCI.DAG; + DebugLoc DL = N->getDebugLoc(); + EVT VT = N->getValueType(0); + + // First job is to hunt for a MaskedBFI on either the left or right. Swap + // operands if it's actually on the right. + SDValue BFI; + SDValue PossExtraMask; + uint64_t ExistingMask = 0; + bool Extended = false; + if (findMaskedBFI(N->getOperand(0), BFI, ExistingMask, Extended)) + PossExtraMask = N->getOperand(1); + else if (findMaskedBFI(N->getOperand(1), BFI, ExistingMask, Extended)) + PossExtraMask = N->getOperand(0); + else + return SDValue(); + + // We can only combine a BFI with another compatible mask. + if (PossExtraMask.getOpcode() != ISD::AND || + !isa(PossExtraMask.getOperand(1))) + return SDValue(); + + uint64_t ExtraMask = PossExtraMask->getConstantOperandVal(1); + + // Masks must be compatible. + if (ExtraMask & ExistingMask) + return SDValue(); + + SDValue OldBFIVal = BFI.getOperand(0); + SDValue NewBFIVal = BFI.getOperand(1); + if (Extended) { + // We skipped a ZERO_EXTEND above, so the input to the MaskedBFIs should be + // 32-bit and we'll be forming a 64-bit MaskedBFI. The MaskedBFI arguments + // need to be made compatible. + assert(VT == MVT::i64 && BFI.getValueType() == MVT::i32 + && "Invalid types for BFI"); + OldBFIVal = DAG.getNode(ISD::ANY_EXTEND, DL, VT, OldBFIVal); + NewBFIVal = DAG.getNode(ISD::ANY_EXTEND, DL, VT, NewBFIVal); + } + + // We need the MaskedBFI to be combined with a mask of the *same* value. + if (PossExtraMask.getOperand(0) != OldBFIVal) + return SDValue(); + + BFI = DAG.getNode(AArch64ISD::BFI, DL, VT, + OldBFIVal, NewBFIVal, + BFI.getOperand(2), BFI.getOperand(3)); + + // If the masking is trivial, we don't need to create it. + if ((ExtraMask | ExistingMask) == (1ULL << VT.getSizeInBits()) - 1) + return BFI; + + return DAG.getNode(ISD::AND, DL, VT, BFI, + DAG.getConstant(ExtraMask | ExistingMask, VT)); +} + +/// An EXTR instruction is made up of two shifts, ORed together. This helper +/// searches for and classifies those shifts. +static bool findEXTRHalf(SDValue N, SDValue &Src, uint32_t &ShiftAmount, + bool &FromHi) { + if (N.getOpcode() == ISD::SHL) + FromHi = false; + else if (N.getOpcode() == ISD::SRL) + FromHi = true; + else + return false; + + if (!isa(N.getOperand(1))) + return false; + + ShiftAmount = N->getConstantOperandVal(1); + Src = N->getOperand(0); + return true; +} + +/// EXTR instruciton extracts a contiguous chunk of bits from two existing +/// registers viewed as a high/low pair. This function looks for the pattern: +/// (or (shl VAL1, #N), (srl VAL2, #RegWidth-N)) and replaces it with an +/// EXTR. Can't quite be done in TableGen because the two immediates aren't +/// independent. +static SDValue tryCombineToEXTR(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + SelectionDAG &DAG = DCI.DAG; + DebugLoc DL = N->getDebugLoc(); + EVT VT = N->getValueType(0); + + assert(N->getOpcode() == ISD::OR && "Unexpected root"); + + if (VT != MVT::i32 && VT != MVT::i64) + return SDValue(); + + SDValue LHS; + uint32_t ShiftLHS = 0; + bool LHSFromHi = 0; + if (!findEXTRHalf(N->getOperand(0), LHS, ShiftLHS, LHSFromHi)) + return SDValue(); + + SDValue RHS; + uint32_t ShiftRHS = 0; + bool RHSFromHi = 0; + if (!findEXTRHalf(N->getOperand(1), RHS, ShiftRHS, RHSFromHi)) + return SDValue(); + + // If they're both trying to come from the high part of the register, they're + // not really an EXTR. + if (LHSFromHi == RHSFromHi) + return SDValue(); + + if (ShiftLHS + ShiftRHS != VT.getSizeInBits()) + return SDValue(); + + if (LHSFromHi) { + std::swap(LHS, RHS); + std::swap(ShiftLHS, ShiftRHS); + } + + return DAG.getNode(AArch64ISD::EXTR, DL, VT, + LHS, RHS, + DAG.getConstant(ShiftRHS, MVT::i64)); +} + +/// Target-specific dag combine xforms for ISD::OR +static SDValue PerformORCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const AArch64Subtarget *Subtarget) { + + SelectionDAG &DAG = DCI.DAG; + EVT VT = N->getValueType(0); + + if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) + return SDValue(); + + // Attempt to recognise bitfield-insert operations. + SDValue Res = tryCombineToBFI(N, DCI, Subtarget); + if (Res.getNode()) + return Res; + + // Attempt to combine an existing MaskedBFI operation into one with a larger + // mask. + Res = tryCombineToLargerBFI(N, DCI, Subtarget); + if (Res.getNode()) + return Res; + + Res = tryCombineToEXTR(N, DCI); + if (Res.getNode()) + return Res; + + return SDValue(); +} + +/// Target-specific dag combine xforms for ISD::SRA +static SDValue PerformSRACombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + + SelectionDAG &DAG = DCI.DAG; + DebugLoc DL = N->getDebugLoc(); + EVT VT = N->getValueType(0); + + // We're looking for an SRA/SHL pair which form an SBFX. + + if (VT != MVT::i32 && VT != MVT::i64) + return SDValue(); + + if (!isa(N->getOperand(1))) + return SDValue(); + + uint64_t ExtraSignBits = N->getConstantOperandVal(1); + SDValue Shift = N->getOperand(0); + + if (Shift.getOpcode() != ISD::SHL) + return SDValue(); + + if (!isa(Shift->getOperand(1))) + return SDValue(); + + uint64_t BitsOnLeft = Shift->getConstantOperandVal(1); + uint64_t Width = VT.getSizeInBits() - ExtraSignBits; + uint64_t LSB = VT.getSizeInBits() - Width - BitsOnLeft; + + if (LSB > VT.getSizeInBits() || Width > VT.getSizeInBits()) + return SDValue(); + + return DAG.getNode(AArch64ISD::SBFX, DL, VT, Shift.getOperand(0), + DAG.getConstant(LSB, MVT::i64), + DAG.getConstant(LSB + Width - 1, MVT::i64)); +} + + +SDValue +AArch64TargetLowering::PerformDAGCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + switch (N->getOpcode()) { + default: break; + case ISD::AND: return PerformANDCombine(N, DCI); + case ISD::ATOMIC_FENCE: return PerformATOMIC_FENCECombine(N, DCI); + case ISD::ATOMIC_STORE: return PerformATOMIC_STORECombine(N, DCI); + case ISD::OR: return PerformORCombine(N, DCI, Subtarget); + case ISD::SRA: return PerformSRACombine(N, DCI); + } + return SDValue(); +} + +AArch64TargetLowering::ConstraintType +AArch64TargetLowering::getConstraintType(const std::string &Constraint) const { + if (Constraint.size() == 1) { + switch (Constraint[0]) { + default: break; + case 'w': // An FP/SIMD vector register + return C_RegisterClass; + case 'I': // Constant that can be used with an ADD instruction + case 'J': // Constant that can be used with a SUB instruction + case 'K': // Constant that can be used with a 32-bit logical instruction + case 'L': // Constant that can be used with a 64-bit logical instruction + case 'M': // Constant that can be used as a 32-bit MOV immediate + case 'N': // Constant that can be used as a 64-bit MOV immediate + case 'Y': // Floating point constant zero + case 'Z': // Integer constant zero + return C_Other; + case 'Q': // A memory reference with base register and no offset + return C_Memory; + case 'S': // A symbolic address + return C_Other; + } + } + + // FIXME: Ump, Utf, Usa, Ush + // Ump: A memory address suitable for ldp/stp in SI, DI, SF and DF modes, whatever they may be + // Utf: A memory address suitable for ldp/stp in TF mode, whatever it may be + // Usa: An absolute symbolic address + // Ush: The high part (bits 32:12) of a pc-relative symbolic address + assert(Constraint != "Ump" && Constraint != "Utf" && Constraint != "Usa" + && Constraint != "Ush" && "Unimplemented constraints"); + + return TargetLowering::getConstraintType(Constraint); +} + +TargetLowering::ConstraintWeight +AArch64TargetLowering::getSingleConstraintMatchWeight(AsmOperandInfo &Info, + const char *Constraint) const { + + llvm_unreachable("Constraint weight unimplemented"); +} + +void +AArch64TargetLowering::LowerAsmOperandForConstraint(SDValue Op, + std::string &Constraint, + std::vector &Ops, + SelectionDAG &DAG) const { + SDValue Result(0, 0); + + // Only length 1 constraints are C_Other. + if (Constraint.size() != 1) return; + + // Only C_Other constraints get lowered like this. That means constants for us + // so return early if there's no hope the constraint can be lowered. + + switch(Constraint[0]) { + default: break; + case 'I': case 'J': case 'K': case 'L': + case 'M': case 'N': case 'Z': { + ConstantSDNode *C = dyn_cast(Op); + if (!C) + return; + + uint64_t CVal = C->getZExtValue(); + uint32_t Bits; + + switch (Constraint[0]) { + default: + // FIXME: 'M' and 'N' are MOV pseudo-insts -- unsupported in assembly. 'J' + // is a peculiarly useless SUB constraint. + llvm_unreachable("Unimplemented C_Other constraint"); + case 'I': + if (CVal <= 0xfff) + break; + return; + case 'K': + if (A64Imms::isLogicalImm(32, CVal, Bits)) + break; + return; + case 'L': + if (A64Imms::isLogicalImm(64, CVal, Bits)) + break; + return; + case 'Z': + if (CVal == 0) + break; + return; + } + + Result = DAG.getTargetConstant(CVal, Op.getValueType()); + break; + } + case 'S': { + // An absolute symbolic address or label reference. + if (const GlobalAddressSDNode *GA = dyn_cast(Op)) { + Result = DAG.getTargetGlobalAddress(GA->getGlobal(), Op.getDebugLoc(), + GA->getValueType(0)); + } else if (const BlockAddressSDNode *BA = dyn_cast(Op)) { + Result = DAG.getTargetBlockAddress(BA->getBlockAddress(), + BA->getValueType(0)); + } else if (const ExternalSymbolSDNode *ES + = dyn_cast(Op)) { + Result = DAG.getTargetExternalSymbol(ES->getSymbol(), + ES->getValueType(0)); + } else + return; + break; + } + case 'Y': + if (const ConstantFPSDNode *CFP = dyn_cast(Op)) { + if (CFP->isExactlyValue(0.0)) { + Result = DAG.getTargetConstantFP(0.0, CFP->getValueType(0)); + break; + } + } + return; + } + + if (Result.getNode()) { + Ops.push_back(Result); + return; + } + + // It's an unknown constraint for us. Let generic code have a go. + TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); +} + +std::pair +AArch64TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, + EVT VT) const { + if (Constraint.size() == 1) { + switch (Constraint[0]) { + case 'r': + if (VT.getSizeInBits() <= 32) + return std::make_pair(0U, &AArch64::GPR32RegClass); + else if (VT == MVT::i64) + return std::make_pair(0U, &AArch64::GPR64RegClass); + break; + case 'w': + if (VT == MVT::f16) + return std::make_pair(0U, &AArch64::FPR16RegClass); + else if (VT == MVT::f32) + return std::make_pair(0U, &AArch64::FPR32RegClass); + else if (VT == MVT::f64) + return std::make_pair(0U, &AArch64::FPR64RegClass); + else if (VT.getSizeInBits() == 64) + return std::make_pair(0U, &AArch64::VPR64RegClass); + else if (VT == MVT::f128) + return std::make_pair(0U, &AArch64::FPR128RegClass); + else if (VT.getSizeInBits() == 128) + return std::make_pair(0U, &AArch64::VPR128RegClass); + break; + } + } + + // Use the default implementation in TargetLowering to convert the register + // constraint into a member of a register class. + return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); +} diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h new file mode 100644 index 0000000..66985c1 --- /dev/null +++ b/lib/Target/AArch64/AArch64ISelLowering.h @@ -0,0 +1,247 @@ +//==-- AArch64ISelLowering.h - AArch64 DAG Lowering Interface ----*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interfaces that AArch64 uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_AARCH64_ISELLOWERING_H +#define LLVM_TARGET_AARCH64_ISELLOWERING_H + +#include "MCTargetDesc/AArch64BaseInfo.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/Target/TargetLowering.h" + + +namespace llvm { +namespace AArch64ISD { + enum NodeType { + // Start the numbering from where ISD NodeType finishes. + FIRST_NUMBER = ISD::BUILTIN_OP_END, + + // This is a conditional branch which also notes the flag needed + // (eq/sgt/...). A64 puts this information on the branches rather than + // compares as LLVM does. + BR_CC, + + // A node to be selected to an actual call operation: either BL or BLR in + // the absence of tail calls. + Call, + + // Indicates a floating-point immediate which fits into the format required + // by the FMOV instructions. First (and only) operand is the 8-bit encoded + // value of that immediate. + FPMOV, + + // Corresponds directly to an EXTR instruction. Operands are an LHS an RHS + // and an LSB. + EXTR, + + // Wraps a load from the GOT, which should always be performed with a 64-bit + // load instruction. This prevents the DAG combiner folding a truncate to + // form a smaller memory access. + GOTLoad, + + // Performs a bitfield insert. Arguments are: the value being inserted into; + // the value being inserted; least significant bit changed; width of the + // field. + BFI, + + // Simply a convenient node inserted during ISelLowering to represent + // procedure return. Will almost certainly be selected to "RET". + Ret, + + /// Extracts a field of contiguous bits from the source and sign extends + /// them into a single register. Arguments are: source; immr; imms. Note + /// these are pre-encoded since DAG matching can't cope with combining LSB + /// and Width into these values itself. + SBFX, + + /// This is an A64-ification of the standard LLVM SELECT_CC operation. The + /// main difference is that it only has the values and an A64 condition, + /// which will be produced by a setcc instruction. + SELECT_CC, + + /// This serves most of the functions of the LLVM SETCC instruction, for two + /// purposes. First, it prevents optimisations from fiddling with the + /// compare after we've moved the CondCode information onto the SELECT_CC or + /// BR_CC instructions. Second, it gives a legal instruction for the actual + /// comparison. + /// + /// It keeps a record of the condition flags asked for because certain + /// instructions are only valid for a subset of condition codes. + SETCC, + + // Designates a node which is a tail call: both a call and a return + // instruction as far as selction is concerned. It should be selected to an + // unconditional branch. Has the usual plethora of call operands, but: 1st + // is callee, 2nd is stack adjustment required immediately before branch. + TC_RETURN, + + // Designates a call used to support the TLS descriptor ABI. The call itself + // will be indirect ("BLR xN") but a relocation-specifier (".tlsdesccall + // var") must be attached somehow during code generation. It takes two + // operands: the callee and the symbol to be relocated against. + TLSDESCCALL, + + // Leaf node which will be lowered to an appropriate MRS to obtain the + // thread pointer: TPIDR_EL0. + THREAD_POINTER, + + /// Extracts a field of contiguous bits from the source and zero extends + /// them into a single register. Arguments are: source; immr; imms. Note + /// these are pre-encoded since DAG matching can't cope with combining LSB + /// and Width into these values itself. + UBFX, + + // Wraps an address which the ISelLowering phase has decided should be + // created using the small absolute memory model: i.e. adrp/add or + // adrp/mem-op. This exists to prevent bare TargetAddresses which may never + // get selected. + WrapperSmall + }; +} + + +class AArch64Subtarget; +class AArch64TargetMachine; + +class AArch64TargetLowering : public TargetLowering { +public: + explicit AArch64TargetLowering(AArch64TargetMachine &TM); + + const char *getTargetNodeName(unsigned Opcode) const; + + CCAssignFn *CCAssignFnForNode(CallingConv::ID CC) const; + + SDValue LowerFormalArguments(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) const; + + SDValue LowerReturn(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, + DebugLoc dl, SelectionDAG &DAG) const; + + SDValue LowerCall(CallLoweringInfo &CLI, + SmallVectorImpl &InVals) const; + + SDValue LowerCallResult(SDValue Chain, SDValue InFlag, + CallingConv::ID CallConv, bool IsVarArg, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) const; + + void SaveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, + DebugLoc DL, SDValue &Chain) const; + + + /// IsEligibleForTailCallOptimization - Check whether the call is eligible + /// for tail call optimization. Targets which want to do tail call + /// optimization should implement this function. + bool IsEligibleForTailCallOptimization(SDValue Callee, + CallingConv::ID CalleeCC, + bool IsVarArg, + bool IsCalleeStructRet, + bool IsCallerStructRet, + const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, + const SmallVectorImpl &Ins, + SelectionDAG& DAG) const; + + /// Finds the incoming stack arguments which overlap the given fixed stack + /// object and incorporates their load into the current chain. This prevents an + /// upcoming store from clobbering the stack argument before it's used. + SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG, + MachineFrameInfo *MFI, int ClobberedFI) const; + + EVT getSetCCResultType(EVT VT) const; + + bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const; + + bool IsTailCallConvention(CallingConv::ID CallCC) const; + + SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; + + bool isLegalICmpImmediate(int64_t Val) const; + SDValue getSelectableIntSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, + SDValue &A64cc, SelectionDAG &DAG, DebugLoc &dl) const; + + virtual MachineBasicBlock * + EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const; + + MachineBasicBlock * + emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *MBB, + unsigned Size, unsigned Opcode) const; + + MachineBasicBlock * + emitAtomicBinaryMinMax(MachineInstr *MI, MachineBasicBlock *BB, + unsigned Size, unsigned CmpOp, + A64CC::CondCodes Cond) const; + MachineBasicBlock * + emitAtomicCmpSwap(MachineInstr *MI, MachineBasicBlock *BB, + unsigned Size) const; + + MachineBasicBlock * + EmitF128CSEL(MachineInstr *MI, MachineBasicBlock *MBB) const; + + SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerF128ToCall(SDValue Op, SelectionDAG &DAG, + RTLIB::Libcall Call) const; + SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, bool IsSigned) const; + SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerTLSDescCall(SDValue SymAddr, SDValue DescAddr, DebugLoc DL, + SelectionDAG &DAG) const; + SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, bool IsSigned) const; + SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; + + virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; + + /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than + /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to + /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd + /// is expanded to mul + add. + virtual bool isFMAFasterThanMulAndAdd(EVT) const { return true; } + + ConstraintType getConstraintType(const std::string &Constraint) const; + + ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &Info, + const char *Constraint) const; + void LowerAsmOperandForConstraint(SDValue Op, + std::string &Constraint, + std::vector &Ops, + SelectionDAG &DAG) const; + + std::pair + getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const; +private: + const AArch64Subtarget *Subtarget; + const TargetRegisterInfo *RegInfo; + const InstrItineraryData *Itins; +}; +} // namespace llvm + +#endif // LLVM_TARGET_AARCH64_ISELLOWERING_H diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td new file mode 100644 index 0000000..ce66396 --- /dev/null +++ b/lib/Target/AArch64/AArch64InstrFormats.td @@ -0,0 +1,1011 @@ +//===- AArch64InstrFormats.td - AArch64 Instruction Formats --*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// +// A64 Instruction Format Definitions. +// + +// A64 is currently the only instruction set supported by the AArch64 +// architecture. +class A64Inst patterns, + InstrItinClass itin> + : Instruction +{ + // All A64 instructions are 32-bit. This field will be filled in + // graually going down the hierarchy. + field bits<32> Inst; + + field bits<32> Unpredictable = 0; + // SoftFail is the generic name for this field, but we alias it so + // as to make it more obvious what it means in ARM-land. + field bits<32> SoftFail = Unpredictable; + + // LLVM-level model of the AArch64/A64 distinction. + let Namespace = "AArch64"; + let DecoderNamespace = "A64"; + let Size = 4; + + // Set the templated fields + let OutOperandList = outs; + let InOperandList = ins; + let AsmString = asmstr; + let Pattern = patterns; + let Itinerary = itin; +} + +class PseudoInst patterns> : Instruction +{ + let Namespace = "AArch64"; + + let OutOperandList = outs; + let InOperandList= ins; + let Pattern = patterns; + let isCodeGenOnly = 1; + let isPseudo = 1; +} + +// Represents a pseudo-instruction that represents a single A64 instruction for +// whatever reason, the eventual result will be a 32-bit real instruction. +class A64PseudoInst patterns> + : PseudoInst +{ + let Size = 4; +} + +// As above, this will be a single A64 instruction, but we can actually give the +// expansion in TableGen. +class A64PseudoExpand patterns, dag Result> + : A64PseudoInst, + PseudoInstExpansion; + + +// First, some common cross-hierarchy register formats. + +class A64InstRd patterns, InstrItinClass itin> + : A64Inst +{ + bits<5> Rd; + + let Inst{4-0} = Rd; +} + +class A64InstRt patterns, InstrItinClass itin> + : A64Inst +{ + bits<5> Rt; + + let Inst{4-0} = Rt; +} + + +class A64InstRdn patterns, InstrItinClass itin> + : A64InstRd +{ + // Inherit rdt + bits<5> Rn; + + let Inst{9-5} = Rn; +} + +class A64InstRtn patterns, InstrItinClass itin> + : A64InstRt +{ + // Inherit rdt + bits<5> Rn; + + let Inst{9-5} = Rn; +} + +// Instructions taking Rt,Rt2,Rn +class A64InstRtt2n patterns, InstrItinClass itin> + : A64InstRtn +{ + bits<5> Rt2; + + let Inst{14-10} = Rt2; +} + +class A64InstRdnm patterns, InstrItinClass itin> + : A64InstRdn +{ + bits<5> Rm; + + let Inst{20-16} = Rm; +} + +//===----------------------------------------------------------------------===// +// +// Actual A64 Instruction Formats +// + +// Format for Add-subtract (extended register) instructions. +class A64I_addsubext opt, bits<3> option, + dag outs, dag ins, string asmstr, list patterns, + InstrItinClass itin> + : A64InstRdnm +{ + bits<3> Imm3; + + let Inst{31} = sf; + let Inst{30} = op; + let Inst{29} = S; + let Inst{28-24} = 0b01011; + let Inst{23-22} = opt; + let Inst{21} = 0b1; + // Rm inherited in 20-16 + let Inst{15-13} = option; + let Inst{12-10} = Imm3; + // Rn inherited in 9-5 + // Rd inherited in 4-0 +} + +// Format for Add-subtract (immediate) instructions. +class A64I_addsubimm shift, + dag outs, dag ins, string asmstr, + list patterns, InstrItinClass itin> + : A64InstRdn +{ + bits<12> Imm12; + + let Inst{31} = sf; + let Inst{30} = op; + let Inst{29} = S; + let Inst{28-24} = 0b10001; + let Inst{23-22} = shift; + let Inst{21-10} = Imm12; +} + +// Format for Add-subtract (shifted register) instructions. +class A64I_addsubshift shift, + dag outs, dag ins, string asmstr, list patterns, + InstrItinClass itin> + : A64InstRdnm +{ + bits<6> Imm6; + + let Inst{31} = sf; + let Inst{30} = op; + let Inst{29} = S; + let Inst{28-24} = 0b01011; + let Inst{23-22} = shift; + let Inst{21} = 0b0; + // Rm inherited in 20-16 + let Inst{15-10} = Imm6; + // Rn inherited in 9-5 + // Rd inherited in 4-0 +} + +// Format for Add-subtract (with carry) instructions. +class A64I_addsubcarry opcode2, + dag outs, dag ins, string asmstr, list patterns, + InstrItinClass itin> + : A64InstRdnm +{ + let Inst{31} = sf; + let Inst{30} = op; + let Inst{29} = S; + let Inst{28-21} = 0b11010000; + // Rm inherited in 20-16 + let Inst{15-10} = opcode2; + // Rn inherited in 9-5 + // Rd inherited in 4-0 +} + + +// Format for Bitfield instructions +class A64I_bitfield opc, bit n, + dag outs, dag ins, string asmstr, + list patterns, InstrItinClass itin> + : A64InstRdn +{ + bits<6> ImmR; + bits<6> ImmS; + + let Inst{31} = sf; + let Inst{30-29} = opc; + let Inst{28-23} = 0b100110; + let Inst{22} = n; + let Inst{21-16} = ImmR; + let Inst{15-10} = ImmS; + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} + +// Format for compare and branch (immediate) instructions. +class A64I_cmpbr patterns, InstrItinClass itin> + : A64InstRt +{ + bits<19> Label; + + let Inst{31} = sf; + let Inst{30-25} = 0b011010; + let Inst{24} = op; + let Inst{23-5} = Label; + // Inherit Rt in 4-0 +} + +// Format for conditional branch (immediate) instructions. +class A64I_condbr patterns, InstrItinClass itin> + : A64Inst +{ + bits<19> Label; + bits<4> Cond; + + let Inst{31-25} = 0b0101010; + let Inst{24} = o1; + let Inst{23-5} = Label; + let Inst{4} = o0; + let Inst{3-0} = Cond; +} + +// Format for conditional compare (immediate) instructions. +class A64I_condcmpimm patterns, InstrItinClass itin> + : A64Inst +{ + bits<5> Rn; + bits<5> UImm5; + bits<4> NZCVImm; + bits<4> Cond; + + let Inst{31} = sf; + let Inst{30} = op; + let Inst{29} = s; + let Inst{28-21} = 0b11010010; + let Inst{20-16} = UImm5; + let Inst{15-12} = Cond; + let Inst{11} = 0b1; + let Inst{10} = o2; + let Inst{9-5} = Rn; + let Inst{4} = o3; + let Inst{3-0} = NZCVImm; +} + +// Format for conditional compare (register) instructions. +class A64I_condcmpreg patterns, InstrItinClass itin> + : A64Inst +{ + bits<5> Rn; + bits<5> Rm; + bits<4> NZCVImm; + bits<4> Cond; + + + let Inst{31} = sf; + let Inst{30} = op; + let Inst{29} = s; + let Inst{28-21} = 0b11010010; + let Inst{20-16} = Rm; + let Inst{15-12} = Cond; + let Inst{11} = 0b0; + let Inst{10} = o2; + let Inst{9-5} = Rn; + let Inst{4} = o3; + let Inst{3-0} = NZCVImm; +} + +// Format for conditional select instructions. +class A64I_condsel op2, + dag outs, dag ins, string asmstr, + list patterns, InstrItinClass itin> + : A64InstRdnm +{ + bits<4> Cond; + + let Inst{31} = sf; + let Inst{30} = op; + let Inst{29} = s; + let Inst{28-21} = 0b11010100; + // Inherit Rm in 20-16 + let Inst{15-12} = Cond; + let Inst{11-10} = op2; + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} + +// Format for data processing (1 source) instructions +class A64I_dp_1src opcode2, bits<6> opcode, + string asmstr, dag outs, dag ins, + list patterns, InstrItinClass itin> + : A64InstRdn +{ + let Inst{31} = sf; + let Inst{30} = 0b1; + let Inst{29} = S; + let Inst{28-21} = 0b11010110; + let Inst{20-16} = opcode2; + let Inst{15-10} = opcode; +} + +// Format for data processing (2 source) instructions +class A64I_dp_2src opcode, bit S, + string asmstr, dag outs, dag ins, + list patterns, InstrItinClass itin> + : A64InstRdnm +{ + let Inst{31} = sf; + let Inst{30} = 0b0; + let Inst{29} = S; + let Inst{28-21} = 0b11010110; + let Inst{15-10} = opcode; +} + +// Format for data-processing (3 source) instructions + +class A64I_dp3 opcode, + dag outs, dag ins, string asmstr, + list patterns, InstrItinClass itin> + : A64InstRdnm +{ + bits<5> Ra; + + let Inst{31} = sf; + let Inst{30-29} = opcode{5-4}; + let Inst{28-24} = 0b11011; + let Inst{23-21} = opcode{3-1}; + // Inherits Rm in 20-16 + let Inst{15} = opcode{0}; + let Inst{14-10} = Ra; + // Inherits Rn in 9-5 + // Inherits Rd in 4-0 +} + +// Format for exception generation instructions +class A64I_exception opc, bits<3> op2, bits<2> ll, + dag outs, dag ins, string asmstr, + list patterns, InstrItinClass itin> + : A64Inst +{ + bits<16> UImm16; + + let Inst{31-24} = 0b11010100; + let Inst{23-21} = opc; + let Inst{20-5} = UImm16; + let Inst{4-2} = op2; + let Inst{1-0} = ll; +} + +// Format for extract (immediate) instructions +class A64I_extract op, bit n, + dag outs, dag ins, string asmstr, + list patterns, InstrItinClass itin> + : A64InstRdnm +{ + bits<6> LSB; + + let Inst{31} = sf; + let Inst{30-29} = op{2-1}; + let Inst{28-23} = 0b100111; + let Inst{22} = n; + let Inst{21} = op{0}; + // Inherits Rm in bits 20-16 + let Inst{15-10} = LSB; + // Inherits Rn in 9-5 + // Inherits Rd in 4-0 +} + +// Format for floating-point compare instructions. +class A64I_fpcmp type, bits<2> op, bits<5> opcode2, + dag outs, dag ins, string asmstr, + list patterns, InstrItinClass itin> + : A64Inst +{ + bits<5> Rn; + bits<5> Rm; + + let Inst{31} = m; + let Inst{30} = 0b0; + let Inst{29} = s; + let Inst{28-24} = 0b11110; + let Inst{23-22} = type; + let Inst{21} = 0b1; + let Inst{20-16} = Rm; + let Inst{15-14} = op; + let Inst{13-10} = 0b1000; + let Inst{9-5} = Rn; + let Inst{4-0} = opcode2; +} + +// Format for floating-point conditional compare instructions. +class A64I_fpccmp type, bit op, + dag outs, dag ins, string asmstr, + list patterns, InstrItinClass itin> + : A64InstRdn +{ + bits<5> Rn; + bits<5> Rm; + bits<4> NZCVImm; + bits<4> Cond; + + let Inst{31} = m; + let Inst{30} = 0b0; + let Inst{29} = s; + let Inst{28-24} = 0b11110; + let Inst{23-22} = type; + let Inst{21} = 0b1; + let Inst{20-16} = Rm; + let Inst{15-12} = Cond; + let Inst{11-10} = 0b01; + let Inst{9-5} = Rn; + let Inst{4} = op; + let Inst{3-0} = NZCVImm; +} + +// Format for floating-point conditional select instructions. +class A64I_fpcondsel type, + dag outs, dag ins, string asmstr, + list patterns, InstrItinClass itin> + : A64InstRdnm +{ + bits<4> Cond; + + let Inst{31} = m; + let Inst{30} = 0b0; + let Inst{29} = s; + let Inst{28-24} = 0b11110; + let Inst{23-22} = type; + let Inst{21} = 0b1; + // Inherit Rm in 20-16 + let Inst{15-12} = Cond; + let Inst{11-10} = 0b11; + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} + + +// Format for floating-point data-processing (1 source) instructions. +class A64I_fpdp1 type, bits<6> opcode, + dag outs, dag ins, string asmstr, + list patterns, InstrItinClass itin> + : A64InstRdn +{ + let Inst{31} = m; + let Inst{30} = 0b0; + let Inst{29} = s; + let Inst{28-24} = 0b11110; + let Inst{23-22} = type; + let Inst{21} = 0b1; + let Inst{20-15} = opcode; + let Inst{14-10} = 0b10000; + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} + +// Format for floating-point data-processing (2 sources) instructions. +class A64I_fpdp2 type, bits<4> opcode, + dag outs, dag ins, string asmstr, + list patterns, InstrItinClass itin> + : A64InstRdnm +{ + let Inst{31} = m; + let Inst{30} = 0b0; + let Inst{29} = s; + let Inst{28-24} = 0b11110; + let Inst{23-22} = type; + let Inst{21} = 0b1; + // Inherit Rm in 20-16 + let Inst{15-12} = opcode; + let Inst{11-10} = 0b10; + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} + +// Format for floating-point data-processing (3 sources) instructions. +class A64I_fpdp3 type, bit o1, bit o0, + dag outs, dag ins, string asmstr, + list patterns, InstrItinClass itin> + : A64InstRdnm +{ + bits<5> Ra; + + let Inst{31} = m; + let Inst{30} = 0b0; + let Inst{29} = s; + let Inst{28-24} = 0b11111; + let Inst{23-22} = type; + let Inst{21} = o1; + // Inherit Rm in 20-16 + let Inst{15} = o0; + let Inst{14-10} = Ra; + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} + +// Format for floating-point <-> fixed-point conversion instructions. +class A64I_fpfixed type, bits<2> mode, bits<3> opcode, + dag outs, dag ins, string asmstr, + list patterns, InstrItinClass itin> + : A64InstRdn +{ + bits<6> Scale; + + let Inst{31} = sf; + let Inst{30} = 0b0; + let Inst{29} = s; + let Inst{28-24} = 0b11110; + let Inst{23-22} = type; + let Inst{21} = 0b0; + let Inst{20-19} = mode; + let Inst{18-16} = opcode; + let Inst{15-10} = Scale; + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} + +// Format for floating-point <-> integer conversion instructions. +class A64I_fpint type, bits<2> rmode, bits<3> opcode, + dag outs, dag ins, string asmstr, + list patterns, InstrItinClass itin> + : A64InstRdn +{ + let Inst{31} = sf; + let Inst{30} = 0b0; + let Inst{29} = s; + let Inst{28-24} = 0b11110; + let Inst{23-22} = type; + let Inst{21} = 0b1; + let Inst{20-19} = rmode; + let Inst{18-16} = opcode; + let Inst{15-10} = 0b000000; + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} + + +// Format for floating-point immediate instructions. +class A64I_fpimm type, bits<5> imm5, + dag outs, dag ins, string asmstr, + list patterns, InstrItinClass itin> + : A64InstRd +{ + bits<8> Imm8; + + let Inst{31} = m; + let Inst{30} = 0b0; + let Inst{29} = s; + let Inst{28-24} = 0b11110; + let Inst{23-22} = type; + let Inst{21} = 0b1; + let Inst{20-13} = Imm8; + let Inst{12-10} = 0b100; + let Inst{9-5} = imm5; + // Inherit Rd in 4-0 +} + +// Format for load-register (literal) instructions. +class A64I_LDRlit opc, bit v, + dag outs, dag ins, string asmstr, + list patterns, InstrItinClass itin> + : A64InstRt +{ + bits<19> Imm19; + + let Inst{31-30} = opc; + let Inst{29-27} = 0b011; + let Inst{26} = v; + let Inst{25-24} = 0b00; + let Inst{23-5} = Imm19; + // Inherit Rt in 4-0 +} + +// Format for load-store exclusive instructions. +class A64I_LDSTex_tn size, bit o2, bit L, bit o1, bit o0, + dag outs, dag ins, string asmstr, + list patterns, InstrItinClass itin> + : A64InstRtn +{ + let Inst{31-30} = size; + let Inst{29-24} = 0b001000; + let Inst{23} = o2; + let Inst{22} = L; + let Inst{21} = o1; + let Inst{15} = o0; +} + +class A64I_LDSTex_tt2n size, bit o2, bit L, bit o1, bit o0, + dag outs, dag ins, string asmstr, + list patterns, InstrItinClass itin>: + A64I_LDSTex_tn{ + bits<5> Rt2; + let Inst{14-10} = Rt2; +} + +class A64I_LDSTex_stn size, bit o2, bit L, bit o1, bit o0, + dag outs, dag ins, string asmstr, + list patterns, InstrItinClass itin>: + A64I_LDSTex_tn{ + bits<5> Rs; + let Inst{20-16} = Rs; +} + +class A64I_LDSTex_stt2n size, bit o2, bit L, bit o1, bit o0, + dag outs, dag ins, string asmstr, + list patterns, InstrItinClass itin>: + A64I_LDSTex_stn{ + bits<5> Rt2; + let Inst{14-10} = Rt2; +} + +// Format for load-store register (immediate post-indexed) instructions +class A64I_LSpostind size, bit v, bits<2> opc, + dag outs, dag ins, string asmstr, + list patterns, InstrItinClass itin> + : A64InstRtn +{ + bits<9> SImm9; + + let Inst{31-30} = size; + let Inst{29-27} = 0b111; + let Inst{26} = v; + let Inst{25-24} = 0b00; + let Inst{23-22} = opc; + let Inst{21} = 0b0; + let Inst{20-12} = SImm9; + let Inst{11-10} = 0b01; + // Inherit Rn in 9-5 + // Inherit Rt in 4-0 +} + +// Format for load-store register (immediate pre-indexed) instructions +class A64I_LSpreind size, bit v, bits<2> opc, + dag outs, dag ins, string asmstr, + list patterns, InstrItinClass itin> + : A64InstRtn +{ + bits<9> SImm9; + + + let Inst{31-30} = size; + let Inst{29-27} = 0b111; + let Inst{26} = v; + let Inst{25-24} = 0b00; + let Inst{23-22} = opc; + let Inst{21} = 0b0; + let Inst{20-12} = SImm9; + let Inst{11-10} = 0b11; + // Inherit Rn in 9-5 + // Inherit Rt in 4-0 +} + +// Format for load-store register (unprivileged) instructions +class A64I_LSunpriv size, bit v, bits<2> opc, + dag outs, dag ins, string asmstr, + list patterns, InstrItinClass itin> + : A64InstRtn +{ + bits<9> SImm9; + + + let Inst{31-30} = size; + let Inst{29-27} = 0b111; + let Inst{26} = v; + let Inst{25-24} = 0b00; + let Inst{23-22} = opc; + let Inst{21} = 0b0; + let Inst{20-12} = SImm9; + let Inst{11-10} = 0b10; + // Inherit Rn in 9-5 + // Inherit Rt in 4-0 +} + +// Format for load-store (unscaled immediate) instructions. +class A64I_LSunalimm size, bit v, bits<2> opc, + dag outs, dag ins, string asmstr, + list patterns, InstrItinClass itin> + : A64InstRtn +{ + bits<9> SImm9; + + let Inst{31-30} = size; + let Inst{29-27} = 0b111; + let Inst{26} = v; + let Inst{25-24} = 0b00; + let Inst{23-22} = opc; + let Inst{21} = 0b0; + let Inst{20-12} = SImm9; + let Inst{11-10} = 0b00; + // Inherit Rn in 9-5 + // Inherit Rt in 4-0 +} + + +// Format for load-store (unsigned immediate) instructions. +class A64I_LSunsigimm size, bit v, bits<2> opc, + dag outs, dag ins, string asmstr, + list patterns, InstrItinClass itin> + : A64InstRtn +{ + bits<12> UImm12; + + let Inst{31-30} = size; + let Inst{29-27} = 0b111; + let Inst{26} = v; + let Inst{25-24} = 0b01; + let Inst{23-22} = opc; + let Inst{21-10} = UImm12; +} + +// Format for load-store register (register offset) instructions. +class A64I_LSregoff size, bit v, bits<2> opc, bit optionlo, + dag outs, dag ins, string asmstr, + list patterns, InstrItinClass itin> + : A64InstRtn +{ + bits<5> Rm; + + // Complex operand selection needed for these instructions, so they + // need an "addr" field for encoding/decoding to be generated. + bits<3> Ext; + // OptionHi = Ext{2-1} + // S = Ext{0} + + let Inst{31-30} = size; + let Inst{29-27} = 0b111; + let Inst{26} = v; + let Inst{25-24} = 0b00; + let Inst{23-22} = opc; + let Inst{21} = 0b1; + let Inst{20-16} = Rm; + let Inst{15-14} = Ext{2-1}; + let Inst{13} = optionlo; + let Inst{12} = Ext{0}; + let Inst{11-10} = 0b10; + // Inherits Rn in 9-5 + // Inherits Rt in 4-0 + + let AddedComplexity = 50; +} + +// Format for Load-store register pair (offset) instructions +class A64I_LSPoffset opc, bit v, bit l, + dag outs, dag ins, string asmstr, + list patterns, InstrItinClass itin> + : A64InstRtt2n +{ + bits<7> SImm7; + + let Inst{31-30} = opc; + let Inst{29-27} = 0b101; + let Inst{26} = v; + let Inst{25-23} = 0b010; + let Inst{22} = l; + let Inst{21-15} = SImm7; + // Inherit Rt2 in 14-10 + // Inherit Rn in 9-5 + // Inherit Rt in 4-0 +} + +// Format for Load-store register pair (post-indexed) instructions +class A64I_LSPpostind opc, bit v, bit l, + dag outs, dag ins, string asmstr, + list patterns, InstrItinClass itin> + : A64InstRtt2n +{ + bits<7> SImm7; + + let Inst{31-30} = opc; + let Inst{29-27} = 0b101; + let Inst{26} = v; + let Inst{25-23} = 0b001; + let Inst{22} = l; + let Inst{21-15} = SImm7; + // Inherit Rt2 in 14-10 + // Inherit Rn in 9-5 + // Inherit Rt in 4-0 +} + +// Format for Load-store register pair (pre-indexed) instructions +class A64I_LSPpreind opc, bit v, bit l, + dag outs, dag ins, string asmstr, + list patterns, InstrItinClass itin> + : A64InstRtt2n +{ + bits<7> SImm7; + + let Inst{31-30} = opc; + let Inst{29-27} = 0b101; + let Inst{26} = v; + let Inst{25-23} = 0b011; + let Inst{22} = l; + let Inst{21-15} = SImm7; + // Inherit Rt2 in 14-10 + // Inherit Rn in 9-5 + // Inherit Rt in 4-0 +} + +// Format for Load-store non-temporal register pair (offset) instructions +class A64I_LSPnontemp opc, bit v, bit l, + dag outs, dag ins, string asmstr, + list patterns, InstrItinClass itin> + : A64InstRtt2n +{ + bits<7> SImm7; + + let Inst{31-30} = opc; + let Inst{29-27} = 0b101; + let Inst{26} = v; + let Inst{25-23} = 0b000; + let Inst{22} = l; + let Inst{21-15} = SImm7; + // Inherit Rt2 in 14-10 + // Inherit Rn in 9-5 + // Inherit Rt in 4-0 +} + +// Format for Logical (immediate) instructions +class A64I_logicalimm opc, + dag outs, dag ins, string asmstr, + list patterns, InstrItinClass itin> + : A64InstRdn +{ + bit N; + bits<6> ImmR; + bits<6> ImmS; + + // N, ImmR and ImmS have no separate existence in any assembly syntax (or for + // selection), so we'll combine them into a single field here. + bits<13> Imm; + // N = Imm{12}; + // ImmR = Imm{11-6}; + // ImmS = Imm{5-0}; + + let Inst{31} = sf; + let Inst{30-29} = opc; + let Inst{28-23} = 0b100100; + let Inst{22} = Imm{12}; + let Inst{21-16} = Imm{11-6}; + let Inst{15-10} = Imm{5-0}; + // Rn inherited in 9-5 + // Rd inherited in 4-0 +} + +// Format for Logical (shifted register) instructions +class A64I_logicalshift opc, bits<2> shift, bit N, + dag outs, dag ins, string asmstr, + list patterns, InstrItinClass itin> + : A64InstRdnm +{ + bits<6> Imm6; + + let Inst{31} = sf; + let Inst{30-29} = opc; + let Inst{28-24} = 0b01010; + let Inst{23-22} = shift; + let Inst{21} = N; + // Rm inherited + let Inst{15-10} = Imm6; + // Rn inherited + // Rd inherited +} + +// Format for Move wide (immediate) +class A64I_movw opc, + dag outs, dag ins, string asmstr, + list patterns, InstrItinClass itin> + : A64InstRd +{ + bits<16> UImm16; + bits<2> Shift; // Called "hw" officially + + let Inst{31} = sf; + let Inst{30-29} = opc; + let Inst{28-23} = 0b100101; + let Inst{22-21} = Shift; + let Inst{20-5} = UImm16; + // Inherits Rd in 4-0 +} + +// Format for PC-relative addressing instructions, ADR and ADRP. +class A64I_PCADR patterns, InstrItinClass itin> + : A64InstRd +{ + bits<21> Label; + + let Inst{31} = op; + let Inst{30-29} = Label{1-0}; + let Inst{28-24} = 0b10000; + let Inst{23-5} = Label{20-2}; +} + +// Format for system instructions +class A64I_system patterns, InstrItinClass itin> + : A64Inst +{ + bits<2> Op0; + bits<3> Op1; + bits<4> CRn; + bits<4> CRm; + bits<3> Op2; + bits<5> Rt; + + let Inst{31-22} = 0b1101010100; + let Inst{21} = l; + let Inst{20-19} = Op0; + let Inst{18-16} = Op1; + let Inst{15-12} = CRn; + let Inst{11-8} = CRm; + let Inst{7-5} = Op2; + let Inst{4-0} = Rt; + + // These instructions can do horrible things. + let hasSideEffects = 1; +} + +// Format for unconditional branch (immediate) instructions +class A64I_Bimm patterns, InstrItinClass itin> + : A64Inst +{ + // Doubly special in not even sharing register fields with other + // instructions, so we create our own Rn here. + bits<26> Label; + + let Inst{31} = op; + let Inst{30-26} = 0b00101; + let Inst{25-0} = Label; +} + +// Format for Test & branch (immediate) instructions +class A64I_TBimm patterns, InstrItinClass itin> + : A64InstRt +{ + // Doubly special in not even sharing register fields with other + // instructions, so we create our own Rn here. + bits<6> Imm; + bits<14> Label; + + let Inst{31} = Imm{5}; + let Inst{30-25} = 0b011011; + let Inst{24} = op; + let Inst{23-19} = Imm{4-0}; + let Inst{18-5} = Label; + // Inherit Rt in 4-0 +} + +// Format for Unconditional branch (register) instructions, including +// RET. Shares no fields with instructions further up the hierarchy +// so top-level. +class A64I_Breg opc, bits<5> op2, bits<6> op3, bits<5> op4, + dag outs, dag ins, string asmstr, + list patterns, InstrItinClass itin> + : A64Inst +{ + // Doubly special in not even sharing register fields with other + // instructions, so we create our own Rn here. + bits<5> Rn; + + let Inst{31-25} = 0b1101011; + let Inst{24-21} = opc; + let Inst{20-16} = op2; + let Inst{15-10} = op3; + let Inst{9-5} = Rn; + let Inst{4-0} = op4; +} + diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp new file mode 100644 index 0000000..967960c --- /dev/null +++ b/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -0,0 +1,805 @@ +//===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the AArch64 implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#include "AArch64.h" +#include "AArch64InstrInfo.h" +#include "AArch64MachineFunctionInfo.h" +#include "AArch64TargetMachine.h" +#include "MCTargetDesc/AArch64BaseInfo.h" +#include "MCTargetDesc/AArch64MCTargetDesc.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" + +#include + +#define GET_INSTRINFO_CTOR +#include "AArch64GenInstrInfo.inc" + +using namespace llvm; + +AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI) + : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP), + RI(*this, STI), Subtarget(STI) {} + +void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { + unsigned Opc = 0; + unsigned ZeroReg = 0; + if (DestReg == AArch64::XSP || SrcReg == AArch64::XSP) { + // E.g. ADD xDst, xsp, #0 (, lsl #0) + BuildMI(MBB, I, DL, get(AArch64::ADDxxi_lsl0_s), DestReg) + .addReg(SrcReg) + .addImm(0); + return; + } else if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) { + // E.g. ADD wDST, wsp, #0 (, lsl #0) + BuildMI(MBB, I, DL, get(AArch64::ADDwwi_lsl0_s), DestReg) + .addReg(SrcReg) + .addImm(0); + return; + } else if (DestReg == AArch64::NZCV) { + assert(AArch64::GPR64RegClass.contains(SrcReg)); + // E.g. MSR NZCV, xDST + BuildMI(MBB, I, DL, get(AArch64::MSRix)) + .addImm(A64SysReg::NZCV) + .addReg(SrcReg); + } else if (SrcReg == AArch64::NZCV) { + assert(AArch64::GPR64RegClass.contains(DestReg)); + // E.g. MRS xDST, NZCV + BuildMI(MBB, I, DL, get(AArch64::MRSxi), DestReg) + .addImm(A64SysReg::NZCV); + } else if (AArch64::GPR64RegClass.contains(DestReg)) { + assert(AArch64::GPR64RegClass.contains(SrcReg)); + Opc = AArch64::ORRxxx_lsl; + ZeroReg = AArch64::XZR; + } else if (AArch64::GPR32RegClass.contains(DestReg)) { + assert(AArch64::GPR32RegClass.contains(SrcReg)); + Opc = AArch64::ORRwww_lsl; + ZeroReg = AArch64::WZR; + } else if (AArch64::FPR32RegClass.contains(DestReg)) { + assert(AArch64::FPR32RegClass.contains(SrcReg)); + BuildMI(MBB, I, DL, get(AArch64::FMOVss), DestReg) + .addReg(SrcReg); + return; + } else if (AArch64::FPR64RegClass.contains(DestReg)) { + assert(AArch64::FPR64RegClass.contains(SrcReg)); + BuildMI(MBB, I, DL, get(AArch64::FMOVdd), DestReg) + .addReg(SrcReg); + return; + } else if (AArch64::FPR128RegClass.contains(DestReg)) { + assert(AArch64::FPR128RegClass.contains(SrcReg)); + + // FIXME: there's no good way to do this, at least without NEON: + // + There's no single move instruction for q-registers + // + We can't create a spill slot and use normal STR/LDR because stack + // allocation has already happened + // + We can't go via X-registers with FMOV because register allocation has + // already happened. + // This may not be efficient, but at least it works. + BuildMI(MBB, I, DL, get(AArch64::LSFP128_PreInd_STR), AArch64::XSP) + .addReg(SrcReg) + .addReg(AArch64::XSP) + .addImm(0x1ff & -16); + + BuildMI(MBB, I, DL, get(AArch64::LSFP128_PostInd_LDR), DestReg) + .addReg(AArch64::XSP, RegState::Define) + .addReg(AArch64::XSP) + .addImm(16); + return; + } else { + llvm_unreachable("Unknown register class in copyPhysReg"); + } + + // E.g. ORR xDst, xzr, xSrc, lsl #0 + BuildMI(MBB, I, DL, get(Opc), DestReg) + .addReg(ZeroReg) + .addReg(SrcReg) + .addImm(0); +} + +MachineInstr * +AArch64InstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx, + uint64_t Offset, const MDNode *MDPtr, + DebugLoc DL) const { + MachineInstrBuilder MIB = BuildMI(MF, DL, get(AArch64::DBG_VALUE)) + .addFrameIndex(FrameIx).addImm(0) + .addImm(Offset) + .addMetadata(MDPtr); + return &*MIB; +} + +/// Does the Opcode represent a conditional branch that we can remove and re-add +/// at the end of a basic block? +static bool isCondBranch(unsigned Opc) { + return Opc == AArch64::Bcc || Opc == AArch64::CBZw || Opc == AArch64::CBZx || + Opc == AArch64::CBNZw || Opc == AArch64::CBNZx || + Opc == AArch64::TBZwii || Opc == AArch64::TBZxii || + Opc == AArch64::TBNZwii || Opc == AArch64::TBNZxii; +} + +/// Takes apart a given conditional branch MachineInstr (see isCondBranch), +/// setting TBB to the destination basic block and populating the Cond vector +/// with data necessary to recreate the conditional branch at a later +/// date. First element will be the opcode, and subsequent ones define the +/// conditions being branched on in an instruction-specific manner. +static void classifyCondBranch(MachineInstr *I, MachineBasicBlock *&TBB, + SmallVectorImpl &Cond) { + switch(I->getOpcode()) { + case AArch64::Bcc: + case AArch64::CBZw: + case AArch64::CBZx: + case AArch64::CBNZw: + case AArch64::CBNZx: + // These instructions just have one predicate operand in position 0 (either + // a condition code or a register being compared). + Cond.push_back(MachineOperand::CreateImm(I->getOpcode())); + Cond.push_back(I->getOperand(0)); + TBB = I->getOperand(1).getMBB(); + return; + case AArch64::TBZwii: + case AArch64::TBZxii: + case AArch64::TBNZwii: + case AArch64::TBNZxii: + // These have two predicate operands: a register and a bit position. + Cond.push_back(MachineOperand::CreateImm(I->getOpcode())); + Cond.push_back(I->getOperand(0)); + Cond.push_back(I->getOperand(1)); + TBB = I->getOperand(2).getMBB(); + return; + default: + llvm_unreachable("Unknown conditional branch to classify"); + } +} + + +bool +AArch64InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, + bool AllowModify) const { + // If the block has no terminators, it just falls into the block after it. + MachineBasicBlock::iterator I = MBB.end(); + if (I == MBB.begin()) + return false; + --I; + while (I->isDebugValue()) { + if (I == MBB.begin()) + return false; + --I; + } + if (!isUnpredicatedTerminator(I)) + return false; + + // Get the last instruction in the block. + MachineInstr *LastInst = I; + + // If there is only one terminator instruction, process it. + unsigned LastOpc = LastInst->getOpcode(); + if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { + if (LastOpc == AArch64::Bimm) { + TBB = LastInst->getOperand(0).getMBB(); + return false; + } + if (isCondBranch(LastOpc)) { + classifyCondBranch(LastInst, TBB, Cond); + return false; + } + return true; // Can't handle indirect branch. + } + + // Get the instruction before it if it is a terminator. + MachineInstr *SecondLastInst = I; + unsigned SecondLastOpc = SecondLastInst->getOpcode(); + + // If AllowModify is true and the block ends with two or more unconditional + // branches, delete all but the first unconditional branch. + if (AllowModify && LastOpc == AArch64::Bimm) { + while (SecondLastOpc == AArch64::Bimm) { + LastInst->eraseFromParent(); + LastInst = SecondLastInst; + LastOpc = LastInst->getOpcode(); + if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { + // Return now the only terminator is an unconditional branch. + TBB = LastInst->getOperand(0).getMBB(); + return false; + } else { + SecondLastInst = I; + SecondLastOpc = SecondLastInst->getOpcode(); + } + } + } + + // If there are three terminators, we don't know what sort of block this is. + if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I)) + return true; + + // If the block ends with a B and a Bcc, handle it. + if (LastOpc == AArch64::Bimm) { + if (SecondLastOpc == AArch64::Bcc) { + TBB = SecondLastInst->getOperand(1).getMBB(); + Cond.push_back(MachineOperand::CreateImm(AArch64::Bcc)); + Cond.push_back(SecondLastInst->getOperand(0)); + FBB = LastInst->getOperand(0).getMBB(); + return false; + } else if (isCondBranch(SecondLastOpc)) { + classifyCondBranch(SecondLastInst, TBB, Cond); + FBB = LastInst->getOperand(0).getMBB(); + return false; + } + } + + // If the block ends with two unconditional branches, handle it. The second + // one is not executed, so remove it. + if (SecondLastOpc == AArch64::Bimm && LastOpc == AArch64::Bimm) { + TBB = SecondLastInst->getOperand(0).getMBB(); + I = LastInst; + if (AllowModify) + I->eraseFromParent(); + return false; + } + + // Otherwise, can't handle this. + return true; +} + +bool AArch64InstrInfo::ReverseBranchCondition( + SmallVectorImpl &Cond) const { + switch (Cond[0].getImm()) { + case AArch64::Bcc: { + A64CC::CondCodes CC = static_cast(Cond[1].getImm()); + CC = A64InvertCondCode(CC); + Cond[1].setImm(CC); + return false; + } + case AArch64::CBZw: + Cond[0].setImm(AArch64::CBNZw); + return false; + case AArch64::CBZx: + Cond[0].setImm(AArch64::CBNZx); + return false; + case AArch64::CBNZw: + Cond[0].setImm(AArch64::CBZw); + return false; + case AArch64::CBNZx: + Cond[0].setImm(AArch64::CBZx); + return false; + case AArch64::TBZwii: + Cond[0].setImm(AArch64::TBNZwii); + return false; + case AArch64::TBZxii: + Cond[0].setImm(AArch64::TBNZxii); + return false; + case AArch64::TBNZwii: + Cond[0].setImm(AArch64::TBZwii); + return false; + case AArch64::TBNZxii: + Cond[0].setImm(AArch64::TBZxii); + return false; + default: + llvm_unreachable("Unknown branch type"); + } +} + + +unsigned +AArch64InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl &Cond, + DebugLoc DL) const { + if (FBB == 0 && Cond.empty()) { + BuildMI(&MBB, DL, get(AArch64::Bimm)).addMBB(TBB); + return 1; + } else if (FBB == 0) { + MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(Cond[0].getImm())); + for (int i = 1, e = Cond.size(); i != e; ++i) + MIB.addOperand(Cond[i]); + MIB.addMBB(TBB); + return 1; + } + + MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(Cond[0].getImm())); + for (int i = 1, e = Cond.size(); i != e; ++i) + MIB.addOperand(Cond[i]); + MIB.addMBB(TBB); + + BuildMI(&MBB, DL, get(AArch64::Bimm)).addMBB(FBB); + return 2; +} + +unsigned AArch64InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { + MachineBasicBlock::iterator I = MBB.end(); + if (I == MBB.begin()) return 0; + --I; + while (I->isDebugValue()) { + if (I == MBB.begin()) + return 0; + --I; + } + if (I->getOpcode() != AArch64::Bimm && !isCondBranch(I->getOpcode())) + return 0; + + // Remove the branch. + I->eraseFromParent(); + + I = MBB.end(); + + if (I == MBB.begin()) return 1; + --I; + if (!isCondBranch(I->getOpcode())) + return 1; + + // Remove the branch. + I->eraseFromParent(); + return 2; +} + +bool +AArch64InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MBBI) const { + MachineInstr &MI = *MBBI; + MachineBasicBlock &MBB = *MI.getParent(); + + unsigned Opcode = MI.getOpcode(); + switch (Opcode) { + case AArch64::TLSDESC_BLRx: { + MachineInstr *NewMI = + BuildMI(MBB, MBBI, MI.getDebugLoc(), get(AArch64::TLSDESCCALL)) + .addOperand(MI.getOperand(1)); + MI.setDesc(get(AArch64::BLRx)); + + llvm::finalizeBundle(MBB, NewMI, *++MBBI); + return true; + } + default: + return false; + } + + return false; +} + +void +AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned SrcReg, bool isKill, + int FrameIdx, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + DebugLoc DL = MBB.findDebugLoc(MBBI); + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + unsigned Align = MFI.getObjectAlignment(FrameIdx); + + MachineMemOperand *MMO + = MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx), + MachineMemOperand::MOStore, + MFI.getObjectSize(FrameIdx), + Align); + + unsigned StoreOp = 0; + if (RC->hasType(MVT::i64) || RC->hasType(MVT::i32)) { + switch(RC->getSize()) { + case 4: StoreOp = AArch64::LS32_STR; break; + case 8: StoreOp = AArch64::LS64_STR; break; + default: + llvm_unreachable("Unknown size for regclass"); + } + } else { + assert((RC->hasType(MVT::f32) || RC->hasType(MVT::f64) || + RC->hasType(MVT::f128)) + && "Expected integer or floating type for store"); + switch (RC->getSize()) { + case 4: StoreOp = AArch64::LSFP32_STR; break; + case 8: StoreOp = AArch64::LSFP64_STR; break; + case 16: StoreOp = AArch64::LSFP128_STR; break; + default: + llvm_unreachable("Unknown size for regclass"); + } + } + + MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(StoreOp)); + NewMI.addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FrameIdx) + .addImm(0) + .addMemOperand(MMO); + +} + +void +AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned DestReg, int FrameIdx, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + DebugLoc DL = MBB.findDebugLoc(MBBI); + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + unsigned Align = MFI.getObjectAlignment(FrameIdx); + + MachineMemOperand *MMO + = MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx), + MachineMemOperand::MOLoad, + MFI.getObjectSize(FrameIdx), + Align); + + unsigned LoadOp = 0; + if (RC->hasType(MVT::i64) || RC->hasType(MVT::i32)) { + switch(RC->getSize()) { + case 4: LoadOp = AArch64::LS32_LDR; break; + case 8: LoadOp = AArch64::LS64_LDR; break; + default: + llvm_unreachable("Unknown size for regclass"); + } + } else { + assert((RC->hasType(MVT::f32) || RC->hasType(MVT::f64) + || RC->hasType(MVT::f128)) + && "Expected integer or floating type for store"); + switch (RC->getSize()) { + case 4: LoadOp = AArch64::LSFP32_LDR; break; + case 8: LoadOp = AArch64::LSFP64_LDR; break; + case 16: LoadOp = AArch64::LSFP128_LDR; break; + default: + llvm_unreachable("Unknown size for regclass"); + } + } + + MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(LoadOp), DestReg); + NewMI.addFrameIndex(FrameIdx) + .addImm(0) + .addMemOperand(MMO); +} + +unsigned AArch64InstrInfo::estimateRSStackLimit(MachineFunction &MF) const { + unsigned Limit = (1 << 16) - 1; + for (MachineFunction::iterator BB = MF.begin(),E = MF.end(); BB != E; ++BB) { + for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); + I != E; ++I) { + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + if (!I->getOperand(i).isFI()) continue; + + // When using ADDxxi_lsl0_s to get the address of a stack object, 0xfff + // is the largest offset guaranteed to fit in the immediate offset. + if (I->getOpcode() == AArch64::ADDxxi_lsl0_s) { + Limit = std::min(Limit, 0xfffu); + break; + } + + int AccessScale, MinOffset, MaxOffset; + getAddressConstraints(*I, AccessScale, MinOffset, MaxOffset); + Limit = std::min(Limit, static_cast(MaxOffset)); + + break; // At most one FI per instruction + } + } + } + + return Limit; +} +void AArch64InstrInfo::getAddressConstraints(const MachineInstr &MI, + int &AccessScale, int &MinOffset, + int &MaxOffset) const { + switch (MI.getOpcode()) { + default: llvm_unreachable("Unkown load/store kind"); + case TargetOpcode::DBG_VALUE: + AccessScale = 1; + MinOffset = INT_MIN; + MaxOffset = INT_MAX; + return; + case AArch64::LS8_LDR: case AArch64::LS8_STR: + case AArch64::LSFP8_LDR: case AArch64::LSFP8_STR: + case AArch64::LDRSBw: + case AArch64::LDRSBx: + AccessScale = 1; + MinOffset = 0; + MaxOffset = 0xfff; + return; + case AArch64::LS16_LDR: case AArch64::LS16_STR: + case AArch64::LSFP16_LDR: case AArch64::LSFP16_STR: + case AArch64::LDRSHw: + case AArch64::LDRSHx: + AccessScale = 2; + MinOffset = 0; + MaxOffset = 0xfff * AccessScale; + return; + case AArch64::LS32_LDR: case AArch64::LS32_STR: + case AArch64::LSFP32_LDR: case AArch64::LSFP32_STR: + case AArch64::LDRSWx: + case AArch64::LDPSWx: + AccessScale = 4; + MinOffset = 0; + MaxOffset = 0xfff * AccessScale; + return; + case AArch64::LS64_LDR: case AArch64::LS64_STR: + case AArch64::LSFP64_LDR: case AArch64::LSFP64_STR: + case AArch64::PRFM: + AccessScale = 8; + MinOffset = 0; + MaxOffset = 0xfff * AccessScale; + return; + case AArch64::LSFP128_LDR: case AArch64::LSFP128_STR: + AccessScale = 16; + MinOffset = 0; + MaxOffset = 0xfff * AccessScale; + return; + case AArch64::LSPair32_LDR: case AArch64::LSPair32_STR: + case AArch64::LSFPPair32_LDR: case AArch64::LSFPPair32_STR: + AccessScale = 4; + MinOffset = -0x40 * AccessScale; + MaxOffset = 0x3f * AccessScale; + return; + case AArch64::LSPair64_LDR: case AArch64::LSPair64_STR: + case AArch64::LSFPPair64_LDR: case AArch64::LSFPPair64_STR: + AccessScale = 8; + MinOffset = -0x40 * AccessScale; + MaxOffset = 0x3f * AccessScale; + return; + case AArch64::LSFPPair128_LDR: case AArch64::LSFPPair128_STR: + AccessScale = 16; + MinOffset = -0x40 * AccessScale; + MaxOffset = 0x3f * AccessScale; + return; + } +} + +unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { + const MCInstrDesc &MCID = MI.getDesc(); + const MachineBasicBlock &MBB = *MI.getParent(); + const MachineFunction &MF = *MBB.getParent(); + const MCAsmInfo &MAI = *MF.getTarget().getMCAsmInfo(); + + if (MCID.getSize()) + return MCID.getSize(); + + if (MI.getOpcode() == AArch64::INLINEASM) + return getInlineAsmLength(MI.getOperand(0).getSymbolName(), MAI); + + if (MI.isLabel()) + return 0; + + switch (MI.getOpcode()) { + case TargetOpcode::BUNDLE: + return getInstBundleLength(MI); + case TargetOpcode::IMPLICIT_DEF: + case TargetOpcode::KILL: + case TargetOpcode::PROLOG_LABEL: + case TargetOpcode::EH_LABEL: + case TargetOpcode::DBG_VALUE: + return 0; + case AArch64::CONSTPOOL_ENTRY: + return MI.getOperand(2).getImm(); + case AArch64::TLSDESCCALL: + return 0; + default: + llvm_unreachable("Unknown instruction class"); + } +} + +unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const { + unsigned Size = 0; + MachineBasicBlock::const_instr_iterator I = MI; + MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); + while (++I != E && I->isInsideBundle()) { + assert(!I->isBundle() && "No nested bundle!"); + Size += getInstSizeInBytes(*I); + } + return Size; +} + +bool llvm::rewriteA64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, + unsigned FrameReg, int &Offset, + const AArch64InstrInfo &TII) { + MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + + MFI.getObjectOffset(FrameRegIdx); + llvm_unreachable("Unimplemented rewriteFrameIndex"); +} + +void llvm::emitRegUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + DebugLoc dl, const TargetInstrInfo &TII, + unsigned DstReg, unsigned SrcReg, unsigned ScratchReg, + int64_t NumBytes, MachineInstr::MIFlag MIFlags) { + if (NumBytes == 0 && DstReg == SrcReg) + return; + else if (abs(NumBytes) & ~0xffffff) { + // Generically, we have to materialize the offset into a temporary register + // and subtract it. There are a couple of ways this could be done, for now + // we'll go for a literal-pool load. + MachineFunction &MF = *MBB.getParent(); + MachineConstantPool *MCP = MF.getConstantPool(); + const Constant *C + = ConstantInt::get(Type::getInt64Ty(MF.getFunction()->getContext()), + abs(NumBytes)); + unsigned CPI = MCP->getConstantPoolIndex(C, 8); + + // LDR xTMP, .LITPOOL + BuildMI(MBB, MBBI, dl, TII.get(AArch64::LDRx_lit), ScratchReg) + .addConstantPoolIndex(CPI) + .setMIFlag(MIFlags); + + // ADD DST, SRC, xTMP (, lsl #0) + unsigned AddOp = NumBytes > 0 ? AArch64::ADDxxx_uxtx : AArch64::SUBxxx_uxtx; + BuildMI(MBB, MBBI, dl, TII.get(AddOp), DstReg) + .addReg(SrcReg, RegState::Kill) + .addReg(ScratchReg, RegState::Kill) + .addImm(0) + .setMIFlag(MIFlags); + return; + } + + // Now we know that the adjustment can be done in at most two add/sub + // (immediate) instructions, which is always more efficient than a + // literal-pool load, or even a hypothetical movz/movk/add sequence + + // Decide whether we're doing addition or subtraction + unsigned LowOp, HighOp; + if (NumBytes >= 0) { + LowOp = AArch64::ADDxxi_lsl0_s; + HighOp = AArch64::ADDxxi_lsl12_s; + } else { + LowOp = AArch64::SUBxxi_lsl0_s; + HighOp = AArch64::SUBxxi_lsl12_s; + NumBytes = abs(NumBytes); + } + + // If we're here, at the very least a move needs to be produced, which just + // happens to be materializable by an ADD. + if ((NumBytes & 0xfff) || NumBytes == 0) { + BuildMI(MBB, MBBI, dl, TII.get(LowOp), DstReg) + .addReg(SrcReg, RegState::Kill) + .addImm(NumBytes & 0xfff) + .setMIFlag(MIFlags); + + // Next update should use the register we've just defined. + SrcReg = DstReg; + } + + if (NumBytes & 0xfff000) { + BuildMI(MBB, MBBI, dl, TII.get(HighOp), DstReg) + .addReg(SrcReg, RegState::Kill) + .addImm(NumBytes >> 12) + .setMIFlag(MIFlags); + } +} + +void llvm::emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + DebugLoc dl, const TargetInstrInfo &TII, + unsigned ScratchReg, int64_t NumBytes, + MachineInstr::MIFlag MIFlags) { + emitRegUpdate(MBB, MI, dl, TII, AArch64::XSP, AArch64::XSP, AArch64::X16, + NumBytes, MIFlags); +} + + +namespace { + struct LDTLSCleanup : public MachineFunctionPass { + static char ID; + LDTLSCleanup() : MachineFunctionPass(ID) {} + + virtual bool runOnMachineFunction(MachineFunction &MF) { + AArch64MachineFunctionInfo* MFI = MF.getInfo(); + if (MFI->getNumLocalDynamicTLSAccesses() < 2) { + // No point folding accesses if there isn't at least two. + return false; + } + + MachineDominatorTree *DT = &getAnalysis(); + return VisitNode(DT->getRootNode(), 0); + } + + // Visit the dominator subtree rooted at Node in pre-order. + // If TLSBaseAddrReg is non-null, then use that to replace any + // TLS_base_addr instructions. Otherwise, create the register + // when the first such instruction is seen, and then use it + // as we encounter more instructions. + bool VisitNode(MachineDomTreeNode *Node, unsigned TLSBaseAddrReg) { + MachineBasicBlock *BB = Node->getBlock(); + bool Changed = false; + + // Traverse the current block. + for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; + ++I) { + switch (I->getOpcode()) { + case AArch64::TLSDESC_BLRx: + // Make sure it's a local dynamic access. + if (!I->getOperand(1).isSymbol() || + strcmp(I->getOperand(1).getSymbolName(), "_TLS_MODULE_BASE_")) + break; + + if (TLSBaseAddrReg) + I = ReplaceTLSBaseAddrCall(I, TLSBaseAddrReg); + else + I = SetRegister(I, &TLSBaseAddrReg); + Changed = true; + break; + default: + break; + } + } + + // Visit the children of this block in the dominator tree. + for (MachineDomTreeNode::iterator I = Node->begin(), E = Node->end(); + I != E; ++I) { + Changed |= VisitNode(*I, TLSBaseAddrReg); + } + + return Changed; + } + + // Replace the TLS_base_addr instruction I with a copy from + // TLSBaseAddrReg, returning the new instruction. + MachineInstr *ReplaceTLSBaseAddrCall(MachineInstr *I, + unsigned TLSBaseAddrReg) { + MachineFunction *MF = I->getParent()->getParent(); + const AArch64TargetMachine *TM = + static_cast(&MF->getTarget()); + const AArch64InstrInfo *TII = TM->getInstrInfo(); + + // Insert a Copy from TLSBaseAddrReg to x0, which is where the rest of the + // code sequence assumes the address will be. + MachineInstr *Copy = BuildMI(*I->getParent(), I, I->getDebugLoc(), + TII->get(TargetOpcode::COPY), + AArch64::X0) + .addReg(TLSBaseAddrReg); + + // Erase the TLS_base_addr instruction. + I->eraseFromParent(); + + return Copy; + } + + // Create a virtal register in *TLSBaseAddrReg, and populate it by + // inserting a copy instruction after I. Returns the new instruction. + MachineInstr *SetRegister(MachineInstr *I, unsigned *TLSBaseAddrReg) { + MachineFunction *MF = I->getParent()->getParent(); + const AArch64TargetMachine *TM = + static_cast(&MF->getTarget()); + const AArch64InstrInfo *TII = TM->getInstrInfo(); + + // Create a virtual register for the TLS base address. + MachineRegisterInfo &RegInfo = MF->getRegInfo(); + *TLSBaseAddrReg = RegInfo.createVirtualRegister(&AArch64::GPR64RegClass); + + // Insert a copy from X0 to TLSBaseAddrReg for later. + MachineInstr *Next = I->getNextNode(); + MachineInstr *Copy = BuildMI(*I->getParent(), Next, I->getDebugLoc(), + TII->get(TargetOpcode::COPY), + *TLSBaseAddrReg) + .addReg(AArch64::X0); + + return Copy; + } + + virtual const char *getPassName() const { + return "Local Dynamic TLS Access Clean-up"; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); + } + }; +} + +char LDTLSCleanup::ID = 0; +FunctionPass* +llvm::createAArch64CleanupLocalDynamicTLSPass() { return new LDTLSCleanup(); } diff --git a/lib/Target/AArch64/AArch64InstrInfo.h b/lib/Target/AArch64/AArch64InstrInfo.h new file mode 100644 index 0000000..8084f78 --- /dev/null +++ b/lib/Target/AArch64/AArch64InstrInfo.h @@ -0,0 +1,110 @@ +//===- AArch64InstrInfo.h - AArch64 Instruction Information -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the AArch64 implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_AARCH64INSTRINFO_H +#define LLVM_TARGET_AARCH64INSTRINFO_H + +#include "llvm/Target/TargetInstrInfo.h" +#include "AArch64RegisterInfo.h" + +#define GET_INSTRINFO_HEADER +#include "AArch64GenInstrInfo.inc" + +namespace llvm { + +class AArch64Subtarget; + +class AArch64InstrInfo : public AArch64GenInstrInfo { + const AArch64RegisterInfo RI; + const AArch64Subtarget &Subtarget; +public: + explicit AArch64InstrInfo(const AArch64Subtarget &TM); + + /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As + /// such, whenever a client has an instance of instruction info, it should + /// always be able to get register info as well (through this method). + /// + const TargetRegisterInfo &getRegisterInfo() const { return RI; } + + const AArch64Subtarget &getSubTarget() const { return Subtarget; } + + void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const; + + MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx, + uint64_t Offset, const MDNode *MDPtr, + DebugLoc DL) const; + + void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; + void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned DestReg, int FrameIdx, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; + + bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, + bool AllowModify = false) const; + unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl &Cond, + DebugLoc DL) const; + unsigned RemoveBranch(MachineBasicBlock &MBB) const; + bool ReverseBranchCondition(SmallVectorImpl &Cond) const; + + bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const; + + /// Look through the instructions in this function and work out the largest + /// the stack frame can be while maintaining the ability to address local + /// slots with no complexities. + unsigned estimateRSStackLimit(MachineFunction &MF) const; + + /// getAddressConstraints - For loads and stores (and PRFMs) taking an + /// immediate offset, this function determines the constraints required for + /// the immediate. It must satisfy: + /// + MinOffset <= imm <= MaxOffset + /// + imm % OffsetScale == 0 + void getAddressConstraints(const MachineInstr &MI, int &AccessScale, + int &MinOffset, int &MaxOffset) const; + + unsigned getInstSizeInBytes(const MachineInstr &MI) const; + + unsigned getInstBundleLength(const MachineInstr &MI) const; +}; + +bool rewriteA64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, + unsigned FrameReg, int &Offset, + const AArch64InstrInfo &TII); + + +void emitRegUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + DebugLoc dl, const TargetInstrInfo &TII, + unsigned DstReg, unsigned SrcReg, unsigned ScratchReg, + int64_t NumBytes, + MachineInstr::MIFlag MIFlags = MachineInstr::NoFlags); + +void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + DebugLoc dl, const TargetInstrInfo &TII, + unsigned ScratchReg, int64_t NumBytes, + MachineInstr::MIFlag MIFlags = MachineInstr::NoFlags); + +} + +#endif diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td new file mode 100644 index 0000000..3c15200 --- /dev/null +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -0,0 +1,5298 @@ +include "AArch64InstrFormats.td" + +//===----------------------------------------------------------------------===// +// Target-specific ISD nodes and profiles +//===----------------------------------------------------------------------===// + +def SDT_A64ret : SDTypeProfile<0, 0, []>; +def A64ret : SDNode<"AArch64ISD::Ret", SDT_A64ret, [SDNPHasChain, + SDNPOptInGlue]>; + +// (ins NZCV, Condition, Dest) +def SDT_A64br_cc : SDTypeProfile<0, 3, [SDTCisVT<0, i32>]>; +def A64br_cc : SDNode<"AArch64ISD::BR_CC", SDT_A64br_cc, [SDNPHasChain]>; + +// (outs Result), (ins NZCV, IfTrue, IfFalse, Condition) +def SDT_A64select_cc : SDTypeProfile<1, 4, [SDTCisVT<1, i32>, + SDTCisSameAs<0, 2>, + SDTCisSameAs<2, 3>]>; +def A64select_cc : SDNode<"AArch64ISD::SELECT_CC", SDT_A64select_cc>; + +// (outs NZCV), (ins LHS, RHS, Condition) +def SDT_A64setcc : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, + SDTCisSameAs<1, 2>]>; +def A64setcc : SDNode<"AArch64ISD::SETCC", SDT_A64setcc>; + + +// (outs GPR64), (ins) +def A64threadpointer : SDNode<"AArch64ISD::THREAD_POINTER", SDTPtrLeaf>; + +// A64 compares don't care about the cond really (they set all flags) so a +// simple binary operator is useful. +def A64cmp : PatFrag<(ops node:$lhs, node:$rhs), + (A64setcc node:$lhs, node:$rhs, cond)>; + + +// When matching a notional (CMP op1, (sub 0, op2)), we'd like to use a CMN +// instruction on the grounds that "op1 - (-op2) == op1 + op2". However, the C +// and V flags can be set differently by this operation. It comes down to +// whether "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are +// then everything is fine. If not then the optimization is wrong. Thus general +// comparisons are only valid if op2 != 0. + +// So, finally, the only LLVM-native comparisons that don't mention C and V are +// SETEQ and SETNE. They're the only ones we can safely use CMN for in the +// absence of information about op2. +def equality_cond : PatLeaf<(cond), [{ + return N->get() == ISD::SETEQ || N->get() == ISD::SETNE; +}]>; + +def A64cmn : PatFrag<(ops node:$lhs, node:$rhs), + (A64setcc node:$lhs, (sub 0, node:$rhs), equality_cond)>; + +// There are two layers of indirection here, driven by the following +// considerations. +// + TableGen does not know CodeModel or Reloc so that decision should be +// made for a variable/address at ISelLowering. +// + The output of ISelLowering should be selectable (hence the Wrapper, +// rather than a bare target opcode) +def SDTAArch64Wrapper : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, + SDTCisSameAs<1, 2>, + SDTCisVT<3, i32>, + SDTCisPtrTy<0>]>; + +def A64WrapperSmall : SDNode<"AArch64ISD::WrapperSmall", SDTAArch64Wrapper>; + + +def SDTAArch64GOTLoad : SDTypeProfile<1, 1, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>; +def A64GOTLoad : SDNode<"AArch64ISD::GOTLoad", SDTAArch64GOTLoad, + [SDNPHasChain]>; + + +// (A64BFI LHS, RHS, LSB, Width) +def SDTA64BFI : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, + SDTCisSameAs<1, 2>, + SDTCisVT<3, i64>, + SDTCisVT<4, i64>]>; + +def A64Bfi : SDNode<"AArch64ISD::BFI", SDTA64BFI>; + +// (A64EXTR HiReg, LoReg, LSB) +def SDTA64EXTR : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, + SDTCisVT<3, i64>]>; +def A64Extr : SDNode<"AArch64ISD::EXTR", SDTA64EXTR>; + +// (A64[SU]BFX Field, ImmR, ImmS). +// +// Note that ImmR and ImmS are already encoded for the actual instructions. The +// more natural LSB and Width mix together to form ImmR and ImmS, something +// which TableGen can't handle. +def SDTA64BFX : SDTypeProfile<1, 3, [SDTCisVT<2, i64>, SDTCisVT<3, i64>]>; +def A64Sbfx : SDNode<"AArch64ISD::SBFX", SDTA64BFX>; + +def A64Ubfx : SDNode<"AArch64ISD::UBFX", SDTA64BFX>; + +//===----------------------------------------------------------------------===// +// Call sequence pseudo-instructions +//===----------------------------------------------------------------------===// + + +def SDT_AArch64Call : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>; +def AArch64Call : SDNode<"AArch64ISD::Call", SDT_AArch64Call, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; + +def AArch64tcret : SDNode<"AArch64ISD::TC_RETURN", SDT_AArch64Call, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; + +// The TLSDESCCALL node is a variant call which goes to an indirectly calculated +// destination but needs a relocation against a fixed symbol. As such it has two +// certain operands: the callee and the relocated variable. +// +// The TLS ABI only allows it to be selected to a BLR instructin (with +// appropriate relocation). +def SDTTLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>; + +def A64tlsdesc_blr : SDNode<"AArch64ISD::TLSDESCCALL", SDTTLSDescCall, + [SDNPInGlue, SDNPOutGlue, SDNPHasChain, SDNPVariadic]>; + + +def SDT_AArch64CallSeqStart : SDCallSeqStart<[ SDTCisPtrTy<0> ]>; +def AArch64callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_AArch64CallSeqStart, + [SDNPHasChain, SDNPOutGlue]>; + +def SDT_AArch64CallSeqEnd : SDCallSeqEnd<[ SDTCisPtrTy<0>, SDTCisPtrTy<1> ]>; +def AArch64callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_AArch64CallSeqEnd, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; + + + +// These pseudo-instructions have special semantics by virtue of being passed to +// the InstrInfo constructor. CALLSEQ_START/CALLSEQ_END are produced by +// LowerCall to (in our case) tell the back-end about stack adjustments for +// arguments passed on the stack. Here we select those markers to +// pseudo-instructions which explicitly set the stack, and finally in the +// RegisterInfo we convert them to a true stack adjustment. +let Defs = [XSP], Uses = [XSP] in +{ + def ADJCALLSTACKDOWN : PseudoInst<(outs), (ins i64imm:$amt), + [(AArch64callseq_start timm:$amt)]>; + + def ADJCALLSTACKUP : PseudoInst<(outs), (ins i64imm:$amt1, i64imm:$amt2), + [(AArch64callseq_end timm:$amt1, timm:$amt2)]>; +} + +//===----------------------------------------------------------------------===// +// Atomic operation pseudo-instructions +//===----------------------------------------------------------------------===// + +let usesCustomInserter = 1, Defs = [NZCV] in { +multiclass AtomicSizes +{ + def _I8 : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$incr), + [(set GPR32:$dst, (!cast(opname # "_8") GPR64:$ptr, GPR32:$incr))]>; + def _I16 : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$incr), + [(set GPR32:$dst, (!cast(opname # "_16") GPR64:$ptr, GPR32:$incr))]>; + def _I32 : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$incr), + [(set GPR32:$dst, (!cast(opname # "_32") GPR64:$ptr, GPR32:$incr))]>; + def _I64 : PseudoInst<(outs GPR64:$dst), (ins GPR64:$ptr, GPR64:$incr), + [(set GPR64:$dst, (!cast(opname # "_64") GPR64:$ptr, GPR64:$incr))]>; +} +} + +defm ATOMIC_LOAD_ADD : AtomicSizes<"atomic_load_add">; +defm ATOMIC_LOAD_SUB : AtomicSizes<"atomic_load_sub">; +defm ATOMIC_LOAD_AND : AtomicSizes<"atomic_load_and">; +defm ATOMIC_LOAD_OR : AtomicSizes<"atomic_load_or">; +defm ATOMIC_LOAD_XOR : AtomicSizes<"atomic_load_xor">; +defm ATOMIC_LOAD_NAND : AtomicSizes<"atomic_load_nand">; +defm ATOMIC_LOAD_MIN : AtomicSizes<"atomic_load_min">; +defm ATOMIC_LOAD_MAX : AtomicSizes<"atomic_load_max">; +defm ATOMIC_LOAD_UMIN : AtomicSizes<"atomic_load_umin">; +defm ATOMIC_LOAD_UMAX : AtomicSizes<"atomic_load_umax">; +defm ATOMIC_SWAP : AtomicSizes<"atomic_swap">; + +let usesCustomInserter = 1, Defs = [NZCV] in { +def ATOMIC_CMP_SWAP_I8 + : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$old, GPR32:$new), + [(set GPR32:$dst, + (atomic_cmp_swap_8 GPR64:$ptr, GPR32:$old, GPR32:$new))]>; +def ATOMIC_CMP_SWAP_I16 + : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$old, GPR32:$new), + [(set GPR32:$dst, + (atomic_cmp_swap_16 GPR64:$ptr, GPR32:$old, GPR32:$new))]>; +def ATOMIC_CMP_SWAP_I32 + : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$old, GPR32:$new), + [(set GPR32:$dst, + (atomic_cmp_swap_32 GPR64:$ptr, GPR32:$old, GPR32:$new))]>; +def ATOMIC_CMP_SWAP_I64 + : PseudoInst<(outs GPR64:$dst), (ins GPR64:$ptr, GPR64:$old, GPR64:$new), + [(set GPR64:$dst, + (atomic_cmp_swap_64 GPR64:$ptr, GPR64:$old, GPR64:$new))]>; +} + +//===----------------------------------------------------------------------===// +// Add-subtract (extended register) instructions +//===----------------------------------------------------------------------===// +// Contains: ADD, ADDS, SUB, SUBS + aliases CMN, CMP + +// The RHS of these operations is conceptually a sign/zero-extended +// register, optionally shifted left by 1-4. The extension can be a +// NOP (e.g. "sxtx" sign-extending a 64-bit register to 64-bits) but +// must be specified with one exception: + +// If one of the registers is sp/wsp then LSL is an alias for UXTW in +// 32-bit instructions and UXTX in 64-bit versions, the shift amount +// is not optional in that case (but can explicitly be 0), and the +// entire suffix can be skipped (e.g. "add sp, x3, x2"). + +multiclass extend_operands +{ + def _asmoperand : AsmOperandClass + { + let Name = PREFIX; + let RenderMethod = "addRegExtendOperands"; + let PredicateMethod = "isRegExtend"; + } + + def _operand : Operand, ImmLeaf= 0 && Imm <= 4; }]> + { + let PrintMethod = "printRegExtendOperand"; + let DecoderMethod = "DecodeRegExtendOperand"; + let ParserMatchClass = !cast(PREFIX # "_asmoperand"); + } +} + +defm UXTB : extend_operands<"UXTB">; +defm UXTH : extend_operands<"UXTH">; +defm UXTW : extend_operands<"UXTW">; +defm UXTX : extend_operands<"UXTX">; +defm SXTB : extend_operands<"SXTB">; +defm SXTH : extend_operands<"SXTH">; +defm SXTW : extend_operands<"SXTW">; +defm SXTX : extend_operands<"SXTX">; + +def LSL_extasmoperand : AsmOperandClass +{ + let Name = "RegExtendLSL"; + let RenderMethod = "addRegExtendOperands"; +} + +def LSL_extoperand : Operand +{ + let ParserMatchClass = LSL_extasmoperand; +} + + +// The patterns for various sign-extensions are a little ugly and +// non-uniform because everything has already been promoted to the +// legal i64 and i32 types. We'll wrap the various variants up in a +// class for use later. +class extend_types +{ + dag uxtb; dag uxth; dag uxtw; dag uxtx; + dag sxtb; dag sxth; dag sxtw; dag sxtx; +} + +def extends_to_i64 : extend_types +{ + let uxtb = (and (anyext GPR32:$Rm), 255); + let uxth = (and (anyext GPR32:$Rm), 65535); + let uxtw = (zext GPR32:$Rm); + let uxtx = (i64 GPR64:$Rm); + + let sxtb = (sext_inreg (anyext GPR32:$Rm), i8); + let sxth = (sext_inreg (anyext GPR32:$Rm), i16); + let sxtw = (sext GPR32:$Rm); + let sxtx = (i64 GPR64:$Rm); +} + + +def extends_to_i32 : extend_types +{ + let uxtb = (and GPR32:$Rm, 255); + let uxth = (and GPR32:$Rm, 65535); + let uxtw = (i32 GPR32:$Rm); + let uxtx = (i32 GPR32:$Rm); + + let sxtb = (sext_inreg GPR32:$Rm, i8); + let sxth = (sext_inreg GPR32:$Rm, i16); + let sxtw = (i32 GPR32:$Rm); + let sxtx = (i32 GPR32:$Rm); +} + +// Now, six of the extensions supported are easy and uniform: if the source size +// is 32-bits or less, then Rm is always a 32-bit register. We'll instantiate +// those instructions in one block. + +// The uxtx/sxtx could potentially be merged in, but three facts dissuaded me: +// + It would break the naming scheme: either ADDxx_uxtx or ADDww_uxtx would +// be impossible. +// + Patterns are very different as well. +// + Passing different registers would be ugly (more fields in extend_types +// would probably be the best option). +multiclass addsub_exts +{ + def w_uxtb : A64I_addsubext; + def w_uxth : A64I_addsubext; + def w_uxtw : A64I_addsubext; + + def w_sxtb : A64I_addsubext; + def w_sxth : A64I_addsubext; + def w_sxtw : A64I_addsubext; +} + +// These two could be merge in with the above, but their patterns aren't really +// necessary and the naming-scheme would necessarily break: +multiclass addsub_xxtx +{ + def x_uxtx : A64I_addsubext<0b1, op, S, 0b00, 0b011, + outs, + (ins GPR64xsp:$Rn, GPR64:$Rm, UXTX_operand:$Imm3), + !strconcat(asmop, "$Rn, $Rm, $Imm3"), + [(opfrag GPR64xsp:$Rn, (shl GPR64:$Rm, UXTX_operand:$Imm3))], + NoItinerary>; + + def x_sxtx : A64I_addsubext<0b1, op, S, 0b00, 0b111, + outs, + (ins GPR64xsp:$Rn, GPR64:$Rm, SXTX_operand:$Imm3), + !strconcat(asmop, "$Rn, $Rm, $Imm3"), + [/* No Pattern: same as uxtx */], + NoItinerary>; +} + +multiclass addsub_wxtx +{ + def w_uxtx : A64I_addsubext<0b0, op, S, 0b00, 0b011, + outs, + (ins GPR32wsp:$Rn, GPR32:$Rm, UXTX_operand:$Imm3), + !strconcat(asmop, "$Rn, $Rm, $Imm3"), + [/* No pattern: probably same as uxtw */], + NoItinerary>; + + def w_sxtx : A64I_addsubext<0b0, op, S, 0b00, 0b111, + outs, + (ins GPR32wsp:$Rn, GPR32:$Rm, SXTX_operand:$Imm3), + !strconcat(asmop, "$Rn, $Rm, $Imm3"), + [/* No Pattern: probably same as uxtw */], + NoItinerary>; +} + +class SetRD + : PatFrag<(ops node:$lhs, node:$rhs), (set RC:$Rd, (op node:$lhs, node:$rhs))>; +class SetNZCV + : PatFrag<(ops node:$lhs, node:$rhs), (set NZCV, (op node:$lhs, node:$rhs))>; + +defm ADDxx :addsub_exts<0b1, 0b0, 0b0, "add\t$Rd, ", SetRD, + (outs GPR64xsp:$Rd), extends_to_i64, GPR64xsp>, + addsub_xxtx< 0b0, 0b0, "add\t$Rd, ", SetRD, + (outs GPR64xsp:$Rd)>; +defm ADDww :addsub_exts<0b0, 0b0, 0b0, "add\t$Rd, ", SetRD, + (outs GPR32wsp:$Rd), extends_to_i32, GPR32wsp>, + addsub_wxtx< 0b0, 0b0, "add\t$Rd, ", + (outs GPR32wsp:$Rd)>; +defm SUBxx :addsub_exts<0b1, 0b1, 0b0, "sub\t$Rd, ", SetRD, + (outs GPR64xsp:$Rd), extends_to_i64, GPR64xsp>, + addsub_xxtx< 0b1, 0b0, "sub\t$Rd, ", SetRD, + (outs GPR64xsp:$Rd)>; +defm SUBww :addsub_exts<0b0, 0b1, 0b0, "sub\t$Rd, ", SetRD, + (outs GPR32wsp:$Rd), extends_to_i32, GPR32wsp>, + addsub_wxtx< 0b1, 0b0, "sub\t$Rd, ", + (outs GPR32wsp:$Rd)>; + +let Defs = [NZCV] in { +defm ADDSxx :addsub_exts<0b1, 0b0, 0b1, "adds\t$Rd, ", SetRD, + (outs GPR64:$Rd), extends_to_i64, GPR64xsp>, + addsub_xxtx< 0b0, 0b1, "adds\t$Rd, ", SetRD, + (outs GPR64:$Rd)>; +defm ADDSww :addsub_exts<0b0, 0b0, 0b1, "adds\t$Rd, ", SetRD, + (outs GPR32:$Rd), extends_to_i32, GPR32wsp>, + addsub_wxtx< 0b0, 0b1, "adds\t$Rd, ", + (outs GPR32:$Rd)>; +defm SUBSxx :addsub_exts<0b1, 0b1, 0b1, "subs\t$Rd, ", SetRD, + (outs GPR64:$Rd), extends_to_i64, GPR64xsp>, + addsub_xxtx< 0b1, 0b1, "subs\t$Rd, ", SetRD, + (outs GPR64:$Rd)>; +defm SUBSww :addsub_exts<0b0, 0b1, 0b1, "subs\t$Rd, ", SetRD, + (outs GPR32:$Rd), extends_to_i32, GPR32wsp>, + addsub_wxtx< 0b1, 0b1, "subs\t$Rd, ", + (outs GPR32:$Rd)>; + + +let Rd = 0b11111, isCompare = 1 in { +defm CMNx : addsub_exts<0b1, 0b0, 0b1, "cmn\t", SetNZCV, + (outs), extends_to_i64, GPR64xsp>, + addsub_xxtx< 0b0, 0b1, "cmn\t", SetNZCV, (outs)>; +defm CMNw : addsub_exts<0b0, 0b0, 0b1, "cmn\t", SetNZCV, + (outs), extends_to_i32, GPR32wsp>, + addsub_wxtx< 0b0, 0b1, "cmn\t", (outs)>; +defm CMPx : addsub_exts<0b1, 0b1, 0b1, "cmp\t", SetNZCV, + (outs), extends_to_i64, GPR64xsp>, + addsub_xxtx< 0b1, 0b1, "cmp\t", SetNZCV, (outs)>; +defm CMPw : addsub_exts<0b0, 0b1, 0b1, "cmp\t", SetNZCV, + (outs), extends_to_i32, GPR32wsp>, + addsub_wxtx< 0b1, 0b1, "cmp\t", (outs)>; +} +} + +// Now patterns for the operation without a shift being needed. No patterns are +// created for uxtx/sxtx since they're non-uniform and it's expected that +// add/sub (shifted register) will handle those cases anyway. +multiclass addsubext_noshift_patterns +{ + def : Pat<(nodeop GPRsp:$Rn, exts.uxtb), + (!cast(prefix # "w_uxtb") GPRsp:$Rn, GPR32:$Rm, 0)>; + def : Pat<(nodeop GPRsp:$Rn, exts.uxth), + (!cast(prefix # "w_uxth") GPRsp:$Rn, GPR32:$Rm, 0)>; + def : Pat<(nodeop GPRsp:$Rn, exts.uxtw), + (!cast(prefix # "w_uxtw") GPRsp:$Rn, GPR32:$Rm, 0)>; + + def : Pat<(nodeop GPRsp:$Rn, exts.sxtb), + (!cast(prefix # "w_sxtb") GPRsp:$Rn, GPR32:$Rm, 0)>; + def : Pat<(nodeop GPRsp:$Rn, exts.sxth), + (!cast(prefix # "w_sxth") GPRsp:$Rn, GPR32:$Rm, 0)>; + def : Pat<(nodeop GPRsp:$Rn, exts.sxtw), + (!cast(prefix # "w_sxtw") GPRsp:$Rn, GPR32:$Rm, 0)>; +} + +defm : addsubext_noshift_patterns<"ADDxx", add, GPR64xsp, extends_to_i64>; +defm : addsubext_noshift_patterns<"ADDww", add, GPR32wsp, extends_to_i32>; +defm : addsubext_noshift_patterns<"SUBxx", sub, GPR64xsp, extends_to_i64>; +defm : addsubext_noshift_patterns<"SUBww", sub, GPR32wsp, extends_to_i32>; + +defm : addsubext_noshift_patterns<"CMNx", A64cmn, GPR64xsp, extends_to_i64>; +defm : addsubext_noshift_patterns<"CMNw", A64cmn, GPR32wsp, extends_to_i32>; +defm : addsubext_noshift_patterns<"CMPx", A64cmp, GPR64xsp, extends_to_i64>; +defm : addsubext_noshift_patterns<"CMPw", A64cmp, GPR32wsp, extends_to_i32>; + +// An extend of "lsl #imm" is valid if and only if one of Rn and Rd is +// sp/wsp. It is synonymous with uxtx/uxtw depending on the size of the +// operation. Also permitted in this case is complete omission of the argument, +// which implies "lsl #0". +multiclass lsl_aliases +{ + def : InstAlias; + + def : InstAlias; + +} + +defm : lsl_aliases<"add", ADDxxx_uxtx, Rxsp, GPR64xsp, GPR64>; +defm : lsl_aliases<"add", ADDxxx_uxtx, GPR64xsp, Rxsp, GPR64>; +defm : lsl_aliases<"add", ADDwww_uxtw, Rwsp, GPR32wsp, GPR32>; +defm : lsl_aliases<"add", ADDwww_uxtw, GPR32wsp, Rwsp, GPR32>; +defm : lsl_aliases<"sub", SUBxxx_uxtx, Rxsp, GPR64xsp, GPR64>; +defm : lsl_aliases<"sub", SUBxxx_uxtx, GPR64xsp, Rxsp, GPR64>; +defm : lsl_aliases<"sub", SUBwww_uxtw, Rwsp, GPR32wsp, GPR32>; +defm : lsl_aliases<"sub", SUBwww_uxtw, GPR32wsp, Rwsp, GPR32>; + +// Rd cannot be sp for flag-setting variants so only half of the aliases are +// needed. +defm : lsl_aliases<"adds", ADDSxxx_uxtx, GPR64, Rxsp, GPR64>; +defm : lsl_aliases<"adds", ADDSwww_uxtw, GPR32, Rwsp, GPR32>; +defm : lsl_aliases<"subs", SUBSxxx_uxtx, GPR64, Rxsp, GPR64>; +defm : lsl_aliases<"subs", SUBSwww_uxtw, GPR32, Rwsp, GPR32>; + +// CMP unfortunately has to be different because the instruction doesn't have a +// dest register. +multiclass cmp_lsl_aliases +{ + def : InstAlias; + + def : InstAlias; +} + +defm : cmp_lsl_aliases<"cmp", CMPxx_uxtx, Rxsp, GPR64>; +defm : cmp_lsl_aliases<"cmp", CMPww_uxtw, Rwsp, GPR32>; +defm : cmp_lsl_aliases<"cmn", CMNxx_uxtx, Rxsp, GPR64>; +defm : cmp_lsl_aliases<"cmn", CMNww_uxtw, Rwsp, GPR32>; + +//===----------------------------------------------------------------------===// +// Add-subtract (immediate) instructions +//===----------------------------------------------------------------------===// +// Contains: ADD, ADDS, SUB, SUBS + aliases CMN, CMP, MOV + +// These instructions accept a 12-bit unsigned immediate, optionally shifted +// left by 12 bits. Official assembly format specifies a 12 bit immediate with +// one of "", "LSL #0", "LSL #12" supplementary operands. + +// There are surprisingly few ways to make this work with TableGen, so this +// implementation has separate instructions for the "LSL #0" and "LSL #12" +// variants. + +// If the MCInst retained a single combined immediate (which could be 0x123000, +// for example) then both components (imm & shift) would have to be delegated to +// a single assembly operand. This would entail a separate operand parser +// (because the LSL would have to live in the same AArch64Operand as the +// immediate to be accessible); assembly parsing is rather complex and +// error-prone C++ code. +// +// By splitting the immediate, we can delegate handling this optional operand to +// an InstAlias. Supporting functions to generate the correct MCInst are still +// required, but these are essentially trivial and parsing can remain generic. +// +// Rejected plans with rationale: +// ------------------------------ +// +// In an ideal world you'de have two first class immediate operands (in +// InOperandList, specifying imm12 and shift). Unfortunately this is not +// selectable by any means I could discover. +// +// An Instruction with two MCOperands hidden behind a single entry in +// InOperandList (expanded by ComplexPatterns and MIOperandInfo) was functional, +// but required more C++ code to handle encoding/decoding. Parsing (the intended +// main beneficiary) ended up equally complex because of the optional nature of +// "LSL #0". +// +// Attempting to circumvent the need for a custom OperandParser above by giving +// InstAliases without the "lsl #0" failed. add/sub could be accommodated but +// the cmp/cmn aliases didn't use the MIOperandInfo to determine how operands +// should be parsed: there was no way to accommodate an "lsl #12". + +let ParserMethod = "ParseImmWithLSLOperand", + RenderMethod = "addImmWithLSLOperands" in +{ + // Derived PredicateMethod fields are different for each + def addsubimm_lsl0_asmoperand : AsmOperandClass + { + let Name = "AddSubImmLSL0"; + } + + def addsubimm_lsl12_asmoperand : AsmOperandClass + { + let Name = "AddSubImmLSL12"; + } +} + +def shr_12_XFORM : SDNodeXFormgetTargetConstant(N->getSExtValue() >> 12, MVT::i32); +}]>; + +def shr_12_neg_XFORM : SDNodeXFormgetTargetConstant((-N->getSExtValue()) >> 12, MVT::i32); +}]>; + +def neg_XFORM : SDNodeXFormgetTargetConstant(-N->getSExtValue(), MVT::i32); +}]>; + + +multiclass addsub_imm_operands +{ + let PrintMethod = "printAddSubImmLSL0Operand", + EncoderMethod = "getAddSubImmOpValue", + ParserMatchClass = addsubimm_lsl0_asmoperand in + { + def _posimm_lsl0 : Operand, + ImmLeaf= 0 && (Imm & ~0xfff) == 0; }]>; + def _negimm_lsl0 : Operand, + ImmLeaf; + } + + let PrintMethod = "printAddSubImmLSL12Operand", + EncoderMethod = "getAddSubImmOpValue", + ParserMatchClass = addsubimm_lsl12_asmoperand in + { + def _posimm_lsl12 : Operand, + ImmLeaf= 0 && (Imm & ~0xfff000) == 0; }], + shr_12_XFORM>; + + def _negimm_lsl12 : Operand, + ImmLeaf; + } +} + +// The add operands don't need any transformation +defm addsubimm_operand_i32 : addsub_imm_operands; +defm addsubimm_operand_i64 : addsub_imm_operands; + +multiclass addsubimm_varieties shift, + string asmop, string cmpasmop, + Operand imm_operand, Operand cmp_imm_operand, + RegisterClass GPR, RegisterClass GPRsp, + AArch64Reg ZR> +{ + // All registers for non-S variants allow SP + def _s : A64I_addsubimm; + + + // S variants can read SP but would write to ZR + def _S : A64I_addsubimm { + let Defs = [NZCV]; + } + + // Note that the pattern here for ADDS is subtle. Canonically CMP + // a, b becomes SUBS a, b. If b < 0 then this is equivalent to + // ADDS a, (-b). This is not true in general. + def _cmp : A64I_addsubimm + { + let Rd = 0b11111; + let Defs = [NZCV]; + let isCompare = 1; + } +} + + +multiclass addsubimm_shifts +{ + defm _lsl0 : addsubimm_varieties(operand # "_lsl0"), + !cast(cmpoperand # "_lsl0"), + GPR, GPRsp, ZR>; + + defm _lsl12 : addsubimm_varieties(operand # "_lsl12"), + !cast(cmpoperand # "_lsl12"), + GPR, GPRsp, ZR>; +} + +defm ADDwwi : addsubimm_shifts<"ADDwi", 0b0, 0b0, "add", "cmn", + "addsubimm_operand_i32_posimm", + "addsubimm_operand_i32_negimm", + GPR32, GPR32wsp, WZR>; +defm ADDxxi : addsubimm_shifts<"ADDxi", 0b1, 0b0, "add", "cmn", + "addsubimm_operand_i64_posimm", + "addsubimm_operand_i64_negimm", + GPR64, GPR64xsp, XZR>; +defm SUBwwi : addsubimm_shifts<"SUBwi", 0b0, 0b1, "sub", "cmp", + "addsubimm_operand_i32_negimm", + "addsubimm_operand_i32_posimm", + GPR32, GPR32wsp, WZR>; +defm SUBxxi : addsubimm_shifts<"SUBxi", 0b1, 0b1, "sub", "cmp", + "addsubimm_operand_i64_negimm", + "addsubimm_operand_i64_posimm", + GPR64, GPR64xsp, XZR>; + +multiclass MOVsp +{ + def _fromsp : InstAlias<"mov $Rd, $Rn", + (addop GPRsp:$Rd, SP:$Rn, 0), + 0b1>; + + def _tosp : InstAlias<"mov $Rd, $Rn", + (addop SP:$Rd, GPRsp:$Rn, 0), + 0b1>; +} + +// Recall Rxsp is a RegisterClass containing *just* xsp. +defm MOVxx : MOVsp; +defm MOVww : MOVsp; + +//===----------------------------------------------------------------------===// +// Add-subtract (shifted register) instructions +//===----------------------------------------------------------------------===// +// Contains: ADD, ADDS, SUB, SUBS + aliases CMN, CMP, NEG, NEGS + +//===------------------------------- +// 1. The "shifed register" operands. Shared with logical insts. +//===------------------------------- + +multiclass shift_operands +{ + def _asmoperand_i32 : AsmOperandClass + { + let Name = "Shift" # form # "i32"; + let RenderMethod = "addShiftOperands"; + let PredicateMethod + = "isShift"; + } + + // Note that the operand type is intentionally i64 because the DAGCombiner + // puts these into a canonical form. + def _i32 : Operand, ImmLeaf= 0 && Imm <= 31; }]> + { + let ParserMatchClass + = !cast(prefix # "_asmoperand_i32"); + let PrintMethod = "printShiftOperand"; + let DecoderMethod = "Decode32BitShiftOperand"; + } + + def _asmoperand_i64 : AsmOperandClass + { + let Name = "Shift" # form # "i64"; + let RenderMethod = "addShiftOperands"; + let PredicateMethod + = "isShift"; + } + + def _i64 : Operand, ImmLeaf= 0 && Imm <= 63; }]> + { + let ParserMatchClass + = !cast(prefix # "_asmoperand_i64"); + let PrintMethod = "printShiftOperand"; + } +} + +defm lsl_operand : shift_operands<"lsl_operand", "LSL">; +defm lsr_operand : shift_operands<"lsr_operand", "LSR">; +defm asr_operand : shift_operands<"asr_operand", "ASR">; + +// Not used for add/sub, but defined here for completeness. The "logical +// (shifted register)" instructions *do* have an ROR variant. +defm ror_operand : shift_operands<"ror_operand", "ROR">; + +//===------------------------------- +// 2. The basic 3.5-operand ADD/SUB/ADDS/SUBS instructions. +//===------------------------------- + +// N.b. the commutable parameter is just !N. It will be first against the wall +// when the revolution comes. +multiclass addsub_shifts defs> +{ + let isCommutable = commutable, Defs = defs in { + def _lsl : A64I_addsubshift("lsl_operand_" # sty):$Imm6), + !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"), + [(set GPR:$Rd, (opfrag GPR:$Rn, (shl GPR:$Rm, + !cast("lsl_operand_" # sty):$Imm6)) + )], + NoItinerary>; + + def _lsr : A64I_addsubshift("lsr_operand_" # sty):$Imm6), + !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"), + [(set GPR:$Rd, (opfrag GPR:$Rn, (srl GPR:$Rm, + !cast("lsr_operand_" # sty):$Imm6)) + )], + NoItinerary>; + + def _asr : A64I_addsubshift("asr_operand_" # sty):$Imm6), + !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"), + [(set GPR:$Rd, (opfrag GPR:$Rn, (sra GPR:$Rm, + !cast("asr_operand_" # sty):$Imm6)) + )], + NoItinerary>; + } + + def _noshift + : InstAlias(prefix # "_lsl") GPR:$Rd, GPR:$Rn, + GPR:$Rm, 0)>; + + def : Pat<(opfrag GPR:$Rn, GPR:$Rm), + (!cast(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>; +} + +multiclass addsub_sizes defs> +{ + defm xxx : addsub_shifts; + defm www : addsub_shifts; +} + + +defm ADD : addsub_sizes<"ADD", 0b0, 0b0, 0b1, "add", add, []>; +defm SUB : addsub_sizes<"SUB", 0b1, 0b0, 0b0, "sub", sub, []>; + +defm ADDS : addsub_sizes<"ADDS", 0b0, 0b1, 0b1, "adds", addc, [NZCV]>; +defm SUBS : addsub_sizes<"SUBS", 0b1, 0b1, 0b0, "subs", subc, [NZCV]>; + +//===------------------------------- +// 1. The NEG/NEGS aliases +//===------------------------------- + +multiclass neg_alias +{ + def : InstAlias<"neg $Rd, $Rm, $Imm6", + (INST GPR:$Rd, ZR, GPR:$Rm, shift_operand:$Imm6)>; + + def : Pat<(sub 0, (shiftop GPR:$Rm, shift_operand:$Imm6)), + (INST ZR, GPR:$Rm, shift_operand:$Imm6)>; +} + +defm : neg_alias; +defm : neg_alias; +defm : neg_alias; +def : InstAlias<"neg $Rd, $Rm", (SUBwww_lsl GPR32:$Rd, WZR, GPR32:$Rm, 0)>; +def : Pat<(sub 0, GPR32:$Rm), (SUBwww_lsl WZR, GPR32:$Rm, 0)>; + +defm : neg_alias; +defm : neg_alias; +defm : neg_alias; +def : InstAlias<"neg $Rd, $Rm", (SUBxxx_lsl GPR64:$Rd, XZR, GPR64:$Rm, 0)>; +def : Pat<(sub 0, GPR64:$Rm), (SUBxxx_lsl XZR, GPR64:$Rm, 0)>; + +// NEGS doesn't get any patterns yet: defining multiple outputs means C++ has to +// be involved. +class negs_alias + : InstAlias<"negs $Rd, $Rm, $Imm6", + (INST GPR:$Rd, ZR, GPR:$Rm, shift_operand:$Imm6)>; + +def : negs_alias; +def : negs_alias; +def : negs_alias; +def : InstAlias<"negs $Rd, $Rm", (SUBSwww_lsl GPR32:$Rd, WZR, GPR32:$Rm, 0)>; + +def : negs_alias; +def : negs_alias; +def : negs_alias; +def : InstAlias<"negs $Rd, $Rm", (SUBSxxx_lsl GPR64:$Rd, XZR, GPR64:$Rm, 0)>; + +//===------------------------------- +// 1. The CMP/CMN aliases +//===------------------------------- + +multiclass cmp_shifts +{ + let isCommutable = commutable, Rd = 0b11111, Defs = [NZCV] in { + def _lsl : A64I_addsubshift("lsl_operand_" # sty):$Imm6), + !strconcat(asmop, "\t$Rn, $Rm, $Imm6"), + [(set NZCV, (opfrag GPR:$Rn, (shl GPR:$Rm, + !cast("lsl_operand_" # sty):$Imm6)) + )], + NoItinerary>; + + def _lsr : A64I_addsubshift("lsr_operand_" # sty):$Imm6), + !strconcat(asmop, "\t$Rn, $Rm, $Imm6"), + [(set NZCV, (opfrag GPR:$Rn, (srl GPR:$Rm, + !cast("lsr_operand_" # sty):$Imm6)) + )], + NoItinerary>; + + def _asr : A64I_addsubshift("asr_operand_" # sty):$Imm6), + !strconcat(asmop, "\t$Rn, $Rm, $Imm6"), + [(set NZCV, (opfrag GPR:$Rn, (sra GPR:$Rm, + !cast("asr_operand_" # sty):$Imm6)) + )], + NoItinerary>; + } + + def _noshift + : InstAlias(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>; + + def : Pat<(opfrag GPR:$Rn, GPR:$Rm), + (!cast(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>; +} + +defm CMPww : cmp_shifts<"CMPww", 0b0, 0b1, 0b0, "cmp", A64cmp, "i32", GPR32>; +defm CMPxx : cmp_shifts<"CMPxx", 0b1, 0b1, 0b0, "cmp", A64cmp, "i64", GPR64>; + +defm CMNww : cmp_shifts<"CMNww", 0b0, 0b0, 0b1, "cmn", A64cmn, "i32", GPR32>; +defm CMNxx : cmp_shifts<"CMNxx", 0b1, 0b0, 0b1, "cmn", A64cmn, "i64", GPR64>; + +//===----------------------------------------------------------------------===// +// Add-subtract (with carry) instructions +//===----------------------------------------------------------------------===// +// Contains: ADC, ADCS, SBC, SBCS + aliases NGC, NGCS + +multiclass A64I_addsubcarrySizes +{ + let Uses = [NZCV] in + { + def www : A64I_addsubcarry<0b0, op, s, 0b000000, + (outs GPR32:$Rd), (ins GPR32:$Rn, GPR32:$Rm), + !strconcat(asmop, "\t$Rd, $Rn, $Rm"), + [], NoItinerary>; + + def xxx : A64I_addsubcarry<0b1, op, s, 0b000000, + (outs GPR64:$Rd), (ins GPR64:$Rn, GPR64:$Rm), + !strconcat(asmop, "\t$Rd, $Rn, $Rm"), + [], NoItinerary>; + } +} + +let isCommutable = 1 in +{ + defm ADC : A64I_addsubcarrySizes<0b0, 0b0, "adc">; +} + +defm SBC : A64I_addsubcarrySizes<0b1, 0b0, "sbc">; + +let Defs = [NZCV] in +{ + let isCommutable = 1 in + { + defm ADCS : A64I_addsubcarrySizes<0b0, 0b1, "adcs">; + } + + defm SBCS : A64I_addsubcarrySizes<0b1, 0b1, "sbcs">; +} + +def : InstAlias<"ngc $Rd, $Rm", (SBCwww GPR32:$Rd, WZR, GPR32:$Rm)>; +def : InstAlias<"ngc $Rd, $Rm", (SBCxxx GPR64:$Rd, XZR, GPR64:$Rm)>; +def : InstAlias<"ngcs $Rd, $Rm", (SBCSwww GPR32:$Rd, WZR, GPR32:$Rm)>; +def : InstAlias<"ngcs $Rd, $Rm", (SBCSxxx GPR64:$Rd, XZR, GPR64:$Rm)>; + +// Note that adde and sube can form a chain longer than two (e.g. for 256-bit +// addition). So the flag-setting instructions are appropriate. +def : Pat<(adde GPR32:$Rn, GPR32:$Rm), (ADCSwww GPR32:$Rn, GPR32:$Rm)>; +def : Pat<(adde GPR64:$Rn, GPR64:$Rm), (ADCSxxx GPR64:$Rn, GPR64:$Rm)>; +def : Pat<(sube GPR32:$Rn, GPR32:$Rm), (SBCSwww GPR32:$Rn, GPR32:$Rm)>; +def : Pat<(sube GPR64:$Rn, GPR64:$Rm), (SBCSxxx GPR64:$Rn, GPR64:$Rm)>; + +//===----------------------------------------------------------------------===// +// Bitfield +//===----------------------------------------------------------------------===// +// Contains: SBFM, BFM, UBFM, [SU]XT[BHW], ASR, LSR, LSL, SBFI[ZX], BFI, BFXIL, +// UBFIZ, UBFX + +// Because of the rather complicated nearly-overlapping aliases, the decoding of +// this range of instructions is handled manually. The architectural +// instructions are BFM, SBFM and UBFM but a disassembler should never produce +// these. +// +// In the end, the best option was to use BFM instructions for decoding under +// almost all circumstances, but to create aliasing *Instructions* for each of +// the canonical forms and specify a completely custom decoder which would +// substitute the correct MCInst as needed. +// +// This also simplifies instruction selection, parsing etc because the MCInsts +// have a shape that's closer to their use in code. + +//===------------------------------- +// 1. The architectural BFM instructions +//===------------------------------- + +def uimm5_asmoperand : AsmOperandClass +{ + let Name = "UImm5"; + let PredicateMethod = "isUImm<5>"; + let RenderMethod = "addImmOperands"; +} + +def uimm6_asmoperand : AsmOperandClass +{ + let Name = "UImm6"; + let PredicateMethod = "isUImm<6>"; + let RenderMethod = "addImmOperands"; +} + +def bitfield32_imm : Operand, + ImmLeaf= 0 && Imm < 32; }]> +{ + let ParserMatchClass = uimm5_asmoperand; + + let DecoderMethod = "DecodeBitfield32ImmOperand"; +} + + +def bitfield64_imm : Operand, + ImmLeaf= 0 && Imm < 64; }]> +{ + let ParserMatchClass = uimm6_asmoperand; + + // Default decoder works in 64-bit case: the 6-bit field can take any value. +} + +multiclass A64I_bitfieldSizes opc, string asmop> +{ + def wwii : A64I_bitfield<0b0, opc, 0b0, (outs GPR32:$Rd), + (ins GPR32:$Rn, bitfield32_imm:$ImmR, bitfield32_imm:$ImmS), + !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"), + [], NoItinerary> + { + let DecoderMethod = "DecodeBitfieldInstruction"; + } + + def xxii : A64I_bitfield<0b1, opc, 0b1, (outs GPR64:$Rd), + (ins GPR64:$Rn, bitfield64_imm:$ImmR, bitfield64_imm:$ImmS), + !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"), + [], NoItinerary> + { + let DecoderMethod = "DecodeBitfieldInstruction"; + } +} + +defm SBFM : A64I_bitfieldSizes<0b00, "sbfm">; +defm UBFM : A64I_bitfieldSizes<0b10, "ubfm">; + +// BFM instructions modify the destination register rather than defining it +// completely. +def BFMwwii : + A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd), + (ins GPR32:$src, GPR32:$Rn, bitfield32_imm:$ImmR, bitfield32_imm:$ImmS), + "bfm\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> +{ + let DecoderMethod = "DecodeBitfieldInstruction"; + let Constraints = "$src = $Rd"; +} + +def BFMxxii : + A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd), + (ins GPR64:$src, GPR64:$Rn, bitfield64_imm:$ImmR, bitfield64_imm:$ImmS), + "bfm\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> +{ + let DecoderMethod = "DecodeBitfieldInstruction"; + let Constraints = "$src = $Rd"; +} + + +//===------------------------------- +// 2. Extend aliases to 64-bit dest +//===------------------------------- + +// Unfortunately the extensions that end up as 64-bits cannot be handled by an +// instruction alias: their syntax is (for example) "SXTB x0, w0", which needs +// to be mapped to "SBFM x0, x0, #0, 7" (changing the class of Rn). InstAlias is +// not capable of such a map as far as I'm aware + +// Note that these instructions are strictly more specific than the +// BFM ones (in ImmR) so they can handle their own decoding. +class A64I_bf_ext opc, RegisterClass GPRDest, string asmop, + bits<6> imms, dag pattern> + : A64I_bitfield +{ + let ImmR = 0b000000; + let ImmS = imms; +} + +// Signed extensions +def SXTBxw : A64I_bf_ext<0b1, 0b00, GPR64, "sxtb", 7, + (sext_inreg (anyext GPR32:$Rn), i8)>; +def SXTBww : A64I_bf_ext<0b0, 0b00, GPR32, "sxtb", 7, + (sext_inreg GPR32:$Rn, i8)>; +def SXTHxw : A64I_bf_ext<0b1, 0b00, GPR64, "sxth", 15, + (sext_inreg (anyext GPR32:$Rn), i16)>; +def SXTHww : A64I_bf_ext<0b0, 0b00, GPR32, "sxth", 15, + (sext_inreg GPR32:$Rn, i16)>; +def SXTWxw : A64I_bf_ext<0b1, 0b00, GPR64, "sxtw", 31, (sext GPR32:$Rn)>; + +// Unsigned extensions +def UXTBww : A64I_bf_ext<0b0, 0b10, GPR32, "uxtb", 7, + (and GPR32:$Rn, 255)>; +def UXTHww : A64I_bf_ext<0b0, 0b10, GPR32, "uxth", 15, + (and GPR32:$Rn, 65535)>; + +// The 64-bit unsigned variants are not strictly architectural but recommended +// for consistency. +let isAsmParserOnly = 1 in +{ + def UXTBxw : A64I_bf_ext<0b0, 0b10, GPR64, "uxtb", 7, + (and (anyext GPR32:$Rn), 255)>; + def UXTHxw : A64I_bf_ext<0b0, 0b10, GPR64, "uxth", 15, + (and (anyext GPR32:$Rn), 65535)>; +} + +// Extra patterns for when the source register is actually 64-bits +// too. There's no architectural difference here, it's just LLVM +// shinanigans. There's no need for equivalent zero-extension patterns +// because they'll already be caught by logical (immediate) matching. +def : Pat<(sext_inreg GPR64:$Rn, i8), + (SXTBxw (EXTRACT_SUBREG GPR64:$Rn, sub_32))>; +def : Pat<(sext_inreg GPR64:$Rn, i16), + (SXTHxw (EXTRACT_SUBREG GPR64:$Rn, sub_32))>; +def : Pat<(sext_inreg GPR64:$Rn, i32), + (SXTWxw (EXTRACT_SUBREG GPR64:$Rn, sub_32))>; + + +//===------------------------------- +// 3. Aliases for ASR and LSR (the simple shifts) +//===------------------------------- + +// These also handle their own decoding because ImmS being set makes +// them take precedence over BFM. +multiclass A64I_shift opc, string asmop, SDNode opnode> +{ + def wwi : A64I_bitfield<0b0, opc, 0b0, + (outs GPR32:$Rd), (ins GPR32:$Rn, bitfield32_imm:$ImmR), + !strconcat(asmop, "\t$Rd, $Rn, $ImmR"), + [(set GPR32:$Rd, (opnode GPR32:$Rn, bitfield32_imm:$ImmR))], + NoItinerary> + { + let ImmS = 31; + } + + def xxi : A64I_bitfield<0b1, opc, 0b1, + (outs GPR64:$Rd), (ins GPR64:$Rn, bitfield64_imm:$ImmR), + !strconcat(asmop, "\t$Rd, $Rn, $ImmR"), + [(set GPR64:$Rd, (opnode GPR64:$Rn, bitfield64_imm:$ImmR))], + NoItinerary> + { + let ImmS = 63; + } + +} + +defm ASR : A64I_shift<0b00, "asr", sra>; +defm LSR : A64I_shift<0b10, "lsr", srl>; + +//===------------------------------- +// 4. Aliases for LSL +//===------------------------------- + +// Unfortunately LSL and subsequent aliases are much more complicated. We need +// to be able to say certain output instruction fields depend in a complex +// manner on combinations of input assembly fields). +// +// MIOperandInfo *might* have been able to do it, but at the cost of +// significantly more C++ code. + +// N.b. contrary to usual practice these operands store the shift rather than +// the machine bits in an MCInst. The complexity overhead of consistency +// outweighed the benefits in this case (custom asmparser, printer and selection +// vs custom encoder). +def bitfield32_lsl_imm : Operand, + ImmLeaf= 0 && Imm <= 31; }]> +{ + let ParserMatchClass = uimm5_asmoperand; + let EncoderMethod = "getBitfield32LSLOpValue"; +} + +def bitfield64_lsl_imm : Operand, + ImmLeaf= 0 && Imm <= 63; }]> +{ + let ParserMatchClass = uimm6_asmoperand; + let EncoderMethod = "getBitfield64LSLOpValue"; +} + +class A64I_bitfield_lsl + : A64I_bitfield +{ + bits<12> FullImm; + let ImmR = FullImm{5-0}; + let ImmS = FullImm{11-6}; + + // No disassembler allowed because it would overlap with BFM which does the + // actual work. + let isAsmParserOnly = 1; +} + +def LSLwwi : A64I_bitfield_lsl<0b0, GPR32, bitfield32_lsl_imm>; +def LSLxxi : A64I_bitfield_lsl<0b1, GPR64, bitfield64_lsl_imm>; + +//===------------------------------- +// 5. Aliases for bitfield extract instructions +//===------------------------------- + +def bfx32_width_asmoperand : AsmOperandClass +{ + let Name = "BFX32Width"; + let PredicateMethod = "isBitfieldWidth<32>"; + let RenderMethod = "addBFXWidthOperands"; +} + +def bfx32_width : Operand, ImmLeaf +{ + let PrintMethod = "printBFXWidthOperand"; + let ParserMatchClass = bfx32_width_asmoperand; +} + +def bfx64_width_asmoperand : AsmOperandClass +{ + let Name = "BFX64Width"; + let PredicateMethod = "isBitfieldWidth<64>"; + let RenderMethod = "addBFXWidthOperands"; +} + +def bfx64_width : Operand +{ + let PrintMethod = "printBFXWidthOperand"; + let ParserMatchClass = bfx64_width_asmoperand; +} + + +multiclass A64I_bitfield_extract opc, string asmop, SDNode op> +{ + def wwii : A64I_bitfield<0b0, opc, 0b0, (outs GPR32:$Rd), + (ins GPR32:$Rn, bitfield32_imm:$ImmR, bfx32_width:$ImmS), + !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"), + [(set GPR32:$Rd, (op GPR32:$Rn, imm:$ImmR, imm:$ImmS))], + NoItinerary> + { + // As above, no disassembler allowed. + let isAsmParserOnly = 1; + } + + def xxii : A64I_bitfield<0b1, opc, 0b1, (outs GPR64:$Rd), + (ins GPR64:$Rn, bitfield64_imm:$ImmR, bfx64_width:$ImmS), + !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"), + [(set GPR64:$Rd, (op GPR64:$Rn, imm:$ImmR, imm:$ImmS))], + NoItinerary> + { + // As above, no disassembler allowed. + let isAsmParserOnly = 1; + } +} + +defm SBFX : A64I_bitfield_extract<0b00, "sbfx", A64Sbfx>; +defm UBFX : A64I_bitfield_extract<0b10, "ubfx", A64Ubfx>; + +// Again, variants based on BFM modify Rd so need it as an input too. +def BFXILwwii : A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd), + (ins GPR32:$src, GPR32:$Rn, bitfield32_imm:$ImmR, bfx32_width:$ImmS), + "bfxil\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> +{ + // As above, no disassembler allowed. + let isAsmParserOnly = 1; + let Constraints = "$src = $Rd"; +} + +def BFXILxxii : A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd), + (ins GPR64:$src, GPR64:$Rn, bitfield64_imm:$ImmR, bfx64_width:$ImmS), + "bfxil\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> +{ + // As above, no disassembler allowed. + let isAsmParserOnly = 1; + let Constraints = "$src = $Rd"; +} + +// SBFX instructions can do a 1-instruction sign-extension of boolean values. +def : Pat<(sext_inreg GPR64:$Rn, i1), (SBFXxxii GPR64:$Rn, 0, 0)>; +def : Pat<(sext_inreg GPR32:$Rn, i1), (SBFXwwii GPR32:$Rn, 0, 0)>; +def : Pat<(i64 (sext_inreg (anyext GPR32:$Rn), i1)), + (SBFXxxii (SUBREG_TO_REG (i64 0), GPR32:$Rn, sub_32), 0, 0)>; + +// UBFX makes sense as an implementation of a 64-bit zero-extension too. Could +// use either 64-bit or 32-bit variant, but 32-bit might be more efficient. +def : Pat<(zext GPR32:$Rn), (SUBREG_TO_REG (i64 0), (UBFXwwii GPR32:$Rn, 0, 31), sub_32)>; + +//===------------------------------- +// 6. Aliases for bitfield insert instructions +//===------------------------------- + +def bfi32_lsb_asmoperand : AsmOperandClass +{ + let Name = "BFI32LSB"; + let PredicateMethod = "isUImm<5>"; + let RenderMethod = "addBFILSBOperands<32>"; +} + +def bfi32_lsb : Operand, ImmLeaf= 0 && Imm <= 31; }]> +{ + let PrintMethod = "printBFILSBOperand<32>"; + let ParserMatchClass = bfi32_lsb_asmoperand; +} + +def bfi64_lsb_asmoperand : AsmOperandClass +{ + let Name = "BFI64LSB"; + let PredicateMethod = "isUImm<6>"; + let RenderMethod = "addBFILSBOperands<64>"; +} + +def bfi64_lsb : Operand, ImmLeaf= 0 && Imm <= 63; }]> +{ + let PrintMethod = "printBFILSBOperand<64>"; + let ParserMatchClass = bfi64_lsb_asmoperand; +} + +// Width verification is performed during conversion so width operand can be +// shared between 32/64-bit cases. Still needed for the print method though +// because ImmR encodes "width - 1". +def bfi32_width_asmoperand : AsmOperandClass +{ + let Name = "BFI32Width"; + let PredicateMethod = "isBitfieldWidth<32>"; + let RenderMethod = "addBFIWidthOperands"; +} + +def bfi32_width : Operand, + ImmLeaf= 1 && Imm <= 32; }]> +{ + let PrintMethod = "printBFIWidthOperand"; + let ParserMatchClass = bfi32_width_asmoperand; +} + +def bfi64_width_asmoperand : AsmOperandClass +{ + let Name = "BFI64Width"; + let PredicateMethod = "isBitfieldWidth<64>"; + let RenderMethod = "addBFIWidthOperands"; +} + +def bfi64_width : Operand, + ImmLeaf= 1 && Imm <= 64; }]> +{ + let PrintMethod = "printBFIWidthOperand"; + let ParserMatchClass = bfi64_width_asmoperand; +} + +multiclass A64I_bitfield_insert opc, string asmop> +{ + def wwii : A64I_bitfield<0b0, opc, 0b0, (outs GPR32:$Rd), + (ins GPR32:$Rn, bfi32_lsb:$ImmR, bfi32_width:$ImmS), + !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"), + [], NoItinerary> + { + // As above, no disassembler allowed. + let isAsmParserOnly = 1; + } + + def xxii : A64I_bitfield<0b1, opc, 0b1, (outs GPR64:$Rd), + (ins GPR64:$Rn, bfi64_lsb:$ImmR, bfi64_width:$ImmS), + !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"), + [], NoItinerary> + { + // As above, no disassembler allowed. + let isAsmParserOnly = 1; + } + +} + +defm SBFIZ : A64I_bitfield_insert<0b00, "sbfiz">; +defm UBFIZ : A64I_bitfield_insert<0b10, "ubfiz">; + + +def BFIwwii : A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd), + (ins GPR32:$src, GPR32:$Rn, bfi32_lsb:$ImmR, bfi32_width:$ImmS), + "bfi\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> +{ + // As above, no disassembler allowed. + let isAsmParserOnly = 1; + let Constraints = "$src = $Rd"; +} + +def BFIxxii : A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd), + (ins GPR64:$src, GPR64:$Rn, bfi64_lsb:$ImmR, bfi64_width:$ImmS), + "bfi\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> +{ + // As above, no disassembler allowed. + let isAsmParserOnly = 1; + let Constraints = "$src = $Rd"; +} + +//===----------------------------------------------------------------------===// +// Compare and branch (immediate) +//===----------------------------------------------------------------------===// +// Contains: CBZ, CBNZ + +class label_asmoperand : AsmOperandClass +{ + let Name = "Label" # width # "_" # scale; + let PredicateMethod = "isLabel<" # width # "," # scale # ">"; + let RenderMethod = "addLabelOperands<" # width # ", " # scale # ">"; +} + +def label_wid19_scal4_asmoperand : label_asmoperand<19, 4>; + +// All conditional immediate branches are the same really: 19 signed bits scaled +// by the instruction-size (4). +def bcc_target : Operand +{ + // This label is a 19-bit offset from PC, scaled by the instruction-width: 4. + let ParserMatchClass = label_wid19_scal4_asmoperand; + let PrintMethod = "printLabelOperand<19, 4>"; + let EncoderMethod = "getLabelOpValue"; + let OperandType = "OPERAND_PCREL"; +} + +multiclass cmpbr_sizes +{ + let isBranch = 1, isTerminator = 1 in { + def x : A64I_cmpbr<0b1, op, + (outs), + (ins GPR64:$Rt, bcc_target:$Label), + !strconcat(asmop,"\t$Rt, $Label"), + [(A64br_cc (A64cmp GPR64:$Rt, 0), SETOP, bb:$Label)], + NoItinerary>; + + def w : A64I_cmpbr<0b0, op, + (outs), + (ins GPR32:$Rt, bcc_target:$Label), + !strconcat(asmop,"\t$Rt, $Label"), + [(A64br_cc (A64cmp GPR32:$Rt, 0), SETOP, bb:$Label)], + NoItinerary>; + } +} + +defm CBZ : cmpbr_sizes<0b0, "cbz", ImmLeaf >; +defm CBNZ : cmpbr_sizes<0b1, "cbnz", ImmLeaf >; + +//===----------------------------------------------------------------------===// +// Conditional branch (immediate) instructions +//===----------------------------------------------------------------------===// +// Contains: B.cc + +def cond_code_asmoperand : AsmOperandClass +{ + let Name = "CondCode"; +} + +def cond_code : Operand, ImmLeaf= 0 && Imm <= 15; +}]> +{ + let PrintMethod = "printCondCodeOperand"; + let ParserMatchClass = cond_code_asmoperand; +} + +def Bcc : A64I_condbr<0b0, 0b0, (outs), + (ins cond_code:$Cond, bcc_target:$Label), + "b.$Cond $Label", [(A64br_cc NZCV, (i32 imm:$Cond), bb:$Label)], + NoItinerary> +{ + let Uses = [NZCV]; + let isBranch = 1; + let isTerminator = 1; +} + +//===----------------------------------------------------------------------===// +// Conditional compare (immediate) instructions +//===----------------------------------------------------------------------===// +// Contains: CCMN, CCMP + +def uimm4_asmoperand : AsmOperandClass +{ + let Name = "UImm4"; + let PredicateMethod = "isUImm<4>"; + let RenderMethod = "addImmOperands"; +} + +def uimm4 : Operand +{ + let ParserMatchClass = uimm4_asmoperand; +} + +def uimm5 : Operand +{ + let ParserMatchClass = uimm5_asmoperand; +} + +// The only difference between this operand and the one for instructions like +// B.cc is that it's parsed manually. The other get parsed implicitly as part of +// the mnemonic handling. +def cond_code_op_asmoperand : AsmOperandClass +{ + let Name = "CondCodeOp"; + let RenderMethod = "addCondCodeOperands"; + let PredicateMethod = "isCondCode"; + let ParserMethod = "ParseCondCodeOperand"; +} + +def cond_code_op : Operand +{ + let PrintMethod = "printCondCodeOperand"; + let ParserMatchClass = cond_code_op_asmoperand; +} + +class A64I_condcmpimmImpl + : A64I_condcmpimm +{ + let Defs = [NZCV]; +} + +def CCMNwi : A64I_condcmpimmImpl<0b0, 0b0, GPR32, "ccmn">; +def CCMNxi : A64I_condcmpimmImpl<0b1, 0b0, GPR64, "ccmn">; +def CCMPwi : A64I_condcmpimmImpl<0b0, 0b1, GPR32, "ccmp">; +def CCMPxi : A64I_condcmpimmImpl<0b1, 0b1, GPR64, "ccmp">; + +//===----------------------------------------------------------------------===// +// Conditional compare (register) instructions +//===----------------------------------------------------------------------===// +// Contains: CCMN, CCMP + +class A64I_condcmpregImpl + : A64I_condcmpreg +{ + let Defs = [NZCV]; +} + +def CCMNww : A64I_condcmpregImpl<0b0, 0b0, GPR32, "ccmn">; +def CCMNxx : A64I_condcmpregImpl<0b1, 0b0, GPR64, "ccmn">; +def CCMPww : A64I_condcmpregImpl<0b0, 0b1, GPR32, "ccmp">; +def CCMPxx : A64I_condcmpregImpl<0b1, 0b1, GPR64, "ccmp">; + +//===----------------------------------------------------------------------===// +// Conditional select instructions +//===----------------------------------------------------------------------===// +// Contains: CSEL, CSINC, CSINV, CSNEG + aliases CSET, CSETM, CINC, CINV, CNEG + +// Condition code which is encoded as the inversion (semantically rather than +// bitwise) in the instruction. +def inv_cond_code_op_asmoperand : AsmOperandClass +{ + let Name = "InvCondCodeOp"; + let RenderMethod = "addInvCondCodeOperands"; + let PredicateMethod = "isCondCode"; + let ParserMethod = "ParseCondCodeOperand"; +} + +def inv_cond_code_op : Operand +{ + let ParserMatchClass = inv_cond_code_op_asmoperand; +} + +// Having a separate operand for the selectable use-case is debatable, but gives +// consistency with cond_code. +def inv_cond_XFORM : SDNodeXForm(N->getZExtValue()); + return CurDAG->getTargetConstant(A64InvertCondCode(CC), MVT::i32); +}]>; + +def inv_cond_code + : ImmLeaf= 0 && Imm <= 15; }], inv_cond_XFORM>; + + +multiclass A64I_condselSizes op2, string asmop, + SDPatternOperator select> +{ + let Uses = [NZCV] in + { + def wwwc : A64I_condsel<0b0, op, 0b0, op2, + (outs GPR32:$Rd), + (ins GPR32:$Rn, GPR32:$Rm, cond_code_op:$Cond), + !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Cond"), + [(set GPR32:$Rd, (select GPR32:$Rn, GPR32:$Rm))], + NoItinerary>; + + + def xxxc : A64I_condsel<0b1, op, 0b0, op2, + (outs GPR64:$Rd), + (ins GPR64:$Rn, GPR64:$Rm, cond_code_op:$Cond), + !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Cond"), + [(set GPR64:$Rd, (select GPR64:$Rn, GPR64:$Rm))], + NoItinerary>; + } +} + +def simple_select + : PatFrag<(ops node:$lhs, node:$rhs), + (A64select_cc NZCV, node:$lhs, node:$rhs, (i32 imm:$Cond))>; + +class complex_select + : PatFrag<(ops node:$lhs, node:$rhs), + (A64select_cc NZCV, node:$lhs, (opnode node:$rhs), (i32 imm:$Cond))>; + + +defm CSEL : A64I_condselSizes<0b0, 0b00, "csel", simple_select>; +defm CSINC : A64I_condselSizes<0b0, 0b01, "csinc", + complex_select>>; +defm CSINV : A64I_condselSizes<0b1, 0b00, "csinv", complex_select>; +defm CSNEG : A64I_condselSizes<0b1, 0b01, "csneg", complex_select>; + +// Now the instruction aliases, which fit nicely into LLVM's model: + +def : InstAlias<"cset $Rd, $Cond", + (CSINCwwwc GPR32:$Rd, WZR, WZR, inv_cond_code_op:$Cond)>; +def : InstAlias<"cset $Rd, $Cond", + (CSINCxxxc GPR64:$Rd, XZR, XZR, inv_cond_code_op:$Cond)>; +def : InstAlias<"csetm $Rd, $Cond", + (CSINVwwwc GPR32:$Rd, WZR, WZR, inv_cond_code_op:$Cond)>; +def : InstAlias<"csetm $Rd, $Cond", + (CSINVxxxc GPR64:$Rd, XZR, XZR, inv_cond_code_op:$Cond)>; +def : InstAlias<"cinc $Rd, $Rn, $Cond", + (CSINCwwwc GPR32:$Rd, GPR32:$Rn, GPR32:$Rn, inv_cond_code_op:$Cond)>; +def : InstAlias<"cinc $Rd, $Rn, $Cond", + (CSINCxxxc GPR64:$Rd, GPR64:$Rn, GPR64:$Rn, inv_cond_code_op:$Cond)>; +def : InstAlias<"cinv $Rd, $Rn, $Cond", + (CSINVwwwc GPR32:$Rd, GPR32:$Rn, GPR32:$Rn, inv_cond_code_op:$Cond)>; +def : InstAlias<"cinv $Rd, $Rn, $Cond", + (CSINVxxxc GPR64:$Rd, GPR64:$Rn, GPR64:$Rn, inv_cond_code_op:$Cond)>; +def : InstAlias<"cneg $Rd, $Rn, $Cond", + (CSNEGwwwc GPR32:$Rd, GPR32:$Rn, GPR32:$Rn, inv_cond_code_op:$Cond)>; +def : InstAlias<"cneg $Rd, $Rn, $Cond", + (CSNEGxxxc GPR64:$Rd, GPR64:$Rn, GPR64:$Rn, inv_cond_code_op:$Cond)>; + +// Finally some helper patterns. + +// For CSET (a.k.a. zero-extension of icmp) +def : Pat<(A64select_cc NZCV, 0, 1, cond_code:$Cond), + (CSINCwwwc WZR, WZR, cond_code:$Cond)>; +def : Pat<(A64select_cc NZCV, 1, 0, inv_cond_code:$Cond), + (CSINCwwwc WZR, WZR, inv_cond_code:$Cond)>; + +def : Pat<(A64select_cc NZCV, 0, 1, cond_code:$Cond), + (CSINCxxxc XZR, XZR, cond_code:$Cond)>; +def : Pat<(A64select_cc NZCV, 1, 0, inv_cond_code:$Cond), + (CSINCxxxc XZR, XZR, inv_cond_code:$Cond)>; + +// For CSETM (a.k.a. sign-extension of icmp) +def : Pat<(A64select_cc NZCV, 0, -1, cond_code:$Cond), + (CSINVwwwc WZR, WZR, cond_code:$Cond)>; +def : Pat<(A64select_cc NZCV, -1, 0, inv_cond_code:$Cond), + (CSINVwwwc WZR, WZR, inv_cond_code:$Cond)>; + +def : Pat<(A64select_cc NZCV, 0, -1, cond_code:$Cond), + (CSINVxxxc XZR, XZR, cond_code:$Cond)>; +def : Pat<(A64select_cc NZCV, -1, 0, inv_cond_code:$Cond), + (CSINVxxxc XZR, XZR, inv_cond_code:$Cond)>; + +// CINC, CINV and CNEG get dealt with automatically, which leaves the issue of +// commutativity. The instructions are to complex for isCommutable to be used, +// so we have to create the patterns manually: + +// No commutable pattern for CSEL since the commuted version is isomorphic. + +// CSINC +def :Pat<(A64select_cc NZCV, (add GPR32:$Rm, 1), GPR32:$Rn, inv_cond_code:$Cond), + (CSINCwwwc GPR32:$Rn, GPR32:$Rm, inv_cond_code:$Cond)>; +def :Pat<(A64select_cc NZCV, (add GPR64:$Rm, 1), GPR64:$Rn, inv_cond_code:$Cond), + (CSINCxxxc GPR64:$Rn, GPR64:$Rm, inv_cond_code:$Cond)>; + +// CSINV +def :Pat<(A64select_cc NZCV, (not GPR32:$Rm), GPR32:$Rn, inv_cond_code:$Cond), + (CSINVwwwc GPR32:$Rn, GPR32:$Rm, inv_cond_code:$Cond)>; +def :Pat<(A64select_cc NZCV, (not GPR64:$Rm), GPR64:$Rn, inv_cond_code:$Cond), + (CSINVxxxc GPR64:$Rn, GPR64:$Rm, inv_cond_code:$Cond)>; + +// CSNEG +def :Pat<(A64select_cc NZCV, (ineg GPR32:$Rm), GPR32:$Rn, inv_cond_code:$Cond), + (CSNEGwwwc GPR32:$Rn, GPR32:$Rm, inv_cond_code:$Cond)>; +def :Pat<(A64select_cc NZCV, (ineg GPR64:$Rm), GPR64:$Rn, inv_cond_code:$Cond), + (CSNEGxxxc GPR64:$Rn, GPR64:$Rm, inv_cond_code:$Cond)>; + +//===----------------------------------------------------------------------===// +// Data Processing (1 source) instructions +//===----------------------------------------------------------------------===// +// Contains: RBIT, REV16, REV, REV32, CLZ, CLS. + +// We define an unary operator which always fails. We will use this to +// define unary operators that cannot be matched. + +class A64I_dp_1src_impl opcode, string asmop, + list patterns, RegisterClass GPRrc, + InstrItinClass itin>: + A64I_dp_1src; + +multiclass A64I_dp_1src opcode, string asmop> { + let neverHasSideEffects = 1 in { + def ww : A64I_dp_1src_impl<0b0, opcode, asmop, [], GPR32, NoItinerary>; + def xx : A64I_dp_1src_impl<0b1, opcode, asmop, [], GPR64, NoItinerary>; + } +} + +defm RBIT : A64I_dp_1src<0b000000, "rbit">; +defm CLS : A64I_dp_1src<0b000101, "cls">; +defm CLZ : A64I_dp_1src<0b000100, "clz">; + +def : Pat<(ctlz GPR32:$Rn), (CLZww GPR32:$Rn)>; +def : Pat<(ctlz GPR64:$Rn), (CLZxx GPR64:$Rn)>; +def : Pat<(ctlz_zero_undef GPR32:$Rn), (CLZww GPR32:$Rn)>; +def : Pat<(ctlz_zero_undef GPR64:$Rn), (CLZxx GPR64:$Rn)>; + +def : Pat<(cttz GPR32:$Rn), (CLZww (RBITww GPR32:$Rn))>; +def : Pat<(cttz GPR64:$Rn), (CLZxx (RBITxx GPR64:$Rn))>; +def : Pat<(cttz_zero_undef GPR32:$Rn), (CLZww (RBITww GPR32:$Rn))>; +def : Pat<(cttz_zero_undef GPR64:$Rn), (CLZxx (RBITxx GPR64:$Rn))>; + + +def REVww : A64I_dp_1src_impl<0b0, 0b000010, "rev", + [(set GPR32:$Rd, (bswap GPR32:$Rn))], + GPR32, NoItinerary>; +def REVxx : A64I_dp_1src_impl<0b1, 0b000011, "rev", + [(set GPR64:$Rd, (bswap GPR64:$Rn))], + GPR64, NoItinerary>; +def REV32xx : A64I_dp_1src_impl<0b1, 0b000010, "rev32", + [(set GPR64:$Rd, (bswap (rotr GPR64:$Rn, (i64 32))))], + GPR64, NoItinerary>; +def REV16ww : A64I_dp_1src_impl<0b0, 0b000001, "rev16", + [(set GPR32:$Rd, (bswap (rotr GPR32:$Rn, (i64 16))))], + GPR32, + NoItinerary>; +def REV16xx : A64I_dp_1src_impl<0b1, 0b000001, "rev16", [], GPR64, NoItinerary>; + +//===----------------------------------------------------------------------===// +// Data Processing (2 sources) instructions +//===----------------------------------------------------------------------===// +// Contains: UDIV, SDIV, LSLV, LSRV, ASRV, RORV + aliases LSL, LSR, ASR, ROR + +class dp_2src_impl opcode, string asmop, list patterns, + RegisterClass GPRsp, + InstrItinClass itin>: + A64I_dp_2src; + +multiclass dp_2src_zext opcode, string asmop, SDPatternOperator op> { + def www : dp_2src_impl<0b0, + opcode, + asmop, + [(set GPR32:$Rd, (op GPR32:$Rn, (i64 (zext GPR32:$Rm))))], + GPR32, + NoItinerary>; + def xxx : dp_2src_impl<0b1, + opcode, + asmop, + [(set GPR64:$Rd, (op GPR64:$Rn, GPR64:$Rm))], + GPR64, + NoItinerary>; +} + + +multiclass dp_2src opcode, string asmop, SDPatternOperator op> { + def www : dp_2src_impl<0b0, + opcode, + asmop, + [(set GPR32:$Rd, (op GPR32:$Rn, GPR32:$Rm))], + GPR32, + NoItinerary>; + def xxx : dp_2src_impl<0b1, + opcode, + asmop, + [(set GPR64:$Rd, (op GPR64:$Rn, GPR64:$Rm))], + GPR64, + NoItinerary>; +} + +// Here we define the data processing 2 source instructions. +defm UDIV : dp_2src<0b000010, "udiv", udiv>; +defm SDIV : dp_2src<0b000011, "sdiv", sdiv>; + +defm LSLV : dp_2src_zext<0b001000, "lsl", shl>; +defm LSRV : dp_2src_zext<0b001001, "lsr", srl>; +defm ASRV : dp_2src_zext<0b001010, "asr", sra>; +defm RORV : dp_2src_zext<0b001011, "ror", rotr>; + +// Extra patterns for an incoming 64-bit value for a 32-bit +// operation. Since the LLVM operations are undefined (as in C) if the +// RHS is out of range, it's perfectly permissible to discard the high +// bits of the GPR64. +def : Pat<(shl GPR32:$Rn, GPR64:$Rm), + (LSLVwww GPR32:$Rn, (EXTRACT_SUBREG GPR64:$Rm, sub_32))>; +def : Pat<(srl GPR32:$Rn, GPR64:$Rm), + (LSRVwww GPR32:$Rn, (EXTRACT_SUBREG GPR64:$Rm, sub_32))>; +def : Pat<(sra GPR32:$Rn, GPR64:$Rm), + (ASRVwww GPR32:$Rn, (EXTRACT_SUBREG GPR64:$Rm, sub_32))>; +def : Pat<(rotr GPR32:$Rn, GPR64:$Rm), + (RORVwww GPR32:$Rn, (EXTRACT_SUBREG GPR64:$Rm, sub_32))>; + +// Here we define the aliases for the data processing 2 source instructions. +def LSL_mnemonic : MnemonicAlias<"lslv", "lsl">; +def LSR_mnemonic : MnemonicAlias<"lsrv", "lsr">; +def ASR_menmonic : MnemonicAlias<"asrv", "asr">; +def ROR_menmonic : MnemonicAlias<"rorv", "ror">; + +//===----------------------------------------------------------------------===// +// Data Processing (3 sources) instructions +//===----------------------------------------------------------------------===// +// Contains: MADD, MSUB, SMADDL, SMSUBL, SMULH, UMADDL, UMSUBL, UMULH +// + aliases MUL, MNEG, SMULL, SMNEGL, UMULL, UMNEGL + +class A64I_dp3_4operand opcode, RegisterClass AccReg, + RegisterClass SrcReg, string asmop, dag pattern> + : A64I_dp3 +{ + RegisterClass AccGPR = AccReg; + RegisterClass SrcGPR = SrcReg; +} + +def MADDwwww : A64I_dp3_4operand<0b0, 0b000000, GPR32, GPR32, "madd", + (add GPR32:$Ra, (mul GPR32:$Rn, GPR32:$Rm))>; +def MADDxxxx : A64I_dp3_4operand<0b1, 0b000000, GPR64, GPR64, "madd", + (add GPR64:$Ra, (mul GPR64:$Rn, GPR64:$Rm))>; + +def MSUBwwww : A64I_dp3_4operand<0b0, 0b000001, GPR32, GPR32, "msub", + (sub GPR32:$Ra, (mul GPR32:$Rn, GPR32:$Rm))>; +def MSUBxxxx : A64I_dp3_4operand<0b1, 0b000001, GPR64, GPR64, "msub", + (sub GPR64:$Ra, (mul GPR64:$Rn, GPR64:$Rm))>; + +def SMADDLxwwx : A64I_dp3_4operand<0b1, 0b000010, GPR64, GPR32, "smaddl", + (add GPR64:$Ra, (mul (i64 (sext GPR32:$Rn)), (sext GPR32:$Rm)))>; +def SMSUBLxwwx : A64I_dp3_4operand<0b1, 0b000011, GPR64, GPR32, "smsubl", + (sub GPR64:$Ra, (mul (i64 (sext GPR32:$Rn)), (sext GPR32:$Rm)))>; + +def UMADDLxwwx : A64I_dp3_4operand<0b1, 0b001010, GPR64, GPR32, "umaddl", + (add GPR64:$Ra, (mul (i64 (zext GPR32:$Rn)), (zext GPR32:$Rm)))>; +def UMSUBLxwwx : A64I_dp3_4operand<0b1, 0b001011, GPR64, GPR32, "umsubl", + (sub GPR64:$Ra, (mul (i64 (zext GPR32:$Rn)), (zext GPR32:$Rm)))>; + +let isCommutable = 1, PostEncoderMethod = "fixMulHigh" in +{ + def UMULHxxx : A64I_dp3<0b1, 0b001100, (outs GPR64:$Rd), + (ins GPR64:$Rn, GPR64:$Rm), + "umulh\t$Rd, $Rn, $Rm", + [(set GPR64:$Rd, (mulhu GPR64:$Rn, GPR64:$Rm))], + NoItinerary>; + + def SMULHxxx : A64I_dp3<0b1, 0b000100, (outs GPR64:$Rd), + (ins GPR64:$Rn, GPR64:$Rm), + "smulh\t$Rd, $Rn, $Rm", + [(set GPR64:$Rd, (mulhs GPR64:$Rn, GPR64:$Rm))], + NoItinerary>; +} + +multiclass A64I_dp3_3operand +{ + def : InstAlias; + + def : Pat; +} + +defm : A64I_dp3_3operand<"mul", MADDwwww, WZR, (mul GPR32:$Rn, GPR32:$Rm)>; +defm : A64I_dp3_3operand<"mul", MADDxxxx, XZR, (mul GPR64:$Rn, GPR64:$Rm)>; + +defm : A64I_dp3_3operand<"mneg", MSUBwwww, WZR, + (sub 0, (mul GPR32:$Rn, GPR32:$Rm))>; +defm : A64I_dp3_3operand<"mneg", MSUBxxxx, XZR, + (sub 0, (mul GPR64:$Rn, GPR64:$Rm))>; + +defm : A64I_dp3_3operand<"smull", SMADDLxwwx, XZR, + (mul (i64 (sext GPR32:$Rn)), (sext GPR32:$Rm))>; +defm : A64I_dp3_3operand<"smnegl", SMSUBLxwwx, XZR, + (sub 0, (mul (i64 (sext GPR32:$Rn)), (sext GPR32:$Rm)))>; + +defm : A64I_dp3_3operand<"umull", UMADDLxwwx, XZR, + (mul (i64 (zext GPR32:$Rn)), (zext GPR32:$Rm))>; +defm : A64I_dp3_3operand<"umnegl", UMSUBLxwwx, XZR, + (sub 0, (mul (i64 (zext GPR32:$Rn)), (zext GPR32:$Rm)))>; + + +//===----------------------------------------------------------------------===// +// Exception generation +//===----------------------------------------------------------------------===// +// Contains: SVC, HVC, SMC, BRK, HLT, DCPS1, DCPS2, DCPS3 + +def uimm16_asmoperand : AsmOperandClass +{ + let Name = "UImm16"; + let PredicateMethod = "isUImm<16>"; + let RenderMethod = "addImmOperands"; +} + +def uimm16 : Operand +{ + let ParserMatchClass = uimm16_asmoperand; +} + +class A64I_exceptImpl opc, bits<2> ll, string asmop> + : A64I_exception +{ + let isBranch = 1; + let isTerminator = 1; +} + +def SVCi : A64I_exceptImpl<0b000, 0b01, "svc">; +def HVCi : A64I_exceptImpl<0b000, 0b10, "hvc">; +def SMCi : A64I_exceptImpl<0b000, 0b11, "smc">; +def BRKi : A64I_exceptImpl<0b001, 0b00, "brk">; +def HLTi : A64I_exceptImpl<0b010, 0b00, "hlt">; + +def DCPS1i : A64I_exceptImpl<0b101, 0b01, "dcps1">; +def DCPS2i : A64I_exceptImpl<0b101, 0b10, "dcps2">; +def DCPS3i : A64I_exceptImpl<0b101, 0b11, "dcps3">; + +// The immediate is optional for the DCPS instructions, defaulting to 0. +def : InstAlias<"dcps1", (DCPS1i 0)>; +def : InstAlias<"dcps2", (DCPS2i 0)>; +def : InstAlias<"dcps3", (DCPS3i 0)>; + +//===----------------------------------------------------------------------===// +// Extract (immediate) +//===----------------------------------------------------------------------===// +// Contains: EXTR + alias ROR + +def EXTRwwwi : A64I_extract<0b0, 0b000, 0b0, + (outs GPR32:$Rd), + (ins GPR32:$Rn, GPR32:$Rm, bitfield32_imm:$LSB), + "extr\t$Rd, $Rn, $Rm, $LSB", + [(set GPR32:$Rd, + (A64Extr GPR32:$Rn, GPR32:$Rm, imm:$LSB))], + NoItinerary>; +def EXTRxxxi : A64I_extract<0b1, 0b000, 0b1, + (outs GPR64:$Rd), + (ins GPR64:$Rn, GPR64:$Rm, bitfield64_imm:$LSB), + "extr\t$Rd, $Rn, $Rm, $LSB", + [(set GPR64:$Rd, + (A64Extr GPR64:$Rn, GPR64:$Rm, imm:$LSB))], + NoItinerary>; + +def : InstAlias<"ror $Rd, $Rs, $LSB", + (EXTRwwwi GPR32:$Rd, GPR32:$Rs, GPR32:$Rs, bitfield32_imm:$LSB)>; +def : InstAlias<"ror $Rd, $Rs, $LSB", + (EXTRxxxi GPR64:$Rd, GPR64:$Rs, GPR64:$Rs, bitfield64_imm:$LSB)>; + +def : Pat<(rotr GPR32:$Rn, bitfield32_imm:$LSB), + (EXTRwwwi GPR32:$Rn, GPR32:$Rn, bitfield32_imm:$LSB)>; +def : Pat<(rotr GPR64:$Rn, bitfield64_imm:$LSB), + (EXTRxxxi GPR64:$Rn, GPR64:$Rn, bitfield64_imm:$LSB)>; + +//===----------------------------------------------------------------------===// +// Floating-point compare instructions +//===----------------------------------------------------------------------===// +// Contains: FCMP, FCMPE + +def fpzero_asmoperand : AsmOperandClass +{ + let Name = "FPZero"; + let ParserMethod = "ParseFPImmOperand"; +} + +def fpz32 : Operand, ComplexPattern +{ + let ParserMatchClass = fpzero_asmoperand; + let PrintMethod = "printFPZeroOperand"; +} + +def fpz64 : Operand, ComplexPattern +{ + let ParserMatchClass = fpzero_asmoperand; + let PrintMethod = "printFPZeroOperand"; +} + +multiclass A64I_fpcmpSignal type, bit imm, dag ins, string asmop2, + dag pattern> +{ + def _quiet : A64I_fpcmp<0b0, 0b0, type, 0b00, {0b0, imm, 0b0, 0b0, 0b0}, + (outs), ins, !strconcat("fcmp\t$Rn, ", asmop2), + [pattern], NoItinerary> + { + let Defs = [NZCV]; + } + + def _sig : A64I_fpcmp<0b0, 0b0, type, 0b00, {0b1, imm, 0b0, 0b0, 0b0}, + (outs), ins, !strconcat("fcmpe\t$Rn, ", asmop2), + [], NoItinerary> + { + let Defs = [NZCV]; + } +} + +defm FCMPss : A64I_fpcmpSignal<0b00, 0b0, (ins FPR32:$Rn, FPR32:$Rm), "$Rm", + (set NZCV, (A64cmp (f32 FPR32:$Rn), FPR32:$Rm))>; +defm FCMPdd : A64I_fpcmpSignal<0b01, 0b0, (ins FPR64:$Rn, FPR64:$Rm), "$Rm", + (set NZCV, (A64cmp (f64 FPR64:$Rn), FPR64:$Rm))>; + +// What would be Rm should be written as 0, but anything is valid for +// disassembly so we can't set the bits +let PostEncoderMethod = "fixFCMPImm" in +{ + defm FCMPsi : A64I_fpcmpSignal<0b00, 0b1, (ins FPR32:$Rn, fpz32:$Imm), "$Imm", + (set NZCV, (A64cmp (f32 FPR32:$Rn), fpz32:$Imm))>; + + defm FCMPdi : A64I_fpcmpSignal<0b01, 0b1, (ins FPR64:$Rn, fpz64:$Imm), "$Imm", + (set NZCV, (A64cmp (f64 FPR64:$Rn), fpz64:$Imm))>; +} + + +//===----------------------------------------------------------------------===// +// Floating-point conditional compare instructions +//===----------------------------------------------------------------------===// +// Contains: FCCMP, FCCMPE + +class A64I_fpccmpImpl type, bit op, RegisterClass FPR, string asmop> + : A64I_fpccmp<0b0, 0b0, type, op, + (outs), + (ins FPR:$Rn, FPR:$Rm, uimm4:$NZCVImm, cond_code_op:$Cond), + !strconcat(asmop, "\t$Rn, $Rm, $NZCVImm, $Cond"), + [], NoItinerary> +{ + let Defs = [NZCV]; +} + +def FCCMPss : A64I_fpccmpImpl<0b00, 0b0, FPR32, "fccmp">; +def FCCMPEss : A64I_fpccmpImpl<0b00, 0b1, FPR32, "fccmpe">; +def FCCMPdd : A64I_fpccmpImpl<0b01, 0b0, FPR64, "fccmp">; +def FCCMPEdd : A64I_fpccmpImpl<0b01, 0b1, FPR64, "fccmpe">; + +//===----------------------------------------------------------------------===// +// Floating-point conditional select instructions +//===----------------------------------------------------------------------===// +// Contains: FCSEL + +let Uses = [NZCV] in +{ + def FCSELsssc : A64I_fpcondsel<0b0, 0b0, 0b00, (outs FPR32:$Rd), + (ins FPR32:$Rn, FPR32:$Rm, cond_code_op:$Cond), + "fcsel\t$Rd, $Rn, $Rm, $Cond", + [(set FPR32:$Rd, + (simple_select (f32 FPR32:$Rn), + FPR32:$Rm))], + NoItinerary>; + + + def FCSELdddc : A64I_fpcondsel<0b0, 0b0, 0b01, (outs FPR64:$Rd), + (ins FPR64:$Rn, FPR64:$Rm, cond_code_op:$Cond), + "fcsel\t$Rd, $Rn, $Rm, $Cond", + [(set FPR64:$Rd, + (simple_select (f64 FPR64:$Rn), + FPR64:$Rm))], + NoItinerary>; +} + +//===----------------------------------------------------------------------===// +// Floating-point data-processing (1 source) +//===----------------------------------------------------------------------===// +// Contains: FMOV, FABS, FNEG, FSQRT, FCVT, FRINT[NPMZAXI]. + +def FPNoUnop : PatFrag<(ops node:$val), (fneg node:$val), + [{ (void)N; return false; }]>; + +// First we do the fairly trivial bunch with uniform "OP s, s" and "OP d, d" +// syntax. Default to no pattern because most are odd enough not to have one. +multiclass A64I_fpdp1sizes opcode, string asmstr, + SDPatternOperator opnode = FPNoUnop> +{ + def ss : A64I_fpdp1<0b0, 0b0, 0b00, opcode, (outs FPR32:$Rd), (ins FPR32:$Rn), + !strconcat(asmstr, "\t$Rd, $Rn"), + [(set (f32 FPR32:$Rd), (opnode FPR32:$Rn))], + NoItinerary>; + + def dd : A64I_fpdp1<0b0, 0b0, 0b01, opcode, (outs FPR64:$Rd), (ins FPR64:$Rn), + !strconcat(asmstr, "\t$Rd, $Rn"), + [(set (f64 FPR64:$Rd), (opnode FPR64:$Rn))], + NoItinerary>; +} + +defm FMOV : A64I_fpdp1sizes<0b000000, "fmov">; +defm FABS : A64I_fpdp1sizes<0b000001, "fabs", fabs>; +defm FNEG : A64I_fpdp1sizes<0b000010, "fneg", fneg>; +defm FSQRT : A64I_fpdp1sizes<0b000011, "fsqrt", fsqrt>; + +defm FRINTN : A64I_fpdp1sizes<0b001000, "frintn">; +defm FRINTP : A64I_fpdp1sizes<0b001001, "frintp", fceil>; +defm FRINTM : A64I_fpdp1sizes<0b001010, "frintm", ffloor>; +defm FRINTZ : A64I_fpdp1sizes<0b001011, "frintz", ftrunc>; +defm FRINTA : A64I_fpdp1sizes<0b001100, "frinta">; +defm FRINTX : A64I_fpdp1sizes<0b001110, "frintx", frint>; +defm FRINTI : A64I_fpdp1sizes<0b001111, "frinti", fnearbyint>; + +// The FCVT instrucitons have different source and destination register-types, +// but the fields are uniform everywhere a D-register (say) crops up. Package +// this information in a Record. +class FCVTRegType fld, ValueType vt> +{ + RegisterClass Class = rc; + ValueType VT = vt; + bit t1 = fld{1}; + bit t0 = fld{0}; +} + +def FCVT16 : FCVTRegType; +def FCVT32 : FCVTRegType; +def FCVT64 : FCVTRegType; + +class A64I_fpdp1_fcvt + : A64I_fpdp1<0b0, 0b0, {SrcReg.t1, SrcReg.t0}, + {0,0,0,1, DestReg.t1, DestReg.t0}, + (outs DestReg.Class:$Rd), (ins SrcReg.Class:$Rn), + "fcvt\t$Rd, $Rn", + [(set (DestReg.VT DestReg.Class:$Rd), + (opnode (SrcReg.VT SrcReg.Class:$Rn)))], NoItinerary>; + +def FCVTds : A64I_fpdp1_fcvt; +def FCVThs : A64I_fpdp1_fcvt; +def FCVTsd : A64I_fpdp1_fcvt; +def FCVThd : A64I_fpdp1_fcvt; +def FCVTsh : A64I_fpdp1_fcvt; +def FCVTdh : A64I_fpdp1_fcvt; + + +//===----------------------------------------------------------------------===// +// Floating-point data-processing (2 sources) instructions +//===----------------------------------------------------------------------===// +// Contains: FMUL, FDIV, FADD, FSUB, FMAX, FMIN, FMAXNM, FMINNM, FNMUL + +def FPNoBinop : PatFrag<(ops node:$lhs, node:$rhs), (fadd node:$lhs, node:$rhs), + [{ (void)N; return false; }]>; + +multiclass A64I_fpdp2sizes opcode, string asmstr, + SDPatternOperator opnode> +{ + def sss : A64I_fpdp2<0b0, 0b0, 0b00, opcode, + (outs FPR32:$Rd), + (ins FPR32:$Rn, FPR32:$Rm), + !strconcat(asmstr, "\t$Rd, $Rn, $Rm"), + [(set (f32 FPR32:$Rd), (opnode FPR32:$Rn, FPR32:$Rm))], + NoItinerary>; + + def ddd : A64I_fpdp2<0b0, 0b0, 0b01, opcode, + (outs FPR64:$Rd), + (ins FPR64:$Rn, FPR64:$Rm), + !strconcat(asmstr, "\t$Rd, $Rn, $Rm"), + [(set (f64 FPR64:$Rd), (opnode FPR64:$Rn, FPR64:$Rm))], + NoItinerary>; +} + +let isCommutable = 1 in { + defm FMUL : A64I_fpdp2sizes<0b0000, "fmul", fmul>; + defm FADD : A64I_fpdp2sizes<0b0010, "fadd", fadd>; + + // No patterns for these. + defm FMAX : A64I_fpdp2sizes<0b0100, "fmax", FPNoBinop>; + defm FMIN : A64I_fpdp2sizes<0b0101, "fmin", FPNoBinop>; + defm FMAXNM : A64I_fpdp2sizes<0b0110, "fmaxnm", FPNoBinop>; + defm FMINNM : A64I_fpdp2sizes<0b0111, "fminnm", FPNoBinop>; + + defm FNMUL : A64I_fpdp2sizes<0b1000, "fnmul", + PatFrag<(ops node:$lhs, node:$rhs), + (fneg (fmul node:$lhs, node:$rhs))> >; +} + +defm FDIV : A64I_fpdp2sizes<0b0001, "fdiv", fdiv>; +defm FSUB : A64I_fpdp2sizes<0b0011, "fsub", fsub>; + +//===----------------------------------------------------------------------===// +// Floating-point data-processing (3 sources) instructions +//===----------------------------------------------------------------------===// +// Contains: FMADD, FMSUB, FNMADD, FNMSUB + +def fmsub : PatFrag<(ops node:$Rn, node:$Rm, node:$Ra), + (fma (fneg node:$Rn), node:$Rm, node:$Ra)>; +def fnmadd : PatFrag<(ops node:$Rn, node:$Rm, node:$Ra), + (fma node:$Rn, node:$Rm, (fneg node:$Ra))>; +def fnmsub : PatFrag<(ops node:$Rn, node:$Rm, node:$Ra), + (fma (fneg node:$Rn), node:$Rm, (fneg node:$Ra))>; + +class A64I_fpdp3Impl type, bit o1, bit o0, SDPatternOperator fmakind> + : A64I_fpdp3<0b0, 0b0, type, o1, o0, (outs FPR:$Rd), + (ins FPR:$Rn, FPR:$Rm, FPR:$Ra), + !strconcat(asmop,"\t$Rd, $Rn, $Rm, $Ra"), + [(set FPR:$Rd, (fmakind (VT FPR:$Rn), FPR:$Rm, FPR:$Ra))], + NoItinerary>; + +def FMADDssss : A64I_fpdp3Impl<"fmadd", FPR32, f32, 0b00, 0b0, 0b0, fma>; +def FMSUBssss : A64I_fpdp3Impl<"fmsub", FPR32, f32, 0b00, 0b0, 0b1, fmsub>; +def FNMADDssss : A64I_fpdp3Impl<"fnmadd", FPR32, f32, 0b00, 0b1, 0b0, fnmadd>; +def FNMSUBssss : A64I_fpdp3Impl<"fnmsub", FPR32, f32, 0b00, 0b1, 0b1, fnmsub>; + +def FMADDdddd : A64I_fpdp3Impl<"fmadd", FPR64, f64, 0b01, 0b0, 0b0, fma>; +def FMSUBdddd : A64I_fpdp3Impl<"fmsub", FPR64, f64, 0b01, 0b0, 0b1, fmsub>; +def FNMADDdddd : A64I_fpdp3Impl<"fnmadd", FPR64, f64, 0b01, 0b1, 0b0, fnmadd>; +def FNMSUBdddd : A64I_fpdp3Impl<"fnmsub", FPR64, f64, 0b01, 0b1, 0b1, fnmsub>; + +//===----------------------------------------------------------------------===// +// Floating-point <-> fixed-point conversion instructions +//===----------------------------------------------------------------------===// +// Contains: FCVTZS, FCVTZU, SCVTF, UCVTF + +// #1-#32 allowed, encoded as "64 - +def fixedpos_asmoperand_i32 : AsmOperandClass +{ + let Name = "CVTFixedPos32"; + let RenderMethod = "addCVTFixedPosOperands"; + let PredicateMethod = "isCVTFixedPos<32>"; +} + +// Also encoded as "64 - " but #1-#64 allowed. +def fixedpos_asmoperand_i64 : AsmOperandClass +{ + let Name = "CVTFixedPos64"; + let RenderMethod = "addCVTFixedPosOperands"; + let PredicateMethod = "isCVTFixedPos<64>"; +} + +// We need the cartesian product of f32/f64 i32/i64 operands for +// conversions: +// + Selection needs to use operands of correct floating type +// + Assembly parsing and decoding depend on integer width +class cvtfix_i32_op + : Operand, + ComplexPattern", [fpimm]> +{ + let ParserMatchClass = fixedpos_asmoperand_i32; + let DecoderMethod = "DecodeCVT32FixedPosOperand"; + let PrintMethod = "printCVTFixedPosOperand"; +} + +class cvtfix_i64_op + : Operand, + ComplexPattern", [fpimm]> +{ + let ParserMatchClass = fixedpos_asmoperand_i64; + let PrintMethod = "printCVTFixedPosOperand"; +} + +// Because of the proliferation of weird operands, it's not really +// worth going for a multiclass here. Oh well. + +class A64I_fptofix type, bits<3> opcode, + RegisterClass GPR, RegisterClass FPR, Operand scale_op, + string asmop, SDNode cvtop> + : A64I_fpfixed; + +def FCVTZSwsi : A64I_fptofix<0b0, 0b00, 0b000, GPR32, FPR32, + cvtfix_i32_op, "fcvtzs", fp_to_sint>; +def FCVTZSxsi : A64I_fptofix<0b1, 0b00, 0b000, GPR64, FPR32, + cvtfix_i64_op, "fcvtzs", fp_to_sint>; +def FCVTZUwsi : A64I_fptofix<0b0, 0b00, 0b001, GPR32, FPR32, + cvtfix_i32_op, "fcvtzu", fp_to_uint>; +def FCVTZUxsi : A64I_fptofix<0b1, 0b00, 0b001, GPR64, FPR32, + cvtfix_i64_op, "fcvtzu", fp_to_uint>; + +def FCVTZSwdi : A64I_fptofix<0b0, 0b01, 0b000, GPR32, FPR64, + cvtfix_i32_op, "fcvtzs", fp_to_sint>; +def FCVTZSxdi : A64I_fptofix<0b1, 0b01, 0b000, GPR64, FPR64, + cvtfix_i64_op, "fcvtzs", fp_to_sint>; +def FCVTZUwdi : A64I_fptofix<0b0, 0b01, 0b001, GPR32, FPR64, + cvtfix_i32_op, "fcvtzu", fp_to_uint>; +def FCVTZUxdi : A64I_fptofix<0b1, 0b01, 0b001, GPR64, FPR64, + cvtfix_i64_op, "fcvtzu", fp_to_uint>; + + +class A64I_fixtofp type, bits<3> opcode, + RegisterClass FPR, RegisterClass GPR, Operand scale_op, + string asmop, SDNode cvtop> + : A64I_fpfixed; + +def SCVTFswi : A64I_fixtofp<0b0, 0b00, 0b010, FPR32, GPR32, + cvtfix_i32_op, "scvtf", sint_to_fp>; +def SCVTFsxi : A64I_fixtofp<0b1, 0b00, 0b010, FPR32, GPR64, + cvtfix_i64_op, "scvtf", sint_to_fp>; +def UCVTFswi : A64I_fixtofp<0b0, 0b00, 0b011, FPR32, GPR32, + cvtfix_i32_op, "ucvtf", uint_to_fp>; +def UCVTFsxi : A64I_fixtofp<0b1, 0b00, 0b011, FPR32, GPR64, + cvtfix_i64_op, "ucvtf", uint_to_fp>; +def SCVTFdwi : A64I_fixtofp<0b0, 0b01, 0b010, FPR64, GPR32, + cvtfix_i32_op, "scvtf", sint_to_fp>; +def SCVTFdxi : A64I_fixtofp<0b1, 0b01, 0b010, FPR64, GPR64, + cvtfix_i64_op, "scvtf", sint_to_fp>; +def UCVTFdwi : A64I_fixtofp<0b0, 0b01, 0b011, FPR64, GPR32, + cvtfix_i32_op, "ucvtf", uint_to_fp>; +def UCVTFdxi : A64I_fixtofp<0b1, 0b01, 0b011, FPR64, GPR64, + cvtfix_i64_op, "ucvtf", uint_to_fp>; + +//===----------------------------------------------------------------------===// +// Floating-point <-> integer conversion instructions +//===----------------------------------------------------------------------===// +// Contains: FCVTZS, FCVTZU, SCVTF, UCVTF + +class A64I_fpintI type, bits<2> rmode, bits<3> opcode, + RegisterClass DestPR, RegisterClass SrcPR, string asmop> + : A64I_fpint; + +multiclass A64I_fptointRM rmode, bit o2, string asmop> +{ + def Sws : A64I_fpintI<0b0, 0b00, rmode, {o2, 0, 0}, GPR32, FPR32, asmop # "s">; + def Sxs : A64I_fpintI<0b1, 0b00, rmode, {o2, 0, 0}, GPR64, FPR32, asmop # "s">; + def Uws : A64I_fpintI<0b0, 0b00, rmode, {o2, 0, 1}, GPR32, FPR32, asmop # "u">; + def Uxs : A64I_fpintI<0b1, 0b00, rmode, {o2, 0, 1}, GPR64, FPR32, asmop # "u">; + + def Swd : A64I_fpintI<0b0, 0b01, rmode, {o2, 0, 0}, GPR32, FPR64, asmop # "s">; + def Sxd : A64I_fpintI<0b1, 0b01, rmode, {o2, 0, 0}, GPR64, FPR64, asmop # "s">; + def Uwd : A64I_fpintI<0b0, 0b01, rmode, {o2, 0, 1}, GPR32, FPR64, asmop # "u">; + def Uxd : A64I_fpintI<0b1, 0b01, rmode, {o2, 0, 1}, GPR64, FPR64, asmop # "u">; +} + +defm FCVTN : A64I_fptointRM<0b00, 0b0, "fcvtn">; +defm FCVTP : A64I_fptointRM<0b01, 0b0, "fcvtp">; +defm FCVTM : A64I_fptointRM<0b10, 0b0, "fcvtm">; +defm FCVTZ : A64I_fptointRM<0b11, 0b0, "fcvtz">; +defm FCVTA : A64I_fptointRM<0b00, 0b1, "fcvta">; + +def : Pat<(i32 (fp_to_sint FPR32:$Rn)), (FCVTZSws FPR32:$Rn)>; +def : Pat<(i64 (fp_to_sint FPR32:$Rn)), (FCVTZSxs FPR32:$Rn)>; +def : Pat<(i32 (fp_to_uint FPR32:$Rn)), (FCVTZUws FPR32:$Rn)>; +def : Pat<(i64 (fp_to_uint FPR32:$Rn)), (FCVTZUxs FPR32:$Rn)>; +def : Pat<(i32 (fp_to_sint (f64 FPR64:$Rn))), (FCVTZSwd FPR64:$Rn)>; +def : Pat<(i64 (fp_to_sint (f64 FPR64:$Rn))), (FCVTZSxd FPR64:$Rn)>; +def : Pat<(i32 (fp_to_uint (f64 FPR64:$Rn))), (FCVTZUwd FPR64:$Rn)>; +def : Pat<(i64 (fp_to_uint (f64 FPR64:$Rn))), (FCVTZUxd FPR64:$Rn)>; + +multiclass A64I_inttofp +{ + def CVTFsw : A64I_fpintI<0b0, 0b00, 0b00, {0, 1, o0}, FPR32, GPR32, asmop>; + def CVTFsx : A64I_fpintI<0b1, 0b00, 0b00, {0, 1, o0}, FPR32, GPR64, asmop>; + def CVTFdw : A64I_fpintI<0b0, 0b01, 0b00, {0, 1, o0}, FPR64, GPR32, asmop>; + def CVTFdx : A64I_fpintI<0b1, 0b01, 0b00, {0, 1, o0}, FPR64, GPR64, asmop>; +} + +defm S : A64I_inttofp<0b0, "scvtf">; +defm U : A64I_inttofp<0b1, "ucvtf">; + +def : Pat<(f32 (sint_to_fp GPR32:$Rn)), (SCVTFsw GPR32:$Rn)>; +def : Pat<(f32 (sint_to_fp GPR64:$Rn)), (SCVTFsx GPR64:$Rn)>; +def : Pat<(f64 (sint_to_fp GPR32:$Rn)), (SCVTFdw GPR32:$Rn)>; +def : Pat<(f64 (sint_to_fp GPR64:$Rn)), (SCVTFdx GPR64:$Rn)>; +def : Pat<(f32 (uint_to_fp GPR32:$Rn)), (UCVTFsw GPR32:$Rn)>; +def : Pat<(f32 (uint_to_fp GPR64:$Rn)), (UCVTFsx GPR64:$Rn)>; +def : Pat<(f64 (uint_to_fp GPR32:$Rn)), (UCVTFdw GPR32:$Rn)>; +def : Pat<(f64 (uint_to_fp GPR64:$Rn)), (UCVTFdx GPR64:$Rn)>; + +def FMOVws : A64I_fpintI<0b0, 0b00, 0b00, 0b110, GPR32, FPR32, "fmov">; +def FMOVsw : A64I_fpintI<0b0, 0b00, 0b00, 0b111, FPR32, GPR32, "fmov">; +def FMOVxd : A64I_fpintI<0b1, 0b01, 0b00, 0b110, GPR64, FPR64, "fmov">; +def FMOVdx : A64I_fpintI<0b1, 0b01, 0b00, 0b111, FPR64, GPR64, "fmov">; + +def : Pat<(i32 (bitconvert (f32 FPR32:$Rn))), (FMOVws FPR32:$Rn)>; +def : Pat<(f32 (bitconvert (i32 GPR32:$Rn))), (FMOVsw GPR32:$Rn)>; +def : Pat<(i64 (bitconvert (f64 FPR64:$Rn))), (FMOVxd FPR64:$Rn)>; +def : Pat<(f64 (bitconvert (i64 GPR64:$Rn))), (FMOVdx GPR64:$Rn)>; + +def lane1_asmoperand : AsmOperandClass +{ + let Name = "Lane1"; + let RenderMethod = "addImmOperands"; +} + +def lane1 : Operand +{ + let ParserMatchClass = lane1_asmoperand; + let PrintMethod = "printBareImmOperand"; +} + +let DecoderMethod = "DecodeFMOVLaneInstruction" in +{ + def FMOVxv : A64I_fpint<0b1, 0b0, 0b10, 0b01, 0b110, + (outs GPR64:$Rd), (ins VPR128:$Rn, lane1:$Lane), + "fmov\t$Rd, $Rn.d[$Lane]", [], NoItinerary>; + + def FMOVvx : A64I_fpint<0b1, 0b0, 0b10, 0b01, 0b111, + (outs VPR128:$Rd), (ins GPR64:$Rn, lane1:$Lane), + "fmov\t$Rd.d[$Lane], $Rn", [], NoItinerary>; +} + +def : InstAlias<"fmov $Rd, $Rn.2d[$Lane]", + (FMOVxv GPR64:$Rd, VPR128:$Rn, lane1:$Lane), 0b0>; + +def : InstAlias<"fmov $Rd.2d[$Lane], $Rn", + (FMOVvx VPR128:$Rd, GPR64:$Rn, lane1:$Lane), 0b0>; + +//===----------------------------------------------------------------------===// +// Floating-point immediate instructions +//===----------------------------------------------------------------------===// +// Contains: FMOV + +def fpimm_asmoperand : AsmOperandClass +{ + let Name = "FMOVImm"; + let ParserMethod = "ParseFPImmOperand"; +} + +// The MCOperand for these instructions are the encoded 8-bit values. +def SDXF_fpimm : SDNodeXFormgetValueAPF(), Imm8); + return CurDAG->getTargetConstant(Imm8, MVT::i32); +}]>; + +class fmov_operand + : Operand, + PatLeaf<(FT fpimm), [{ return A64Imms::isFPImm(N->getValueAPF()); }], + SDXF_fpimm> +{ + let PrintMethod = "printFPImmOperand"; + let ParserMatchClass = fpimm_asmoperand; +} + +def fmov32_operand : fmov_operand; +def fmov64_operand : fmov_operand; + +class A64I_fpimm_impl type, RegisterClass Reg, ValueType VT, + Operand fmov_operand> + : A64I_fpimm<0b0, 0b0, type, 0b00000, + (outs Reg:$Rd), + (ins fmov_operand:$Imm8), + "fmov\t$Rd, $Imm8", + [(set (VT Reg:$Rd), fmov_operand:$Imm8)], + NoItinerary>; + +def FMOVsi : A64I_fpimm_impl<0b00, FPR32, f32, fmov32_operand>; +def FMOVdi : A64I_fpimm_impl<0b01, FPR64, f64, fmov64_operand>; + +//===----------------------------------------------------------------------===// +// Load-register (literal) instructions +//===----------------------------------------------------------------------===// +// Contains: LDR, LDRSW, PRFM + +def ldrlit_label_asmoperand : AsmOperandClass +{ + let Name = "LoadLitLabel"; + let RenderMethod = "addLabelOperands<19, 4>"; +} + +def ldrlit_label : Operand +{ + let EncoderMethod = "getLoadLitLabelOpValue"; + + // This label is a 19-bit offset from PC, scaled by the instruction-width: 4. + let PrintMethod = "printLabelOperand<19, 4>"; + let ParserMatchClass = ldrlit_label_asmoperand; + let OperandType = "OPERAND_PCREL"; +} + +// Various instructions take an immediate value (which can always be used), +// where some numbers have a symbolic name to make things easier. These operands +// and the associated functions abstract away the differences. +multiclass namedimm +{ + def _asmoperand : AsmOperandClass + { + let Name = "NamedImm" # prefix; + let PredicateMethod = "isUImm"; + let RenderMethod = "addImmOperands"; + let ParserMethod = "ParseNamedImmOperand<" # mapper # ">"; + } + + def _op : Operand + { + let ParserMatchClass = !cast(prefix # "_asmoperand"); + let PrintMethod = "printNamedImmOperand<" # mapper # ">"; + let DecoderMethod = "DecodeNamedImmOperand<" # mapper # ">"; + } +} + +defm prefetch : namedimm<"prefetch", "A64PRFM::PRFMMapper">; + +class A64I_LDRlitSimple opc, bit v, RegisterClass OutReg, + list patterns = []> + : A64I_LDRlit; + +let mayLoad = 1 in +{ + def LDRw_lit : A64I_LDRlitSimple<0b00, 0b0, GPR32>; + def LDRx_lit : A64I_LDRlitSimple<0b01, 0b0, GPR64>; +} + +def LDRs_lit : A64I_LDRlitSimple<0b00, 0b1, FPR32, + [(set (f32 FPR32:$Rt), (load constpool:$Imm19))]>; +def LDRd_lit : A64I_LDRlitSimple<0b01, 0b1, FPR64, + [(set (f64 FPR64:$Rt), (load constpool:$Imm19))]>; + +let mayLoad = 1 in +{ + def LDRq_lit : A64I_LDRlitSimple<0b10, 0b1, FPR128>; + + + def LDRSWx_lit : A64I_LDRlit<0b10, 0b0, + (outs GPR64:$Rt), + (ins ldrlit_label:$Imm19), + "ldrsw\t$Rt, $Imm19", + [], NoItinerary>; + + def PRFM_lit : A64I_LDRlit<0b11, 0b0, + (outs), (ins prefetch_op:$Rt, ldrlit_label:$Imm19), + "prfm\t$Rt, $Imm19", + [], NoItinerary>; +} + +//===----------------------------------------------------------------------===// +// Load-store exclusive instructions +//===----------------------------------------------------------------------===// +// Contains: STXRB, STXRH, STXR, LDXRB, LDXRH, LDXR. STXP, LDXP, STLXRB, +// STLXRH, STLXR, LDAXRB, LDAXRH, LDAXR, STLXP, LDAXP, STLRB, +// STLRH, STLR, LDARB, LDARH, LDAR + +// Since these instructions have the undefined register bits set to 1 in +// their canonical form, we need a post encoder method to set those bits +// to 1 when encoding these instructions. We do this using the +// fixLoadStoreExclusive function. This function has template parameters: +// +// fixLoadStoreExclusive +// +// hasRs indicates that the instruction uses the Rs field, so we won't set +// it to 1 (and the same for Rt2). We don't need template parameters for +// the other register fiels since Rt and Rn are always used. + +// This operand parses a GPR64xsp register, followed by an optional immediate +// #0. +def GPR64xsp0_asmoperand : AsmOperandClass +{ + let Name = "GPR64xsp0"; + let PredicateMethod = "isWrappedReg"; + let RenderMethod = "addRegOperands"; + let ParserMethod = "ParseLSXAddressOperand"; +} + +def GPR64xsp0 : RegisterOperand +{ + let ParserMatchClass = GPR64xsp0_asmoperand; +} + +//===---------------------------------- +// Store-exclusive (releasing & normal) +//===---------------------------------- + +class A64I_SRexs_impl size, bits<3> opcode, string asm, dag outs, + dag ins, list pat, + InstrItinClass itin> : + A64I_LDSTex_stn { + let mayStore = 1; + let PostEncoderMethod = "fixLoadStoreExclusive<1,0>"; +} + +multiclass A64I_SRex opcode, string prefix> { + def _byte: A64I_SRexs_impl<0b00, opcode, !strconcat(asmstr, "b"), + (outs GPR32:$Rs), (ins GPR32:$Rt, GPR64xsp0:$Rn), + [], NoItinerary>; + + def _hword: A64I_SRexs_impl<0b01, opcode, !strconcat(asmstr, "h"), + (outs GPR32:$Rs), (ins GPR32:$Rt, GPR64xsp0:$Rn), + [],NoItinerary>; + + def _word: A64I_SRexs_impl<0b10, opcode, asmstr, + (outs GPR32:$Rs), (ins GPR32:$Rt, GPR64xsp0:$Rn), + [], NoItinerary>; + + def _dword: A64I_SRexs_impl<0b11, opcode, asmstr, + (outs GPR32:$Rs), (ins GPR64:$Rt, GPR64xsp0:$Rn), + [], NoItinerary>; +} + +defm STXR : A64I_SRex<"stxr", 0b000, "STXR">; +defm STLXR : A64I_SRex<"stlxr", 0b001, "STLXR">; + +//===---------------------------------- +// Loads +//===---------------------------------- + +class A64I_LRexs_impl size, bits<3> opcode, string asm, dag outs, + dag ins, list pat, + InstrItinClass itin> : + A64I_LDSTex_tn { + let mayLoad = 1; + let PostEncoderMethod = "fixLoadStoreExclusive<0,0>"; +} + +multiclass A64I_LRex opcode> { + def _byte: A64I_LRexs_impl<0b00, opcode, !strconcat(asmstr, "b"), + (outs GPR32:$Rt), (ins GPR64xsp0:$Rn), + [], NoItinerary>; + + def _hword: A64I_LRexs_impl<0b01, opcode, !strconcat(asmstr, "h"), + (outs GPR32:$Rt), (ins GPR64xsp0:$Rn), + [], NoItinerary>; + + def _word: A64I_LRexs_impl<0b10, opcode, asmstr, + (outs GPR32:$Rt), (ins GPR64xsp0:$Rn), + [], NoItinerary>; + + def _dword: A64I_LRexs_impl<0b11, opcode, asmstr, + (outs GPR64:$Rt), (ins GPR64xsp0:$Rn), + [], NoItinerary>; +} + +defm LDXR : A64I_LRex<"ldxr", 0b000>; +defm LDAXR : A64I_LRex<"ldaxr", 0b001>; +defm LDAR : A64I_LRex<"ldar", 0b101>; + +class acquiring_load + : PatFrag<(ops node:$ptr), (base node:$ptr), [{ + return cast(N)->getOrdering() == Acquire; +}]>; + +def atomic_load_acquire_8 : acquiring_load; +def atomic_load_acquire_16 : acquiring_load; +def atomic_load_acquire_32 : acquiring_load; +def atomic_load_acquire_64 : acquiring_load; + +def : Pat<(atomic_load_acquire_8 GPR64xsp:$Rn), (LDAR_byte GPR64xsp0:$Rn)>; +def : Pat<(atomic_load_acquire_16 GPR64xsp:$Rn), (LDAR_hword GPR64xsp0:$Rn)>; +def : Pat<(atomic_load_acquire_32 GPR64xsp:$Rn), (LDAR_word GPR64xsp0:$Rn)>; +def : Pat<(atomic_load_acquire_64 GPR64xsp:$Rn), (LDAR_dword GPR64xsp0:$Rn)>; + +//===---------------------------------- +// Store-release (no exclusivity) +//===---------------------------------- + +class A64I_SLexs_impl size, bits<3> opcode, string asm, dag outs, + dag ins, list pat, + InstrItinClass itin> : + A64I_LDSTex_tn { + let mayStore = 1; + let PostEncoderMethod = "fixLoadStoreExclusive<0,0>"; +} + +class releasing_store + : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{ + return cast(N)->getOrdering() == Release; +}]>; + +def atomic_store_release_8 : releasing_store; +def atomic_store_release_16 : releasing_store; +def atomic_store_release_32 : releasing_store; +def atomic_store_release_64 : releasing_store; + +multiclass A64I_SLex opcode, string prefix> { + def _byte: A64I_SLexs_impl<0b00, opcode, !strconcat(asmstr, "b"), + (outs), (ins GPR32:$Rt, GPR64xsp0:$Rn), + [(atomic_store_release_8 GPR64xsp0:$Rn, GPR32:$Rt)], + NoItinerary>; + + def _hword: A64I_SLexs_impl<0b01, opcode, !strconcat(asmstr, "h"), + (outs), (ins GPR32:$Rt, GPR64xsp0:$Rn), + [(atomic_store_release_16 GPR64xsp0:$Rn, GPR32:$Rt)], + NoItinerary>; + + def _word: A64I_SLexs_impl<0b10, opcode, asmstr, + (outs), (ins GPR32:$Rt, GPR64xsp0:$Rn), + [(atomic_store_release_32 GPR64xsp0:$Rn, GPR32:$Rt)], + NoItinerary>; + + def _dword: A64I_SLexs_impl<0b11, opcode, asmstr, + (outs), (ins GPR64:$Rt, GPR64xsp0:$Rn), + [(atomic_store_release_64 GPR64xsp0:$Rn, GPR64:$Rt)], + NoItinerary>; +} + +defm STLR : A64I_SLex<"stlr", 0b101, "STLR">; + +//===---------------------------------- +// Store-exclusive pair (releasing & normal) +//===---------------------------------- + +class A64I_SPexs_impl size, bits<3> opcode, string asm, dag outs, + dag ins, list pat, + InstrItinClass itin> : + A64I_LDSTex_stt2n +{ + let mayStore = 1; +} + + +multiclass A64I_SPex opcode> { + def _word: A64I_SPexs_impl<0b10, opcode, asmstr, (outs), + (ins GPR32:$Rs, GPR32:$Rt, GPR32:$Rt2, + GPR64xsp0:$Rn), + [], NoItinerary>; + + def _dword: A64I_SPexs_impl<0b11, opcode, asmstr, (outs), + (ins GPR32:$Rs, GPR64:$Rt, GPR64:$Rt2, + GPR64xsp0:$Rn), + [], NoItinerary>; +} + +defm STXP : A64I_SPex<"stxp", 0b010>; +defm STLXP : A64I_SPex<"stlxp", 0b011>; + +//===---------------------------------- +// Load-exclusive pair (acquiring & normal) +//===---------------------------------- + +class A64I_LPexs_impl size, bits<3> opcode, string asm, dag outs, + dag ins, list pat, + InstrItinClass itin> : + A64I_LDSTex_tt2n { + let mayLoad = 1; + let DecoderMethod = "DecodeLoadPairExclusiveInstruction"; + let PostEncoderMethod = "fixLoadStoreExclusive<0,1>"; +} + +multiclass A64I_LPex opcode> { + def _word: A64I_LPexs_impl<0b10, opcode, asmstr, + (outs GPR32:$Rt, GPR32:$Rt2), + (ins GPR64xsp0:$Rn), + [], NoItinerary>; + + def _dword: A64I_LPexs_impl<0b11, opcode, asmstr, + (outs GPR64:$Rt, GPR64:$Rt2), + (ins GPR64xsp0:$Rn), + [], NoItinerary>; +} + +defm LDXP : A64I_LPex<"ldxp", 0b010>; +defm LDAXP : A64I_LPex<"ldaxp", 0b011>; + +//===----------------------------------------------------------------------===// +// Load-store register (unscaled immediate) instructions +//===----------------------------------------------------------------------===// +// Contains: LDURB, LDURH, LDRUSB, LDRUSH, LDRUSW, STUR, STURB, STURH and PRFUM +// +// and +// +//===----------------------------------------------------------------------===// +// Load-store register (register offset) instructions +//===----------------------------------------------------------------------===// +// Contains: LDRB, LDRH, LDRSB, LDRSH, LDRSW, STR, STRB, STRH and PRFM +// +// and +// +//===----------------------------------------------------------------------===// +// Load-store register (unsigned immediate) instructions +//===----------------------------------------------------------------------===// +// Contains: LDRB, LDRH, LDRSB, LDRSH, LDRSW, STR, STRB, STRH and PRFM +// +// and +// +//===----------------------------------------------------------------------===// +// Load-store register (immediate post-indexed) instructions +//===----------------------------------------------------------------------===// +// Contains: STRB, STRH, STR, LDRB, LDRH, LDR, LDRSB, LDRSH, LDRSW +// +// and +// +//===----------------------------------------------------------------------===// +// Load-store register (immediate pre-indexed) instructions +//===----------------------------------------------------------------------===// +// Contains: STRB, STRH, STR, LDRB, LDRH, LDR, LDRSB, LDRSH, LDRSW + +// Note that patterns are much later on in a completely separate section (they +// need ADRPxi to be defined). + +//===------------------------------- +// 1. Various operands needed +//===------------------------------- + +//===------------------------------- +// 1.1 Unsigned 12-bit immediate operands +//===------------------------------- +// The addressing mode for these instructions consists of an unsigned 12-bit +// immediate which is scaled by the size of the memory access. +// +// We represent this in the MC layer by two operands: +// 1. A base register. +// 2. A 12-bit immediate: not multiplied by access size, so "LDR x0,[x0,#8]" +// would have '1' in this field. +// This means that separate functions are needed for converting representations +// which *are* aware of the intended access size. + +// Anything that creates an MCInst (Decoding, selection and AsmParsing) has to +// know the access size via some means. An isolated operand does not have this +// information unless told from here, which means we need separate tablegen +// Operands for each access size. This multiclass takes care of instantiating +// the correct template functions in the rest of the backend. + +//===------------------------------- +// 1.1 Unsigned 12-bit immediate operands +//===------------------------------- + +multiclass offsets_uimm12 +{ + def uimm12_asmoperand : AsmOperandClass + { + let Name = "OffsetUImm12_" # MemSize; + let PredicateMethod = "isOffsetUImm12<" # MemSize # ">"; + let RenderMethod = "addOffsetUImm12Operands<" # MemSize # ">"; + } + + // Pattern is really no more than an ImmLeaf, but predicated on MemSize which + // complicates things beyond TableGen's ken. + def uimm12 : Operand, + ComplexPattern"> + { + let ParserMatchClass + = !cast(prefix # uimm12_asmoperand); + + let PrintMethod = "printOffsetUImm12Operand<" # MemSize # ">"; + let EncoderMethod = "getOffsetUImm12OpValue<" # MemSize # ">"; + } +} + +defm byte_ : offsets_uimm12<1, "byte_">; +defm hword_ : offsets_uimm12<2, "hword_">; +defm word_ : offsets_uimm12<4, "word_">; +defm dword_ : offsets_uimm12<8, "dword_">; +defm qword_ : offsets_uimm12<16, "qword_">; + +//===------------------------------- +// 1.1 Signed 9-bit immediate operands +//===------------------------------- + +// The MCInst is expected to store the bit-wise encoding of the value, +// which amounts to lopping off the extended sign bits. +def SDXF_simm9 : SDNodeXFormgetTargetConstant(N->getZExtValue() & 0x1ff, MVT::i32); +}]>; + +def simm9_asmoperand : AsmOperandClass +{ + let Name = "SImm9"; + let PredicateMethod = "isSImm<9>"; + let RenderMethod = "addSImmOperands<9>"; +} + +def simm9 : Operand, + ImmLeaf= -0x100 && Imm <= 0xff; }], + SDXF_simm9> +{ + let PrintMethod = "printOffsetSImm9Operand"; + let ParserMatchClass = simm9_asmoperand; +} + + +//===------------------------------- +// 1.3 Register offset extensions +//===------------------------------- + +// The assembly-syntax for these addressing-modes is: +// [, {, {}}] +// +// The essential semantics are: +// + is a shift: # or #0 +// + can be W or X. +// + If is W, can be UXTW or SXTW +// + If is X, can be LSL or SXTX +// +// The trickiest of those constraints is that Rm can be either GPR32 or GPR64, +// which will need separate instructions for LLVM type-consistency. We'll also +// need separate operands, of course. +multiclass regexts +{ + def regext_asmoperand : AsmOperandClass + { + let Name = "AddrRegExtend_" # MemSize # "_" # Rm; + let PredicateMethod = "isAddrRegExtend<" # MemSize # "," # RmSize # ">"; + let RenderMethod = "addAddrRegExtendOperands<" # MemSize # ">"; + } + + def regext : Operand + { + let PrintMethod + = "printAddrRegExtendOperand<" # MemSize # ", " # RmSize # ">"; + + let DecoderMethod = "DecodeAddrRegExtendOperand"; + let ParserMatchClass + = !cast(prefix # regext_asmoperand); + } +} + +multiclass regexts_wx +{ + // Rm is an X-register if LSL or SXTX are specified as the shift. + defm Xm_ : regexts; + + // Rm is a W-register if UXTW or SXTW are specified as the shift. + defm Wm_ : regexts; +} + +defm byte_ : regexts_wx<1, "byte_">; +defm hword_ : regexts_wx<2, "hword_">; +defm word_ : regexts_wx<4, "word_">; +defm dword_ : regexts_wx<8, "dword_">; +defm qword_ : regexts_wx<16, "qword_">; + + +//===------------------------------ +// 2. The instructions themselves. +//===------------------------------ + +// We have the following instructions to implement: +// | | B | H | W | X | +// |-----------------+-------+-------+-------+--------| +// | unsigned str | STRB | STRH | STR | STR | +// | unsigned ldr | LDRB | LDRH | LDR | LDR | +// | signed ldr to W | LDRSB | LDRSH | - | - | +// | signed ldr to X | LDRSB | LDRSH | LDRSW | (PRFM) | + +// This will instantiate the LDR/STR instructions you'd expect to use for an +// unsigned datatype (first two rows above) or floating-point register, which is +// reasonably uniform across all access sizes. + + +//===------------------------------ +// 2.1 Regular instructions +//===------------------------------ + +// This class covers the basic unsigned or irrelevantly-signed loads and stores, +// to general-purpose and floating-point registers. + +class AddrParams +{ + Operand uimm12 = !cast(prefix # "_uimm12"); + + Operand regextWm = !cast(prefix # "_Wm_regext"); + Operand regextXm = !cast(prefix # "_Xm_regext"); +} + +def byte_addrparams : AddrParams<"byte">; +def hword_addrparams : AddrParams<"hword">; +def word_addrparams : AddrParams<"word">; +def dword_addrparams : AddrParams<"dword">; +def qword_addrparams : AddrParams<"qword">; + +multiclass A64I_LDRSTR_unsigned size, bit v, + bit high_opc, string asmsuffix, + RegisterClass GPR, AddrParams params> +{ + // Unsigned immediate + def _STR : A64I_LSunsigimm + { + let mayStore = 1; + } + def : InstAlias<"str" # asmsuffix # " $Rt, [$Rn]", + (!cast(prefix # "_STR") GPR:$Rt, GPR64xsp:$Rn, 0)>; + + def _LDR : A64I_LSunsigimm + { + let mayLoad = 1; + } + def : InstAlias<"ldr" # asmsuffix # " $Rt, [$Rn]", + (!cast(prefix # "_LDR") GPR:$Rt, GPR64xsp:$Rn, 0)>; + + // Register offset (four of these: load/store and Wm/Xm). + let mayLoad = 1 in + { + def _Wm_RegOffset_LDR : A64I_LSregoff; + + def _Xm_RegOffset_LDR : A64I_LSregoff; + } + def : InstAlias<"ldr" # asmsuffix # " $Rt, [$Rn, $Rm]", + (!cast(prefix # "_Xm_RegOffset_LDR") GPR:$Rt, GPR64xsp:$Rn, + GPR64:$Rm, 2)>; + + let mayStore = 1 in + { + def _Wm_RegOffset_STR : A64I_LSregoff; + + def _Xm_RegOffset_STR : A64I_LSregoff; + } + def : InstAlias<"str" # asmsuffix # " $Rt, [$Rn, $Rm]", + (!cast(prefix # "_Xm_RegOffset_STR") GPR:$Rt, GPR64xsp:$Rn, + GPR64:$Rm, 2)>; + + // Unaligned immediate + def _STUR : A64I_LSunalimm + { + let mayStore = 1; + } + def : InstAlias<"stur" # asmsuffix # " $Rt, [$Rn]", + (!cast(prefix # "_STUR") GPR:$Rt, GPR64xsp:$Rn, 0)>; + + def _LDUR : A64I_LSunalimm + { + let mayLoad = 1; + } + def : InstAlias<"ldur" # asmsuffix # " $Rt, [$Rn]", + (!cast(prefix # "_LDUR") GPR:$Rt, GPR64xsp:$Rn, 0)>; + + // Post-indexed + def _PostInd_STR : A64I_LSpostind + { + let Constraints = "$Rn = $Rn_wb"; + let mayStore = 1; + + // Decoder only needed for unpredictability checking (FIXME). + let DecoderMethod = "DecodeSingleIndexedInstruction"; + } + + def _PostInd_LDR : A64I_LSpostind + { + let mayLoad = 1; + let Constraints = "$Rn = $Rn_wb"; + let DecoderMethod = "DecodeSingleIndexedInstruction"; + } + + // Pre-indexed + def _PreInd_STR : A64I_LSpreind + { + let Constraints = "$Rn = $Rn_wb"; + let mayStore = 1; + + // Decoder only needed for unpredictability checking (FIXME). + let DecoderMethod = "DecodeSingleIndexedInstruction"; + } + + def _PreInd_LDR : A64I_LSpreind + { + let mayLoad = 1; + let Constraints = "$Rn = $Rn_wb"; + let DecoderMethod = "DecodeSingleIndexedInstruction"; + } + +} + +// STRB/LDRB: First define the instructions +defm LS8 + : A64I_LDRSTR_unsigned<"LS8", 0b00, 0b0, 0b0, "b", GPR32, byte_addrparams>; + +// STRH/LDRH +defm LS16 + : A64I_LDRSTR_unsigned<"LS16", 0b01, 0b0, 0b0, "h", GPR32, hword_addrparams>; + + +// STR/LDR to/from a W register +defm LS32 + : A64I_LDRSTR_unsigned<"LS32", 0b10, 0b0, 0b0, "", GPR32, word_addrparams>; + +// STR/LDR to/from an X register +defm LS64 + : A64I_LDRSTR_unsigned<"LS64", 0b11, 0b0, 0b0, "", GPR64, dword_addrparams>; + +// STR/LDR to/from a B register +defm LSFP8 + : A64I_LDRSTR_unsigned<"LSFP8", 0b00, 0b1, 0b0, "", FPR8, byte_addrparams>; + +// STR/LDR to/from an H register +defm LSFP16 + : A64I_LDRSTR_unsigned<"LSFP16", 0b01, 0b1, 0b0, "", FPR16, hword_addrparams>; + +// STR/LDR to/from an S register +defm LSFP32 + : A64I_LDRSTR_unsigned<"LSFP32", 0b10, 0b1, 0b0, "", FPR32, word_addrparams>; +// STR/LDR to/from a D register +defm LSFP64 + : A64I_LDRSTR_unsigned<"LSFP64", 0b11, 0b1, 0b0, "", FPR64, dword_addrparams>; +// STR/LDR to/from a Q register +defm LSFP128 + : A64I_LDRSTR_unsigned<"LSFP128", 0b00, 0b1, 0b1, "", FPR128, qword_addrparams>; + +//===------------------------------ +// 2.3 Signed loads +//===------------------------------ + +// Byte and half-word signed loads can both go into either an X or a W register, +// so it's worth factoring out. Signed word loads don't fit because there is no +// W version. +multiclass A64I_LDR_signed size, string asmopcode, AddrParams params, + string prefix> +{ + // Unsigned offset + def w : A64I_LSunsigimm + { + let mayLoad = 1; + } + def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn]", + (!cast(prefix # w) GPR32:$Rt, GPR64xsp:$Rn, 0)>; + + def x : A64I_LSunsigimm + { + let mayLoad = 1; + } + def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn]", + (!cast(prefix # x) GPR64:$Rt, GPR64xsp:$Rn, 0)>; + + // Register offset + let mayLoad = 1 in + { + def w_Wm_RegOffset : A64I_LSregoff; + + def w_Xm_RegOffset : A64I_LSregoff; + + def x_Wm_RegOffset : A64I_LSregoff; + + def x_Xm_RegOffset : A64I_LSregoff; + } + def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn, $Rm]", + (!cast(prefix # "w_Xm_RegOffset") GPR32:$Rt, GPR64xsp:$Rn, + GPR64:$Rm, 2)>; + + def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn, $Rm]", + (!cast(prefix # "x_Xm_RegOffset") GPR64:$Rt, GPR64xsp:$Rn, + GPR64:$Rm, 2)>; + + + let mayLoad = 1 in + { + // Unaligned offset + def w_U : A64I_LSunalimm; + + def x_U : A64I_LSunalimm; + + + // Post-indexed + def w_PostInd : A64I_LSpostind + { + let Constraints = "$Rn = $Rn_wb"; + let DecoderMethod = "DecodeSingleIndexedInstruction"; + } + + def x_PostInd : A64I_LSpostind + { + let Constraints = "$Rn = $Rn_wb"; + let DecoderMethod = "DecodeSingleIndexedInstruction"; + } + + // Pre-indexed + def w_PreInd : A64I_LSpreind + { + let Constraints = "$Rn = $Rn_wb"; + let DecoderMethod = "DecodeSingleIndexedInstruction"; + } + + def x_PreInd : A64I_LSpreind + { + let Constraints = "$Rn = $Rn_wb"; + let DecoderMethod = "DecodeSingleIndexedInstruction"; + } + } // let mayLoad = 1 +} + +// LDRSB +defm LDRSB : A64I_LDR_signed<0b00, "b", byte_addrparams, "LDRSB">; +// LDRSH +defm LDRSH : A64I_LDR_signed<0b01, "h", hword_addrparams, "LDRSH">; + +// LDRSW: load a 32-bit register, sign-extending to 64-bits. +def LDRSWx + : A64I_LSunsigimm<0b10, 0b0, 0b10, + (outs GPR64:$Rt), + (ins GPR64xsp:$Rn, word_uimm12:$UImm12), + "ldrsw\t$Rt, [$Rn, $UImm12]", + [], NoItinerary> +{ + let mayLoad = 1; +} +def : InstAlias<"ldrsw $Rt, [$Rn]", (LDRSWx GPR64:$Rt, GPR64xsp:$Rn, 0)>; + +let mayLoad = 1 in +{ + def LDRSWx_Wm_RegOffset : A64I_LSregoff<0b10, 0b0, 0b10, 0b0, + (outs GPR64:$Rt), + (ins GPR64xsp:$Rn, GPR32:$Rm, word_Wm_regext:$Ext), + "ldrsw\t$Rt, [$Rn, $Rm, $Ext]", + [], NoItinerary>; + + def LDRSWx_Xm_RegOffset : A64I_LSregoff<0b10, 0b0, 0b10, 0b1, + (outs GPR64:$Rt), + (ins GPR64xsp:$Rn, GPR64:$Rm, word_Xm_regext:$Ext), + "ldrsw\t$Rt, [$Rn, $Rm, $Ext]", + [], NoItinerary>; +} +def : InstAlias<"ldrsw $Rt, [$Rn, $Rm]", + (LDRSWx_Xm_RegOffset GPR64:$Rt, GPR64xsp:$Rn, GPR64:$Rm, 2)>; + + +def LDURSWx + : A64I_LSunalimm<0b10, 0b0, 0b10, + (outs GPR64:$Rt), + (ins GPR64xsp:$Rn, simm9:$SImm9), + "ldursw\t$Rt, [$Rn, $SImm9]", + [], NoItinerary> +{ + let mayLoad = 1; +} +def : InstAlias<"ldursw $Rt, [$Rn]", (LDURSWx GPR64:$Rt, GPR64xsp:$Rn, 0)>; + +def LDRSWx_PostInd + : A64I_LSpostind<0b10, 0b0, 0b10, + (outs GPR64:$Rt, GPR64xsp:$Rn_wb), + (ins GPR64xsp:$Rn, simm9:$SImm9), + "ldrsw\t$Rt, [$Rn], $SImm9", + [], NoItinerary> +{ + let mayLoad = 1; + let Constraints = "$Rn = $Rn_wb"; + let DecoderMethod = "DecodeSingleIndexedInstruction"; +} + +def LDRSWx_PreInd : A64I_LSpreind<0b10, 0b0, 0b10, + (outs GPR64:$Rt, GPR64xsp:$Rn_wb), + (ins GPR64xsp:$Rn, simm9:$SImm9), + "ldrsw\t$Rt, [$Rn, $SImm9]!", + [], NoItinerary> +{ + let mayLoad = 1; + let Constraints = "$Rn = $Rn_wb"; + let DecoderMethod = "DecodeSingleIndexedInstruction"; +} + +//===------------------------------ +// 2.4 Prefetch operations +//===------------------------------ + +def PRFM : A64I_LSunsigimm<0b11, 0b0, 0b10, (outs), + (ins prefetch_op:$Rt, GPR64xsp:$Rn, dword_uimm12:$UImm12), + "prfm\t$Rt, [$Rn, $UImm12]", + [], NoItinerary> +{ + let mayLoad = 1; +} +def : InstAlias<"prfm $Rt, [$Rn]", + (PRFM prefetch_op:$Rt, GPR64xsp:$Rn, 0)>; + +let mayLoad = 1 in +{ + def PRFM_Wm_RegOffset : A64I_LSregoff<0b11, 0b0, 0b10, 0b0, (outs), + (ins prefetch_op:$Rt, GPR64xsp:$Rn, + GPR32:$Rm, dword_Wm_regext:$Ext), + "prfm\t$Rt, [$Rn, $Rm, $Ext]", + [], NoItinerary>; + def PRFM_Xm_RegOffset : A64I_LSregoff<0b11, 0b0, 0b10, 0b1, (outs), + (ins prefetch_op:$Rt, GPR64xsp:$Rn, + GPR64:$Rm, dword_Xm_regext:$Ext), + "prfm\t$Rt, [$Rn, $Rm, $Ext]", + [], NoItinerary>; +} + +def : InstAlias<"prfm $Rt, [$Rn, $Rm]", + (PRFM_Xm_RegOffset prefetch_op:$Rt, GPR64xsp:$Rn, + GPR64:$Rm, 2)>; + + +def PRFUM : A64I_LSunalimm<0b11, 0b0, 0b10, (outs), + (ins prefetch_op:$Rt, GPR64xsp:$Rn, simm9:$SImm9), + "prfum\t$Rt, [$Rn, $SImm9]", + [], NoItinerary> +{ + let mayLoad = 1; +} +def : InstAlias<"prfum $Rt, [$Rn]", + (PRFUM prefetch_op:$Rt, GPR64xsp:$Rn, 0)>; + +//===----------------------------------------------------------------------===// +// Load-store register (unprivileged) instructions +//===----------------------------------------------------------------------===// +// Contains: LDTRB, LDTRH, LDTRSB, LDTRSH, LDTRSW, STTR, STTRB and STTRH + +// These instructions very much mirror the "unscaled immediate" loads, but since +// there are no floating-point variants we need to split them out into their own +// section to avoid instantiation of "ldtr d0, [sp]" etc. + +multiclass A64I_LDTRSTTR size, string asmsuffix, RegisterClass GPR, + string prefix> +{ + def _UnPriv_STR : A64I_LSunpriv + { + let mayStore = 1; + } + + def : InstAlias<"sttr" # asmsuffix # " $Rt, [$Rn]", + (!cast(prefix # "_UnPriv_STR") GPR:$Rt, GPR64xsp:$Rn, 0)>; + + def _UnPriv_LDR : A64I_LSunpriv + { + let mayLoad = 1; + } + + def : InstAlias<"ldtr" # asmsuffix # " $Rt, [$Rn]", + (!cast(prefix # "_UnPriv_LDR") GPR:$Rt, GPR64xsp:$Rn, 0)>; + +} + +// STTRB/LDTRB: First define the instructions +defm LS8 : A64I_LDTRSTTR<0b00, "b", GPR32, "LS8">; + +// STTRH/LDTRH +defm LS16 : A64I_LDTRSTTR<0b01, "h", GPR32, "LS16">; + +// STTR/LDTR to/from a W register +defm LS32 : A64I_LDTRSTTR<0b10, "", GPR32, "LS32">; + +// STTR/LDTR to/from an X register +defm LS64 : A64I_LDTRSTTR<0b11, "", GPR64, "LS64">; + +// Now a class for the signed instructions that can go to either 32 or 64 +// bits... +multiclass A64I_LDTR_signed size, string asmopcode, string prefix> +{ + let mayLoad = 1 in + { + def w : A64I_LSunpriv; + + def x : A64I_LSunpriv; + } + + def : InstAlias<"ldtrs" # asmopcode # " $Rt, [$Rn]", + (!cast(prefix # "w") GPR32:$Rt, GPR64xsp:$Rn, 0)>; + + def : InstAlias<"ldtrs" # asmopcode # " $Rt, [$Rn]", + (!cast(prefix # "x") GPR64:$Rt, GPR64xsp:$Rn, 0)>; + +} + +// LDTRSB +defm LDTRSB : A64I_LDTR_signed<0b00, "b", "LDTRSB">; +// LDTRSH +defm LDTRSH : A64I_LDTR_signed<0b01, "h", "LDTRSH">; + +// And finally LDTRSW which only goes to 64 bits. +def LDTRSWx : A64I_LSunpriv<0b10, 0b0, 0b10, + (outs GPR64:$Rt), + (ins GPR64xsp:$Rn, simm9:$SImm9), + "ldtrsw\t$Rt, [$Rn, $SImm9]", + [], NoItinerary> +{ + let mayLoad = 1; +} +def : InstAlias<"ldtrsw $Rt, [$Rn]", (LDTRSWx GPR64:$Rt, GPR64xsp:$Rn, 0)>; + +//===----------------------------------------------------------------------===// +// Load-store register pair (offset) instructions +//===----------------------------------------------------------------------===// +// +// and +// +//===----------------------------------------------------------------------===// +// Load-store register pair (post-indexed) instructions +//===----------------------------------------------------------------------===// +// Contains: STP, LDP, LDPSW +// +// and +// +//===----------------------------------------------------------------------===// +// Load-store register pair (pre-indexed) instructions +//===----------------------------------------------------------------------===// +// Contains: STP, LDP, LDPSW +// +// and +// +//===----------------------------------------------------------------------===// +// Load-store non-temporal register pair (offset) instructions +//===----------------------------------------------------------------------===// +// Contains: STNP, LDNP + + +// Anything that creates an MCInst (Decoding, selection and AsmParsing) has to +// know the access size via some means. An isolated operand does not have this +// information unless told from here, which means we need separate tablegen +// Operands for each access size. This multiclass takes care of instantiating +// the correct template functions in the rest of the backend. + +multiclass offsets_simm7 +{ + // The bare signed 7-bit immediate is used in post-indexed instructions, but + // because of the scaling performed a generic "simm7" operand isn't + // appropriate here either. + def simm7_asmoperand : AsmOperandClass + { + let Name = "SImm7_Scaled" # MemSize; + let PredicateMethod = "isSImm7Scaled<" # MemSize # ">"; + let RenderMethod = "addSImm7ScaledOperands<" # MemSize # ">"; + } + + def simm7 : Operand + { + let PrintMethod = "printSImm7ScaledOperand<" # MemSize # ">"; + let ParserMatchClass = !cast(prefix # "simm7_asmoperand"); + } +} + +defm word_ : offsets_simm7<"4", "word_">; +defm dword_ : offsets_simm7<"8", "dword_">; +defm qword_ : offsets_simm7<"16", "qword_">; + +multiclass A64I_LSPsimple opc, bit v, RegisterClass SomeReg, + Operand simm7, string prefix> +{ + def _STR : A64I_LSPoffset + { + let mayStore = 1; + let DecoderMethod = "DecodeLDSTPairInstruction"; + } + def : InstAlias<"stp $Rt, $Rt2, [$Rn]", + (!cast(prefix # "_STR") SomeReg:$Rt, + SomeReg:$Rt2, GPR64xsp:$Rn, 0)>; + + def _LDR : A64I_LSPoffset + { + let mayLoad = 1; + let DecoderMethod = "DecodeLDSTPairInstruction"; + } + def : InstAlias<"ldp $Rt, $Rt2, [$Rn]", + (!cast(prefix # "_LDR") SomeReg:$Rt, + SomeReg:$Rt2, GPR64xsp:$Rn, 0)>; + + def _PostInd_STR : A64I_LSPpostind + { + let mayStore = 1; + let Constraints = "$Rn = $Rn_wb"; + + // Decoder only needed for unpredictability checking (FIXME). + let DecoderMethod = "DecodeLDSTPairInstruction"; + } + + def _PostInd_LDR : A64I_LSPpostind + { + let mayLoad = 1; + let Constraints = "$Rn = $Rn_wb"; + let DecoderMethod = "DecodeLDSTPairInstruction"; + } + + def _PreInd_STR : A64I_LSPpreind + { + let mayStore = 1; + let Constraints = "$Rn = $Rn_wb"; + let DecoderMethod = "DecodeLDSTPairInstruction"; + } + + def _PreInd_LDR : A64I_LSPpreind + { + let mayLoad = 1; + let Constraints = "$Rn = $Rn_wb"; + let DecoderMethod = "DecodeLDSTPairInstruction"; + } + + def _NonTemp_STR : A64I_LSPnontemp + { + let mayStore = 1; + let DecoderMethod = "DecodeLDSTPairInstruction"; + } + def : InstAlias<"stnp $Rt, $Rt2, [$Rn]", + (!cast(prefix # "_NonTemp_STR") SomeReg:$Rt, + SomeReg:$Rt2, GPR64xsp:$Rn, 0)>; + + def _NonTemp_LDR : A64I_LSPnontemp + { + let mayLoad = 1; + let DecoderMethod = "DecodeLDSTPairInstruction"; + } + def : InstAlias<"ldnp $Rt, $Rt2, [$Rn]", + (!cast(prefix # "_NonTemp_LDR") SomeReg:$Rt, + SomeReg:$Rt2, GPR64xsp:$Rn, 0)>; + +} + + +defm LSPair32 : A64I_LSPsimple<0b00, 0b0, GPR32, word_simm7, "LSPair32">; +defm LSPair64 : A64I_LSPsimple<0b10, 0b0, GPR64, dword_simm7, "LSPair64">; +defm LSFPPair32 : A64I_LSPsimple<0b00, 0b1, FPR32, word_simm7, "LSFPPair32">; +defm LSFPPair64 : A64I_LSPsimple<0b01, 0b1, FPR64, dword_simm7, "LSFPPair64">; +defm LSFPPair128 : A64I_LSPsimple<0b10, 0b1, FPR128, qword_simm7, "LSFPPair128">; + + +def LDPSWx : A64I_LSPoffset<0b01, 0b0, 0b1, + (outs GPR64:$Rt, GPR64:$Rt2), + (ins GPR64xsp:$Rn, word_simm7:$SImm7), + "ldpsw\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary> +{ + let mayLoad = 1; + let DecoderMethod = "DecodeLDSTPairInstruction"; +} +def : InstAlias<"ldpsw $Rt, $Rt2, [$Rn]", + (LDPSWx GPR64:$Rt, GPR64:$Rt2, GPR64xsp:$Rn, 0)>; + +def LDPSWx_PostInd : A64I_LSPpostind<0b01, 0b0, 0b1, + (outs GPR64:$Rt, GPR64:$Rt2, GPR64:$Rn_wb), + (ins GPR64xsp:$Rn, word_simm7:$SImm7), + "ldpsw\t$Rt, $Rt2, [$Rn], $SImm7", + [], NoItinerary> +{ + let mayLoad = 1; + let Constraints = "$Rn = $Rn_wb"; + let DecoderMethod = "DecodeLDSTPairInstruction"; +} + +def LDPSWx_PreInd : A64I_LSPpreind<0b01, 0b0, 0b1, + (outs GPR64:$Rt, GPR64:$Rt2, GPR64:$Rn_wb), + (ins GPR64xsp:$Rn, word_simm7:$SImm7), + "ldpsw\t$Rt, $Rt2, [$Rn, $SImm7]!", + [], NoItinerary> +{ + let mayLoad = 1; + let Constraints = "$Rn = $Rn_wb"; + let DecoderMethod = "DecodeLDSTPairInstruction"; +} + +//===----------------------------------------------------------------------===// +// Logical (immediate) instructions +//===----------------------------------------------------------------------===// +// Contains: AND, ORR, EOR, ANDS, + aliases TST, MOV + +multiclass logical_imm_operands +{ + def _asmoperand : AsmOperandClass + { + let Name = "LogicalImm" # note # size; + let PredicateMethod = "isLogicalImm" # note # "<" # size # ">"; + let RenderMethod = "addLogicalImmOperands<" # size # ">"; + } + + def _operand + : Operand, ComplexPattern + { + let ParserMatchClass = !cast(prefix # "_asmoperand"); + let PrintMethod = "printLogicalImmOperand<" # size # ">"; + let DecoderMethod = "DecodeLogicalImmOperand<" # size # ">"; + } +} + +defm logical_imm32 : logical_imm_operands<"logical_imm32", "", 32, i32>; +defm logical_imm64 : logical_imm_operands<"logical_imm64", "", 64, i64>; + +// The mov versions only differ in assembly parsing, where they +// exclude values representable with either MOVZ or MOVN. +defm logical_imm32_mov + : logical_imm_operands<"logical_imm32_mov", "MOV", 32, i32>; +defm logical_imm64_mov + : logical_imm_operands<"logical_imm64_mov", "MOV", 64, i64>; + + +multiclass A64I_logimmSizes opc, string asmop, SDNode opnode> +{ + def wwi : A64I_logicalimm<0b0, opc, (outs GPR32wsp:$Rd), + (ins GPR32:$Rn, logical_imm32_operand:$Imm), + !strconcat(asmop, "\t$Rd, $Rn, $Imm"), + [(set GPR32wsp:$Rd, + (opnode GPR32:$Rn, logical_imm32_operand:$Imm))], + NoItinerary>; + + def xxi : A64I_logicalimm<0b1, opc, (outs GPR64xsp:$Rd), + (ins GPR64:$Rn, logical_imm64_operand:$Imm), + !strconcat(asmop, "\t$Rd, $Rn, $Imm"), + [(set GPR64xsp:$Rd, + (opnode GPR64:$Rn, logical_imm64_operand:$Imm))], + NoItinerary>; +} + +defm AND : A64I_logimmSizes<0b00, "and", and>; +defm ORR : A64I_logimmSizes<0b01, "orr", or>; +defm EOR : A64I_logimmSizes<0b10, "eor", xor>; + +let Defs = [NZCV] in +{ + def ANDSwwi : A64I_logicalimm<0b0, 0b11, (outs GPR32:$Rd), + (ins GPR32:$Rn, logical_imm32_operand:$Imm), + "ands\t$Rd, $Rn, $Imm", + [], NoItinerary>; + + def ANDSxxi : A64I_logicalimm<0b1, 0b11, (outs GPR64:$Rd), + (ins GPR64:$Rn, logical_imm64_operand:$Imm), + "ands\t$Rd, $Rn, $Imm", + [], NoItinerary>; +} + + +def : InstAlias<"tst $Rn, $Imm", + (ANDSwwi WZR, GPR32:$Rn, logical_imm32_operand:$Imm)>; +def : InstAlias<"tst $Rn, $Imm", + (ANDSxxi XZR, GPR64:$Rn, logical_imm64_operand:$Imm)>; +def : InstAlias<"mov $Rd, $Imm", + (ORRwwi GPR32wsp:$Rd, WZR, logical_imm32_mov_operand:$Imm)>; +def : InstAlias<"mov $Rd, $Imm", + (ORRxxi GPR64xsp:$Rd, XZR, logical_imm64_mov_operand:$Imm)>; + +//===----------------------------------------------------------------------===// +// Logical (shifted register) instructions +//===----------------------------------------------------------------------===// +// Contains: AND, BIC, ORR, ORN, EOR, EON, ANDS, BICS + aliases TST, MVN, MOV + +// Operand for optimizing (icmp (and LHS, RHS), 0, SomeCode). In theory "ANDS" +// behaves differently for unsigned comparisons, so we defensively only allow +// signed or n/a as the operand. In practice "unsigned greater than 0" is "not +// equal to 0" and LLVM gives us this. +def signed_cond : PatLeaf<(cond), [{ + return !isUnsignedIntSetCC(N->get()); +}]>; + + +// These instructions share their "shift" operands with add/sub (shifted +// register instructions). They are defined there. + +// N.b. the commutable parameter is just !N. It will be first against the wall +// when the revolution comes. +multiclass logical_shifts opc, + bit N, bit commutable, + string asmop, SDPatternOperator opfrag, string sty, + RegisterClass GPR, list defs> +{ + let isCommutable = commutable, Defs = defs in { + def _lsl : A64I_logicalshift("lsl_operand_" # sty):$Imm6), + !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"), + [(set GPR:$Rd, (opfrag GPR:$Rn, (shl GPR:$Rm, + !cast("lsl_operand_" # sty):$Imm6)) + )], + NoItinerary>; + + def _lsr : A64I_logicalshift("lsr_operand_" # sty):$Imm6), + !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"), + [(set GPR:$Rd, (opfrag GPR:$Rn, (srl GPR:$Rm, + !cast("lsr_operand_" # sty):$Imm6)) + )], + NoItinerary>; + + def _asr : A64I_logicalshift("asr_operand_" # sty):$Imm6), + !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"), + [(set GPR:$Rd, (opfrag GPR:$Rn, (sra GPR:$Rm, + !cast("asr_operand_" # sty):$Imm6)) + )], + NoItinerary>; + + def _ror : A64I_logicalshift("ror_operand_" # sty):$Imm6), + !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"), + [(set GPR:$Rd, (opfrag GPR:$Rn, (rotr GPR:$Rm, + !cast("ror_operand_" # sty):$Imm6)) + )], + NoItinerary>; + } + + def _noshift + : InstAlias(prefix # "_lsl") GPR:$Rd, GPR:$Rn, + GPR:$Rm, 0)>; + + def : Pat<(opfrag GPR:$Rn, GPR:$Rm), + (!cast(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>; +} + +multiclass logical_sizes opc, bit N, bit commutable, + string asmop, SDPatternOperator opfrag, + list defs> +{ + defm xxx : logical_shifts; + defm www : logical_shifts; +} + + +defm AND : logical_sizes<"AND", 0b00, 0b0, 0b1, "and", and, []>; +defm ORR : logical_sizes<"ORR", 0b01, 0b0, 0b1, "orr", or, []>; +defm EOR : logical_sizes<"EOR", 0b10, 0b0, 0b1, "eor", xor, []>; +defm ANDS : logical_sizes<"ANDS", 0b11, 0b0, 0b1, "ands", + PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs), + [{ (void)N; return false; }]>, + [NZCV]>; + +defm BIC : logical_sizes<"BIC", 0b00, 0b1, 0b0, "bic", + PatFrag<(ops node:$lhs, node:$rhs), + (and node:$lhs, (not node:$rhs))>, []>; +defm ORN : logical_sizes<"ORN", 0b01, 0b1, 0b0, "orn", + PatFrag<(ops node:$lhs, node:$rhs), + (or node:$lhs, (not node:$rhs))>, []>; +defm EON : logical_sizes<"EON", 0b10, 0b1, 0b0, "eon", + PatFrag<(ops node:$lhs, node:$rhs), + (xor node:$lhs, (not node:$rhs))>, []>; +defm BICS : logical_sizes<"BICS", 0b11, 0b1, 0b0, "bics", + PatFrag<(ops node:$lhs, node:$rhs), + (and node:$lhs, (not node:$rhs)), + [{ (void)N; return false; }]>, + [NZCV]>; + +multiclass tst_shifts +{ + let isCommutable = 1, Rd = 0b11111, Defs = [NZCV] in { + def _lsl : A64I_logicalshift("lsl_operand_" # sty):$Imm6), + "tst\t$Rn, $Rm, $Imm6", + [(set NZCV, (A64setcc (and GPR:$Rn, (shl GPR:$Rm, + !cast("lsl_operand_" # sty):$Imm6)), + 0, signed_cond))], + NoItinerary>; + + + def _lsr : A64I_logicalshift("lsr_operand_" # sty):$Imm6), + "tst\t$Rn, $Rm, $Imm6", + [(set NZCV, (A64setcc (and GPR:$Rn, (srl GPR:$Rm, + !cast("lsr_operand_" # sty):$Imm6)), + 0, signed_cond))], + NoItinerary>; + + def _asr : A64I_logicalshift("asr_operand_" # sty):$Imm6), + "tst\t$Rn, $Rm, $Imm6", + [(set NZCV, (A64setcc (and GPR:$Rn, (sra GPR:$Rm, + !cast("asr_operand_" # sty):$Imm6)), + 0, signed_cond))], + NoItinerary>; + + def _ror : A64I_logicalshift("ror_operand_" # sty):$Imm6), + "tst\t$Rn, $Rm, $Imm6", + [(set NZCV, (A64setcc (and GPR:$Rn, (rotr GPR:$Rm, + !cast("ror_operand_" # sty):$Imm6)), + 0, signed_cond))], + NoItinerary>; + } + + def _noshift : InstAlias<"tst $Rn, $Rm", + (!cast(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>; + + def : Pat<(A64setcc (and GPR:$Rn, GPR:$Rm), 0, signed_cond), + (!cast(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>; +} + +defm TSTxx : tst_shifts<"TSTxx", 0b1, "i64", GPR64>; +defm TSTww : tst_shifts<"TSTww", 0b0, "i32", GPR32>; + + +multiclass mvn_shifts +{ + let isCommutable = 0, Rn = 0b11111 in { + def _lsl : A64I_logicalshift("lsl_operand_" # sty):$Imm6), + "mvn\t$Rd, $Rm, $Imm6", + [(set GPR:$Rd, (not (shl GPR:$Rm, + !cast("lsl_operand_" # sty):$Imm6)))], + NoItinerary>; + + + def _lsr : A64I_logicalshift("lsr_operand_" # sty):$Imm6), + "mvn\t$Rd, $Rm, $Imm6", + [(set GPR:$Rd, (not (srl GPR:$Rm, + !cast("lsr_operand_" # sty):$Imm6)))], + NoItinerary>; + + def _asr : A64I_logicalshift("asr_operand_" # sty):$Imm6), + "mvn\t$Rd, $Rm, $Imm6", + [(set GPR:$Rd, (not (sra GPR:$Rm, + !cast("asr_operand_" # sty):$Imm6)))], + NoItinerary>; + + def _ror : A64I_logicalshift("ror_operand_" # sty):$Imm6), + "mvn\t$Rd, $Rm, $Imm6", + [(set GPR:$Rd, (not (rotr GPR:$Rm, + !cast("lsl_operand_" # sty):$Imm6)))], + NoItinerary>; + } + + def _noshift : InstAlias<"mvn $Rn, $Rm", + (!cast(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>; + + def : Pat<(not GPR:$Rm), + (!cast(prefix # "_lsl") GPR:$Rm, 0)>; +} + +defm MVNxx : mvn_shifts<"MVNxx", 0b1, "i64", GPR64>; +defm MVNww : mvn_shifts<"MVNww", 0b0, "i32", GPR32>; + +def MOVxx :InstAlias<"mov $Rd, $Rm", (ORRxxx_lsl GPR64:$Rd, XZR, GPR64:$Rm, 0)>; +def MOVww :InstAlias<"mov $Rd, $Rm", (ORRwww_lsl GPR32:$Rd, WZR, GPR32:$Rm, 0)>; + +//===----------------------------------------------------------------------===// +// Move wide (immediate) instructions +//===----------------------------------------------------------------------===// +// Contains: MOVN, MOVZ, MOVK + MOV aliases + +// A wide variety of different relocations are needed for variants of these +// instructions, so it turns out that we need a different operand for all of +// them. +multiclass movw_operands +{ + def _imm_asmoperand : AsmOperandClass + { + let Name = instname # width # "Shifted" # shift; + let PredicateMethod = "is" # instname # width # "Imm"; + let RenderMethod = "addMoveWideImmOperands"; + + let ParserMethod = "ParseImmWithLSLOperand"; + } + + def _imm : Operand + { + let ParserMatchClass = !cast(prefix # "_imm_asmoperand"); + let PrintMethod = "printMoveWideImmOperand"; + let EncoderMethod = "getMoveWideImmOpValue"; + let DecoderMethod = "DecodeMoveWideImmOperand<" # width # ">"; + + let MIOperandInfo = (ops uimm16:$UImm16, imm:$Shift); + } +} + +defm movn32 : movw_operands<"movn32", "MOVN", 32>; +defm movn64 : movw_operands<"movn64", "MOVN", 64>; +defm movz32 : movw_operands<"movz32", "MOVZ", 32>; +defm movz64 : movw_operands<"movz64", "MOVZ", 64>; +defm movk32 : movw_operands<"movk32", "MOVK", 32>; +defm movk64 : movw_operands<"movk64", "MOVK", 64>; + +multiclass A64I_movwSizes opc, string asmop, dag ins32bit, dag ins64bit> +{ + + def wii : A64I_movw<0b0, opc, (outs GPR32:$Rd), ins32bit, + !strconcat(asmop, "\t$Rd, $FullImm"), + [], NoItinerary> + { + bits<18> FullImm; + let UImm16 = FullImm{15-0}; + let Shift = FullImm{17-16}; + } + + def xii : A64I_movw<0b1, opc, (outs GPR64:$Rd), ins64bit, + !strconcat(asmop, "\t$Rd, $FullImm"), + [], NoItinerary> + { + bits<18> FullImm; + let UImm16 = FullImm{15-0}; + let Shift = FullImm{17-16}; + } +} + +let isMoveImm = 1, isReMaterializable = 1, + isAsCheapAsAMove = 1, neverHasSideEffects = 1 in +{ + defm MOVN : A64I_movwSizes<0b00, "movn", + (ins movn32_imm:$FullImm), + (ins movn64_imm:$FullImm)>; + + // Some relocations are able to convert between a MOVZ and a MOVN. If these + // are applied the instruction must be emitted with the corresponding bits as + // 0, which means a MOVZ needs to override that bit from the default. + let PostEncoderMethod = "fixMOVZ" in + defm MOVZ : A64I_movwSizes<0b10, "movz", + (ins movz32_imm:$FullImm), + (ins movz64_imm:$FullImm)>; +} + +let Constraints = "$src = $Rd" in +defm MOVK : A64I_movwSizes<0b11, "movk", + (ins GPR32:$src, movk32_imm:$FullImm), + (ins GPR64:$src, movk64_imm:$FullImm)>; + + +// And now the "MOV" aliases. These also need their own operands because what +// they accept is completely different to what the base instructions accept. +multiclass movalias_operand +{ + def _asmoperand : AsmOperandClass + { + let Name = basename # width # "MovAlias"; + let PredicateMethod + = "isMoveWideMovAlias<" # width # ", A64Imms::" # immpredicate # ">"; + let RenderMethod + = "addMoveWideMovAliasOperands<" # width # ", " + # "A64Imms::" # immpredicate # ">"; + } + + def _movimm : Operand + { + let ParserMatchClass = !cast(prefix # "_asmoperand"); + + let MIOperandInfo = (ops uimm16:$UImm16, imm:$Shift); + } +} + +defm movz32 : movalias_operand<"movz32", "MOVZ", "isMOVZImm", 32>; +defm movz64 : movalias_operand<"movz64", "MOVZ", "isMOVZImm", 64>; +defm movn32 : movalias_operand<"movn32", "MOVN", "isOnlyMOVNImm", 32>; +defm movn64 : movalias_operand<"movn64", "MOVN", "isOnlyMOVNImm", 64>; + +// FIXME: these are officially canonical aliases, but TableGen is too limited to +// print them at the moment. I believe in this case an "AliasPredicate" method +// will need to be implemented. to allow it, as well as the more generally +// useful handling of non-register, non-constant operands. +class movalias + : InstAlias<"mov $Rd, $FullImm", (INST GPR:$Rd, operand:$FullImm)>; + +def : movalias; +def : movalias; +def : movalias; +def : movalias; + +//===----------------------------------------------------------------------===// +// PC-relative addressing instructions +//===----------------------------------------------------------------------===// +// Contains: ADR, ADRP + +def adr_label : Operand { + let EncoderMethod = "getLabelOpValue"; + + // This label is a 21-bit offset from PC, unscaled + let PrintMethod = "printLabelOperand<21, 1>"; + let ParserMatchClass = label_asmoperand<21, 1>; + let OperandType = "OPERAND_PCREL"; +} + +def adrp_label_asmoperand : AsmOperandClass +{ + let Name = "AdrpLabel"; + let RenderMethod = "addLabelOperands<21, 4096>"; +} + +def adrp_label : Operand +{ + let EncoderMethod = "getAdrpLabelOpValue"; + + // This label is a 21-bit offset from PC, scaled by the page-size: 4096. + let PrintMethod = "printLabelOperand<21, 4096>"; + let ParserMatchClass = adrp_label_asmoperand; + let OperandType = "OPERAND_PCREL"; +} + +let neverHasSideEffects = 1 in +{ + def ADRxi : A64I_PCADR<0b0, (outs GPR64:$Rd), (ins adr_label:$Label), + "adr\t$Rd, $Label", [], NoItinerary>; + + def ADRPxi : A64I_PCADR<0b1, (outs GPR64:$Rd), (ins adrp_label:$Label), + "adrp\t$Rd, $Label", [], NoItinerary>; +} + +//===----------------------------------------------------------------------===// +// System instructions +//===----------------------------------------------------------------------===// +// Contains: HINT, CLREX, DSB, DMB, ISB, MSR, SYS, SYSL, MRS +// + aliases IC, DC, AT, TLBI, NOP, YIELD, WFE, WFI, SEV, SEVL + +// Op1 and Op2 fields are sometimes simple 3-bit unsigned immediate values. +def uimm3_asmoperand : AsmOperandClass +{ + let Name = "UImm3"; + let PredicateMethod = "isUImm<3>"; + let RenderMethod = "addImmOperands"; +} + +def uimm3 : Operand +{ + let ParserMatchClass = uimm3_asmoperand; +} + +// The HINT alias can accept a simple unsigned 7-bit immediate. +def uimm7_asmoperand : AsmOperandClass +{ + let Name = "UImm7"; + let PredicateMethod = "isUImm<7>"; + let RenderMethod = "addImmOperands"; +} + +def uimm7 : Operand +{ + let ParserMatchClass = uimm7_asmoperand; +} + +// Multiclass namedimm is defined with the prefetch operands. Most of these fit +// into the NamedImmMapper scheme well: they either accept a named operand or +// any immediate under a particular value (which may be 0, implying no immediate +// is allowed). +defm dbarrier : namedimm<"dbarrier", "A64DB::DBarrierMapper">; +defm isb : namedimm<"isb", "A64ISB::ISBMapper">; +defm ic : namedimm<"ic", "A64IC::ICMapper">; +defm dc : namedimm<"dc", "A64DC::DCMapper">; +defm at : namedimm<"at", "A64AT::ATMapper">; +defm tlbi : namedimm<"tlbi", "A64TLBI::TLBIMapper">; + +// However, MRS and MSR are more complicated for a few reasons: +// * There are ~1000 generic names S3____ which have an +// implementation-defined effect +// * Most registers are shared, but some are read-only or write-only. +// * There is a variant of MSR which accepts the same register name (SPSel), but +// which would have a different encoding. + +// In principle these could be resolved in with more complicated subclasses of +// NamedImmMapper, however that imposes an overhead on other "named +// immediates". Both in concrete terms with virtual tables and in unnecessary +// abstraction. + +// The solution adopted here is to take the MRS/MSR Mappers out of the usual +// hierarchy (they're not derived from NamedImmMapper) and to add logic for +// their special situation. +def mrs_asmoperand : AsmOperandClass +{ + let Name = "MRS"; + let ParserMethod = "ParseSysRegOperand"; +} + +def mrs_op : Operand +{ + let ParserMatchClass = mrs_asmoperand; + let PrintMethod = "printMRSOperand"; + let DecoderMethod = "DecodeMRSOperand"; +} + +def msr_asmoperand : AsmOperandClass +{ + let Name = "MSRWithReg"; + + // Note that SPSel is valid for both this and the pstate operands, but with + // different immediate encodings. This is why these operands provide a string + // AArch64Operand rather than an immediate. The overlap is small enough that + // it could be resolved with hackery now, but who can say in future? + let ParserMethod = "ParseSysRegOperand"; +} + +def msr_op : Operand +{ + let ParserMatchClass = msr_asmoperand; + let PrintMethod = "printMSROperand"; + let DecoderMethod = "DecodeMSROperand"; +} + +def pstate_asmoperand : AsmOperandClass +{ + let Name = "MSRPState"; + // See comment above about parser. + let ParserMethod = "ParseSysRegOperand"; +} + +def pstate_op : Operand +{ + let ParserMatchClass = pstate_asmoperand; + let PrintMethod = "printNamedImmOperand"; + let DecoderMethod = "DecodeNamedImmOperand"; +} + +// When is specified, an assembler should accept something like "C4", not +// the usual "#4" immediate. +def CRx_asmoperand : AsmOperandClass +{ + let Name = "CRx"; + let PredicateMethod = "isUImm<4>"; + let RenderMethod = "addImmOperands"; + let ParserMethod = "ParseCRxOperand"; +} + +def CRx : Operand +{ + let ParserMatchClass = CRx_asmoperand; + let PrintMethod = "printCRxOperand"; +} + + +// Finally, we can start defining the instructions. + +// HINT is straightforward, with a few aliases. +def HINTi : A64I_system<0b0, (outs), (ins uimm7:$UImm7), "hint\t$UImm7", + [], NoItinerary> +{ + bits<7> UImm7; + let CRm = UImm7{6-3}; + let Op2 = UImm7{2-0}; + + let Op0 = 0b00; + let Op1 = 0b011; + let CRn = 0b0010; + let Rt = 0b11111; +} + +def : InstAlias<"nop", (HINTi 0)>; +def : InstAlias<"yield", (HINTi 1)>; +def : InstAlias<"wfe", (HINTi 2)>; +def : InstAlias<"wfi", (HINTi 3)>; +def : InstAlias<"sev", (HINTi 4)>; +def : InstAlias<"sevl", (HINTi 5)>; + +// Quite a few instructions then follow a similar pattern of fixing common +// fields in the bitpattern, we'll define a helper-class for them. +class simple_sys op0, bits<3> op1, bits<4> crn, bits<3> op2, + Operand operand, string asmop> + : A64I_system<0b0, (outs), (ins operand:$CRm), !strconcat(asmop, "\t$CRm"), + [], NoItinerary> +{ + let Op0 = op0; + let Op1 = op1; + let CRn = crn; + let Op2 = op2; + let Rt = 0b11111; +} + + +def CLREXi : simple_sys<0b00, 0b011, 0b0011, 0b010, uimm4, "clrex">; +def DSBi : simple_sys<0b00, 0b011, 0b0011, 0b100, dbarrier_op, "dsb">; +def DMBi : simple_sys<0b00, 0b011, 0b0011, 0b101, dbarrier_op, "dmb">; +def ISBi : simple_sys<0b00, 0b011, 0b0011, 0b110, isb_op, "isb">; + +def : InstAlias<"clrex", (CLREXi 0b1111)>; +def : InstAlias<"isb", (ISBi 0b1111)>; + +// (DMBi 0xb) is a "DMB ISH" instruciton, appropriate for Linux SMP +// configurations at least. +def : Pat<(atomic_fence imm, imm), (DMBi 0xb)>; + +// Any SYS bitpattern can be represented with a complex and opaque "SYS" +// instruction. +def SYSiccix : A64I_system<0b0, (outs), + (ins uimm3:$Op1, CRx:$CRn, CRx:$CRm, + uimm3:$Op2, GPR64:$Rt), + "sys\t$Op1, $CRn, $CRm, $Op2, $Rt", + [], NoItinerary> +{ + let Op0 = 0b01; +} + +// You can skip the Xt argument whether it makes sense or not for the generic +// SYS instruction. +def : InstAlias<"sys $Op1, $CRn, $CRm, $Op2", + (SYSiccix uimm3:$Op1, CRx:$CRn, CRx:$CRm, uimm3:$Op2, XZR)>; + + +// But many have aliases, which obviously don't fit into +class SYSalias + : A64I_system<0b0, (outs), ins, asmstring, [], NoItinerary> +{ + let isAsmParserOnly = 1; + + bits<14> SysOp; + let Op0 = 0b01; + let Op1 = SysOp{13-11}; + let CRn = SysOp{10-7}; + let CRm = SysOp{6-3}; + let Op2 = SysOp{2-0}; +} + +def ICix : SYSalias<(ins ic_op:$SysOp, GPR64:$Rt), "ic\t$SysOp, $Rt">; + +def ICi : SYSalias<(ins ic_op:$SysOp), "ic\t$SysOp"> +{ + let Rt = 0b11111; +} + +def DCix : SYSalias<(ins dc_op:$SysOp, GPR64:$Rt), "dc\t$SysOp, $Rt">; +def ATix : SYSalias<(ins at_op:$SysOp, GPR64:$Rt), "at\t$SysOp, $Rt">; + +def TLBIix : SYSalias<(ins tlbi_op:$SysOp, GPR64:$Rt), "tlbi\t$SysOp, $Rt">; + +def TLBIi : SYSalias<(ins tlbi_op:$SysOp), "tlbi\t$SysOp"> +{ + let Rt = 0b11111; +} + + +def SYSLxicci : A64I_system<0b1, (outs GPR64:$Rt), + (ins uimm3:$Op1, CRx:$CRn, CRx:$CRm, uimm3:$Op2), + "sysl\t$Rt, $Op1, $CRn, $CRm, $Op2", + [], NoItinerary> +{ + let Op0 = 0b01; +} + +// The instructions themselves are rather simple for MSR and MRS. +def MSRix : A64I_system<0b0, (outs), (ins msr_op:$SysReg, GPR64:$Rt), + "msr\t$SysReg, $Rt", [], NoItinerary> +{ + bits<16> SysReg; + let Op0 = SysReg{15-14}; + let Op1 = SysReg{13-11}; + let CRn = SysReg{10-7}; + let CRm = SysReg{6-3}; + let Op2 = SysReg{2-0}; +} + +def MRSxi : A64I_system<0b1, (outs GPR64:$Rt), (ins mrs_op:$SysReg), + "mrs\t$Rt, $SysReg", [], NoItinerary> +{ + bits<16> SysReg; + let Op0 = SysReg{15-14}; + let Op1 = SysReg{13-11}; + let CRn = SysReg{10-7}; + let CRm = SysReg{6-3}; + let Op2 = SysReg{2-0}; +} + +def MSRii : A64I_system<0b0, (outs), (ins pstate_op:$PState, uimm4:$CRm), + "msr\t$PState, $CRm", [], NoItinerary> +{ + bits<6> PState; + + let Op0 = 0b00; + let Op1 = PState{5-3}; + let CRn = 0b0100; + let Op2 = PState{2-0}; + let Rt = 0b11111; +} + +//===----------------------------------------------------------------------===// +// Test & branch (immediate) instructions +//===----------------------------------------------------------------------===// +// Contains: TBZ, TBNZ + +// The bit to test is a simple unsigned 6-bit immediate in the X-register +// versions. +def uimm6 : Operand +{ + let ParserMatchClass = uimm6_asmoperand; +} + +def label_wid14_scal4_asmoperand : label_asmoperand<14, 4>; + +def tbimm_target : Operand +{ + let EncoderMethod = "getLabelOpValue"; + + // This label is a 14-bit offset from PC, scaled by the instruction-width: 4. + let PrintMethod = "printLabelOperand<14, 4>"; + let ParserMatchClass = label_wid14_scal4_asmoperand; + + let OperandType = "OPERAND_PCREL"; +} + +def A64eq : ImmLeaf; +def A64ne : ImmLeaf; + +// These instructions correspond to patterns involving "and" with a power of +// two, which we need to be able to select. +def tstb64_pat : ComplexPattern">; +def tstb32_pat : ComplexPattern">; + +let isBranch = 1, isTerminator = 1 in +{ + def TBZxii : A64I_TBimm<0b0, (outs), + (ins GPR64:$Rt, uimm6:$Imm, tbimm_target:$Label), + "tbz\t$Rt, $Imm, $Label", + [(A64br_cc (A64cmp (and GPR64:$Rt, tstb64_pat:$Imm), 0), + A64eq, bb:$Label)], + NoItinerary>; + + def TBNZxii : A64I_TBimm<0b1, (outs), + (ins GPR64:$Rt, uimm6:$Imm, tbimm_target:$Label), + "tbnz\t$Rt, $Imm, $Label", + [(A64br_cc (A64cmp (and GPR64:$Rt, tstb64_pat:$Imm), 0), + A64ne, bb:$Label)], + NoItinerary>; + + + // Note, these instructions overlap with the above 64-bit patterns. This is + // intentional, "tbz x3, #1, somewhere" and "tbz w3, #1, somewhere" would both + // do the same thing and are both permitted assembly. They also both have + // sensible DAG patterns. + def TBZwii : A64I_TBimm<0b0, (outs), + (ins GPR32:$Rt, uimm5:$Imm, tbimm_target:$Label), + "tbz\t$Rt, $Imm, $Label", + [(A64br_cc (A64cmp (and GPR32:$Rt, tstb32_pat:$Imm), 0), + A64eq, bb:$Label)], + NoItinerary> + { + let Imm{5} = 0b0; + } + + def TBNZwii : A64I_TBimm<0b1, (outs), + (ins GPR32:$Rt, uimm5:$Imm, tbimm_target:$Label), + "tbnz\t$Rt, $Imm, $Label", + [(A64br_cc (A64cmp (and GPR32:$Rt, tstb32_pat:$Imm), 0), + A64ne, bb:$Label)], + NoItinerary> + { + let Imm{5} = 0b0; + } +} + +//===----------------------------------------------------------------------===// +// Unconditional branch (immediate) instructions +//===----------------------------------------------------------------------===// +// Contains: B, BL + +def label_wid26_scal4_asmoperand : label_asmoperand<26, 4>; + +def bimm_target : Operand +{ + let EncoderMethod = "getLabelOpValue"; + + // This label is a 26-bit offset from PC, scaled by the instruction-width: 4. + let PrintMethod = "printLabelOperand<26, 4>"; + let ParserMatchClass = label_wid26_scal4_asmoperand; + + let OperandType = "OPERAND_PCREL"; +} + +def blimm_target : Operand +{ + let EncoderMethod = "getLabelOpValue"; + + // This label is a 26-bit offset from PC, scaled by the instruction-width: 4. + let PrintMethod = "printLabelOperand<26, 4>"; + let ParserMatchClass = label_wid26_scal4_asmoperand; + + let OperandType = "OPERAND_PCREL"; +} + +class A64I_BimmImpl patterns, Operand lbl_type> + : A64I_Bimm; + +let isBranch = 1 in { + def Bimm : A64I_BimmImpl<0b0, "b", [(br bb:$Label)], bimm_target> + { + let isTerminator = 1; + let isBarrier = 1; + } + + def BLimm : A64I_BimmImpl<0b1, "bl", + [(AArch64Call tglobaladdr:$Label)], blimm_target> + { + let isCall = 1; + let Defs = [X30]; + } +} + +def : Pat<(AArch64Call texternalsym:$Label), (BLimm texternalsym:$Label)>; + +//===----------------------------------------------------------------------===// +// Unconditional branch (register) instructions +//===----------------------------------------------------------------------===// +// Contains: BR, BLR, RET, ERET, DRP. + +// Most of the notional opcode fields in the A64I_Breg format are fixed in A64 +// at the moment. +class A64I_BregImpl opc, + dag outs, dag ins, string asmstr, list patterns, + InstrItinClass itin = NoItinerary> + : A64I_Breg +{ + let isBranch = 1; + let isIndirectBranch = 1; +} + +// Note that these are not marked isCall or isReturn because as far as LLVM is +// concerned they're not. "ret" is just another jump unless it has been selected +// by LLVM as the function's return. + +let isBranch = 1 in { + def BRx : A64I_BregImpl<0b0000,(outs), (ins GPR64:$Rn), + "br\t$Rn", [(brind GPR64:$Rn)]> + { + let isBarrier = 1; + let isTerminator = 1; + } + + def BLRx : A64I_BregImpl<0b0001, (outs), (ins GPR64:$Rn), + "blr\t$Rn", [(AArch64Call GPR64:$Rn)]> + { + let isBarrier = 0; + let isCall = 1; + let Defs = [X30]; + } + + def RETx : A64I_BregImpl<0b0010, (outs), (ins GPR64:$Rn), + "ret\t$Rn", []> + { + let isBarrier = 1; + let isTerminator = 1; + let isReturn = 1; + } + + // Create a separate pseudo-instruction for codegen to use so that we don't + // flag x30 as used in every function. It'll be restored before the RET by the + // epilogue if it's legitimately used. + def RET : A64PseudoExpand<(outs), (ins), [(A64ret)], (RETx (ops X30))> + { + let isTerminator = 1; + let isBarrier = 1; + let isReturn = 1; + } + + def ERET : A64I_BregImpl<0b0100, (outs), (ins), "eret", []> + { + let Rn = 0b11111; + let isBarrier = 1; + let isTerminator = 1; + let isReturn = 1; + } + + def DRPS : A64I_BregImpl<0b0101, (outs), (ins), "drps", []> + { + let Rn = 0b11111; + let isBarrier = 1; + } +} + +def RETAlias : InstAlias<"ret", (RETx X30)>; + + +//===----------------------------------------------------------------------===// +// Address generation patterns +//===----------------------------------------------------------------------===// + +// Primary method of address generation for the small/absolute memory model is +// an ADRP/ADR pair: +// ADRP x0, some_variable +// ADD x0, x0, #:lo12:some_variable +// +// The load/store elision of the ADD is accomplished when selecting +// addressing-modes. This just mops up the cases where that doesn't work and we +// really need an address in some register. + +// This wrapper applies a LO12 modifier to the address. Otherwise we could just +// use the same address. + +class ADRP_ADD + : Pat<(Wrapper addrop:$Hi, addrop:$Lo12, (i32 imm)), + (ADDxxi_lsl0_s (ADRPxi addrop:$Hi), addrop:$Lo12)>; + +def : ADRP_ADD; +def : ADRP_ADD; +def : ADRP_ADD; +def : ADRP_ADD; +def : ADRP_ADD; + +//===----------------------------------------------------------------------===// +// GOT access patterns +//===----------------------------------------------------------------------===// + +// FIXME: Wibble + +class GOTLoadSmall + : Pat<(A64GOTLoad (A64WrapperSmall addrfrag:$Hi, addrfrag:$Lo12, 8)), + (LS64_LDR (ADRPxi addrfrag:$Hi), addrfrag:$Lo12)>; + +def : GOTLoadSmall; +def : GOTLoadSmall; +def : GOTLoadSmall; + +//===----------------------------------------------------------------------===// +// Tail call handling +//===----------------------------------------------------------------------===// + +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [XSP] in +{ + def TC_RETURNdi + : PseudoInst<(outs), (ins i64imm:$dst, i32imm:$FPDiff), + [(AArch64tcret tglobaladdr:$dst, (i32 timm:$FPDiff))]>; + + def TC_RETURNxi + : PseudoInst<(outs), (ins tcGPR64:$dst, i32imm:$FPDiff), + [(AArch64tcret tcGPR64:$dst, (i32 timm:$FPDiff))]>; +} + +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, + Uses = [XSP] in +{ + def TAIL_Bimm : A64PseudoExpand<(outs), (ins bimm_target:$Label), [], + (Bimm bimm_target:$Label)>; + + def TAIL_BRx : A64PseudoExpand<(outs), (ins tcGPR64:$Rd), [], + (BRx GPR64:$Rd)>; +} + + +def : Pat<(AArch64tcret texternalsym:$dst, (i32 timm:$FPDiff)), + (TC_RETURNdi texternalsym:$dst, imm:$FPDiff)>; + +//===----------------------------------------------------------------------===// +// Thread local storage +//===----------------------------------------------------------------------===// + +// This is a pseudo-instruction representing the ".tlsdesccall" directive in +// assembly. Its effect is to insert an R_AARCH64_TLSDESC_CALL relocation at the +// current location. It should always be immediately followed by a BLR +// instruction, and is intended solely for relaxation by the linker. + +def : Pat<(A64threadpointer), (MRSxi 0xde82)>; + +def TLSDESCCALL : PseudoInst<(outs), (ins i64imm:$Lbl), []> +{ + let hasSideEffects = 1; +} + +def TLSDESC_BLRx : PseudoInst<(outs), (ins GPR64:$Rn, i64imm:$Var), + [(A64tlsdesc_blr GPR64:$Rn, tglobaltlsaddr:$Var)]> +{ + let isCall = 1; + let Defs = [X30]; +} + +def : Pat<(A64tlsdesc_blr GPR64:$Rn, texternalsym:$Var), + (TLSDESC_BLRx GPR64:$Rn, texternalsym:$Var)>; + +//===----------------------------------------------------------------------===// +// Bitfield patterns +//===----------------------------------------------------------------------===// + +def bfi32_lsb_to_immr : SDNodeXFormgetTargetConstant((32 - N->getZExtValue()) % 32, MVT::i64); +}]>; + +def bfi64_lsb_to_immr : SDNodeXFormgetTargetConstant((64 - N->getZExtValue()) % 64, MVT::i64); +}]>; + +def bfi_width_to_imms : SDNodeXFormgetTargetConstant(N->getZExtValue() - 1, MVT::i64); +}]>; + + +// The simpler patterns deal with cases where no AND mask is actually needed +// (either all bits are used or the low 32 bits are used). +let AddedComplexity = 10 in { + +def : Pat<(A64Bfi GPR64:$src, GPR64:$Rn, imm:$ImmR, imm:$ImmS), + (BFIxxii GPR64:$src, GPR64:$Rn, + (bfi64_lsb_to_immr (i64 imm:$ImmR)), + (bfi_width_to_imms (i64 imm:$ImmS)))>; + +def : Pat<(A64Bfi GPR32:$src, GPR32:$Rn, imm:$ImmR, imm:$ImmS), + (BFIwwii GPR32:$src, GPR32:$Rn, + (bfi32_lsb_to_immr (i64 imm:$ImmR)), + (bfi_width_to_imms (i64 imm:$ImmS)))>; + + +def : Pat<(and (A64Bfi GPR64:$src, GPR64:$Rn, imm:$ImmR, imm:$ImmS), + (i64 4294967295)), + (SUBREG_TO_REG (i64 0), + (BFIwwii (EXTRACT_SUBREG GPR64:$src, sub_32), + (EXTRACT_SUBREG GPR64:$Rn, sub_32), + (bfi32_lsb_to_immr (i64 imm:$ImmR)), + (bfi_width_to_imms (i64 imm:$ImmS))), + sub_32)>; + +} + +//===----------------------------------------------------------------------===// +// Constant island entries +//===----------------------------------------------------------------------===// + +// The constant island pass needs to create "instructions" in the middle of the +// instruction stream to reresent its constants. + +def cpinst_operand : Operand; + +def CONSTPOOL_ENTRY : PseudoInst<(outs), (ins cpinst_operand:$instid, + cpinst_operand:$cpidx, + i32imm:$size), []> +{ + let neverHasSideEffects = 1; + let isNotDuplicable = 1; +} + +//===----------------------------------------------------------------------===// +// Miscellaneous patterns +//===----------------------------------------------------------------------===// + +// Truncation from 64 to 32-bits just involves renaming your register. +def : Pat<(i32 (trunc (i64 GPR64:$val))), (EXTRACT_SUBREG GPR64:$val, sub_32)>; + +// Similarly, extension where we don't care about the high bits is +// just a rename. +def : Pat<(i64 (anyext (i32 GPR32:$val))), + (INSERT_SUBREG (IMPLICIT_DEF), GPR32:$val, sub_32)>; + +// SELECT instructions providing f128 types need to be handled by a +// pseudo-instruction since the eventual code will need to introduce basic +// blocks and control flow. +def F128CSEL : PseudoInst<(outs FPR128:$Rd), + (ins FPR128:$Rn, FPR128:$Rm, cond_code_op:$Cond), + [(set FPR128:$Rd, (simple_select (f128 FPR128:$Rn), + FPR128:$Rm))]> +{ + let Uses = [NZCV]; + let usesCustomInserter = 1; +} + +//===----------------------------------------------------------------------===// +// Load/store patterns +//===----------------------------------------------------------------------===// + +// There are lots of patterns here, because we need to allow at least three +// parameters to vary independently. +// 1. Instruction: "ldrb w9, [sp]", "ldrh w9, [sp]", ... +// 2. LLVM source: zextloadi8, anyextloadi8, ... +// 3. Address-generation: A64Wrapper, (add BASE, OFFSET), ... +// +// The biggest problem turns out to be the address-generation variable. At the +// point of instantiation we need to produce two DAGs, one for the pattern and +// one for the instruction. Doing this at the lowest level of classes doesn't +// work. +// +// Consider the simple uimm12 addressing mode, and the desire to match both (add +// GPR64xsp:$Rn, uimm12:$Offset) and GPR64xsp:$Rn, particularly on the +// instruction side. We'd need to insert either "GPR64xsp" and "uimm12" or +// "GPR64xsp" and "0" into an unknown dag. !subst is not capable of this +// operation, and PatFrags are for selection not output. +// +// As a result, the address-generation patterns are the final +// instantiations. However, we do still need to vary the operand for the address +// further down (At the point we're deciding A64WrapperSmall, we don't know +// the memory width of the operation). + +//===------------------------------ +// 1. Basic infrastructural defs +//===------------------------------ + +// First, some simple classes for !foreach and !subst to use: +class Decls +{ + dag pattern; +} + +def decls : Decls; +def ALIGN; +def INST; +def OFFSET; +def SHIFT; + +// You can't use !subst on an actual immediate, but you *can* use it on an +// operand record that happens to match a single immediate. So we do. +def imm_eq0 : ImmLeaf; +def imm_eq1 : ImmLeaf; +def imm_eq2 : ImmLeaf; +def imm_eq3 : ImmLeaf; +def imm_eq4 : ImmLeaf; + +// If the low bits of a pointer are known to be 0 then an "or" is just as good +// as addition for computing an offset. This fragment forwards that check for +// TableGen's use. +def add_like_or : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs), +[{ + return CurDAG->isBaseWithConstantOffset(SDValue(N, 0)); +}]>; + +// Load/store (unsigned immediate) operations with relocations against global +// symbols (for lo12) are only valid if those symbols have correct alignment +// (since the immediate offset is divided by the access scale, it can't have a +// remainder). +// +// The guaranteed alignment is provided as part of the WrapperSmall +// operation, and checked against one of these. +def any_align : ImmLeaf; +def min_align2 : ImmLeaf= 2; }]>; +def min_align4 : ImmLeaf= 4; }]>; +def min_align8 : ImmLeaf= 8; }]>; +def min_align16 : ImmLeaf= 16; }]>; + +// "Normal" load/store instructions can be used on atomic operations, provided +// the ordering parameter is at most "monotonic". Anything above that needs +// special handling with acquire/release instructions. +class simple_load + : PatFrag<(ops node:$ptr), (base node:$ptr), [{ + return cast(N)->getOrdering() <= Monotonic; +}]>; + +def atomic_load_simple_i8 : simple_load; +def atomic_load_simple_i16 : simple_load; +def atomic_load_simple_i32 : simple_load; +def atomic_load_simple_i64 : simple_load; + +class simple_store + : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{ + return cast(N)->getOrdering() <= Monotonic; +}]>; + +def atomic_store_simple_i8 : simple_store; +def atomic_store_simple_i16 : simple_store; +def atomic_store_simple_i32 : simple_store; +def atomic_store_simple_i64 : simple_store; + +//===------------------------------ +// 2. UImm12 and SImm9 +//===------------------------------ + +// These instructions have two operands providing the address so they can be +// treated similarly for most purposes. + +//===------------------------------ +// 2.1 Base patterns covering extend/truncate semantics +//===------------------------------ + +// Atomic patterns can be shared between integer operations of all sizes, a +// quick multiclass here allows reuse. +multiclass ls_atomic_pats +{ + def : Pat<(!cast("atomic_load_simple_" # sty) address), + (LOAD Base, Offset)>; + + def : Pat<(!cast("atomic_store_simple_" # sty) address, TPR:$Rt), + (STORE TPR:$Rt, Base, Offset)>; +} + +// Instructions accessing a memory chunk smaller than a register (or, in a +// pinch, the same size) have a characteristic set of patterns they want to +// match: extending loads and truncating stores. This class deals with the +// sign-neutral version of those patterns. +// +// It will be instantiated across multiple addressing-modes. +multiclass ls_small_pats + : ls_atomic_pats +{ + def : Pat<(!cast(zextload # sty) address), (LOAD Base, Offset)>; + + def : Pat<(!cast(extload # sty) address), (LOAD Base, Offset)>; + + // For zero-extension to 64-bits we have to tell LLVM that the whole 64-bit + // register was actually set. + def : Pat<(i64 (!cast(zextload # sty) address)), + (SUBREG_TO_REG (i64 0), (LOAD Base, Offset), sub_32)>; + + def : Pat<(i64 (!cast(extload # sty) address)), + (SUBREG_TO_REG (i64 0), (LOAD Base, Offset), sub_32)>; + + def : Pat<(!cast(truncstore # sty) GPR32:$Rt, address), + (STORE GPR32:$Rt, Base, Offset)>; + + // For truncating store from 64-bits, we have to manually tell LLVM to + // ignore the high bits of the x register. + def : Pat<(!cast(truncstore # sty) GPR64:$Rt, address), + (STORE (EXTRACT_SUBREG GPR64:$Rt, sub_32), Base, Offset)>; +} + +// Next come patterns for sign-extending loads. +multiclass load_signed_pats +{ + def : Pat<(i32 (!cast("sextload" # sty) address)), + (!cast("LDRS" # T # "w" # U) Base, Offset)>; + + def : Pat<(i64 (!cast("sextload" # sty) address)), + (!cast("LDRS" # T # "x" # U) Base, Offset)>; + +} + +// and finally "natural-width" loads and stores come next. +multiclass ls_neutral_pats +{ + def : Pat<(sty (load address)), (LOAD Base, Offset)>; + def : Pat<(store (sty TPR:$Rt), address), (STORE TPR:$Rt, Base, Offset)>; +} + +// Integer operations also get atomic instructions to select for. +multiclass ls_int_neutral_pats + : ls_neutral_pats, + ls_atomic_pats; + +//===------------------------------ +// 2.2. Addressing-mode instantiations +//===------------------------------ + +multiclass uimm12_pats +{ + defm : ls_small_pats; + defm : ls_small_pats; + defm : ls_small_pats; + + defm : ls_int_neutral_pats; + + defm : ls_int_neutral_pats; + + defm : ls_neutral_pats; + + defm : ls_neutral_pats; + + defm : ls_neutral_pats; + + defm : ls_neutral_pats; + + defm : load_signed_pats<"B", "", Base, + !foreach(decls.pattern, Offset, + !subst(OFFSET, byte_uimm12, decls.pattern)), + !foreach(decls.pattern, address, + !subst(OFFSET, byte_uimm12, + !subst(ALIGN, any_align, decls.pattern))), + i8>; + + defm : load_signed_pats<"H", "", Base, + !foreach(decls.pattern, Offset, + !subst(OFFSET, hword_uimm12, decls.pattern)), + !foreach(decls.pattern, address, + !subst(OFFSET, hword_uimm12, + !subst(ALIGN, min_align2, decls.pattern))), + i16>; + + def : Pat<(sextloadi32 !foreach(decls.pattern, address, + !subst(OFFSET, word_uimm12, + !subst(ALIGN, min_align4, decls.pattern)))), + (LDRSWx Base, !foreach(decls.pattern, Offset, + !subst(OFFSET, word_uimm12, decls.pattern)))>; +} + +// Straightforward patterns of last resort: a pointer with or without an +// appropriate offset. +defm : uimm12_pats<(i64 GPR64xsp:$Rn), (i64 GPR64xsp:$Rn), (i64 0)>; +defm : uimm12_pats<(add GPR64xsp:$Rn, OFFSET:$UImm12), + (i64 GPR64xsp:$Rn), (i64 OFFSET:$UImm12)>; + +// The offset could be hidden behind an "or", of course: +defm : uimm12_pats<(add_like_or GPR64xsp:$Rn, OFFSET:$UImm12), + (i64 GPR64xsp:$Rn), (i64 OFFSET:$UImm12)>; + +// Global addresses under the small-absolute model should use these +// instructions. There are ELF relocations specifically for it. +defm : uimm12_pats<(A64WrapperSmall tglobaladdr:$Hi, tglobaladdr:$Lo12, ALIGN), + (ADRPxi tglobaladdr:$Hi), (i64 tglobaladdr:$Lo12)>; + +defm : uimm12_pats<(A64WrapperSmall tglobaltlsaddr:$Hi, tglobaltlsaddr:$Lo12, ALIGN), + (ADRPxi tglobaltlsaddr:$Hi), (i64 tglobaltlsaddr:$Lo12)>; + +// External symbols that make it this far should also get standard relocations. +defm : uimm12_pats<(A64WrapperSmall texternalsym:$Hi, texternalsym:$Lo12, ALIGN), + (ADRPxi texternalsym:$Hi), (i64 texternalsym:$Lo12)>; + + +// We also want to use uimm12 instructions for local variables at the moment. +def tframeindex_XFORM : SDNodeXForm(N)->getIndex(); + return CurDAG->getTargetFrameIndex(FI, MVT::i64); +}]>; + +defm : uimm12_pats<(i64 frameindex:$Rn), + (tframeindex_XFORM tframeindex:$Rn), (i64 0)>; + +// These can be much simpler than uimm12 because we don't to change the operand +// type (e.g. LDURB and LDURH take the same operands). +multiclass simm9_pats +{ + defm : ls_small_pats; + defm : ls_small_pats; + + defm : ls_int_neutral_pats; + defm : ls_int_neutral_pats; + + defm : ls_neutral_pats; + defm : ls_neutral_pats; + defm : ls_neutral_pats; + defm : ls_neutral_pats; + + def : Pat<(i64 (zextloadi32 address)), + (SUBREG_TO_REG (i64 0), (LS32_LDUR Base, Offset), sub_32)>; + + def : Pat<(truncstorei32 GPR64:$Rt, address), + (LS32_STUR (EXTRACT_SUBREG GPR64:$Rt, sub_32), Base, Offset)>; + + defm : load_signed_pats<"B", "_U", Base, Offset, address, i8>; + defm : load_signed_pats<"H", "_U", Base, Offset, address, i16>; + def : Pat<(sextloadi32 address), (LDURSWx Base, Offset)>; +} + +defm : simm9_pats<(add GPR64xsp:$Rn, simm9:$SImm9), + (i64 GPR64xsp:$Rn), (SDXF_simm9 simm9:$SImm9)>; + +defm : simm9_pats<(add_like_or GPR64xsp:$Rn, simm9:$SImm9), + (i64 GPR64xsp:$Rn), (SDXF_simm9 simm9:$SImm9)>; + + +//===------------------------------ +// 3. Register offset patterns +//===------------------------------ + +// Atomic patterns can be shared between integer operations of all sizes, a +// quick multiclass here allows reuse. +multiclass ro_atomic_pats +{ + def : Pat<(!cast("atomic_load_simple_" # sty) address), + (LOAD Base, Offset, Extend)>; + + def : Pat<(!cast("atomic_store_simple_" # sty) address, TPR:$Rt), + (STORE TPR:$Rt, Base, Offset, Extend)>; +} + +// The register offset instructions take three operands giving the instruction, +// and have an annoying split between instructions where Rm is 32-bit and +// 64-bit. So we need a special hierarchy to describe them. Other than that the +// same operations should be supported as for simm9 and uimm12 addressing. + +multiclass ro_small_pats + : ro_atomic_pats +{ + def : Pat<(!cast(zextload # sty) address), + (LOAD Base, Offset, Extend)>; + + def : Pat<(!cast(extload # sty) address), + (LOAD Base, Offset, Extend)>; + + // For zero-extension to 64-bits we have to tell LLVM that the whole 64-bit + // register was actually set. + def : Pat<(i64 (!cast(zextload # sty) address)), + (SUBREG_TO_REG (i64 0), (LOAD Base, Offset, Extend), sub_32)>; + + def : Pat<(i64 (!cast(extload # sty) address)), + (SUBREG_TO_REG (i64 0), (LOAD Base, Offset, Extend), sub_32)>; + + def : Pat<(!cast(truncstore # sty) GPR32:$Rt, address), + (STORE GPR32:$Rt, Base, Offset, Extend)>; + + // For truncating store from 64-bits, we have to manually tell LLVM to + // ignore the high bits of the x register. + def : Pat<(!cast(truncstore # sty) GPR64:$Rt, address), + (STORE (EXTRACT_SUBREG GPR64:$Rt, sub_32), Base, Offset, Extend)>; + +} + +// Next come patterns for sign-extending loads. +multiclass ro_signed_pats +{ + def : Pat<(i32 (!cast("sextload" # sty) address)), + (!cast("LDRS" # T # "w_" # Rm # "_RegOffset") + Base, Offset, Extend)>; + + def : Pat<(i64 (!cast("sextload" # sty) address)), + (!cast("LDRS" # T # "x_" # Rm # "_RegOffset") + Base, Offset, Extend)>; +} + +// and finally "natural-width" loads and stores come next. +multiclass ro_neutral_pats +{ + def : Pat<(sty (load address)), (LOAD Base, Offset, Extend)>; + def : Pat<(store (sty TPR:$Rt), address), + (STORE TPR:$Rt, Base, Offset, Extend)>; +} + +multiclass ro_int_neutral_pats + : ro_neutral_pats, + ro_atomic_pats; + +multiclass regoff_pats +{ + defm : ro_small_pats("LS8_" # Rm # "_RegOffset_LDR"), + !cast("LS8_" # Rm # "_RegOffset_STR"), + Base, Offset, Extend, + !foreach(decls.pattern, address, + !subst(SHIFT, imm_eq0, decls.pattern)), + i8>; + defm : ro_small_pats("LS16_" # Rm # "_RegOffset_LDR"), + !cast("LS16_" # Rm # "_RegOffset_STR"), + Base, Offset, Extend, + !foreach(decls.pattern, address, + !subst(SHIFT, imm_eq1, decls.pattern)), + i16>; + defm : ro_small_pats("LS32_" # Rm # "_RegOffset_LDR"), + !cast("LS32_" # Rm # "_RegOffset_STR"), + Base, Offset, Extend, + !foreach(decls.pattern, address, + !subst(SHIFT, imm_eq2, decls.pattern)), + i32>; + + defm : ro_int_neutral_pats("LS32_" # Rm # "_RegOffset_LDR"), + !cast("LS32_" # Rm # "_RegOffset_STR"), + Base, Offset, Extend, + !foreach(decls.pattern, address, + !subst(SHIFT, imm_eq2, decls.pattern)), + GPR32, i32>; + + defm : ro_int_neutral_pats("LS64_" # Rm # "_RegOffset_LDR"), + !cast("LS64_" # Rm # "_RegOffset_STR"), + Base, Offset, Extend, + !foreach(decls.pattern, address, + !subst(SHIFT, imm_eq3, decls.pattern)), + GPR64, i64>; + + defm : ro_neutral_pats("LSFP16_" # Rm # "_RegOffset_LDR"), + !cast("LSFP16_" # Rm # "_RegOffset_STR"), + Base, Offset, Extend, + !foreach(decls.pattern, address, + !subst(SHIFT, imm_eq1, decls.pattern)), + FPR16, f16>; + + defm : ro_neutral_pats("LSFP32_" # Rm # "_RegOffset_LDR"), + !cast("LSFP32_" # Rm # "_RegOffset_STR"), + Base, Offset, Extend, + !foreach(decls.pattern, address, + !subst(SHIFT, imm_eq2, decls.pattern)), + FPR32, f32>; + + defm : ro_neutral_pats("LSFP64_" # Rm # "_RegOffset_LDR"), + !cast("LSFP64_" # Rm # "_RegOffset_STR"), + Base, Offset, Extend, + !foreach(decls.pattern, address, + !subst(SHIFT, imm_eq3, decls.pattern)), + FPR64, f64>; + + defm : ro_neutral_pats("LSFP128_" # Rm # "_RegOffset_LDR"), + !cast("LSFP128_" # Rm # "_RegOffset_STR"), + Base, Offset, Extend, + !foreach(decls.pattern, address, + !subst(SHIFT, imm_eq4, decls.pattern)), + FPR128, f128>; + + defm : ro_signed_pats<"B", Rm, Base, Offset, Extend, + !foreach(decls.pattern, address, + !subst(SHIFT, imm_eq0, decls.pattern)), + i8>; + + defm : ro_signed_pats<"H", Rm, Base, Offset, Extend, + !foreach(decls.pattern, address, + !subst(SHIFT, imm_eq1, decls.pattern)), + i16>; + + def : Pat<(sextloadi32 !foreach(decls.pattern, address, + !subst(SHIFT, imm_eq2, decls.pattern))), + (!cast("LDRSWx_" # Rm # "_RegOffset") + Base, Offset, Extend)>; +} + + +// Finally we're in a position to tell LLVM exactly what addresses are reachable +// using register-offset instructions. Essentially a base plus a possibly +// extended, possibly shifted (by access size) offset. + +defm : regoff_pats<"Wm", (add GPR64xsp:$Rn, (sext GPR32:$Rm)), + (i64 GPR64xsp:$Rn), (i32 GPR32:$Rm), (i64 6)>; + +defm : regoff_pats<"Wm", (add GPR64xsp:$Rn, (shl (sext GPR32:$Rm), SHIFT)), + (i64 GPR64xsp:$Rn), (i32 GPR32:$Rm), (i64 7)>; + +defm : regoff_pats<"Wm", (add GPR64xsp:$Rn, (zext GPR32:$Rm)), + (i64 GPR64xsp:$Rn), (i32 GPR32:$Rm), (i64 2)>; + +defm : regoff_pats<"Wm", (add GPR64xsp:$Rn, (shl (zext GPR32:$Rm), SHIFT)), + (i64 GPR64xsp:$Rn), (i32 GPR32:$Rm), (i64 3)>; + +defm : regoff_pats<"Xm", (add GPR64xsp:$Rn, GPR64:$Rm), + (i64 GPR64xsp:$Rn), (i64 GPR64:$Rm), (i64 2)>; + +defm : regoff_pats<"Xm", (add GPR64xsp:$Rn, (shl GPR64:$Rm, SHIFT)), + (i64 GPR64xsp:$Rn), (i64 GPR64:$Rm), (i64 3)>; diff --git a/lib/Target/AArch64/AArch64MCInstLower.cpp b/lib/Target/AArch64/AArch64MCInstLower.cpp new file mode 100644 index 0000000..0603574 --- /dev/null +++ b/lib/Target/AArch64/AArch64MCInstLower.cpp @@ -0,0 +1,140 @@ +//===-- AArch64MCInstLower.cpp - Convert AArch64 MachineInstr to an MCInst -==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains code to lower AArch64 MachineInstrs to their corresponding +// MCInst records. +// +//===----------------------------------------------------------------------===// + +#include "AArch64AsmPrinter.h" +#include "AArch64TargetMachine.h" +#include "MCTargetDesc/AArch64BaseInfo.h" +#include "MCTargetDesc/AArch64MCExpr.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/Target/Mangler.h" + +using namespace llvm; + +MCOperand +AArch64AsmPrinter::lowerSymbolOperand(const MachineOperand &MO, + const MCSymbol *Sym) const { + const MCExpr *Expr = 0; + + Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, OutContext); + + switch (MO.getTargetFlags()) { + case AArch64II::MO_GOT: + Expr = AArch64MCExpr::CreateGOT(Expr, OutContext); + break; + case AArch64II::MO_GOT_LO12: + Expr = AArch64MCExpr::CreateGOTLo12(Expr, OutContext); + break; + case AArch64II::MO_LO12: + Expr = AArch64MCExpr::CreateLo12(Expr, OutContext); + break; + case AArch64II::MO_DTPREL_G1: + Expr = AArch64MCExpr::CreateDTPREL_G1(Expr, OutContext); + break; + case AArch64II::MO_DTPREL_G0_NC: + Expr = AArch64MCExpr::CreateDTPREL_G0_NC(Expr, OutContext); + break; + case AArch64II::MO_GOTTPREL: + Expr = AArch64MCExpr::CreateGOTTPREL(Expr, OutContext); + break; + case AArch64II::MO_GOTTPREL_LO12: + Expr = AArch64MCExpr::CreateGOTTPRELLo12(Expr, OutContext); + break; + case AArch64II::MO_TLSDESC: + Expr = AArch64MCExpr::CreateTLSDesc(Expr, OutContext); + break; + case AArch64II::MO_TLSDESC_LO12: + Expr = AArch64MCExpr::CreateTLSDescLo12(Expr, OutContext); + break; + case AArch64II::MO_TPREL_G1: + Expr = AArch64MCExpr::CreateTPREL_G1(Expr, OutContext); + break; + case AArch64II::MO_TPREL_G0_NC: + Expr = AArch64MCExpr::CreateTPREL_G0_NC(Expr, OutContext); + break; + case AArch64II::MO_NO_FLAG: + // Expr is already correct + break; + default: + llvm_unreachable("Unexpected MachineOperand flag"); + } + + if (!MO.isJTI() && MO.getOffset()) + Expr = MCBinaryExpr::CreateAdd(Expr, + MCConstantExpr::Create(MO.getOffset(), + OutContext), + OutContext); + + return MCOperand::CreateExpr(Expr); +} + +bool AArch64AsmPrinter::lowerOperand(const MachineOperand &MO, + MCOperand &MCOp) const { + switch (MO.getType()) { + default: llvm_unreachable("unknown operand type"); + case MachineOperand::MO_Register: + if (MO.isImplicit()) + return false; + assert(!MO.getSubReg() && "Subregs should be eliminated!"); + MCOp = MCOperand::CreateReg(MO.getReg()); + break; + case MachineOperand::MO_Immediate: + MCOp = MCOperand::CreateImm(MO.getImm()); + break; + case MachineOperand::MO_BlockAddress: + MCOp = lowerSymbolOperand(MO, GetBlockAddressSymbol(MO.getBlockAddress())); + break; + case MachineOperand::MO_ExternalSymbol: + MCOp = lowerSymbolOperand(MO, GetExternalSymbolSymbol(MO.getSymbolName())); + break; + case MachineOperand::MO_GlobalAddress: + MCOp = lowerSymbolOperand(MO, Mang->getSymbol(MO.getGlobal())); + break; + case MachineOperand::MO_MachineBasicBlock: + MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create( + MO.getMBB()->getSymbol(), OutContext)); + break; + case MachineOperand::MO_JumpTableIndex: + MCOp = lowerSymbolOperand(MO, GetJTISymbol(MO.getIndex())); + break; + case MachineOperand::MO_ConstantPoolIndex: + MCOp = lowerSymbolOperand(MO, GetCPISymbol(MO.getIndex())); + break; + case MachineOperand::MO_RegisterMask: + // Ignore call clobbers + return false; + + } + + return true; +} + +void llvm::LowerAArch64MachineInstrToMCInst(const MachineInstr *MI, + MCInst &OutMI, + AArch64AsmPrinter &AP) { + OutMI.setOpcode(MI->getOpcode()); + + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + + MCOperand MCOp; + if (AP.lowerOperand(MO, MCOp)) + OutMI.addOperand(MCOp); + } +} diff --git a/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp b/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp new file mode 100644 index 0000000..012a4f8 --- /dev/null +++ b/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp @@ -0,0 +1,14 @@ +//===-- AArch64MachineFuctionInfo.cpp - AArch64 machine function info -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "AArch64MachineFunctionInfo.h" + +using namespace llvm; + +void AArch64MachineFunctionInfo::anchor() { } diff --git a/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/lib/Target/AArch64/AArch64MachineFunctionInfo.h new file mode 100644 index 0000000..bf5cadf --- /dev/null +++ b/lib/Target/AArch64/AArch64MachineFunctionInfo.h @@ -0,0 +1,158 @@ +//=- AArch64MachineFuctionInfo.h - AArch64 machine function info -*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares AArch64-specific per-machine-function information. +// +//===----------------------------------------------------------------------===// + +#ifndef AARCH64MACHINEFUNCTIONINFO_H +#define AARCH64MACHINEFUNCTIONINFO_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/MachineFunction.h" + +namespace llvm { + +/// This class is derived from MachineFunctionInfo and contains private AArch64 +/// target-specific information for each MachineFunction. +class AArch64MachineFunctionInfo : public MachineFunctionInfo { + virtual void anchor(); + + /// Number of bytes of arguments this function has on the stack. If the callee + /// is expected to restore the argument stack this should be a multiple of 16, + /// all usable during a tail call. + /// + /// The alternative would forbid tail call optimisation in some cases: if we + /// want to transfer control from a function with 8-bytes of stack-argument + /// space to a function with 16-bytes then misalignment of this value would + /// make a stack adjustment necessary, which could not be undone by the + /// callee. + unsigned BytesInStackArgArea; + + /// The number of bytes to restore to deallocate space for incoming + /// arguments. Canonically 0 in the C calling convention, but non-zero when + /// callee is expected to pop the args. + unsigned ArgumentStackToRestore; + + /// If the stack needs to be adjusted on frame entry in two stages, this + /// records the size of the first adjustment just prior to storing + /// callee-saved registers. The callee-saved slots are addressed assuming + /// SP == - InitialStackAdjust. + unsigned InitialStackAdjust; + + /// Number of local-dynamic TLS accesses. + unsigned NumLocalDynamics; + + /// Keep track of the next label to be created within this function to + /// represent a cloned constant pool entry. Used by constant islands pass. + unsigned PICLabelUId; + + /// @see AArch64 Procedure Call Standard, B.3 + /// + /// The Frame index of the area where LowerFormalArguments puts the + /// general-purpose registers that might contain variadic parameters. + int VariadicGPRIdx; + + /// @see AArch64 Procedure Call Standard, B.3 + /// + /// The size of the frame object used to store the general-purpose registers + /// which might contain variadic arguments. This is the offset from + /// VariadicGPRIdx to what's stored in __gr_top. + unsigned VariadicGPRSize; + + /// @see AArch64 Procedure Call Standard, B.3 + /// + /// The Frame index of the area where LowerFormalArguments puts the + /// floating-point registers that might contain variadic parameters. + int VariadicFPRIdx; + + /// @see AArch64 Procedure Call Standard, B.3 + /// + /// The size of the frame object used to store the floating-point registers + /// which might contain variadic arguments. This is the offset from + /// VariadicFPRIdx to what's stored in __vr_top. + unsigned VariadicFPRSize; + + /// @see AArch64 Procedure Call Standard, B.3 + /// + /// The Frame index of an object pointing just past the last known stacked + /// argument on entry to a variadic function. This goes into the __stack field + /// of the va_list type. + int VariadicStackIdx; + + /// The offset of the frame pointer from the stack pointer on function + /// entry. This is expected to be negative. + int FramePointerOffset; + +public: + AArch64MachineFunctionInfo() + : BytesInStackArgArea(0), + ArgumentStackToRestore(0), + InitialStackAdjust(0), + NumLocalDynamics(0), + PICLabelUId(0), + VariadicGPRIdx(0), + VariadicGPRSize(0), + VariadicFPRIdx(0), + VariadicFPRSize(0), + VariadicStackIdx(0), + FramePointerOffset(0) {} + + explicit AArch64MachineFunctionInfo(MachineFunction &MF) + : BytesInStackArgArea(0), + ArgumentStackToRestore(0), + InitialStackAdjust(0), + NumLocalDynamics(0), + PICLabelUId(0), + VariadicGPRIdx(0), + VariadicGPRSize(0), + VariadicFPRIdx(0), + VariadicFPRSize(0), + VariadicStackIdx(0), + FramePointerOffset(0) {} + + unsigned getBytesInStackArgArea() const { return BytesInStackArgArea; } + void setBytesInStackArgArea (unsigned bytes) { BytesInStackArgArea = bytes;} + + unsigned getArgumentStackToRestore() const { return ArgumentStackToRestore; } + void setArgumentStackToRestore(unsigned bytes) { ArgumentStackToRestore = bytes; } + + unsigned getInitialStackAdjust() const { return InitialStackAdjust; } + void setInitialStackAdjust(unsigned bytes) { InitialStackAdjust = bytes; } + + unsigned getNumLocalDynamicTLSAccesses() const { return NumLocalDynamics; } + void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamics; } + + void initPICLabelUId(unsigned UId) { PICLabelUId = UId; } + unsigned getNumPICLabels() const { return PICLabelUId; } + unsigned createPICLabelUId() { return PICLabelUId++; } + + int getVariadicGPRIdx() const { return VariadicGPRIdx; } + void setVariadicGPRIdx(int Idx) { VariadicGPRIdx = Idx; } + + unsigned getVariadicGPRSize() const { return VariadicGPRSize; } + void setVariadicGPRSize(unsigned Size) { VariadicGPRSize = Size; } + + int getVariadicFPRIdx() const { return VariadicFPRIdx; } + void setVariadicFPRIdx(int Idx) { VariadicFPRIdx = Idx; } + + unsigned getVariadicFPRSize() const { return VariadicFPRSize; } + void setVariadicFPRSize(unsigned Size) { VariadicFPRSize = Size; } + + int getVariadicStackIdx() const { return VariadicStackIdx; } + void setVariadicStackIdx(int Idx) { VariadicStackIdx = Idx; } + + int getFramePointerOffset() const { return FramePointerOffset; } + void setFramePointerOffset(int Idx) { FramePointerOffset = Idx; } + +}; + +} // End llvm namespace + +#endif diff --git a/lib/Target/AArch64/AArch64RegisterInfo.cpp b/lib/Target/AArch64/AArch64RegisterInfo.cpp new file mode 100644 index 0000000..ce66504 --- /dev/null +++ b/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -0,0 +1,211 @@ +//===- AArch64RegisterInfo.cpp - AArch64 Register Information -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the AArch64 implementation of the TargetRegisterInfo class. +// +//===----------------------------------------------------------------------===// + + +#include "AArch64RegisterInfo.h" +#include "AArch64FrameLowering.h" +#include "AArch64MachineFunctionInfo.h" +#include "AArch64TargetMachine.h" +#include "MCTargetDesc/AArch64MCTargetDesc.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/ADT/BitVector.h" + +#define GET_REGINFO_TARGET_DESC +#include "AArch64GenRegisterInfo.inc" + +using namespace llvm; + +AArch64RegisterInfo::AArch64RegisterInfo(const AArch64InstrInfo &tii, + const AArch64Subtarget &sti) + : AArch64GenRegisterInfo(AArch64::X30), TII(tii) { +} + +const uint16_t * +AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { + return CSR_PCS_SaveList; +} + +const uint32_t* +AArch64RegisterInfo::getCallPreservedMask(CallingConv::ID) const { + return CSR_PCS_RegMask; +} + +const uint32_t *AArch64RegisterInfo::getTLSDescCallPreservedMask() const { + return TLSDesc_RegMask; +} + +const TargetRegisterClass * +AArch64RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const { + if (RC == &AArch64::FlagClassRegClass) + return &AArch64::GPR64RegClass; + + return RC; +} + + + +BitVector +AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const { + BitVector Reserved(getNumRegs()); + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + + Reserved.set(AArch64::XSP); + Reserved.set(AArch64::WSP); + + Reserved.set(AArch64::XZR); + Reserved.set(AArch64::WZR); + + if (TFI->hasFP(MF)) { + Reserved.set(AArch64::X29); + Reserved.set(AArch64::W29); + } + + return Reserved; +} + +void +AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MBBI, + int SPAdj, RegScavenger *RS) const { + assert(SPAdj == 0 && "Cannot deal with nonzero SPAdj yet"); + MachineInstr &MI = *MBBI; + MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + const AArch64FrameLowering *TFI = + static_cast(MF.getTarget().getFrameLowering()); + + unsigned i = 0; + while (!MI.getOperand(i).isFI()) { + ++i; + assert(i < MI.getNumOperands() && "Instr doesn't have a FrameIndex Operand"); + } + + // In order to work out the base and offset for addressing, the FrameLowering + // code needs to know (sometimes) whether the instruction is storing/loading a + // callee-saved register, or whether it's a more generic + // operation. Fortunately the frame indices are used *only* for that purpose + // and are contiguous, so we can check here. + const std::vector &CSI = MFI->getCalleeSavedInfo(); + int MinCSFI = 0; + int MaxCSFI = -1; + + if (CSI.size()) { + MinCSFI = CSI[0].getFrameIdx(); + MaxCSFI = CSI[CSI.size() - 1].getFrameIdx(); + } + + int FrameIndex = MI.getOperand(i).getIndex(); + bool IsCalleeSaveOp = FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI; + + unsigned FrameReg; + int64_t Offset; + Offset = TFI->resolveFrameIndexReference(MF, FrameIndex, FrameReg, SPAdj, + IsCalleeSaveOp); + + Offset += MI.getOperand(i+1).getImm(); + + // DBG_VALUE instructions have no real restrictions so they can be handled + // easily. + if (MI.isDebugValue()) { + MI.getOperand(i).ChangeToRegister(FrameReg, /*isDef=*/ false); + MI.getOperand(i+1).ChangeToImmediate(Offset); + return; + } + + int MinOffset, MaxOffset, OffsetScale; + if (MI.getOpcode() == AArch64::ADDxxi_lsl0_s) { + MinOffset = 0; + MaxOffset = 0xfff; + OffsetScale = 1; + } else { + // Load/store of a stack object + TII.getAddressConstraints(MI, OffsetScale, MinOffset, MaxOffset); + } + + // The frame lowering has told us a base and offset it thinks we should use to + // access this variable, but it's still up to us to make sure the values are + // legal for the instruction in question. + if (Offset % OffsetScale != 0 || Offset < MinOffset || Offset > MaxOffset) { + unsigned BaseReg = + MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass); + emitRegUpdate(MBB, MBBI, MBBI->getDebugLoc(), TII, + BaseReg, FrameReg, BaseReg, Offset); + FrameReg = BaseReg; + Offset = 0; + } + + // Negative offsets are expected if we address from FP, but for + // now this checks nothing has gone horribly wrong. + assert(Offset >= 0 && "Unexpected negative offset from SP"); + + MI.getOperand(i).ChangeToRegister(FrameReg, false, false, true); + MI.getOperand(i+1).ChangeToImmediate(Offset / OffsetScale); +} + +void +AArch64RegisterInfo::eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const { + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + DebugLoc dl = MI->getDebugLoc(); + int Opcode = MI->getOpcode(); + bool IsDestroy = Opcode == TII.getCallFrameDestroyOpcode(); + uint64_t CalleePopAmount = IsDestroy ? MI->getOperand(1).getImm() : 0; + + if (!TFI->hasReservedCallFrame(MF)) { + unsigned Align = TFI->getStackAlignment(); + + uint64_t Amount = MI->getOperand(0).getImm(); + Amount = (Amount + Align - 1)/Align * Align; + if (!IsDestroy) Amount = -Amount; + + // N.b. if CalleePopAmount is valid but zero (i.e. callee would pop, but it + // doesn't have to pop anything), then the first operand will be zero too so + // this adjustment is a no-op. + if (CalleePopAmount == 0) { + // FIXME: in-function stack adjustment for calls is limited to 12-bits + // because there's no guaranteed temporary register available. Mostly call + // frames will be allocated at the start of a function so this is OK, but + // it is a limitation that needs dealing with. + assert(abs(Amount) < 0xfff && "call frame too large"); + emitSPUpdate(MBB, MI, dl, TII, AArch64::NoRegister, Amount); + } + } else if (CalleePopAmount != 0) { + // If the calling convention demands that the callee pops arguments from the + // stack, we want to add it back if we have a reserved call frame. + assert(CalleePopAmount < 0xfff && "call frame too large"); + emitSPUpdate(MBB, MI, dl, TII, AArch64::NoRegister, -CalleePopAmount); + } + + MBB.erase(MI); +} + +unsigned +AArch64RegisterInfo::getFrameRegister(const MachineFunction &MF) const { + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + + if (TFI->hasFP(MF)) + return AArch64::X29; + else + return AArch64::XSP; +} + +bool +AArch64RegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const { + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + const AArch64FrameLowering *AFI = static_cast(TFI); + return AFI->useFPForAddressing(MF); +} diff --git a/lib/Target/AArch64/AArch64RegisterInfo.h b/lib/Target/AArch64/AArch64RegisterInfo.h new file mode 100644 index 0000000..ea538e2 --- /dev/null +++ b/lib/Target/AArch64/AArch64RegisterInfo.h @@ -0,0 +1,79 @@ +//==- AArch64RegisterInfo.h - AArch64 Register Information Impl -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the AArch64 implementation of the MRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_AARCH64REGISTERINFO_H +#define LLVM_TARGET_AARCH64REGISTERINFO_H + +#include "llvm/Target/TargetRegisterInfo.h" + +#define GET_REGINFO_HEADER +#include "AArch64GenRegisterInfo.inc" + +namespace llvm { + +class AArch64InstrInfo; +class AArch64Subtarget; + +struct AArch64RegisterInfo : public AArch64GenRegisterInfo { +private: + const AArch64InstrInfo &TII; + +public: + AArch64RegisterInfo(const AArch64InstrInfo &tii, + const AArch64Subtarget &sti); + + const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const; + const uint32_t *getCallPreservedMask(CallingConv::ID) const; + + const uint32_t *getTLSDescCallPreservedMask() const; + + BitVector getReservedRegs(const MachineFunction &MF) const; + unsigned getFrameRegister(const MachineFunction &MF) const; + + void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, + RegScavenger *Rs = NULL) const; + + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const; + + /// getCrossCopyRegClass - Returns a legal register class to copy a register + /// in the specified class to or from. Returns original class if it is + /// possible to copy between a two registers of the specified class. + const TargetRegisterClass * + getCrossCopyRegClass(const TargetRegisterClass *RC) const; + + /// getLargestLegalSuperClass - Returns the largest super class of RC that is + /// legal to use in the current sub-target and has the same spill size. + const TargetRegisterClass* + getLargestLegalSuperClass(const TargetRegisterClass *RC) const { + if (RC == &AArch64::tcGPR64RegClass) + return &AArch64::GPR64RegClass; + + return RC; + } + + bool requiresRegisterScavenging(const MachineFunction &MF) const { + return true; + } + + bool requiresFrameIndexScavenging(const MachineFunction &MF) const { + return true; + } + + bool useFPForScavengingIndex(const MachineFunction &MF) const; +}; + +} // end namespace llvm + +#endif // LLVM_TARGET_AARCH64REGISTERINFO_H diff --git a/lib/Target/AArch64/AArch64RegisterInfo.td b/lib/Target/AArch64/AArch64RegisterInfo.td new file mode 100644 index 0000000..f1f7fd1 --- /dev/null +++ b/lib/Target/AArch64/AArch64RegisterInfo.td @@ -0,0 +1,205 @@ +//===- ARMRegisterInfo.td - ARM Register defs --------------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Declarations that describe the ARM register file +//===----------------------------------------------------------------------===// + +let Namespace = "AArch64" in { +def sub_128 : SubRegIndex; +def sub_64 : SubRegIndex; +def sub_32 : SubRegIndex; +def sub_16 : SubRegIndex; +def sub_8 : SubRegIndex; + +// The VPR registers are handled as sub-registers of FPR equivalents, but +// they're really the same thing. We give this concept a special index. +def sub_alias : SubRegIndex; +} + +// Registers are identified with 5-bit ID numbers. +class AArch64Reg enc, string n> : Register { + let HWEncoding = enc; + let Namespace = "AArch64"; +} + +class AArch64RegWithSubs enc, string n, list subregs = [], + list inds = []> + : AArch64Reg { + let SubRegs = subregs; + let SubRegIndices = inds; +} + +//===----------------------------------------------------------------------===// +// Integer registers: w0-w30, wzr, wsp, x0-x30, xzr, sp +//===----------------------------------------------------------------------===// + +foreach Index = 0-30 in { + def W#Index : AArch64Reg< Index, "w"#Index>, DwarfRegNum<[Index]>; +} + +def WSP : AArch64Reg<31, "wsp">, DwarfRegNum<[31]>; +def WZR : AArch64Reg<31, "wzr">; + +// Could be combined with previous loop, but this way leaves w and x registers +// consecutive as LLVM register numbers, which makes for easier debugging. +foreach Index = 0-30 in { + def X#Index : AArch64RegWithSubs("W"#Index)], [sub_32]>, + DwarfRegNum<[Index]>; +} + +def XSP : AArch64RegWithSubs<31, "sp", [WSP], [sub_32]>, DwarfRegNum<[31]>; +def XZR : AArch64RegWithSubs<31, "xzr", [WZR], [sub_32]>; + +// Most instructions treat register 31 as zero for reads and a black-hole for +// writes. + +// Note that the order of registers is important for the Disassembler here: +// tablegen uses it to form MCRegisterClass::getRegister, which we assume can +// take an encoding value. +def GPR32 : RegisterClass<"AArch64", [i32], 32, + (add (sequence "W%u", 0, 30), WZR)> { +} + +def GPR64 : RegisterClass<"AArch64", [i64], 64, + (add (sequence "X%u", 0, 30), XZR)> { +} + +def GPR32nowzr : RegisterClass<"AArch64", [i32], 32, + (sequence "W%u", 0, 30)> { +} + +def GPR64noxzr : RegisterClass<"AArch64", [i64], 64, + (sequence "X%u", 0, 30)> { +} + +// For tail calls, we can't use callee-saved registers or the structure-return +// register, as they are supposed to be live across function calls and may be +// clobbered by the epilogue. +def tcGPR64 : RegisterClass<"AArch64", [i64], 64, + (add (sequence "X%u", 0, 7), + (sequence "X%u", 9, 18))> { +} + + +// Certain addressing-useful instructions accept sp directly. Again the order of +// registers is important to the Disassembler. +def GPR32wsp : RegisterClass<"AArch64", [i32], 32, + (add (sequence "W%u", 0, 30), WSP)> { +} + +def GPR64xsp : RegisterClass<"AArch64", [i64], 64, + (add (sequence "X%u", 0, 30), XSP)> { +} + +// Some aliases *only* apply to SP (e.g. MOV uses different encoding for SP and +// non-SP variants). We can't use a bare register in those patterns because +// TableGen doesn't like it, so we need a class containing just stack registers +def Rxsp : RegisterClass<"AArch64", [i64], 64, + (add XSP)> { +} + +def Rwsp : RegisterClass<"AArch64", [i32], 32, + (add WSP)> { +} + +//===----------------------------------------------------------------------===// +// Scalar registers in the vector unit: +// b0-b31, h0-h31, s0-s31, d0-d31, q0-q31 +//===----------------------------------------------------------------------===// + +foreach Index = 0-31 in { + def B # Index : AArch64Reg< Index, "b" # Index>, + DwarfRegNum<[!add(Index, 64)]>; + + def H # Index : AArch64RegWithSubs("B" # Index)], [sub_8]>, + DwarfRegNum<[!add(Index, 64)]>; + + def S # Index : AArch64RegWithSubs("H" # Index)], [sub_16]>, + DwarfRegNum<[!add(Index, 64)]>; + + def D # Index : AArch64RegWithSubs("S" # Index)], [sub_32]>, + DwarfRegNum<[!add(Index, 64)]>; + + def Q # Index : AArch64RegWithSubs("D" # Index)], [sub_64]>, + DwarfRegNum<[!add(Index, 64)]>; +} + + +def FPR8 : RegisterClass<"AArch64", [i8], 8, + (sequence "B%u", 0, 31)> { +} + +def FPR16 : RegisterClass<"AArch64", [f16], 16, + (sequence "H%u", 0, 31)> { +} + +def FPR32 : RegisterClass<"AArch64", [f32], 32, + (sequence "S%u", 0, 31)> { +} + +def FPR64 : RegisterClass<"AArch64", [f64], 64, + (sequence "D%u", 0, 31)> { +} + +def FPR128 : RegisterClass<"AArch64", [f128], 128, + (sequence "Q%u", 0, 31)> { +} + + +//===----------------------------------------------------------------------===// +// Vector registers: +//===----------------------------------------------------------------------===// + +// NEON registers simply specify the overall vector, and it's expected that +// Instructions will individually specify the acceptable data layout. In +// principle this leaves two approaches open: +// + An operand, giving a single ADDvvv instruction (for example). This turns +// out to be unworkable in the assembly parser (without every Instruction +// having a "cvt" function, at least) because the constraints can't be +// properly enforced. It also complicates specifying patterns since each +// instruction will accept many types. +// + A bare token (e.g. ".2d"). This means the AsmParser has to know specific +// details about NEON registers, but simplifies most other details. +// +// The second approach was taken. + +foreach Index = 0-31 in { + def V # Index : AArch64RegWithSubs("Q" # Index)], + [sub_alias]>, + DwarfRegNum<[!add(Index, 64)]>; +} + +// These two classes contain the same registers, which should be reasonably +// sensible for MC and allocation purposes, but allows them to be treated +// separately for things like stack spilling. +def VPR64 : RegisterClass<"AArch64", [v2f32, v2i32, v4i16, v8i8], 64, + (sequence "V%u", 0, 31)>; + +def VPR128 : RegisterClass<"AArch64", + [v2f64, v2i64, v4f32, v4i32, v8i16, v16i8], 128, + (sequence "V%u", 0, 31)>; + +// Flags register +def NZCV : Register<"nzcv"> +{ + let Namespace = "AArch64"; +} + +def FlagClass : RegisterClass<"AArch64", [i32], 32, (add NZCV)> +{ + let CopyCost = -1; + let isAllocatable = 0; +} diff --git a/lib/Target/AArch64/AArch64Schedule.td b/lib/Target/AArch64/AArch64Schedule.td new file mode 100644 index 0000000..e17cdaa --- /dev/null +++ b/lib/Target/AArch64/AArch64Schedule.td @@ -0,0 +1,10 @@ +//===- AArch64Schedule.td - AArch64 Scheduling Definitions -*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +def GenericItineraries : ProcessorItineraries<[], [], []>; diff --git a/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp new file mode 100644 index 0000000..6bbe075 --- /dev/null +++ b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp @@ -0,0 +1,25 @@ +//===-- AArch64SelectionDAGInfo.cpp - AArch64 SelectionDAG Info -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the AArch64SelectionDAGInfo class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "arm-selectiondag-info" +#include "AArch64TargetMachine.h" +#include "llvm/CodeGen/SelectionDAG.h" +using namespace llvm; + +AArch64SelectionDAGInfo::AArch64SelectionDAGInfo(const AArch64TargetMachine &TM) + : TargetSelectionDAGInfo(TM), + Subtarget(&TM.getSubtarget()) { +} + +AArch64SelectionDAGInfo::~AArch64SelectionDAGInfo() { +} diff --git a/lib/Target/AArch64/AArch64SelectionDAGInfo.h b/lib/Target/AArch64/AArch64SelectionDAGInfo.h new file mode 100644 index 0000000..8d3889e --- /dev/null +++ b/lib/Target/AArch64/AArch64SelectionDAGInfo.h @@ -0,0 +1,32 @@ +//===-- AArch64SelectionDAGInfo.h - AArch64 SelectionDAG Info -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the AArch64 subclass for TargetSelectionDAGInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_AARCH64SELECTIONDAGINFO_H +#define LLVM_AARCH64SELECTIONDAGINFO_H + +#include "llvm/Target/TargetSelectionDAGInfo.h" + +namespace llvm { + +class AArch64TargetMachine; + +class AArch64SelectionDAGInfo : public TargetSelectionDAGInfo { + const AArch64Subtarget *Subtarget; +public: + explicit AArch64SelectionDAGInfo(const AArch64TargetMachine &TM); + ~AArch64SelectionDAGInfo(); +}; + +} + +#endif diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp new file mode 100644 index 0000000..d17b738 --- /dev/null +++ b/lib/Target/AArch64/AArch64Subtarget.cpp @@ -0,0 +1,43 @@ +//===-- AArch64Subtarget.cpp - AArch64 Subtarget Information --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the AArch64 specific subclass of TargetSubtargetInfo. +// +//===----------------------------------------------------------------------===// + +#include "AArch64Subtarget.h" +#include "AArch64RegisterInfo.h" +#include "MCTargetDesc/AArch64MCTargetDesc.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/ADT/SmallVector.h" + +#define GET_SUBTARGETINFO_TARGET_DESC +#define GET_SUBTARGETINFO_CTOR +#include "AArch64GenSubtargetInfo.inc" + +using namespace llvm; + +AArch64Subtarget::AArch64Subtarget(StringRef TT, StringRef CPU, StringRef FS) + : AArch64GenSubtargetInfo(TT, CPU, FS) + , HasNEON(true) + , HasCrypto(true) + , TargetTriple(TT) { + + ParseSubtargetFeatures(CPU, FS); +} + +bool AArch64Subtarget::GVIsIndirectSymbol(const GlobalValue *GV, + Reloc::Model RelocM) const { + if (RelocM == Reloc::Static) + return false; + + return !GV->hasLocalLinkage() && !GV->hasHiddenVisibility(); +} diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h new file mode 100644 index 0000000..2e9205f --- /dev/null +++ b/lib/Target/AArch64/AArch64Subtarget.h @@ -0,0 +1,54 @@ +//==-- AArch64Subtarget.h - Define Subtarget for the AArch64 ---*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the AArch64 specific subclass of TargetSubtargetInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_AARCH64_SUBTARGET_H +#define LLVM_TARGET_AARCH64_SUBTARGET_H + +#include "llvm/ADT/Triple.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +#define GET_SUBTARGETINFO_HEADER +#include "AArch64GenSubtargetInfo.inc" + +#include + +namespace llvm { +class StringRef; +class GlobalValue; + +class AArch64Subtarget : public AArch64GenSubtargetInfo { +protected: + bool HasNEON; + bool HasCrypto; + + /// TargetTriple - What processor and OS we're targeting. + Triple TargetTriple; +public: + /// This constructor initializes the data members to match that + /// of the specified triple. + /// + AArch64Subtarget(StringRef TT, StringRef CPU, StringRef FS); + + /// ParseSubtargetFeatures - Parses features string setting specified + /// subtarget options. Definition of function is auto generated by tblgen. + void ParseSubtargetFeatures(StringRef CPU, StringRef FS); + + bool GVIsIndirectSymbol(const GlobalValue *GV, Reloc::Model RelocM) const; + + bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); } + bool isTargetLinux() const { return TargetTriple.getOS() == Triple::Linux; } + +}; +} // End llvm namespace + +#endif // LLVM_TARGET_AARCH64_SUBTARGET_H diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp new file mode 100644 index 0000000..68e3643 --- /dev/null +++ b/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -0,0 +1,78 @@ +//===-- AArch64TargetMachine.cpp - Define TargetMachine for AArch64 -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +#include "AArch64.h" +#include "AArch64TargetMachine.h" +#include "MCTargetDesc/AArch64MCTargetDesc.h" +#include "llvm/PassManager.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/TargetRegistry.h" + +using namespace llvm; + +extern "C" void LLVMInitializeAArch64Target() { + RegisterTargetMachine X(TheAArch64Target); +} + +AArch64TargetMachine::AArch64TargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), + Subtarget(TT, CPU, FS), + InstrInfo(Subtarget), + DL("e-p:64:64-i64:64:64-i128:128:128-s0:32:32-f128:128:128-n32:64-S128"), + TLInfo(*this), + TSInfo(*this), + FrameLowering(Subtarget) { +} + +namespace { +/// AArch64 Code Generator Pass Configuration Options. +class AArch64PassConfig : public TargetPassConfig { +public: + AArch64PassConfig(AArch64TargetMachine *TM, PassManagerBase &PM) + : TargetPassConfig(TM, PM) {} + + AArch64TargetMachine &getAArch64TargetMachine() const { + return getTM(); + } + + const AArch64Subtarget &getAArch64Subtarget() const { + return *getAArch64TargetMachine().getSubtargetImpl(); + } + + virtual bool addInstSelector(); + virtual bool addPreEmitPass(); +}; +} // namespace + +TargetPassConfig *AArch64TargetMachine::createPassConfig(PassManagerBase &PM) { + return new AArch64PassConfig(this, PM); +} + +bool AArch64PassConfig::addPreEmitPass() { + addPass(&UnpackMachineBundlesID); + addPass(createAArch64ConstantIslandPass()); + return true; +} + +bool AArch64PassConfig::addInstSelector() { + addPass(createAArch64ISelDAG(getAArch64TargetMachine(), getOptLevel())); + + // For ELF, cleanup any local-dynamic TLS accesses. + if (getAArch64Subtarget().isTargetELF() && getOptLevel() != CodeGenOpt::None) + addPass(createAArch64CleanupLocalDynamicTLSPass()); + + return false; +} diff --git a/lib/Target/AArch64/AArch64TargetMachine.h b/lib/Target/AArch64/AArch64TargetMachine.h new file mode 100644 index 0000000..c1f47c2 --- /dev/null +++ b/lib/Target/AArch64/AArch64TargetMachine.h @@ -0,0 +1,69 @@ +//=== AArch64TargetMachine.h - Define TargetMachine for AArch64 -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the AArch64 specific subclass of TargetMachine. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_AARCH64TARGETMACHINE_H +#define LLVM_AARCH64TARGETMACHINE_H + +#include "AArch64FrameLowering.h" +#include "AArch64ISelLowering.h" +#include "AArch64InstrInfo.h" +#include "AArch64SelectionDAGInfo.h" +#include "AArch64Subtarget.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/Target/TargetMachine.h" + +namespace llvm { + +class AArch64TargetMachine : public LLVMTargetMachine { + AArch64Subtarget Subtarget; + AArch64InstrInfo InstrInfo; + const DataLayout DL; + AArch64TargetLowering TLInfo; + AArch64SelectionDAGInfo TSInfo; + AArch64FrameLowering FrameLowering; + +public: + AArch64TargetMachine(const Target &T, StringRef TT, StringRef CPU, + StringRef FS, const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); + + const AArch64InstrInfo *getInstrInfo() const { + return &InstrInfo; + } + + const AArch64FrameLowering *getFrameLowering() const { + return &FrameLowering; + } + + const AArch64TargetLowering *getTargetLowering() const { + return &TLInfo; + } + + const AArch64SelectionDAGInfo *getSelectionDAGInfo() const { + return &TSInfo; + } + + const AArch64Subtarget *getSubtargetImpl() const { return &Subtarget; } + + const DataLayout *getDataLayout() const { return &DL; } + + const TargetRegisterInfo *getRegisterInfo() const { + return &InstrInfo.getRegisterInfo(); + } + TargetPassConfig *createPassConfig(PassManagerBase &PM); +}; + +} + +#endif diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.cpp b/lib/Target/AArch64/AArch64TargetObjectFile.cpp new file mode 100644 index 0000000..3bb961a --- /dev/null +++ b/lib/Target/AArch64/AArch64TargetObjectFile.cpp @@ -0,0 +1,19 @@ +//===-- AArch64TargetObjectFile.cpp - AArch64 Object Info ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "AArch64TargetObjectFile.h" + +using namespace llvm; + +void +AArch64LinuxTargetObjectFile::Initialize(MCContext &Ctx, + const TargetMachine &TM) { + TargetLoweringObjectFileELF::Initialize(Ctx, TM); + InitializeELF(TM.Options.UseInitArray); +} diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.h b/lib/Target/AArch64/AArch64TargetObjectFile.h new file mode 100644 index 0000000..07caac1 --- /dev/null +++ b/lib/Target/AArch64/AArch64TargetObjectFile.h @@ -0,0 +1,27 @@ +//===-- AArch64TargetObjectFile.h - AArch64 Object Info ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_AARCH64_TARGETOBJECTFILE_H +#define LLVM_TARGET_AARCH64_TARGETOBJECTFILE_H + +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetLoweringObjectFile.h" + +namespace llvm { + + /// AArch64LinuxTargetObjectFile - This implementation is used for linux + /// AArch64. + class AArch64LinuxTargetObjectFile : public TargetLoweringObjectFileELF { + virtual void Initialize(MCContext &Ctx, const TargetMachine &TM); + }; + +} // end namespace llvm + +#endif diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp new file mode 100644 index 0000000..3402634 --- /dev/null +++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -0,0 +1,2025 @@ +//==- AArch64AsmParser.cpp - Parse AArch64 assembly to MCInst instructions -==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + + +#include "MCTargetDesc/AArch64BaseInfo.h" +#include "MCTargetDesc/AArch64MCTargetDesc.h" +#include "MCTargetDesc/AArch64MCExpr.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCTargetAsmParser.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCParser/MCAsmLexer.h" +#include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/TargetRegistry.h" + +using namespace llvm; + +namespace { + +class AArch64Operand; + +class AArch64AsmParser : public MCTargetAsmParser { + MCSubtargetInfo &STI; + MCAsmParser &Parser; + +#define GET_ASSEMBLER_HEADER +#include "AArch64GenAsmMatcher.inc" + +public: + AArch64AsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser) + : MCTargetAsmParser(), STI(_STI), Parser(_Parser) { + MCAsmParserExtension::Initialize(_Parser); + + // Initialize the set of available features. + setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); + } + + // These are the public interface of the MCTargetAsmParser + bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc); + bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + SMLoc NameLoc, + SmallVectorImpl &Operands); + + bool ParseDirective(AsmToken DirectiveID); + bool ParseDirectiveTLSDescCall(SMLoc L); + bool ParseDirectiveWord(unsigned Size, SMLoc L); + + bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + SmallVectorImpl &Operands, + MCStreamer&Out, unsigned &ErrorInfo, + bool MatchingInlineAsm); + + // The rest of the sub-parsers have more freedom over interface: they return + // an OperandMatchResultTy because it's less ambiguous than true/false or + // -1/0/1 even if it is more verbose + OperandMatchResultTy + ParseOperand(SmallVectorImpl &Operands, + StringRef Mnemonic); + + OperandMatchResultTy ParseImmediate(const MCExpr *&ExprVal); + + OperandMatchResultTy ParseRelocPrefix(AArch64MCExpr::VariantKind &RefKind); + + OperandMatchResultTy + ParseNEONLane(SmallVectorImpl &Operands, + uint32_t NumLanes); + + OperandMatchResultTy + ParseRegister(SmallVectorImpl &Operands, + uint32_t &NumLanes); + + OperandMatchResultTy + ParseImmWithLSLOperand(SmallVectorImpl &Operands); + + OperandMatchResultTy + ParseCondCodeOperand(SmallVectorImpl &Operands); + + OperandMatchResultTy + ParseCRxOperand(SmallVectorImpl &Operands); + + OperandMatchResultTy + ParseFPImmOperand(SmallVectorImpl &Operands); + + template OperandMatchResultTy + ParseNamedImmOperand(SmallVectorImpl &Operands) { + return ParseNamedImmOperand(SomeNamedImmMapper(), Operands); + } + + OperandMatchResultTy + ParseNamedImmOperand(const NamedImmMapper &Mapper, + SmallVectorImpl &Operands); + + OperandMatchResultTy + ParseLSXAddressOperand(SmallVectorImpl &Operands); + + OperandMatchResultTy + ParseShiftExtend(SmallVectorImpl &Operands); + + OperandMatchResultTy + ParseSysRegOperand(SmallVectorImpl &Operands); + + bool validateInstruction(MCInst &Inst, + const SmallVectorImpl &Operands); + + /// Scan the next token (which had better be an identifier) and determine + /// whether it represents a general-purpose or vector register. It returns + /// true if an identifier was found and populates its reference arguments. It + /// does not consume the token. + bool + IdentifyRegister(unsigned &RegNum, SMLoc &RegEndLoc, StringRef &LayoutSpec, + SMLoc &LayoutLoc) const; + +}; + +} + +namespace { + +/// Instances of this class represent a parsed AArch64 machine instruction. +class AArch64Operand : public MCParsedAsmOperand { +private: + enum KindTy { + k_ImmWithLSL, // #uimm {, LSL #amt } + k_CondCode, // eq/ne/... + k_FPImmediate, // Limited-precision floating-point imm + k_Immediate, // Including expressions referencing symbols + k_Register, + k_ShiftExtend, + k_SysReg, // The register operand of MRS and MSR instructions + k_Token, // The mnemonic; other raw tokens the auto-generated + k_WrappedRegister // Load/store exclusive permit a wrapped register. + } Kind; + + SMLoc StartLoc, EndLoc; + + union { + struct { + const MCExpr *Val; + unsigned ShiftAmount; + bool ImplicitAmount; + } ImmWithLSL; + + struct { + A64CC::CondCodes Code; + } CondCode; + + struct { + double Val; + } FPImm; + + struct { + const MCExpr *Val; + } Imm; + + struct { + unsigned RegNum; + } Reg; + + struct { + A64SE::ShiftExtSpecifiers ShiftType; + unsigned Amount; + bool ImplicitAmount; + } ShiftExtend; + + struct { + const char *Data; + unsigned Length; + } SysReg; + + struct { + const char *Data; + unsigned Length; + } Tok; + }; + + AArch64Operand(KindTy K, SMLoc S, SMLoc E) + : MCParsedAsmOperand(), Kind(K), StartLoc(S), EndLoc(E) {} + +public: + AArch64Operand(const AArch64Operand &o) : MCParsedAsmOperand() { + } + + SMLoc getStartLoc() const { return StartLoc; } + SMLoc getEndLoc() const { return EndLoc; } + void print(raw_ostream&) const; + void dump() const; + + StringRef getToken() const { + assert(Kind == k_Token && "Invalid access!"); + return StringRef(Tok.Data, Tok.Length); + } + + unsigned getReg() const { + assert((Kind == k_Register || Kind == k_WrappedRegister) + && "Invalid access!"); + return Reg.RegNum; + } + + const MCExpr *getImm() const { + assert(Kind == k_Immediate && "Invalid access!"); + return Imm.Val; + } + + A64CC::CondCodes getCondCode() const { + assert(Kind == k_CondCode && "Invalid access!"); + return CondCode.Code; + } + + static bool isNonConstantExpr(const MCExpr *E, + AArch64MCExpr::VariantKind &Variant) { + if (const AArch64MCExpr *A64E = dyn_cast(E)) { + Variant = A64E->getKind(); + return true; + } else if (!isa(E)) { + Variant = AArch64MCExpr::VK_AARCH64_None; + return true; + } + + return false; + } + + bool isCondCode() const { return Kind == k_CondCode; } + bool isToken() const { return Kind == k_Token; } + bool isReg() const { return Kind == k_Register; } + bool isImm() const { return Kind == k_Immediate; } + bool isMem() const { return false; } + bool isFPImm() const { return Kind == k_FPImmediate; } + bool isShiftOrExtend() const { return Kind == k_ShiftExtend; } + bool isSysReg() const { return Kind == k_SysReg; } + bool isImmWithLSL() const { return Kind == k_ImmWithLSL; } + bool isWrappedReg() const { return Kind == k_WrappedRegister; } + + bool isAddSubImmLSL0() const { + if (!isImmWithLSL()) return false; + if (ImmWithLSL.ShiftAmount != 0) return false; + + AArch64MCExpr::VariantKind Variant; + if (isNonConstantExpr(ImmWithLSL.Val, Variant)) { + return Variant == AArch64MCExpr::VK_AARCH64_LO12 + || Variant == AArch64MCExpr::VK_AARCH64_DTPREL_LO12 + || Variant == AArch64MCExpr::VK_AARCH64_DTPREL_LO12_NC + || Variant == AArch64MCExpr::VK_AARCH64_TPREL_LO12 + || Variant == AArch64MCExpr::VK_AARCH64_TPREL_LO12_NC + || Variant == AArch64MCExpr::VK_AARCH64_TLSDESC_LO12; + } + + // Otherwise it should be a real immediate in range: + const MCConstantExpr *CE = cast(ImmWithLSL.Val); + return CE->getValue() >= 0 && CE->getValue() <= 0xfff; + } + + bool isAddSubImmLSL12() const { + if (!isImmWithLSL()) return false; + if (ImmWithLSL.ShiftAmount != 12) return false; + + AArch64MCExpr::VariantKind Variant; + if (isNonConstantExpr(ImmWithLSL.Val, Variant)) { + return Variant == AArch64MCExpr::VK_AARCH64_DTPREL_HI12 + || Variant == AArch64MCExpr::VK_AARCH64_TPREL_HI12; + } + + // Otherwise it should be a real immediate in range: + const MCConstantExpr *CE = cast(ImmWithLSL.Val); + return CE->getValue() >= 0 && CE->getValue() <= 0xfff; + } + + template bool isAddrRegExtend() const { + if (!isShiftOrExtend()) return false; + + A64SE::ShiftExtSpecifiers Ext = ShiftExtend.ShiftType; + if (RmSize == 32 && !(Ext == A64SE::UXTW || Ext == A64SE::SXTW)) + return false; + + if (RmSize == 64 && !(Ext == A64SE::LSL || Ext == A64SE::SXTX)) + return false; + + return ShiftExtend.Amount == Log2_32(MemSize) || ShiftExtend.Amount == 0; + } + + bool isAdrpLabel() const { + if (!isImm()) return false; + + AArch64MCExpr::VariantKind Variant; + if (isNonConstantExpr(getImm(), Variant)) { + return Variant == AArch64MCExpr::VK_AARCH64_None + || Variant == AArch64MCExpr::VK_AARCH64_GOT + || Variant == AArch64MCExpr::VK_AARCH64_GOTTPREL + || Variant == AArch64MCExpr::VK_AARCH64_TLSDESC; + } + + return isLabel<21, 4096>(); + } + + template bool isBitfieldWidth() const { + if (!isImm()) return false; + + const MCConstantExpr *CE = dyn_cast(getImm()); + if (!CE) return false; + + return CE->getValue() >= 1 && CE->getValue() <= RegWidth; + } + + template + bool isCVTFixedPos() const { + if (!isImm()) return false; + + const MCConstantExpr *CE = dyn_cast(getImm()); + if (!CE) return false; + + return CE->getValue() >= 1 && CE->getValue() <= RegWidth; + } + + bool isFMOVImm() const { + if (!isFPImm()) return false; + + APFloat RealVal(FPImm.Val); + uint32_t ImmVal; + return A64Imms::isFPImm(RealVal, ImmVal); + } + + bool isFPZero() const { + if (!isFPImm()) return false; + + APFloat RealVal(FPImm.Val); + return RealVal.isPosZero(); + } + + template + bool isLabel() const { + if (!isImm()) return false; + + if (dyn_cast(Imm.Val)) { + return true; + } else if (const MCConstantExpr *CE = dyn_cast(Imm.Val)) { + int64_t Val = CE->getValue(); + int64_t Min = - (scale * (1LL << (field_width - 1))); + int64_t Max = scale * ((1LL << (field_width - 1)) - 1); + return (Val % scale) == 0 && Val >= Min && Val <= Max; + } + + // N.b. this disallows explicit relocation specifications via an + // AArch64MCExpr. Users needing that behaviour + return false; + } + + bool isLane1() const { + if (!isImm()) return false; + + // Because it's come through custom assembly parsing, it must always be a + // constant expression. + return cast(getImm())->getValue() == 1; + } + + bool isLoadLitLabel() const { + if (!isImm()) return false; + + AArch64MCExpr::VariantKind Variant; + if (isNonConstantExpr(getImm(), Variant)) { + return Variant == AArch64MCExpr::VK_AARCH64_None + || Variant == AArch64MCExpr::VK_AARCH64_GOTTPREL; + } + + return isLabel<19, 4>(); + } + + template bool isLogicalImm() const { + if (!isImm()) return false; + + const MCConstantExpr *CE = dyn_cast(Imm.Val); + if (!CE) return false; + + uint32_t Bits; + return A64Imms::isLogicalImm(RegWidth, CE->getValue(), Bits); + } + + template bool isLogicalImmMOV() const { + if (!isLogicalImm()) return false; + + const MCConstantExpr *CE = cast(Imm.Val); + + // The move alias for ORR is only valid if the immediate cannot be + // represented with a move (immediate) instruction; they take priority. + int UImm16, Shift; + return !A64Imms::isMOVZImm(RegWidth, CE->getValue(), UImm16, Shift) + && !A64Imms::isMOVNImm(RegWidth, CE->getValue(), UImm16, Shift); + } + + template + bool isOffsetUImm12() const { + if (!isImm()) return false; + + const MCConstantExpr *CE = dyn_cast(getImm()); + + // Assume they know what they're doing for now if they've given us a + // non-constant expression. In principle we could check for ridiculous + // things that can't possibly work or relocations that would almost + // certainly break resulting code. + if (!CE) + return true; + + int64_t Val = CE->getValue(); + + // Must be a multiple of the access size in bytes. + if ((Val & (MemSize - 1)) != 0) return false; + + // Must be 12-bit unsigned + return Val >= 0 && Val <= 0xfff * MemSize; + } + + template + bool isShift() const { + if (!isShiftOrExtend()) return false; + + if (ShiftExtend.ShiftType != SHKind) + return false; + + return is64Bit ? ShiftExtend.Amount <= 63 : ShiftExtend.Amount <= 31; + } + + bool isMOVN32Imm() const { + static AArch64MCExpr::VariantKind PermittedModifiers[] = { + AArch64MCExpr::VK_AARCH64_SABS_G0, + AArch64MCExpr::VK_AARCH64_SABS_G1, + AArch64MCExpr::VK_AARCH64_DTPREL_G1, + AArch64MCExpr::VK_AARCH64_DTPREL_G0, + AArch64MCExpr::VK_AARCH64_GOTTPREL_G1, + AArch64MCExpr::VK_AARCH64_TPREL_G1, + AArch64MCExpr::VK_AARCH64_TPREL_G0, + }; + unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers); + + return isMoveWideImm(32, PermittedModifiers, NumModifiers); + } + + bool isMOVN64Imm() const { + static AArch64MCExpr::VariantKind PermittedModifiers[] = { + AArch64MCExpr::VK_AARCH64_SABS_G0, + AArch64MCExpr::VK_AARCH64_SABS_G1, + AArch64MCExpr::VK_AARCH64_SABS_G2, + AArch64MCExpr::VK_AARCH64_DTPREL_G2, + AArch64MCExpr::VK_AARCH64_DTPREL_G1, + AArch64MCExpr::VK_AARCH64_DTPREL_G0, + AArch64MCExpr::VK_AARCH64_GOTTPREL_G1, + AArch64MCExpr::VK_AARCH64_TPREL_G2, + AArch64MCExpr::VK_AARCH64_TPREL_G1, + AArch64MCExpr::VK_AARCH64_TPREL_G0, + }; + unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers); + + return isMoveWideImm(64, PermittedModifiers, NumModifiers); + } + + + bool isMOVZ32Imm() const { + static AArch64MCExpr::VariantKind PermittedModifiers[] = { + AArch64MCExpr::VK_AARCH64_ABS_G0, + AArch64MCExpr::VK_AARCH64_ABS_G1, + AArch64MCExpr::VK_AARCH64_SABS_G0, + AArch64MCExpr::VK_AARCH64_SABS_G1, + AArch64MCExpr::VK_AARCH64_DTPREL_G1, + AArch64MCExpr::VK_AARCH64_DTPREL_G0, + AArch64MCExpr::VK_AARCH64_GOTTPREL_G1, + AArch64MCExpr::VK_AARCH64_TPREL_G1, + AArch64MCExpr::VK_AARCH64_TPREL_G0, + }; + unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers); + + return isMoveWideImm(32, PermittedModifiers, NumModifiers); + } + + bool isMOVZ64Imm() const { + static AArch64MCExpr::VariantKind PermittedModifiers[] = { + AArch64MCExpr::VK_AARCH64_ABS_G0, + AArch64MCExpr::VK_AARCH64_ABS_G1, + AArch64MCExpr::VK_AARCH64_ABS_G2, + AArch64MCExpr::VK_AARCH64_ABS_G3, + AArch64MCExpr::VK_AARCH64_SABS_G0, + AArch64MCExpr::VK_AARCH64_SABS_G1, + AArch64MCExpr::VK_AARCH64_SABS_G2, + AArch64MCExpr::VK_AARCH64_DTPREL_G2, + AArch64MCExpr::VK_AARCH64_DTPREL_G1, + AArch64MCExpr::VK_AARCH64_DTPREL_G0, + AArch64MCExpr::VK_AARCH64_GOTTPREL_G1, + AArch64MCExpr::VK_AARCH64_TPREL_G2, + AArch64MCExpr::VK_AARCH64_TPREL_G1, + AArch64MCExpr::VK_AARCH64_TPREL_G0, + }; + unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers); + + return isMoveWideImm(64, PermittedModifiers, NumModifiers); + } + + bool isMOVK32Imm() const { + static AArch64MCExpr::VariantKind PermittedModifiers[] = { + AArch64MCExpr::VK_AARCH64_ABS_G0_NC, + AArch64MCExpr::VK_AARCH64_ABS_G1_NC, + AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC, + AArch64MCExpr::VK_AARCH64_DTPREL_G0_NC, + AArch64MCExpr::VK_AARCH64_GOTTPREL_G0_NC, + AArch64MCExpr::VK_AARCH64_TPREL_G1_NC, + AArch64MCExpr::VK_AARCH64_TPREL_G0_NC, + }; + unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers); + + return isMoveWideImm(32, PermittedModifiers, NumModifiers); + } + + bool isMOVK64Imm() const { + static AArch64MCExpr::VariantKind PermittedModifiers[] = { + AArch64MCExpr::VK_AARCH64_ABS_G0_NC, + AArch64MCExpr::VK_AARCH64_ABS_G1_NC, + AArch64MCExpr::VK_AARCH64_ABS_G2_NC, + AArch64MCExpr::VK_AARCH64_ABS_G3, + AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC, + AArch64MCExpr::VK_AARCH64_DTPREL_G0_NC, + AArch64MCExpr::VK_AARCH64_GOTTPREL_G0_NC, + AArch64MCExpr::VK_AARCH64_TPREL_G1_NC, + AArch64MCExpr::VK_AARCH64_TPREL_G0_NC, + }; + unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers); + + return isMoveWideImm(64, PermittedModifiers, NumModifiers); + } + + bool isMoveWideImm(unsigned RegWidth, + AArch64MCExpr::VariantKind *PermittedModifiers, + unsigned NumModifiers) const { + if (!isImmWithLSL()) return false; + + if (ImmWithLSL.ShiftAmount % 16 != 0) return false; + if (ImmWithLSL.ShiftAmount >= RegWidth) return false; + + AArch64MCExpr::VariantKind Modifier; + if (isNonConstantExpr(ImmWithLSL.Val, Modifier)) { + // E.g. "#:abs_g0:sym, lsl #16" makes no sense. + if (!ImmWithLSL.ImplicitAmount) return false; + + for (unsigned i = 0; i < NumModifiers; ++i) + if (PermittedModifiers[i] == Modifier) return true; + + return false; + } + + const MCConstantExpr *CE = dyn_cast(ImmWithLSL.Val); + return CE && CE->getValue() >= 0 && CE->getValue() <= 0xffff; + } + + template + bool isMoveWideMovAlias() const { + if (!isImm()) return false; + + const MCConstantExpr *CE = dyn_cast(getImm()); + if (!CE) return false; + + int UImm16, Shift; + uint64_t Value = CE->getValue(); + + // If this is a 32-bit instruction then all bits above 32 should be the + // same: either of these is fine because signed/unsigned values should be + // permitted. + if (RegWidth == 32) { + if ((Value >> 32) != 0 && (Value >> 32) != 0xffffffff) + return false; + + Value &= 0xffffffffULL; + } + + return isValidImm(RegWidth, Value, UImm16, Shift); + } + + bool isMSRWithReg() const { + if (!isSysReg()) return false; + + bool IsKnownRegister; + StringRef Name(SysReg.Data, SysReg.Length); + A64SysReg::MSRMapper().fromString(Name, IsKnownRegister); + + return IsKnownRegister; + } + + bool isMSRPState() const { + if (!isSysReg()) return false; + + bool IsKnownRegister; + StringRef Name(SysReg.Data, SysReg.Length); + A64PState::PStateMapper().fromString(Name, IsKnownRegister); + + return IsKnownRegister; + } + + bool isMRS() const { + if (!isSysReg()) return false; + + // First check against specific MSR-only (write-only) registers + bool IsKnownRegister; + StringRef Name(SysReg.Data, SysReg.Length); + A64SysReg::MRSMapper().fromString(Name, IsKnownRegister); + + return IsKnownRegister; + } + + bool isPRFM() const { + if (!isImm()) return false; + + const MCConstantExpr *CE = dyn_cast(getImm()); + + if (!CE) + return false; + + return CE->getValue() >= 0 && CE->getValue() <= 31; + } + + template bool isRegExtend() const { + if (!isShiftOrExtend()) return false; + + if (ShiftExtend.ShiftType != SHKind) + return false; + + return ShiftExtend.Amount <= 4; + } + + bool isRegExtendLSL() const { + if (!isShiftOrExtend()) return false; + + if (ShiftExtend.ShiftType != A64SE::LSL) + return false; + + return !ShiftExtend.ImplicitAmount && ShiftExtend.Amount <= 4; + } + + template bool isSImm7Scaled() const { + if (!isImm()) return false; + + const MCConstantExpr *CE = dyn_cast(getImm()); + if (!CE) return false; + + int64_t Val = CE->getValue(); + if (Val % MemSize != 0) return false; + + Val /= MemSize; + + return Val >= -64 && Val < 64; + } + + template + bool isSImm() const { + if (!isImm()) return false; + + const MCConstantExpr *CE = dyn_cast(getImm()); + if (!CE) return false; + + return CE->getValue() >= -(1LL << (BitWidth - 1)) + && CE->getValue() < (1LL << (BitWidth - 1)); + } + + template + bool isUImm() const { + if (!isImm()) return false; + + const MCConstantExpr *CE = dyn_cast(getImm()); + if (!CE) return false; + + return CE->getValue() >= 0 && CE->getValue() < (1LL << bitWidth); + } + + bool isUImm() const { + if (!isImm()) return false; + + return isa(getImm()); + } + + static AArch64Operand *CreateImmWithLSL(const MCExpr *Val, + unsigned ShiftAmount, + bool ImplicitAmount, + SMLoc S, SMLoc E) { + AArch64Operand *Op = new AArch64Operand(k_ImmWithLSL, S, E); + Op->ImmWithLSL.Val = Val; + Op->ImmWithLSL.ShiftAmount = ShiftAmount; + Op->ImmWithLSL.ImplicitAmount = ImplicitAmount; + return Op; + } + + static AArch64Operand *CreateCondCode(A64CC::CondCodes Code, + SMLoc S, SMLoc E) { + AArch64Operand *Op = new AArch64Operand(k_CondCode, S, E); + Op->CondCode.Code = Code; + return Op; + } + + static AArch64Operand *CreateFPImm(double Val, + SMLoc S, SMLoc E) { + AArch64Operand *Op = new AArch64Operand(k_FPImmediate, S, E); + Op->FPImm.Val = Val; + return Op; + } + + static AArch64Operand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) { + AArch64Operand *Op = new AArch64Operand(k_Immediate, S, E); + Op->Imm.Val = Val; + return Op; + } + + static AArch64Operand *CreateReg(unsigned RegNum, SMLoc S, SMLoc E) { + AArch64Operand *Op = new AArch64Operand(k_Register, S, E); + Op->Reg.RegNum = RegNum; + return Op; + } + + static AArch64Operand *CreateWrappedReg(unsigned RegNum, SMLoc S, SMLoc E) { + AArch64Operand *Op = new AArch64Operand(k_WrappedRegister, S, E); + Op->Reg.RegNum = RegNum; + return Op; + } + + static AArch64Operand *CreateShiftExtend(A64SE::ShiftExtSpecifiers ShiftTyp, + unsigned Amount, + bool ImplicitAmount, + SMLoc S, SMLoc E) { + AArch64Operand *Op = new AArch64Operand(k_ShiftExtend, S, E); + Op->ShiftExtend.ShiftType = ShiftTyp; + Op->ShiftExtend.Amount = Amount; + Op->ShiftExtend.ImplicitAmount = ImplicitAmount; + return Op; + } + + static AArch64Operand *CreateSysReg(StringRef Str, SMLoc S) { + AArch64Operand *Op = new AArch64Operand(k_SysReg, S, S); + Op->Tok.Data = Str.data(); + Op->Tok.Length = Str.size(); + return Op; + } + + static AArch64Operand *CreateToken(StringRef Str, SMLoc S) { + AArch64Operand *Op = new AArch64Operand(k_Token, S, S); + Op->Tok.Data = Str.data(); + Op->Tok.Length = Str.size(); + return Op; + } + + + void addExpr(MCInst &Inst, const MCExpr *Expr) const { + // Add as immediates when possible. + if (const MCConstantExpr *CE = dyn_cast(Expr)) + Inst.addOperand(MCOperand::CreateImm(CE->getValue())); + else + Inst.addOperand(MCOperand::CreateExpr(Expr)); + } + + template + void addBFILSBOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *CE = cast(getImm()); + unsigned EncodedVal = (RegWidth - CE->getValue()) % RegWidth; + Inst.addOperand(MCOperand::CreateImm(EncodedVal)); + } + + void addBFIWidthOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *CE = cast(getImm()); + Inst.addOperand(MCOperand::CreateImm(CE->getValue() - 1)); + } + + void addBFXWidthOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + + uint64_t LSB = Inst.getOperand(Inst.getNumOperands()-1).getImm(); + const MCConstantExpr *CE = cast(getImm()); + + Inst.addOperand(MCOperand::CreateImm(LSB + CE->getValue() - 1)); + } + + void addCondCodeOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateImm(getCondCode())); + } + + void addCVTFixedPosOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + + const MCConstantExpr *CE = cast(getImm()); + Inst.addOperand(MCOperand::CreateImm(64 - CE->getValue())); + } + + void addFMOVImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + + APFloat RealVal(FPImm.Val); + uint32_t ImmVal; + A64Imms::isFPImm(RealVal, ImmVal); + + Inst.addOperand(MCOperand::CreateImm(ImmVal)); + } + + void addFPZeroOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands"); + Inst.addOperand(MCOperand::CreateImm(0)); + } + + void addInvCondCodeOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + unsigned Encoded = A64InvertCondCode(getCondCode()); + Inst.addOperand(MCOperand::CreateImm(Encoded)); + } + + void addRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(getReg())); + } + + void addImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + addExpr(Inst, getImm()); + } + + template + void addSImm7ScaledOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + + const MCConstantExpr *CE = cast(getImm()); + uint64_t Val = CE->getValue() / MemSize; + Inst.addOperand(MCOperand::CreateImm(Val & 0x7f)); + } + + template + void addSImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + + const MCConstantExpr *CE = cast(getImm()); + uint64_t Val = CE->getValue(); + Inst.addOperand(MCOperand::CreateImm(Val & ((1ULL << BitWidth) - 1))); + } + + void addImmWithLSLOperands(MCInst &Inst, unsigned N) const { + assert (N == 1 && "Invalid number of operands!"); + + addExpr(Inst, ImmWithLSL.Val); + } + + template + void addLabelOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + + const MCConstantExpr *CE = dyn_cast(Imm.Val); + + if (!CE) { + addExpr(Inst, Imm.Val); + return; + } + + int64_t Val = CE->getValue(); + assert(Val % scale == 0 && "Unaligned immediate in instruction"); + Val /= scale; + + Inst.addOperand(MCOperand::CreateImm(Val & ((1LL << field_width) - 1))); + } + + template + void addOffsetUImm12Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + + if (const MCConstantExpr *CE = dyn_cast(getImm())) { + Inst.addOperand(MCOperand::CreateImm(CE->getValue() / MemSize)); + } else { + Inst.addOperand(MCOperand::CreateExpr(getImm())); + } + } + + template + void addLogicalImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands"); + const MCConstantExpr *CE = cast(Imm.Val); + + uint32_t Bits; + A64Imms::isLogicalImm(RegWidth, CE->getValue(), Bits); + + Inst.addOperand(MCOperand::CreateImm(Bits)); + } + + void addMRSOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + + bool Valid; + StringRef Name(SysReg.Data, SysReg.Length); + uint32_t Bits = A64SysReg::MRSMapper().fromString(Name, Valid); + + Inst.addOperand(MCOperand::CreateImm(Bits)); + } + + void addMSRWithRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + + bool Valid; + StringRef Name(SysReg.Data, SysReg.Length); + uint32_t Bits = A64SysReg::MSRMapper().fromString(Name, Valid); + + Inst.addOperand(MCOperand::CreateImm(Bits)); + } + + void addMSRPStateOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + + bool Valid; + StringRef Name(SysReg.Data, SysReg.Length); + uint32_t Bits = A64PState::PStateMapper().fromString(Name, Valid); + + Inst.addOperand(MCOperand::CreateImm(Bits)); + } + + void addMoveWideImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + + addExpr(Inst, ImmWithLSL.Val); + + AArch64MCExpr::VariantKind Variant; + if (!isNonConstantExpr(ImmWithLSL.Val, Variant)) { + Inst.addOperand(MCOperand::CreateImm(ImmWithLSL.ShiftAmount / 16)); + return; + } + + // We know it's relocated + switch (Variant) { + case AArch64MCExpr::VK_AARCH64_ABS_G0: + case AArch64MCExpr::VK_AARCH64_ABS_G0_NC: + case AArch64MCExpr::VK_AARCH64_SABS_G0: + case AArch64MCExpr::VK_AARCH64_DTPREL_G0: + case AArch64MCExpr::VK_AARCH64_DTPREL_G0_NC: + case AArch64MCExpr::VK_AARCH64_GOTTPREL_G0_NC: + case AArch64MCExpr::VK_AARCH64_TPREL_G0: + case AArch64MCExpr::VK_AARCH64_TPREL_G0_NC: + Inst.addOperand(MCOperand::CreateImm(0)); + break; + case AArch64MCExpr::VK_AARCH64_ABS_G1: + case AArch64MCExpr::VK_AARCH64_ABS_G1_NC: + case AArch64MCExpr::VK_AARCH64_SABS_G1: + case AArch64MCExpr::VK_AARCH64_DTPREL_G1: + case AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC: + case AArch64MCExpr::VK_AARCH64_GOTTPREL_G1: + case AArch64MCExpr::VK_AARCH64_TPREL_G1: + case AArch64MCExpr::VK_AARCH64_TPREL_G1_NC: + Inst.addOperand(MCOperand::CreateImm(1)); + break; + case AArch64MCExpr::VK_AARCH64_ABS_G2: + case AArch64MCExpr::VK_AARCH64_ABS_G2_NC: + case AArch64MCExpr::VK_AARCH64_SABS_G2: + case AArch64MCExpr::VK_AARCH64_DTPREL_G2: + case AArch64MCExpr::VK_AARCH64_TPREL_G2: + Inst.addOperand(MCOperand::CreateImm(2)); + break; + case AArch64MCExpr::VK_AARCH64_ABS_G3: + Inst.addOperand(MCOperand::CreateImm(3)); + break; + default: llvm_unreachable("Inappropriate move wide relocation"); + } + } + + template + void addMoveWideMovAliasOperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + int UImm16, Shift; + + const MCConstantExpr *CE = cast(getImm()); + uint64_t Value = CE->getValue(); + + if (RegWidth == 32) { + Value &= 0xffffffffULL; + } + + bool Valid = isValidImm(RegWidth, Value, UImm16, Shift); + (void)Valid; + assert(Valid && "Invalid immediates should have been weeded out by now"); + + Inst.addOperand(MCOperand::CreateImm(UImm16)); + Inst.addOperand(MCOperand::CreateImm(Shift)); + } + + void addPRFMOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + + const MCConstantExpr *CE = cast(getImm()); + assert(CE->getValue() >= 0 && CE->getValue() <= 31 + && "PRFM operand should be 5-bits"); + + Inst.addOperand(MCOperand::CreateImm(CE->getValue())); + } + + // For Add-sub (extended register) operands. + void addRegExtendOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + + Inst.addOperand(MCOperand::CreateImm(ShiftExtend.Amount)); + } + + // For the extend in load-store (register offset) instructions. + template + void addAddrRegExtendOperands(MCInst &Inst, unsigned N) const { + addAddrRegExtendOperands(Inst, N, MemSize); + } + + void addAddrRegExtendOperands(MCInst &Inst, unsigned N, + unsigned MemSize) const { + assert(N == 1 && "Invalid number of operands!"); + + // First bit of Option is set in instruction classes, the high two bits are + // as follows: + unsigned OptionHi = 0; + switch (ShiftExtend.ShiftType) { + case A64SE::UXTW: + case A64SE::LSL: + OptionHi = 1; + break; + case A64SE::SXTW: + case A64SE::SXTX: + OptionHi = 3; + break; + default: + llvm_unreachable("Invalid extend type for register offset"); + } + + unsigned S = 0; + if (MemSize == 1 && !ShiftExtend.ImplicitAmount) + S = 1; + else if (MemSize != 1 && ShiftExtend.Amount != 0) + S = 1; + + Inst.addOperand(MCOperand::CreateImm((OptionHi << 1) | S)); + } + void addShiftOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + + Inst.addOperand(MCOperand::CreateImm(ShiftExtend.Amount)); + } +}; + +} // end anonymous namespace. + +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::ParseOperand(SmallVectorImpl &Operands, + StringRef Mnemonic) { + + // See if the operand has a custom parser + OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); + + // It could either succeed, fail or just not care. + if (ResTy != MatchOperand_NoMatch) + return ResTy; + + switch (getLexer().getKind()) { + default: + Error(Parser.getTok().getLoc(), "unexpected token in operand"); + return MatchOperand_ParseFail; + case AsmToken::Identifier: { + // It might be in the LSL/UXTB family ... + OperandMatchResultTy GotShift = ParseShiftExtend(Operands); + + // We can only continue if no tokens were eaten. + if (GotShift != MatchOperand_NoMatch) + return GotShift; + + // ... or it might be a register ... + uint32_t NumLanes = 0; + OperandMatchResultTy GotReg = ParseRegister(Operands, NumLanes); + assert(GotReg != MatchOperand_ParseFail + && "register parsing shouldn't partially succeed"); + + if (GotReg == MatchOperand_Success) { + if (Parser.getTok().is(AsmToken::LBrac)) + return ParseNEONLane(Operands, NumLanes); + else + return MatchOperand_Success; + } + + // ... or it might be a symbolish thing + } + // Fall through + case AsmToken::LParen: // E.g. (strcmp-4) + case AsmToken::Integer: // 1f, 2b labels + case AsmToken::String: // quoted labels + case AsmToken::Dot: // . is Current location + case AsmToken::Dollar: // $ is PC + case AsmToken::Colon: { + SMLoc StartLoc = Parser.getTok().getLoc(); + SMLoc EndLoc; + const MCExpr *ImmVal = 0; + + if (ParseImmediate(ImmVal) != MatchOperand_Success) + return MatchOperand_ParseFail; + + EndLoc = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + Operands.push_back(AArch64Operand::CreateImm(ImmVal, StartLoc, EndLoc)); + return MatchOperand_Success; + } + case AsmToken::Hash: { // Immediates + SMLoc StartLoc = Parser.getTok().getLoc(); + SMLoc EndLoc; + const MCExpr *ImmVal = 0; + Parser.Lex(); + + if (ParseImmediate(ImmVal) != MatchOperand_Success) + return MatchOperand_ParseFail; + + EndLoc = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + Operands.push_back(AArch64Operand::CreateImm(ImmVal, StartLoc, EndLoc)); + return MatchOperand_Success; + } + case AsmToken::LBrac: { + SMLoc Loc = Parser.getTok().getLoc(); + Operands.push_back(AArch64Operand::CreateToken("[", Loc)); + Parser.Lex(); // Eat '[' + + // There's no comma after a '[', so we can parse the next operand + // immediately. + return ParseOperand(Operands, Mnemonic); + } + // The following will likely be useful later, but not in very early cases + case AsmToken::LCurly: // Weird SIMD lists + llvm_unreachable("Don't know how to deal with '{' in operand"); + return MatchOperand_ParseFail; + } +} + +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::ParseImmediate(const MCExpr *&ExprVal) { + if (getLexer().is(AsmToken::Colon)) { + AArch64MCExpr::VariantKind RefKind; + + OperandMatchResultTy ResTy = ParseRelocPrefix(RefKind); + if (ResTy != MatchOperand_Success) + return ResTy; + + const MCExpr *SubExprVal; + if (getParser().ParseExpression(SubExprVal)) + return MatchOperand_ParseFail; + + ExprVal = AArch64MCExpr::Create(RefKind, SubExprVal, getContext()); + return MatchOperand_Success; + } + + // No weird AArch64MCExpr prefix + return getParser().ParseExpression(ExprVal) + ? MatchOperand_ParseFail : MatchOperand_Success; +} + +// A lane attached to a NEON register. "[N]", which should yield three tokens: +// '[', N, ']'. A hash is not allowed to precede the immediate here. +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::ParseNEONLane(SmallVectorImpl &Operands, + uint32_t NumLanes) { + SMLoc Loc = Parser.getTok().getLoc(); + + assert(Parser.getTok().is(AsmToken::LBrac) && "inappropriate operand"); + Operands.push_back(AArch64Operand::CreateToken("[", Loc)); + Parser.Lex(); // Eat '[' + + if (Parser.getTok().isNot(AsmToken::Integer)) { + Error(Parser.getTok().getLoc(), "expected lane number"); + return MatchOperand_ParseFail; + } + + if (Parser.getTok().getIntVal() >= NumLanes) { + Error(Parser.getTok().getLoc(), "lane number incompatible with layout"); + return MatchOperand_ParseFail; + } + + const MCExpr *Lane = MCConstantExpr::Create(Parser.getTok().getIntVal(), + getContext()); + SMLoc S = Parser.getTok().getLoc(); + Parser.Lex(); // Eat actual lane + SMLoc E = Parser.getTok().getLoc(); + Operands.push_back(AArch64Operand::CreateImm(Lane, S, E)); + + + if (Parser.getTok().isNot(AsmToken::RBrac)) { + Error(Parser.getTok().getLoc(), "expected ']' after lane"); + return MatchOperand_ParseFail; + } + + Operands.push_back(AArch64Operand::CreateToken("]", Loc)); + Parser.Lex(); // Eat ']' + + return MatchOperand_Success; +} + +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::ParseRelocPrefix(AArch64MCExpr::VariantKind &RefKind) { + assert(getLexer().is(AsmToken::Colon) && "expected a ':'"); + Parser.Lex(); + + if (getLexer().isNot(AsmToken::Identifier)) { + Error(Parser.getTok().getLoc(), + "expected relocation specifier in operand after ':'"); + return MatchOperand_ParseFail; + } + + StringRef lowerCase = Parser.getTok().getIdentifier().lower(); + RefKind = StringSwitch(lowerCase) + .Case("got", AArch64MCExpr::VK_AARCH64_GOT) + .Case("got_lo12", AArch64MCExpr::VK_AARCH64_GOT_LO12) + .Case("lo12", AArch64MCExpr::VK_AARCH64_LO12) + .Case("abs_g0", AArch64MCExpr::VK_AARCH64_ABS_G0) + .Case("abs_g0_nc", AArch64MCExpr::VK_AARCH64_ABS_G0_NC) + .Case("abs_g1", AArch64MCExpr::VK_AARCH64_ABS_G1) + .Case("abs_g1_nc", AArch64MCExpr::VK_AARCH64_ABS_G1_NC) + .Case("abs_g2", AArch64MCExpr::VK_AARCH64_ABS_G2) + .Case("abs_g2_nc", AArch64MCExpr::VK_AARCH64_ABS_G2_NC) + .Case("abs_g3", AArch64MCExpr::VK_AARCH64_ABS_G3) + .Case("abs_g0_s", AArch64MCExpr::VK_AARCH64_SABS_G0) + .Case("abs_g1_s", AArch64MCExpr::VK_AARCH64_SABS_G1) + .Case("abs_g2_s", AArch64MCExpr::VK_AARCH64_SABS_G2) + .Case("dtprel_g2", AArch64MCExpr::VK_AARCH64_DTPREL_G2) + .Case("dtprel_g1", AArch64MCExpr::VK_AARCH64_DTPREL_G1) + .Case("dtprel_g1_nc", AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC) + .Case("dtprel_g0", AArch64MCExpr::VK_AARCH64_DTPREL_G0) + .Case("dtprel_g0_nc", AArch64MCExpr::VK_AARCH64_DTPREL_G0_NC) + .Case("dtprel_hi12", AArch64MCExpr::VK_AARCH64_DTPREL_HI12) + .Case("dtprel_lo12", AArch64MCExpr::VK_AARCH64_DTPREL_LO12) + .Case("dtprel_lo12_nc", AArch64MCExpr::VK_AARCH64_DTPREL_LO12_NC) + .Case("gottprel_g1", AArch64MCExpr::VK_AARCH64_GOTTPREL_G1) + .Case("gottprel_g0_nc", AArch64MCExpr::VK_AARCH64_GOTTPREL_G0_NC) + .Case("gottprel", AArch64MCExpr::VK_AARCH64_GOTTPREL) + .Case("gottprel_lo12", AArch64MCExpr::VK_AARCH64_GOTTPREL_LO12) + .Case("tprel_g2", AArch64MCExpr::VK_AARCH64_TPREL_G2) + .Case("tprel_g1", AArch64MCExpr::VK_AARCH64_TPREL_G1) + .Case("tprel_g1_nc", AArch64MCExpr::VK_AARCH64_TPREL_G1_NC) + .Case("tprel_g0", AArch64MCExpr::VK_AARCH64_TPREL_G0) + .Case("tprel_g0_nc", AArch64MCExpr::VK_AARCH64_TPREL_G0_NC) + .Case("tprel_hi12", AArch64MCExpr::VK_AARCH64_TPREL_HI12) + .Case("tprel_lo12", AArch64MCExpr::VK_AARCH64_TPREL_LO12) + .Case("tprel_lo12_nc", AArch64MCExpr::VK_AARCH64_TPREL_LO12_NC) + .Case("tlsdesc", AArch64MCExpr::VK_AARCH64_TLSDESC) + .Case("tlsdesc_lo12", AArch64MCExpr::VK_AARCH64_TLSDESC_LO12) + .Default(AArch64MCExpr::VK_AARCH64_None); + + if (RefKind == AArch64MCExpr::VK_AARCH64_None) { + Error(Parser.getTok().getLoc(), + "expected relocation specifier in operand after ':'"); + return MatchOperand_ParseFail; + } + Parser.Lex(); // Eat identifier + + if (getLexer().isNot(AsmToken::Colon)) { + Error(Parser.getTok().getLoc(), + "expected ':' after relocation specifier"); + return MatchOperand_ParseFail; + } + Parser.Lex(); + return MatchOperand_Success; +} + +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::ParseImmWithLSLOperand( + SmallVectorImpl &Operands) { + // FIXME?: I want to live in a world where immediates must start with + // #. Please don't dash my hopes (well, do if you have a good reason). + if (Parser.getTok().isNot(AsmToken::Hash)) return MatchOperand_NoMatch; + + SMLoc S = Parser.getTok().getLoc(); + Parser.Lex(); // Eat '#' + + const MCExpr *Imm; + if (ParseImmediate(Imm) != MatchOperand_Success) + return MatchOperand_ParseFail; + else if (Parser.getTok().isNot(AsmToken::Comma)) { + SMLoc E = Parser.getTok().getLoc(); + Operands.push_back(AArch64Operand::CreateImmWithLSL(Imm, 0, true, S, E)); + return MatchOperand_Success; + } + + // Eat ',' + Parser.Lex(); + + // The optional operand must be "lsl #N" where N is non-negative. + if (Parser.getTok().is(AsmToken::Identifier) + && Parser.getTok().getIdentifier().lower() == "lsl") { + Parser.Lex(); + + if (Parser.getTok().is(AsmToken::Hash)) { + Parser.Lex(); + + if (Parser.getTok().isNot(AsmToken::Integer)) { + Error(Parser.getTok().getLoc(), "only 'lsl #+N' valid after immediate"); + return MatchOperand_ParseFail; + } + } + } + + int64_t ShiftAmount = Parser.getTok().getIntVal(); + + if (ShiftAmount < 0) { + Error(Parser.getTok().getLoc(), "positive shift amount required"); + return MatchOperand_ParseFail; + } + Parser.Lex(); // Eat the number + + SMLoc E = Parser.getTok().getLoc(); + Operands.push_back(AArch64Operand::CreateImmWithLSL(Imm, ShiftAmount, + false, S, E)); + return MatchOperand_Success; +} + + +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::ParseCondCodeOperand( + SmallVectorImpl &Operands) { + if (Parser.getTok().isNot(AsmToken::Identifier)) + return MatchOperand_NoMatch; + + StringRef Tok = Parser.getTok().getIdentifier(); + A64CC::CondCodes CondCode = A64StringToCondCode(Tok); + + if (CondCode == A64CC::Invalid) + return MatchOperand_NoMatch; + + SMLoc S = Parser.getTok().getLoc(); + Parser.Lex(); // Eat condition code + SMLoc E = Parser.getTok().getLoc(); + + Operands.push_back(AArch64Operand::CreateCondCode(CondCode, S, E)); + return MatchOperand_Success; +} + +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::ParseCRxOperand( + SmallVectorImpl &Operands) { + SMLoc S = Parser.getTok().getLoc(); + if (Parser.getTok().isNot(AsmToken::Identifier)) { + Error(S, "Expected cN operand where 0 <= N <= 15"); + return MatchOperand_ParseFail; + } + + StringRef Tok = Parser.getTok().getIdentifier().lower(); + if (Tok[0] != 'c') { + Error(S, "Expected cN operand where 0 <= N <= 15"); + return MatchOperand_ParseFail; + } + + uint32_t CRNum; + bool BadNum = Tok.drop_front().getAsInteger(10, CRNum); + if (BadNum || CRNum > 15) { + Error(S, "Expected cN operand where 0 <= N <= 15"); + return MatchOperand_ParseFail; + } + + const MCExpr *CRImm = MCConstantExpr::Create(CRNum, getContext()); + + Parser.Lex(); + SMLoc E = Parser.getTok().getLoc(); + + Operands.push_back(AArch64Operand::CreateImm(CRImm, S, E)); + return MatchOperand_Success; +} + +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::ParseFPImmOperand( + SmallVectorImpl &Operands) { + + // FIXME?: I want to live in a world where immediates must start with + // #. Please don't dash my hopes (well, do if you have a good reason). + if (Parser.getTok().isNot(AsmToken::Hash)) return MatchOperand_NoMatch; + + SMLoc S = Parser.getTok().getLoc(); + Parser.Lex(); // Eat '#' + + bool Negative = false; + if (Parser.getTok().is(AsmToken::Minus)) { + Negative = true; + Parser.Lex(); // Eat '-' + } else if (Parser.getTok().is(AsmToken::Plus)) { + Parser.Lex(); // Eat '+' + } + + if (Parser.getTok().isNot(AsmToken::Real)) { + Error(S, "Expected floating-point immediate"); + return MatchOperand_ParseFail; + } + + APFloat RealVal(APFloat::IEEEdouble, Parser.getTok().getString()); + if (Negative) RealVal.changeSign(); + double DblVal = RealVal.convertToDouble(); + + Parser.Lex(); // Eat real number + SMLoc E = Parser.getTok().getLoc(); + + Operands.push_back(AArch64Operand::CreateFPImm(DblVal, S, E)); + return MatchOperand_Success; +} + + +// Automatically generated +static unsigned MatchRegisterName(StringRef Name); + +bool +AArch64AsmParser::IdentifyRegister(unsigned &RegNum, SMLoc &RegEndLoc, + StringRef &Layout, + SMLoc &LayoutLoc) const { + const AsmToken &Tok = Parser.getTok(); + + if (Tok.isNot(AsmToken::Identifier)) + return false; + + std::string LowerReg = Tok.getString().lower(); + size_t DotPos = LowerReg.find('.'); + + RegNum = MatchRegisterName(LowerReg.substr(0, DotPos)); + if (RegNum == AArch64::NoRegister) { + RegNum = StringSwitch(LowerReg.substr(0, DotPos)) + .Case("ip0", AArch64::X16) + .Case("ip1", AArch64::X17) + .Case("fp", AArch64::X29) + .Case("lr", AArch64::X30) + .Default(AArch64::NoRegister); + } + if (RegNum == AArch64::NoRegister) + return false; + + SMLoc S = Tok.getLoc(); + RegEndLoc = SMLoc::getFromPointer(S.getPointer() + DotPos); + + if (DotPos == StringRef::npos) { + Layout = StringRef(); + } else { + // Everything afterwards needs to be a literal token, expected to be + // '.2d','.b' etc for vector registers. + + // This StringSwitch validates the input and (perhaps more importantly) + // gives us a permanent string to use in the token (a pointer into LowerReg + // would go out of scope when we return). + LayoutLoc = SMLoc::getFromPointer(S.getPointer() + DotPos + 1); + Layout = LowerReg.substr(DotPos, StringRef::npos); + Layout = StringSwitch(Layout) + .Case(".d", ".d").Case(".1d", ".1d").Case(".2d", ".2d") + .Case(".s", ".s").Case(".2s", ".2s").Case(".4s", ".4s") + .Case(".h", ".h").Case(".4h", ".4h").Case(".8h", ".8h") + .Case(".b", ".b").Case(".8b", ".8b").Case(".16b", ".16b") + .Default(""); + + if (Layout.size() == 0) { + // Malformed register + return false; + } + } + + return true; +} + +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::ParseRegister(SmallVectorImpl &Operands, + uint32_t &NumLanes) { + unsigned RegNum; + StringRef Layout; + SMLoc RegEndLoc, LayoutLoc; + SMLoc S = Parser.getTok().getLoc(); + + if (!IdentifyRegister(RegNum, RegEndLoc, Layout, LayoutLoc)) + return MatchOperand_NoMatch; + + Operands.push_back(AArch64Operand::CreateReg(RegNum, S, RegEndLoc)); + + if (Layout.size() != 0) { + unsigned long long TmpLanes = 0; + llvm::getAsUnsignedInteger(Layout.substr(1), 10, TmpLanes); + if (TmpLanes != 0) { + NumLanes = TmpLanes; + } else { + // If the number of lanes isn't specified explicitly, a valid instruction + // will have an element specifier and be capable of acting on the entire + // vector register. + switch (Layout.back()) { + default: llvm_unreachable("Invalid layout specifier"); + case 'b': NumLanes = 16; break; + case 'h': NumLanes = 8; break; + case 's': NumLanes = 4; break; + case 'd': NumLanes = 2; break; + } + } + + Operands.push_back(AArch64Operand::CreateToken(Layout, LayoutLoc)); + } + + Parser.Lex(); + return MatchOperand_Success; +} + +bool +AArch64AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, + SMLoc &EndLoc) { + // This callback is used for things like DWARF frame directives in + // assembly. They don't care about things like NEON layouts or lanes, they + // just want to be able to produce the DWARF register number. + StringRef LayoutSpec; + SMLoc RegEndLoc, LayoutLoc; + StartLoc = Parser.getTok().getLoc(); + + if (!IdentifyRegister(RegNo, RegEndLoc, LayoutSpec, LayoutLoc)) + return true; + + Parser.Lex(); + EndLoc = Parser.getTok().getLoc(); + + return false; +} + +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::ParseNamedImmOperand(const NamedImmMapper &Mapper, + SmallVectorImpl &Operands) { + // Since these operands occur in very limited circumstances, without + // alternatives, we actually signal an error if there is no match. If relaxing + // this, beware of unintended consequences: an immediate will be accepted + // during matching, no matter how it gets into the AArch64Operand. + const AsmToken &Tok = Parser.getTok(); + SMLoc S = Tok.getLoc(); + + if (Tok.is(AsmToken::Identifier)) { + bool ValidName; + uint32_t Code = Mapper.fromString(Tok.getString().lower(), ValidName); + + if (!ValidName) { + Error(S, "operand specifier not recognised"); + return MatchOperand_ParseFail; + } + + Parser.Lex(); // We're done with the identifier. Eat it + + SMLoc E = Parser.getTok().getLoc(); + const MCExpr *Imm = MCConstantExpr::Create(Code, getContext()); + Operands.push_back(AArch64Operand::CreateImm(Imm, S, E)); + return MatchOperand_Success; + } else if (Tok.is(AsmToken::Hash)) { + Parser.Lex(); + + const MCExpr *ImmVal; + if (ParseImmediate(ImmVal) != MatchOperand_Success) + return MatchOperand_ParseFail; + + const MCConstantExpr *CE = dyn_cast(ImmVal); + if (!CE || CE->getValue() < 0 || !Mapper.validImm(CE->getValue())) { + Error(S, "Invalid immediate for instruction"); + return MatchOperand_ParseFail; + } + + SMLoc E = Parser.getTok().getLoc(); + Operands.push_back(AArch64Operand::CreateImm(ImmVal, S, E)); + return MatchOperand_Success; + } + + Error(S, "unexpected operand for instruction"); + return MatchOperand_ParseFail; +} + +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::ParseSysRegOperand( + SmallVectorImpl &Operands) { + const AsmToken &Tok = Parser.getTok(); + + // Any MSR/MRS operand will be an identifier, and we want to store it as some + // kind of string: SPSel is valid for two different forms of MSR with two + // different encodings. There's no collision at the moment, but the potential + // is there. + if (!Tok.is(AsmToken::Identifier)) { + return MatchOperand_NoMatch; + } + + SMLoc S = Tok.getLoc(); + Operands.push_back(AArch64Operand::CreateSysReg(Tok.getString(), S)); + Parser.Lex(); // Eat identifier + + return MatchOperand_Success; +} + +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::ParseLSXAddressOperand( + SmallVectorImpl &Operands) { + SMLoc S = Parser.getTok().getLoc(); + + unsigned RegNum; + SMLoc RegEndLoc, LayoutLoc; + StringRef Layout; + if(!IdentifyRegister(RegNum, RegEndLoc, Layout, LayoutLoc) + || !AArch64MCRegisterClasses[AArch64::GPR64xspRegClassID].contains(RegNum) + || Layout.size() != 0) { + // Check Layout.size because we don't want to let "x3.4s" or similar + // through. + return MatchOperand_NoMatch; + } + Parser.Lex(); // Eat register + + if (Parser.getTok().is(AsmToken::RBrac)) { + // We're done + SMLoc E = Parser.getTok().getLoc(); + Operands.push_back(AArch64Operand::CreateWrappedReg(RegNum, S, E)); + return MatchOperand_Success; + } + + // Otherwise, only ", #0" is valid + + if (Parser.getTok().isNot(AsmToken::Comma)) { + Error(Parser.getTok().getLoc(), "expected ',' or ']' after register"); + return MatchOperand_ParseFail; + } + Parser.Lex(); // Eat ',' + + if (Parser.getTok().isNot(AsmToken::Hash)) { + Error(Parser.getTok().getLoc(), "expected '#0'"); + return MatchOperand_ParseFail; + } + Parser.Lex(); // Eat '#' + + if (Parser.getTok().isNot(AsmToken::Integer) + || Parser.getTok().getIntVal() != 0 ) { + Error(Parser.getTok().getLoc(), "expected '#0'"); + return MatchOperand_ParseFail; + } + Parser.Lex(); // Eat '0' + + SMLoc E = Parser.getTok().getLoc(); + Operands.push_back(AArch64Operand::CreateWrappedReg(RegNum, S, E)); + return MatchOperand_Success; +} + +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::ParseShiftExtend( + SmallVectorImpl &Operands) { + StringRef IDVal = Parser.getTok().getIdentifier(); + std::string LowerID = IDVal.lower(); + + A64SE::ShiftExtSpecifiers Spec = + StringSwitch(LowerID) + .Case("lsl", A64SE::LSL) + .Case("lsr", A64SE::LSR) + .Case("asr", A64SE::ASR) + .Case("ror", A64SE::ROR) + .Case("uxtb", A64SE::UXTB) + .Case("uxth", A64SE::UXTH) + .Case("uxtw", A64SE::UXTW) + .Case("uxtx", A64SE::UXTX) + .Case("sxtb", A64SE::SXTB) + .Case("sxth", A64SE::SXTH) + .Case("sxtw", A64SE::SXTW) + .Case("sxtx", A64SE::SXTX) + .Default(A64SE::Invalid); + + if (Spec == A64SE::Invalid) + return MatchOperand_NoMatch; + + // Eat the shift + SMLoc S, E; + S = Parser.getTok().getLoc(); + Parser.Lex(); + + if (Spec != A64SE::LSL && Spec != A64SE::LSR && + Spec != A64SE::ASR && Spec != A64SE::ROR) { + // The shift amount can be omitted for the extending versions, but not real + // shifts: + // add x0, x0, x0, uxtb + // is valid, and equivalent to + // add x0, x0, x0, uxtb #0 + + if (Parser.getTok().is(AsmToken::Comma) || + Parser.getTok().is(AsmToken::EndOfStatement) || + Parser.getTok().is(AsmToken::RBrac)) { + Operands.push_back(AArch64Operand::CreateShiftExtend(Spec, 0, true, S, E)); + return MatchOperand_Success; + } + } + + // Eat # at beginning of immediate + if (!Parser.getTok().is(AsmToken::Hash)) { + Error(Parser.getTok().getLoc(), + "expected #imm after shift specifier"); + return MatchOperand_ParseFail; + } + Parser.Lex(); + + // Make sure we do actually have a number + if (!Parser.getTok().is(AsmToken::Integer)) { + Error(Parser.getTok().getLoc(), + "expected integer shift amount"); + return MatchOperand_ParseFail; + } + unsigned Amount = Parser.getTok().getIntVal(); + Parser.Lex(); + E = Parser.getTok().getLoc(); + + Operands.push_back(AArch64Operand::CreateShiftExtend(Spec, Amount, false, S, E)); + + return MatchOperand_Success; +} + +// FIXME: We would really like to be able to tablegen'erate this. +bool AArch64AsmParser:: +validateInstruction(MCInst &Inst, + const SmallVectorImpl &Operands) { + switch (Inst.getOpcode()) { + case AArch64::BFIwwii: + case AArch64::BFIxxii: + case AArch64::SBFIZwwii: + case AArch64::SBFIZxxii: + case AArch64::UBFIZwwii: + case AArch64::UBFIZxxii: { + unsigned ImmOps = Inst.getNumOperands() - 2; + int64_t ImmR = Inst.getOperand(ImmOps).getImm(); + int64_t ImmS = Inst.getOperand(ImmOps+1).getImm(); + + if (ImmR == 0) { + // Bitfield inserts are preferred disassembly if ImmS < ImmR. However, + // there is this one case where insert is valid syntax but the bfx + // disassembly should be used: e.g. "sbfiz w0, w0, #0, #1". + return false; + } else if (ImmS >= ImmR) { + return Error(Operands[4]->getStartLoc(), + "requested insert overflows register"); + } + return false; + } + case AArch64::BFXILwwii: + case AArch64::BFXILxxii: + case AArch64::SBFXwwii: + case AArch64::SBFXxxii: + case AArch64::UBFXwwii: + case AArch64::UBFXxxii: { + unsigned ImmOps = Inst.getNumOperands() - 2; + int64_t ImmR = Inst.getOperand(ImmOps).getImm(); + int64_t ImmS = Inst.getOperand(ImmOps+1).getImm(); + int64_t RegWidth = 0; + switch (Inst.getOpcode()) { + case AArch64::SBFXxxii: case AArch64::UBFXxxii: case AArch64::BFXILxxii: + RegWidth = 64; + break; + case AArch64::SBFXwwii: case AArch64::UBFXwwii: case AArch64::BFXILwwii: + RegWidth = 32; + break; + } + + if (ImmS >= RegWidth || ImmS < ImmR) { + return Error(Operands[4]->getStartLoc(), + "requested extract overflows register"); + } + return false; + } + case AArch64::ICix: { + int64_t ImmVal = Inst.getOperand(0).getImm(); + A64IC::ICValues ICOp = static_cast(ImmVal); + if (!A64IC::NeedsRegister(ICOp)) { + return Error(Operands[1]->getStartLoc(), + "specified IC op does not use a register"); + } + return false; + } + case AArch64::ICi: { + int64_t ImmVal = Inst.getOperand(0).getImm(); + A64IC::ICValues ICOp = static_cast(ImmVal); + if (A64IC::NeedsRegister(ICOp)) { + return Error(Operands[1]->getStartLoc(), + "specified IC op requires a register"); + } + return false; + } + case AArch64::TLBIix: { + int64_t ImmVal = Inst.getOperand(0).getImm(); + A64TLBI::TLBIValues TLBIOp = static_cast(ImmVal); + if (!A64TLBI::NeedsRegister(TLBIOp)) { + return Error(Operands[1]->getStartLoc(), + "specified TLBI op does not use a register"); + } + return false; + } + case AArch64::TLBIi: { + int64_t ImmVal = Inst.getOperand(0).getImm(); + A64TLBI::TLBIValues TLBIOp = static_cast(ImmVal); + if (A64TLBI::NeedsRegister(TLBIOp)) { + return Error(Operands[1]->getStartLoc(), + "specified TLBI op requires a register"); + } + return false; + } + } + + return false; +} + + +// Parses the instruction *together with* all operands, appending each parsed +// operand to the "Operands" list +bool AArch64AsmParser::ParseInstruction(ParseInstructionInfo &Info, + StringRef Name, SMLoc NameLoc, + SmallVectorImpl &Operands) { + size_t CondCodePos = Name.find('.'); + + StringRef Mnemonic = Name.substr(0, CondCodePos); + Operands.push_back(AArch64Operand::CreateToken(Mnemonic, NameLoc)); + + if (CondCodePos != StringRef::npos) { + // We have a condition code + SMLoc S = SMLoc::getFromPointer(NameLoc.getPointer() + CondCodePos + 1); + StringRef CondStr = Name.substr(CondCodePos + 1, StringRef::npos); + A64CC::CondCodes Code; + + Code = A64StringToCondCode(CondStr); + + if (Code == A64CC::Invalid) { + Error(S, "invalid condition code"); + Parser.EatToEndOfStatement(); + return true; + } + + SMLoc DotL = SMLoc::getFromPointer(NameLoc.getPointer() + CondCodePos); + + Operands.push_back(AArch64Operand::CreateToken(".", DotL)); + SMLoc E = SMLoc::getFromPointer(NameLoc.getPointer() + CondCodePos + 3); + Operands.push_back(AArch64Operand::CreateCondCode(Code, S, E)); + } + + // Now we parse the operands of this instruction + if (getLexer().isNot(AsmToken::EndOfStatement)) { + // Read the first operand. + if (ParseOperand(Operands, Mnemonic)) { + Parser.EatToEndOfStatement(); + return true; + } + + while (getLexer().is(AsmToken::Comma)) { + Parser.Lex(); // Eat the comma. + + // Parse and remember the operand. + if (ParseOperand(Operands, Mnemonic)) { + Parser.EatToEndOfStatement(); + return true; + } + + + // After successfully parsing some operands there are two special cases to + // consider (i.e. notional operands not separated by commas). Both are due + // to memory specifiers: + // + An RBrac will end an address for load/store/prefetch + // + An '!' will indicate a pre-indexed operation. + // + // It's someone else's responsibility to make sure these tokens are sane + // in the given context! + if (Parser.getTok().is(AsmToken::RBrac)) { + SMLoc Loc = Parser.getTok().getLoc(); + Operands.push_back(AArch64Operand::CreateToken("]", Loc)); + Parser.Lex(); + } + + if (Parser.getTok().is(AsmToken::Exclaim)) { + SMLoc Loc = Parser.getTok().getLoc(); + Operands.push_back(AArch64Operand::CreateToken("!", Loc)); + Parser.Lex(); + } + } + } + + if (getLexer().isNot(AsmToken::EndOfStatement)) { + SMLoc Loc = getLexer().getLoc(); + Parser.EatToEndOfStatement(); + return Error(Loc, ""); + } + + // Eat the EndOfStatement + Parser.Lex(); + + return false; +} + +bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) { + StringRef IDVal = DirectiveID.getIdentifier(); + if (IDVal == ".hword") + return ParseDirectiveWord(2, DirectiveID.getLoc()); + else if (IDVal == ".word") + return ParseDirectiveWord(4, DirectiveID.getLoc()); + else if (IDVal == ".xword") + return ParseDirectiveWord(8, DirectiveID.getLoc()); + else if (IDVal == ".tlsdesccall") + return ParseDirectiveTLSDescCall(DirectiveID.getLoc()); + + return true; +} + +/// parseDirectiveWord +/// ::= .word [ expression (, expression)* ] +bool AArch64AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { + if (getLexer().isNot(AsmToken::EndOfStatement)) { + for (;;) { + const MCExpr *Value; + if (getParser().ParseExpression(Value)) + return true; + + getParser().getStreamer().EmitValue(Value, Size, 0/*addrspace*/); + + if (getLexer().is(AsmToken::EndOfStatement)) + break; + + // FIXME: Improve diagnostic. + if (getLexer().isNot(AsmToken::Comma)) + return Error(L, "unexpected token in directive"); + Parser.Lex(); + } + } + + Parser.Lex(); + return false; +} + +// parseDirectiveTLSDescCall: +// ::= .tlsdesccall symbol +bool AArch64AsmParser::ParseDirectiveTLSDescCall(SMLoc L) { + StringRef Name; + if (getParser().ParseIdentifier(Name)) + return Error(L, "expected symbol after directive"); + + MCSymbol *Sym = getContext().GetOrCreateSymbol(Name); + const MCSymbolRefExpr *Expr = MCSymbolRefExpr::Create(Sym, getContext()); + + MCInst Inst; + Inst.setOpcode(AArch64::TLSDESCCALL); + Inst.addOperand(MCOperand::CreateExpr(Expr)); + + getParser().getStreamer().EmitInstruction(Inst); + return false; +} + + +bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + SmallVectorImpl &Operands, + MCStreamer &Out, unsigned &ErrorInfo, + bool MatchingInlineAsm) { + MCInst Inst; + unsigned MatchResult; + MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo, + MatchingInlineAsm); + switch (MatchResult) { + default: break; + case Match_Success: + if (validateInstruction(Inst, Operands)) + return true; + + Out.EmitInstruction(Inst); + return false; + case Match_MissingFeature: + Error(IDLoc, "instruction requires a CPU feature not currently enabled"); + return true; + case Match_InvalidOperand: { + SMLoc ErrorLoc = IDLoc; + if (ErrorInfo != ~0U) { + if (ErrorInfo >= Operands.size()) + return Error(IDLoc, "too few operands for instruction"); + + ErrorLoc = ((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(); + if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; + } + + return Error(ErrorLoc, "invalid operand for instruction"); + } + case Match_MnemonicFail: + return Error(IDLoc, "invalid instruction"); + } + + llvm_unreachable("Implement any new match types added!"); + return true; +} + +void AArch64Operand::print(raw_ostream &OS) const { + switch (Kind) { + case k_CondCode: + OS << ""; + break; + case k_FPImmediate: + OS << ""; + break; + case k_ImmWithLSL: + OS << ""; + break; + case k_Immediate: + getImm()->print(OS); + break; + case k_Register: + OS << "'; + break; + case k_Token: + OS << '\'' << getToken() << '\''; + break; + case k_ShiftExtend: + OS << ""; + break; + case k_SysReg: { + StringRef Name(SysReg.Data, SysReg.Length); + OS << "'; + break; + } + default: + llvm_unreachable("No idea how to print this kind of operand"); + break; + } +} + +void AArch64Operand::dump() const { + print(errs()); +} + + +/// Force static initialization. +extern "C" void LLVMInitializeAArch64AsmParser() { + RegisterMCAsmParser X(TheAArch64Target); +} + +#define GET_REGISTER_MATCHER +#define GET_MATCHER_IMPLEMENTATION +#include "AArch64GenAsmMatcher.inc" diff --git a/lib/Target/AArch64/AsmParser/CMakeLists.txt b/lib/Target/AArch64/AsmParser/CMakeLists.txt new file mode 100644 index 0000000..a018a0a --- /dev/null +++ b/lib/Target/AArch64/AsmParser/CMakeLists.txt @@ -0,0 +1,7 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMAArch64AsmParser + AArch64AsmParser.cpp + ) + +add_dependencies(LLVMAArch64AsmParser AArch64CommonTableGen) diff --git a/lib/Target/AArch64/AsmParser/LLVMBuild.txt b/lib/Target/AArch64/AsmParser/LLVMBuild.txt new file mode 100644 index 0000000..bd1fcaf --- /dev/null +++ b/lib/Target/AArch64/AsmParser/LLVMBuild.txt @@ -0,0 +1,24 @@ +;===- ./lib/Target/AArch64/AsmParser/LLVMBuild.txt -------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = AArch64AsmParser +parent = AArch64 +required_libraries = AArch64Desc AArch64Info MC MCParser Support +add_to_library_groups = AArch64 + diff --git a/lib/Target/AArch64/AsmParser/Makefile b/lib/Target/AArch64/AsmParser/Makefile new file mode 100644 index 0000000..56c9ef5 --- /dev/null +++ b/lib/Target/AArch64/AsmParser/Makefile @@ -0,0 +1,15 @@ +##===- lib/Target/AArch64/AsmParser/Makefile ---------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../../.. +LIBRARYNAME = LLVMAArch64AsmParser + +# Hack: we need to include 'main' target directory to grab private headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/AArch64/CMakeLists.txt b/lib/Target/AArch64/CMakeLists.txt new file mode 100644 index 0000000..a89861f --- /dev/null +++ b/lib/Target/AArch64/CMakeLists.txt @@ -0,0 +1,35 @@ +set(LLVM_TARGET_DEFINITIONS AArch64.td) + +tablegen(LLVM AArch64GenAsmMatcher.inc -gen-asm-matcher) +tablegen(LLVM AArch64GenAsmWriter.inc -gen-asm-writer) +tablegen(LLVM AArch64GenCallingConv.inc -gen-callingconv) +tablegen(LLVM AArch64GenDisassemblerTables.inc -gen-disassembler) +tablegen(LLVM AArch64GenInstrInfo.inc -gen-instr-info) +tablegen(LLVM AArch64GenMCCodeEmitter.inc -gen-emitter -mc-emitter) +tablegen(LLVM AArch64GenMCPseudoLowering.inc -gen-pseudo-lowering) +tablegen(LLVM AArch64GenRegisterInfo.inc -gen-register-info) +tablegen(LLVM AArch64GenDAGISel.inc -gen-dag-isel) +tablegen(LLVM AArch64GenSubtargetInfo.inc -gen-subtarget) +add_public_tablegen_target(AArch64CommonTableGen) + +add_llvm_target(AArch64CodeGen + AArch64AsmPrinter.cpp + AArch64ConstantIslandPass.cpp + AArch64FrameLowering.cpp + AArch64ISelDAGToDAG.cpp + AArch64ISelLowering.cpp + AArch64InstrInfo.cpp + AArch64MachineFunctionInfo.cpp + AArch64MCInstLower.cpp + AArch64RegisterInfo.cpp + AArch64SelectionDAGInfo.cpp + AArch64Subtarget.cpp + AArch64TargetMachine.cpp + AArch64TargetObjectFile.cpp + ) + +add_subdirectory(AsmParser) +add_subdirectory(Disassembler) +add_subdirectory(InstPrinter) +add_subdirectory(MCTargetDesc) +add_subdirectory(TargetInfo) diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp new file mode 100644 index 0000000..e98285b --- /dev/null +++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp @@ -0,0 +1,791 @@ +//===- AArch64Disassembler.cpp - Disassembler for AArch64/Thumb ISA -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "arm-disassembler" + +#include "AArch64.h" +#include "AArch64RegisterInfo.h" +#include "AArch64Subtarget.h" +#include "MCTargetDesc/AArch64BaseInfo.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCFixedLenDisassembler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MemoryObject.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +typedef MCDisassembler::DecodeStatus DecodeStatus; + +namespace { +/// AArch64 disassembler for all AArch64 platforms. +class AArch64Disassembler : public MCDisassembler { + const MCRegisterInfo *RegInfo; +public: + /// Initializes the disassembler. + /// + AArch64Disassembler(const MCSubtargetInfo &STI, const MCRegisterInfo *Info) + : MCDisassembler(STI), RegInfo(Info) { + } + + ~AArch64Disassembler() { + } + + /// See MCDisassembler. + DecodeStatus getInstruction(MCInst &instr, + uint64_t &size, + const MemoryObject ®ion, + uint64_t address, + raw_ostream &vStream, + raw_ostream &cStream) const; + + const MCRegisterInfo *getRegInfo() const { return RegInfo; } +}; + +} + +// Forward-declarations used in the auto-generated files. +static DecodeStatus DecodeGPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); +static DecodeStatus +DecodeGPR64xspRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); + +static DecodeStatus DecodeGPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); +static DecodeStatus +DecodeGPR32wspRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); + +static DecodeStatus DecodeFPR8RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeFPR16RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeFPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeFPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeFPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeVPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeVPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); + +static DecodeStatus DecodeAddrRegExtendOperand(llvm::MCInst &Inst, + unsigned OptionHiS, + uint64_t Address, + const void *Decoder); + + +static DecodeStatus DecodeBitfield32ImmOperand(llvm::MCInst &Inst, + unsigned Imm6Bits, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeCVT32FixedPosOperand(llvm::MCInst &Inst, + unsigned Imm6Bits, + uint64_t Address, + const void *Decoder); + +template +static DecodeStatus DecodeMoveWideImmOperand(llvm::MCInst &Inst, + unsigned FullImm, + uint64_t Address, + const void *Decoder); + +template +static DecodeStatus DecodeLogicalImmOperand(llvm::MCInst &Inst, + unsigned Bits, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeRegExtendOperand(llvm::MCInst &Inst, + unsigned ShiftAmount, + uint64_t Address, + const void *Decoder); + +static DecodeStatus Decode32BitShiftOperand(llvm::MCInst &Inst, + unsigned ShiftAmount, + uint64_t Address, + const void *Decoder); +static DecodeStatus DecodeBitfieldInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeFMOVLaneInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeLDSTPairInstruction(llvm::MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeLoadPairExclusiveInstruction(llvm::MCInst &Inst, + unsigned Val, + uint64_t Address, + const void *Decoder); + +template +static DecodeStatus DecodeNamedImmOperand(llvm::MCInst &Inst, + unsigned Val, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeSysRegOperand(const A64SysReg::SysRegMapper &InstMapper, + llvm::MCInst &Inst, + unsigned Val, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeMRSOperand(llvm::MCInst &Inst, + unsigned Val, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeMSROperand(llvm::MCInst &Inst, + unsigned Val, + uint64_t Address, + const void *Decoder); + + +static DecodeStatus DecodeSingleIndexedInstruction(llvm::MCInst &Inst, + unsigned Val, + uint64_t Address, + const void *Decoder); + + +static bool Check(DecodeStatus &Out, DecodeStatus In); + +#include "AArch64GenDisassemblerTables.inc" +#include "AArch64GenInstrInfo.inc" + +static bool Check(DecodeStatus &Out, DecodeStatus In) { + switch (In) { + case MCDisassembler::Success: + // Out stays the same. + return true; + case MCDisassembler::SoftFail: + Out = In; + return true; + case MCDisassembler::Fail: + Out = In; + return false; + } + llvm_unreachable("Invalid DecodeStatus!"); +} + +DecodeStatus AArch64Disassembler::getInstruction(MCInst &MI, uint64_t &Size, + const MemoryObject &Region, + uint64_t Address, + raw_ostream &os, + raw_ostream &cs) const { + CommentStream = &cs; + + uint8_t bytes[4]; + + // We want to read exactly 4 bytes of data. + if (Region.readBytes(Address, 4, (uint8_t*)bytes, NULL) == -1) { + Size = 0; + return MCDisassembler::Fail; + } + + // Encoded as a small-endian 32-bit word in the stream. + uint32_t insn = (bytes[3] << 24) | + (bytes[2] << 16) | + (bytes[1] << 8) | + (bytes[0] << 0); + + // Calling the auto-generated decoder function. + DecodeStatus result = decodeInstruction(DecoderTableA6432, MI, insn, Address, + this, STI); + if (result != MCDisassembler::Fail) { + Size = 4; + return result; + } + + MI.clear(); + Size = 0; + return MCDisassembler::Fail; +} + +static unsigned getReg(const void *D, unsigned RC, unsigned RegNo) { + const AArch64Disassembler *Dis = static_cast(D); + return Dis->getRegInfo()->getRegClass(RC).getRegister(RegNo); +} + +static DecodeStatus DecodeGPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + + uint16_t Register = getReg(Decoder, AArch64::GPR64RegClassID, RegNo); + Inst.addOperand(MCOperand::CreateReg(Register)); + return MCDisassembler::Success; +} + +static DecodeStatus +DecodeGPR64xspRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + + uint16_t Register = getReg(Decoder, AArch64::GPR64xspRegClassID, RegNo); + Inst.addOperand(MCOperand::CreateReg(Register)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeGPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + + uint16_t Register = getReg(Decoder, AArch64::GPR32RegClassID, RegNo); + Inst.addOperand(MCOperand::CreateReg(Register)); + return MCDisassembler::Success; +} + +static DecodeStatus +DecodeGPR32wspRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + + uint16_t Register = getReg(Decoder, AArch64::GPR32wspRegClassID, RegNo); + Inst.addOperand(MCOperand::CreateReg(Register)); + return MCDisassembler::Success; +} + +static DecodeStatus +DecodeFPR8RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + + uint16_t Register = getReg(Decoder, AArch64::FPR8RegClassID, RegNo); + Inst.addOperand(MCOperand::CreateReg(Register)); + return MCDisassembler::Success; +} + +static DecodeStatus +DecodeFPR16RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + + uint16_t Register = getReg(Decoder, AArch64::FPR16RegClassID, RegNo); + Inst.addOperand(MCOperand::CreateReg(Register)); + return MCDisassembler::Success; +} + + +static DecodeStatus +DecodeFPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + + uint16_t Register = getReg(Decoder, AArch64::FPR32RegClassID, RegNo); + Inst.addOperand(MCOperand::CreateReg(Register)); + return MCDisassembler::Success; +} + +static DecodeStatus +DecodeFPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + + uint16_t Register = getReg(Decoder, AArch64::FPR64RegClassID, RegNo); + Inst.addOperand(MCOperand::CreateReg(Register)); + return MCDisassembler::Success; +} + + +static DecodeStatus +DecodeFPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + + uint16_t Register = getReg(Decoder, AArch64::FPR128RegClassID, RegNo); + Inst.addOperand(MCOperand::CreateReg(Register)); + return MCDisassembler::Success; +} + + +static DecodeStatus +DecodeVPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + + uint16_t Register = getReg(Decoder, AArch64::VPR64RegClassID, RegNo); + Inst.addOperand(MCOperand::CreateReg(Register)); + return MCDisassembler::Success; +} + +static DecodeStatus +DecodeVPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + + uint16_t Register = getReg(Decoder, AArch64::VPR128RegClassID, RegNo); + Inst.addOperand(MCOperand::CreateReg(Register)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeAddrRegExtendOperand(llvm::MCInst &Inst, + unsigned OptionHiS, + uint64_t Address, + const void *Decoder) { + // Option{1} must be 1. OptionHiS is made up of {Option{2}, Option{1}, + // S}. Hence we want to check bit 1. + if (!(OptionHiS & 2)) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::CreateImm(OptionHiS)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeBitfield32ImmOperand(llvm::MCInst &Inst, + unsigned Imm6Bits, + uint64_t Address, + const void *Decoder) { + // In the 32-bit variant, bit 6 must be zero. I.e. the immediate must be + // between 0 and 31. + if (Imm6Bits > 31) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::CreateImm(Imm6Bits)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeCVT32FixedPosOperand(llvm::MCInst &Inst, + unsigned Imm6Bits, + uint64_t Address, + const void *Decoder) { + // 1 <= Imm <= 32. Encoded as 64 - Imm so: 63 >= Encoded >= 32. + if (Imm6Bits < 32) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::CreateImm(Imm6Bits)); + return MCDisassembler::Success; +} + + +template +static DecodeStatus DecodeMoveWideImmOperand(llvm::MCInst &Inst, + unsigned FullImm, + uint64_t Address, + const void *Decoder) { + unsigned Imm16 = FullImm & 0xffff; + unsigned Shift = FullImm >> 16; + + if (RegWidth == 32 && Shift > 1) return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::CreateImm(Imm16)); + Inst.addOperand(MCOperand::CreateImm(Shift)); + return MCDisassembler::Success; +} + +template +static DecodeStatus DecodeLogicalImmOperand(llvm::MCInst &Inst, + unsigned Bits, + uint64_t Address, + const void *Decoder) { + uint64_t Imm; + if (!A64Imms::isLogicalImmBits(RegWidth, Bits, Imm)) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::CreateImm(Bits)); + return MCDisassembler::Success; +} + + +static DecodeStatus DecodeRegExtendOperand(llvm::MCInst &Inst, + unsigned ShiftAmount, + uint64_t Address, + const void *Decoder) { + // Only values 0-4 are valid for this 3-bit field + if (ShiftAmount > 4) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::CreateImm(ShiftAmount)); + return MCDisassembler::Success; +} + +static DecodeStatus Decode32BitShiftOperand(llvm::MCInst &Inst, + unsigned ShiftAmount, + uint64_t Address, + const void *Decoder) { + // Only values below 32 are valid for a 32-bit register + if (ShiftAmount > 31) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::CreateImm(ShiftAmount)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeBitfieldInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, + const void *Decoder) { + unsigned Rd = fieldFromInstruction(Insn, 0, 5); + unsigned Rn = fieldFromInstruction(Insn, 5, 5); + unsigned ImmS = fieldFromInstruction(Insn, 10, 6); + unsigned ImmR = fieldFromInstruction(Insn, 16, 6); + unsigned SF = fieldFromInstruction(Insn, 31, 1); + + // Undef for 0b11 just in case it occurs. Don't want the compiler to optimise + // out assertions that it thinks should never be hit. + enum OpcTypes { SBFM = 0, BFM, UBFM, Undef } Opc; + Opc = (OpcTypes)fieldFromInstruction(Insn, 29, 2); + + if (!SF) { + // ImmR and ImmS must be between 0 and 31 for 32-bit instructions. + if (ImmR > 31 || ImmS > 31) + return MCDisassembler::Fail; + } + + if (SF) { + DecodeGPR64RegisterClass(Inst, Rd, Address, Decoder); + // BFM MCInsts use Rd as a source too. + if (Opc == BFM) DecodeGPR64RegisterClass(Inst, Rd, Address, Decoder); + DecodeGPR64RegisterClass(Inst, Rn, Address, Decoder); + } else { + DecodeGPR32RegisterClass(Inst, Rd, Address, Decoder); + // BFM MCInsts use Rd as a source too. + if (Opc == BFM) DecodeGPR32RegisterClass(Inst, Rd, Address, Decoder); + DecodeGPR32RegisterClass(Inst, Rn, Address, Decoder); + } + + // ASR and LSR have more specific patterns so they won't get here: + assert(!(ImmS == 31 && !SF && Opc != BFM) && "shift should have used auto decode"); + assert(!(ImmS == 63 && SF && Opc != BFM) && "shift should have used auto decode"); + + // Extension instructions similarly: + if (Opc == SBFM && ImmR == 0) { + assert((ImmS != 7 && ImmS != 15) && "extension got here"); + assert((ImmS != 31 || SF == 0) && "extension got here"); + } else if (Opc == UBFM && ImmR == 0) { + assert((SF != 0 || (ImmS != 7 && ImmS != 15)) && "extension got here"); + } + + if (Opc == UBFM) { + // It might be a LSL instruction, which actually takes the shift amount + // itself as an MCInst operand. + if (SF && (ImmS + 1) % 64 == ImmR) { + Inst.setOpcode(AArch64::LSLxxi); + Inst.addOperand(MCOperand::CreateImm(63 - ImmS)); + return MCDisassembler::Success; + } else if (!SF && (ImmS + 1) % 32 == ImmR) { + Inst.setOpcode(AArch64::LSLwwi); + Inst.addOperand(MCOperand::CreateImm(31 - ImmS)); + return MCDisassembler::Success; + } + } + + // Otherwise it's definitely either an extract or an insert depending on which + // of ImmR or ImmS is larger. + unsigned ExtractOp, InsertOp; + switch (Opc) { + default: llvm_unreachable("unexpected instruction trying to decode bitfield"); + case SBFM: + ExtractOp = SF ? AArch64::SBFXxxii : AArch64::SBFXwwii; + InsertOp = SF ? AArch64::SBFIZxxii : AArch64::SBFIZwwii; + break; + case BFM: + ExtractOp = SF ? AArch64::BFXILxxii : AArch64::BFXILwwii; + InsertOp = SF ? AArch64::BFIxxii : AArch64::BFIwwii; + break; + case UBFM: + ExtractOp = SF ? AArch64::UBFXxxii : AArch64::UBFXwwii; + InsertOp = SF ? AArch64::UBFIZxxii : AArch64::UBFIZwwii; + break; + } + + // Otherwise it's a boring insert or extract + Inst.addOperand(MCOperand::CreateImm(ImmR)); + Inst.addOperand(MCOperand::CreateImm(ImmS)); + + + if (ImmS < ImmR) + Inst.setOpcode(InsertOp); + else + Inst.setOpcode(ExtractOp); + + return MCDisassembler::Success; +} + +static DecodeStatus DecodeFMOVLaneInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, + const void *Decoder) { + // This decoder exists to add the dummy Lane operand to the MCInst, which must + // be 1 in assembly but has no other real manifestation. + unsigned Rd = fieldFromInstruction(Insn, 0, 5); + unsigned Rn = fieldFromInstruction(Insn, 5, 5); + unsigned IsToVec = fieldFromInstruction(Insn, 16, 1); + + if (IsToVec) { + DecodeVPR128RegisterClass(Inst, Rd, Address, Decoder); + DecodeGPR64RegisterClass(Inst, Rn, Address, Decoder); + } else { + DecodeGPR64RegisterClass(Inst, Rd, Address, Decoder); + DecodeVPR128RegisterClass(Inst, Rn, Address, Decoder); + } + + // Add the lane + Inst.addOperand(MCOperand::CreateImm(1)); + + return MCDisassembler::Success; +} + + +static DecodeStatus DecodeLDSTPairInstruction(llvm::MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder) { + DecodeStatus Result = MCDisassembler::Success; + unsigned Rt = fieldFromInstruction(Insn, 0, 5); + unsigned Rn = fieldFromInstruction(Insn, 5, 5); + unsigned Rt2 = fieldFromInstruction(Insn, 10, 5); + unsigned SImm7 = fieldFromInstruction(Insn, 15, 7); + unsigned L = fieldFromInstruction(Insn, 22, 1); + unsigned V = fieldFromInstruction(Insn, 26, 1); + unsigned Opc = fieldFromInstruction(Insn, 30, 2); + + // Not an official name, but it turns out that bit 23 distinguishes indexed + // from non-indexed operations. + unsigned Indexed = fieldFromInstruction(Insn, 23, 1); + + if (Indexed && L == 0) { + // The MCInst for an indexed store has an out operand and 4 ins: + // Rn_wb, Rt, Rt2, Rn, Imm + DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder); + } + + // You shouldn't load to the same register twice in an instruction... + if (L && Rt == Rt2) + Result = MCDisassembler::SoftFail; + + // ... or do any operation that writes-back to a transfer register. But note + // that "stp xzr, xzr, [sp], #4" is fine because xzr and sp are different. + if (Indexed && V == 0 && Rn != 31 && (Rt == Rn || Rt2 == Rn)) + Result = MCDisassembler::SoftFail; + + // Exactly how we decode the MCInst's registers depends on the Opc and V + // fields of the instruction. These also obviously determine the size of the + // operation so we can fill in that information while we're at it. + if (V) { + // The instruction operates on the FP/SIMD registers + switch (Opc) { + default: return MCDisassembler::Fail; + case 0: + DecodeFPR32RegisterClass(Inst, Rt, Address, Decoder); + DecodeFPR32RegisterClass(Inst, Rt2, Address, Decoder); + break; + case 1: + DecodeFPR64RegisterClass(Inst, Rt, Address, Decoder); + DecodeFPR64RegisterClass(Inst, Rt2, Address, Decoder); + break; + case 2: + DecodeFPR128RegisterClass(Inst, Rt, Address, Decoder); + DecodeFPR128RegisterClass(Inst, Rt2, Address, Decoder); + break; + } + } else { + switch (Opc) { + default: return MCDisassembler::Fail; + case 0: + DecodeGPR32RegisterClass(Inst, Rt, Address, Decoder); + DecodeGPR32RegisterClass(Inst, Rt2, Address, Decoder); + break; + case 1: + assert(L && "unexpected \"store signed\" attempt"); + DecodeGPR64RegisterClass(Inst, Rt, Address, Decoder); + DecodeGPR64RegisterClass(Inst, Rt2, Address, Decoder); + break; + case 2: + DecodeGPR64RegisterClass(Inst, Rt, Address, Decoder); + DecodeGPR64RegisterClass(Inst, Rt2, Address, Decoder); + break; + } + } + + if (Indexed && L == 1) { + // The MCInst for an indexed load has 3 out operands and an 3 ins: + // Rt, Rt2, Rn_wb, Rt2, Rn, Imm + DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder); + } + + + DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder); + Inst.addOperand(MCOperand::CreateImm(SImm7)); + + return Result; +} + +static DecodeStatus DecodeLoadPairExclusiveInstruction(llvm::MCInst &Inst, + uint32_t Val, + uint64_t Address, + const void *Decoder) { + unsigned Rt = fieldFromInstruction(Val, 0, 5); + unsigned Rn = fieldFromInstruction(Val, 5, 5); + unsigned Rt2 = fieldFromInstruction(Val, 10, 5); + unsigned MemSize = fieldFromInstruction(Val, 30, 2); + + DecodeStatus S = MCDisassembler::Success; + if (Rt == Rt2) S = MCDisassembler::SoftFail; + + switch (MemSize) { + case 2: + if (!Check(S, DecodeGPR32RegisterClass(Inst, Rt, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeGPR32RegisterClass(Inst, Rt2, Address, Decoder))) + return MCDisassembler::Fail; + break; + case 3: + if (!Check(S, DecodeGPR64RegisterClass(Inst, Rt, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeGPR64RegisterClass(Inst, Rt2, Address, Decoder))) + return MCDisassembler::Fail; + break; + default: + llvm_unreachable("Invalid MemSize in DecodeLoadPairExclusiveInstruction"); + } + + if (!Check(S, DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + + return S; +} + +template +static DecodeStatus DecodeNamedImmOperand(llvm::MCInst &Inst, + unsigned Val, + uint64_t Address, + const void *Decoder) { + SomeNamedImmMapper Mapper; + bool ValidNamed; + Mapper.toString(Val, ValidNamed); + if (ValidNamed || Mapper.validImm(Val)) { + Inst.addOperand(MCOperand::CreateImm(Val)); + return MCDisassembler::Success; + } + + return MCDisassembler::Fail; +} + +static DecodeStatus DecodeSysRegOperand(const A64SysReg::SysRegMapper &Mapper, + llvm::MCInst &Inst, + unsigned Val, + uint64_t Address, + const void *Decoder) { + bool ValidNamed; + Mapper.toString(Val, ValidNamed); + + Inst.addOperand(MCOperand::CreateImm(Val)); + + return ValidNamed ? MCDisassembler::Success : MCDisassembler::Fail; +} + +static DecodeStatus DecodeMRSOperand(llvm::MCInst &Inst, + unsigned Val, + uint64_t Address, + const void *Decoder) { + return DecodeSysRegOperand(A64SysReg::MRSMapper(), Inst, Val, Address, + Decoder); +} + +static DecodeStatus DecodeMSROperand(llvm::MCInst &Inst, + unsigned Val, + uint64_t Address, + const void *Decoder) { + return DecodeSysRegOperand(A64SysReg::MSRMapper(), Inst, Val, Address, + Decoder); +} + +static DecodeStatus DecodeSingleIndexedInstruction(llvm::MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder) { + unsigned Rt = fieldFromInstruction(Insn, 0, 5); + unsigned Rn = fieldFromInstruction(Insn, 5, 5); + unsigned Imm9 = fieldFromInstruction(Insn, 12, 9); + + unsigned Opc = fieldFromInstruction(Insn, 22, 2); + unsigned V = fieldFromInstruction(Insn, 26, 1); + unsigned Size = fieldFromInstruction(Insn, 30, 2); + + if (Opc == 0 || (V == 1 && Opc == 2)) { + // It's a store, the MCInst gets: Rn_wb, Rt, Rn, Imm + DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder); + } + + if (V == 0 && (Opc == 2 || Size == 3)) { + DecodeGPR64RegisterClass(Inst, Rt, Address, Decoder); + } else if (V == 0) { + DecodeGPR32RegisterClass(Inst, Rt, Address, Decoder); + } else if (V == 1 && (Opc & 2)) { + DecodeFPR128RegisterClass(Inst, Rt, Address, Decoder); + } else { + switch (Size) { + case 0: + DecodeFPR8RegisterClass(Inst, Rt, Address, Decoder); + break; + case 1: + DecodeFPR16RegisterClass(Inst, Rt, Address, Decoder); + break; + case 2: + DecodeFPR32RegisterClass(Inst, Rt, Address, Decoder); + break; + case 3: + DecodeFPR64RegisterClass(Inst, Rt, Address, Decoder); + break; + } + } + + if (Opc != 0 && (V != 1 || Opc != 2)) { + // It's a load, the MCInst gets: Rt, Rn_wb, Rn, Imm + DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder); + } + + DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder); + + Inst.addOperand(MCOperand::CreateImm(Imm9)); + + // N.b. The official documentation says undpredictable if Rt == Rn, but this + // takes place at the architectural rather than encoding level: + // + // "STR xzr, [sp], #4" is perfectly valid. + if (V == 0 && Rt == Rn && Rn != 31) + return MCDisassembler::SoftFail; + else + return MCDisassembler::Success; +} + +static MCDisassembler *createAArch64Disassembler(const Target &T, + const MCSubtargetInfo &STI) { + return new AArch64Disassembler(STI, T.createMCRegInfo("")); +} + +extern "C" void LLVMInitializeAArch64Disassembler() { + TargetRegistry::RegisterMCDisassembler(TheAArch64Target, + createAArch64Disassembler); +} + + diff --git a/lib/Target/AArch64/Disassembler/CMakeLists.txt b/lib/Target/AArch64/Disassembler/CMakeLists.txt new file mode 100644 index 0000000..d4bd163 --- /dev/null +++ b/lib/Target/AArch64/Disassembler/CMakeLists.txt @@ -0,0 +1,7 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMAArch64Disassembler + AArch64Disassembler.cpp + ) + +add_dependencies(LLVMAArch64Disassembler AArch64CommonTableGen) diff --git a/lib/Target/AArch64/Disassembler/LLVMBuild.txt b/lib/Target/AArch64/Disassembler/LLVMBuild.txt new file mode 100644 index 0000000..123eb3e --- /dev/null +++ b/lib/Target/AArch64/Disassembler/LLVMBuild.txt @@ -0,0 +1,24 @@ +;===- ./lib/Target/AArch64/Disassembler/LLVMBuild.txt ----------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = AArch64Disassembler +parent = AArch64 +required_libraries = AArch64CodeGen AArch64Desc AArch64Info MC Support +add_to_library_groups = AArch64 + diff --git a/lib/Target/AArch64/Disassembler/Makefile b/lib/Target/AArch64/Disassembler/Makefile new file mode 100644 index 0000000..5c86120 --- /dev/null +++ b/lib/Target/AArch64/Disassembler/Makefile @@ -0,0 +1,16 @@ +##===- lib/Target/AArch64/Disassembler/Makefile ------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME = LLVMAArch64Disassembler + +# Hack: we need to include 'main' target directory to grab private headers +CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp new file mode 100644 index 0000000..909810f --- /dev/null +++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp @@ -0,0 +1,408 @@ +//==-- AArch64InstPrinter.cpp - Convert AArch64 MCInst to assembly syntax --==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class prints an AArch64 MCInst to a .s file. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "asm-printer" +#include "AArch64InstPrinter.h" +#include "MCTargetDesc/AArch64BaseInfo.h" +#include "MCTargetDesc/AArch64MCTargetDesc.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +#define GET_INSTRUCTION_NAME +#define PRINT_ALIAS_INSTR +#include "AArch64GenAsmWriter.inc" + +static int64_t unpackSignedImm(int BitWidth, uint64_t Value) { + assert(!(Value & ~((1ULL << BitWidth)-1)) && "immediate not n-bit"); + if (Value & (1ULL << (BitWidth - 1))) + return static_cast(Value) - (1LL << BitWidth); + else + return Value; +} + +AArch64InstPrinter::AArch64InstPrinter(const MCAsmInfo &MAI, + const MCInstrInfo &MII, + const MCRegisterInfo &MRI, + const MCSubtargetInfo &STI) : + MCInstPrinter(MAI, MII, MRI) { + // Initialize the set of available features. + setAvailableFeatures(STI.getFeatureBits()); +} + +void AArch64InstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { + OS << getRegisterName(RegNo); +} + +void +AArch64InstPrinter::printOffsetSImm9Operand(const MCInst *MI, + unsigned OpNum, raw_ostream &O) { + const MCOperand &MOImm = MI->getOperand(OpNum); + int32_t Imm = unpackSignedImm(9, MOImm.getImm()); + + O << '#' << Imm; +} + +void +AArch64InstPrinter::printAddrRegExtendOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O, unsigned MemSize, + unsigned RmSize) { + unsigned ExtImm = MI->getOperand(OpNum).getImm(); + unsigned OptionHi = ExtImm >> 1; + unsigned S = ExtImm & 1; + bool IsLSL = OptionHi == 1 && RmSize == 64; + + const char *Ext; + switch (OptionHi) { + case 1: + Ext = (RmSize == 32) ? "uxtw" : "lsl"; + break; + case 3: + Ext = (RmSize == 32) ? "sxtw" : "sxtx"; + break; + default: + llvm_unreachable("Incorrect Option on load/store (reg offset)"); + } + O << Ext; + + if (S) { + unsigned ShiftAmt = Log2_32(MemSize); + O << " #" << ShiftAmt; + } else if (IsLSL) { + O << " #0"; + } +} + +void +AArch64InstPrinter::printAddSubImmLSL0Operand(const MCInst *MI, + unsigned OpNum, raw_ostream &O) { + const MCOperand &Imm12Op = MI->getOperand(OpNum); + + if (Imm12Op.isImm()) { + int64_t Imm12 = Imm12Op.getImm(); + assert(Imm12 >= 0 && "Invalid immediate for add/sub imm"); + O << "#" << Imm12; + } else { + assert(Imm12Op.isExpr() && "Unexpected shift operand type"); + O << "#" << *Imm12Op.getExpr(); + } +} + +void +AArch64InstPrinter::printAddSubImmLSL12Operand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + + printAddSubImmLSL0Operand(MI, OpNum, O); + + O << ", lsl #12"; +} + +void +AArch64InstPrinter::printBareImmOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &MO = MI->getOperand(OpNum); + O << MO.getImm(); +} + +template void +AArch64InstPrinter::printBFILSBOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &ImmROp = MI->getOperand(OpNum); + unsigned LSB = ImmROp.getImm() == 0 ? 0 : RegWidth - ImmROp.getImm(); + + O << '#' << LSB; +} + +void AArch64InstPrinter::printBFIWidthOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &ImmSOp = MI->getOperand(OpNum); + unsigned Width = ImmSOp.getImm() + 1; + + O << '#' << Width; +} + +void +AArch64InstPrinter::printBFXWidthOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &ImmSOp = MI->getOperand(OpNum); + const MCOperand &ImmROp = MI->getOperand(OpNum - 1); + + unsigned ImmR = ImmROp.getImm(); + unsigned ImmS = ImmSOp.getImm(); + + assert(ImmS >= ImmR && "Invalid ImmR, ImmS combination for bitfield extract"); + + O << '#' << (ImmS - ImmR + 1); +} + +void +AArch64InstPrinter::printCRxOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &CRx = MI->getOperand(OpNum); + + O << 'c' << CRx.getImm(); +} + + +void +AArch64InstPrinter::printCVTFixedPosOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &ScaleOp = MI->getOperand(OpNum); + + O << '#' << (64 - ScaleOp.getImm()); +} + + +void AArch64InstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &o) { + const MCOperand &MOImm8 = MI->getOperand(OpNum); + + assert(MOImm8.isImm() + && "Immediate operand required for floating-point immediate inst"); + + uint32_t Imm8 = MOImm8.getImm(); + uint32_t Fraction = Imm8 & 0xf; + uint32_t Exponent = (Imm8 >> 4) & 0x7; + uint32_t Negative = (Imm8 >> 7) & 0x1; + + float Val = 1.0f + Fraction / 16.0f; + + // That is: + // 000 -> 2^1, 001 -> 2^2, 010 -> 2^3, 011 -> 2^4, + // 100 -> 2^-3, 101 -> 2^-2, 110 -> 2^-1, 111 -> 2^0 + if (Exponent & 0x4) { + Val /= 1 << (7 - Exponent); + } else { + Val *= 1 << (Exponent + 1); + } + + Val = Negative ? -Val : Val; + + o << '#' << format("%.8f", Val); +} + +void AArch64InstPrinter::printFPZeroOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &o) { + o << "#0.0"; +} + +void +AArch64InstPrinter::printCondCodeOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &MO = MI->getOperand(OpNum); + + O << A64CondCodeToString(static_cast(MO.getImm())); +} + +template void +AArch64InstPrinter::printLabelOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &MO = MI->getOperand(OpNum); + + if (!MO.isImm()) { + printOperand(MI, OpNum, O); + return; + } + + // The immediate of LDR (lit) instructions is a signed 19-bit immediate, which + // is multiplied by 4 (because all A64 instructions are 32-bits wide). + uint64_t UImm = MO.getImm(); + uint64_t Sign = UImm & (1LL << (field_width - 1)); + int64_t SImm = scale * ((UImm & ~Sign) - Sign); + + O << "#" << SImm; +} + +template void +AArch64InstPrinter::printLogicalImmOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &MO = MI->getOperand(OpNum); + uint64_t Val; + A64Imms::isLogicalImmBits(RegWidth, MO.getImm(), Val); + O << "#0x"; + O.write_hex(Val); +} + +void +AArch64InstPrinter::printOffsetUImm12Operand(const MCInst *MI, unsigned OpNum, + raw_ostream &O, int MemSize) { + const MCOperand &MOImm = MI->getOperand(OpNum); + + if (MOImm.isImm()) { + uint32_t Imm = MOImm.getImm() * MemSize; + + O << "#" << Imm; + } else { + O << "#" << *MOImm.getExpr(); + } +} + +void +AArch64InstPrinter::printShiftOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O, + A64SE::ShiftExtSpecifiers Shift) { + const MCOperand &MO = MI->getOperand(OpNum); + + // LSL #0 is not printed + if (Shift == A64SE::LSL && MO.isImm() && MO.getImm() == 0) + return; + + switch (Shift) { + case A64SE::LSL: O << "lsl"; break; + case A64SE::LSR: O << "lsr"; break; + case A64SE::ASR: O << "asr"; break; + case A64SE::ROR: O << "ror"; break; + default: llvm_unreachable("Invalid shift specifier in logical instruction"); + } + + O << " #" << MO.getImm(); +} + +void +AArch64InstPrinter::printMoveWideImmOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &UImm16MO = MI->getOperand(OpNum); + const MCOperand &ShiftMO = MI->getOperand(OpNum + 1); + + if (UImm16MO.isImm()) { + O << '#' << UImm16MO.getImm(); + + if (ShiftMO.getImm() != 0) + O << ", lsl #" << (ShiftMO.getImm() * 16); + + return; + } + + O << "#" << *UImm16MO.getExpr(); +} + +void AArch64InstPrinter::printNamedImmOperand(const NamedImmMapper &Mapper, + const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + bool ValidName; + const MCOperand &MO = MI->getOperand(OpNum); + StringRef Name = Mapper.toString(MO.getImm(), ValidName); + + if (ValidName) + O << Name; + else + O << '#' << MO.getImm(); +} + +void +AArch64InstPrinter::printSysRegOperand(const A64SysReg::SysRegMapper &Mapper, + const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &MO = MI->getOperand(OpNum); + + bool ValidName; + std::string Name = Mapper.toString(MO.getImm(), ValidName); + if (ValidName) { + O << Name; + return; + } +} + + +void AArch64InstPrinter::printRegExtendOperand(const MCInst *MI, + unsigned OpNum, + raw_ostream &O, + A64SE::ShiftExtSpecifiers Ext) { + // FIXME: In principle TableGen should be able to detect this itself far more + // easily. We will only accumulate more of these hacks. + unsigned Reg0 = MI->getOperand(0).getReg(); + unsigned Reg1 = MI->getOperand(1).getReg(); + + if (isStackReg(Reg0) || isStackReg(Reg1)) { + A64SE::ShiftExtSpecifiers LSLEquiv; + + if (Reg0 == AArch64::XSP || Reg1 == AArch64::XSP) + LSLEquiv = A64SE::UXTX; + else + LSLEquiv = A64SE::UXTW; + + if (Ext == LSLEquiv) { + O << "lsl #" << MI->getOperand(OpNum).getImm(); + return; + } + } + + switch (Ext) { + case A64SE::UXTB: O << "uxtb"; break; + case A64SE::UXTH: O << "uxth"; break; + case A64SE::UXTW: O << "uxtw"; break; + case A64SE::UXTX: O << "uxtx"; break; + case A64SE::SXTB: O << "sxtb"; break; + case A64SE::SXTH: O << "sxth"; break; + case A64SE::SXTW: O << "sxtw"; break; + case A64SE::SXTX: O << "sxtx"; break; + default: llvm_unreachable("Unexpected shift type for printing"); + } + + const MCOperand &MO = MI->getOperand(OpNum); + if (MO.getImm() != 0) + O << " #" << MO.getImm(); +} + +template void +AArch64InstPrinter::printSImm7ScaledOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &MOImm = MI->getOperand(OpNum); + int32_t Imm = unpackSignedImm(7, MOImm.getImm()); + + O << "#" << (Imm * MemScale); +} + +void AArch64InstPrinter::printOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isReg()) { + unsigned Reg = Op.getReg(); + O << getRegisterName(Reg); + } else if (Op.isImm()) { + O << '#' << Op.getImm(); + } else { + assert(Op.isExpr() && "unknown operand kind in printOperand"); + // If a symbolic branch target was added as a constant expression then print + // that address in hex. + const MCConstantExpr *BranchTarget = dyn_cast(Op.getExpr()); + int64_t Address; + if (BranchTarget && BranchTarget->EvaluateAsAbsolute(Address)) { + O << "0x"; + O.write_hex(Address); + } + else { + // Otherwise, just print the expression. + O << *Op.getExpr(); + } + } +} + + +void AArch64InstPrinter::printInst(const MCInst *MI, raw_ostream &O, + StringRef Annot) { + if (MI->getOpcode() == AArch64::TLSDESCCALL) { + // This is a special assembler directive which applies an + // R_AARCH64_TLSDESC_CALL to the following (BLR) instruction. It has a fixed + // form outside the normal TableGenerated scheme. + O << "\t.tlsdesccall " << *MI->getOperand(0).getExpr(); + } else if (!printAliasInstr(MI, O)) + printInstruction(MI, O); + + printAnnotation(O, Annot); +} diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h new file mode 100644 index 0000000..1890082 --- /dev/null +++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h @@ -0,0 +1,171 @@ +//===-- AArch64InstPrinter.h - Convert AArch64 MCInst to assembly syntax --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class prints an AArch64 MCInst to a .s file. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_AARCH64INSTPRINTER_H +#define LLVM_AARCH64INSTPRINTER_H + +#include "MCTargetDesc/AArch64BaseInfo.h" +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCSubtargetInfo.h" + +namespace llvm { + +class MCOperand; + +class AArch64InstPrinter : public MCInstPrinter { +public: + AArch64InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, + const MCRegisterInfo &MRI, const MCSubtargetInfo &STI); + + // Autogenerated by tblgen + void printInstruction(const MCInst *MI, raw_ostream &O); + bool printAliasInstr(const MCInst *MI, raw_ostream &O); + static const char *getRegisterName(unsigned RegNo); + static const char *getInstructionName(unsigned Opcode); + + void printRegName(raw_ostream &O, unsigned RegNum) const; + + template + void printAddrRegExtendOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + printAddrRegExtendOperand(MI, OpNum, O, MemSize, RmSize); + } + + + void printAddrRegExtendOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O, unsigned MemSize, + unsigned RmSize); + + void printAddSubImmLSL0Operand(const MCInst *MI, + unsigned OpNum, raw_ostream &O); + void printAddSubImmLSL12Operand(const MCInst *MI, + unsigned OpNum, raw_ostream &O); + + void printBareImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + + template + void printBFILSBOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printBFIWidthOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printBFXWidthOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + + + void printCondCodeOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + + void printCRxOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + + void printCVTFixedPosOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + + void printFPImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &o); + + void printFPZeroOperand(const MCInst *MI, unsigned OpNum, raw_ostream &o); + + template + void printOffsetUImm12Operand(const MCInst *MI, + unsigned OpNum, raw_ostream &o) { + printOffsetUImm12Operand(MI, OpNum, o, MemScale); + } + + void printOffsetUImm12Operand(const MCInst *MI, unsigned OpNum, + raw_ostream &o, int MemScale); + + template + void printLabelOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + + template + void printLogicalImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + + template + void printNamedImmOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + printNamedImmOperand(SomeNamedImmMapper(), MI, OpNum, O); + } + + void printNamedImmOperand(const NamedImmMapper &Mapper, + const MCInst *MI, unsigned OpNum, + raw_ostream &O); + + void printSysRegOperand(const A64SysReg::SysRegMapper &Mapper, + const MCInst *MI, unsigned OpNum, + raw_ostream &O); + + void printMRSOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + printSysRegOperand(A64SysReg::MRSMapper(), MI, OpNum, O); + } + + void printMSROperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + printSysRegOperand(A64SysReg::MSRMapper(), MI, OpNum, O); + } + + void printShiftOperand(const char *name, const MCInst *MI, + unsigned OpIdx, raw_ostream &O); + + void printLSLOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + + void printLSROperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { + printShiftOperand("lsr", MI, OpNum, O); + } + void printASROperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { + printShiftOperand("asr", MI, OpNum, O); + } + void printROROperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { + printShiftOperand("ror", MI, OpNum, O); + } + + template + void printShiftOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { + printShiftOperand(MI, OpNum, O, Shift); + } + + void printShiftOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O, A64SE::ShiftExtSpecifiers Sh); + + + void printMoveWideImmOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + + template void + printSImm7ScaledOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + + void printOffsetSImm9Operand(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + + void printPRFMOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + + template + void printRegExtendOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + printRegExtendOperand(MI, OpNum, O, EXT); + } + + void printRegExtendOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O, A64SE::ShiftExtSpecifiers Ext); + + void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); + + bool isStackReg(unsigned RegNo) { + return RegNo == AArch64::XSP || RegNo == AArch64::WSP; + } + + +}; + +} + +#endif diff --git a/lib/Target/AArch64/InstPrinter/CMakeLists.txt b/lib/Target/AArch64/InstPrinter/CMakeLists.txt new file mode 100644 index 0000000..d4b980a --- /dev/null +++ b/lib/Target/AArch64/InstPrinter/CMakeLists.txt @@ -0,0 +1,8 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMAArch64AsmPrinter + AArch64InstPrinter.cpp + ) + +add_dependencies(LLVMAArch64AsmPrinter AArch64CommonTableGen) + diff --git a/lib/Target/AArch64/InstPrinter/LLVMBuild.txt b/lib/Target/AArch64/InstPrinter/LLVMBuild.txt new file mode 100644 index 0000000..40fdc55 --- /dev/null +++ b/lib/Target/AArch64/InstPrinter/LLVMBuild.txt @@ -0,0 +1,24 @@ +;===- ./lib/Target/AArch64/InstPrinter/LLVMBuild.txt -----------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = AArch64AsmPrinter +parent = AArch64 +required_libraries = MC Support +add_to_library_groups = AArch64 + diff --git a/lib/Target/AArch64/InstPrinter/Makefile b/lib/Target/AArch64/InstPrinter/Makefile new file mode 100644 index 0000000..1c36a8d --- /dev/null +++ b/lib/Target/AArch64/InstPrinter/Makefile @@ -0,0 +1,15 @@ +##===- lib/Target/AArch64/AsmPrinter/Makefile --------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../../.. +LIBRARYNAME = LLVMAArch64AsmPrinter + +# Hack: we need to include 'main' target directory to grab private headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/AArch64/LLVMBuild.txt b/lib/Target/AArch64/LLVMBuild.txt new file mode 100644 index 0000000..09c7448 --- /dev/null +++ b/lib/Target/AArch64/LLVMBuild.txt @@ -0,0 +1,36 @@ +;===- ./lib/Target/AArch64/LLVMBuild.txt -----------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[common] +subdirectories = AsmParser Disassembler InstPrinter MCTargetDesc TargetInfo + +[component_0] +type = TargetGroup +name = AArch64 +parent = Target +has_asmparser = 1 +has_asmprinter = 1 +has_disassembler = 1 +;has_jit = 1 + +[component_1] +type = Library +name = AArch64CodeGen +parent = AArch64 +required_libraries = AArch64AsmPrinter AArch64Desc AArch64Info AsmPrinter CodeGen Core MC SelectionDAG Support Target +add_to_library_groups = AArch64 + diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp new file mode 100644 index 0000000..1c09369 --- /dev/null +++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp @@ -0,0 +1,580 @@ +//===-- AArch64AsmBackend.cpp - AArch64 Assembler Backend -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/AArch64FixupKinds.h" +#include "MCTargetDesc/AArch64MCTargetDesc.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/MC/MCFixupKindInfo.h" +#include "llvm/MC/MCObjectWriter.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +namespace { +class AArch64AsmBackend : public MCAsmBackend { + const MCSubtargetInfo* STI; +public: + AArch64AsmBackend(const Target &T, const StringRef TT) + : MCAsmBackend(), + STI(AArch64_MC::createAArch64MCSubtargetInfo(TT, "", "")) + {} + + + ~AArch64AsmBackend() { + delete STI; + } + + bool writeNopData(uint64_t Count, MCObjectWriter *OW) const; + + virtual void processFixupValue(const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFixup &Fixup, const MCFragment *DF, + MCValue &Target, uint64_t &Value, + bool &IsResolved); +}; +} // end anonymous namespace + +void AArch64AsmBackend::processFixupValue(const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFixup &Fixup, + const MCFragment *DF, + MCValue &Target, uint64_t &Value, + bool &IsResolved) { + // The ADRP instruction adds some multiple of 0x1000 to the current PC & + // ~0xfff. This means that the required offset to reach a symbol can vary by + // up to one step depending on where the ADRP is in memory. For example: + // + // ADRP x0, there + // there: + // + // If the ADRP occurs at address 0xffc then "there" will be at 0x1000 and + // we'll need that as an offset. At any other address "there" will be in the + // same page as the ADRP and the instruction should encode 0x0. Assuming the + // section isn't 0x1000-aligned, we therefore need to delegate this decision + // to the linker -- a relocation! + if ((uint32_t)Fixup.getKind() == AArch64::fixup_a64_adr_prel_page || + (uint32_t)Fixup.getKind() == AArch64::fixup_a64_adr_prel_got_page || + (uint32_t)Fixup.getKind() == AArch64::fixup_a64_adr_gottprel_page || + (uint32_t)Fixup.getKind() == AArch64::fixup_a64_tlsdesc_adr_page) + IsResolved = false; +} + + +static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value); + +namespace { + +class ELFAArch64AsmBackend : public AArch64AsmBackend { +public: + uint8_t OSABI; + ELFAArch64AsmBackend(const Target &T, const StringRef TT, + uint8_t _OSABI) + : AArch64AsmBackend(T, TT), OSABI(_OSABI) { } + + bool fixupNeedsRelaxation(const MCFixup &Fixup, + uint64_t Value, + const MCRelaxableFragment *DF, + const MCAsmLayout &Layout) const; + + unsigned int getNumFixupKinds() const { + return AArch64::NumTargetFixupKinds; + } + + const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const { + const static MCFixupKindInfo Infos[AArch64::NumTargetFixupKinds] = { +// This table *must* be in the order that the fixup_* kinds are defined in +// AArch64FixupKinds.h. +// +// Name Offset (bits) Size (bits) Flags + { "fixup_a64_ld_prel", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_a64_adr_prel", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_a64_adr_prel_page", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_a64_add_lo12", 0, 32, 0 }, + { "fixup_a64_ldst8_lo12", 0, 32, 0 }, + { "fixup_a64_ldst16_lo12", 0, 32, 0 }, + { "fixup_a64_ldst32_lo12", 0, 32, 0 }, + { "fixup_a64_ldst64_lo12", 0, 32, 0 }, + { "fixup_a64_ldst128_lo12", 0, 32, 0 }, + { "fixup_a64_tstbr", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_a64_condbr", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_a64_uncondbr", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_a64_call", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_a64_movw_uabs_g0", 0, 32, 0 }, + { "fixup_a64_movw_uabs_g0_nc", 0, 32, 0 }, + { "fixup_a64_movw_uabs_g1", 0, 32, 0 }, + { "fixup_a64_movw_uabs_g1_nc", 0, 32, 0 }, + { "fixup_a64_movw_uabs_g2", 0, 32, 0 }, + { "fixup_a64_movw_uabs_g2_nc", 0, 32, 0 }, + { "fixup_a64_movw_uabs_g3", 0, 32, 0 }, + { "fixup_a64_movw_sabs_g0", 0, 32, 0 }, + { "fixup_a64_movw_sabs_g1", 0, 32, 0 }, + { "fixup_a64_movw_sabs_g2", 0, 32, 0 }, + { "fixup_a64_adr_prel_got_page", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_a64_ld64_got_lo12_nc", 0, 32, 0 }, + { "fixup_a64_movw_dtprel_g2", 0, 32, 0 }, + { "fixup_a64_movw_dtprel_g1", 0, 32, 0 }, + { "fixup_a64_movw_dtprel_g1_nc", 0, 32, 0 }, + { "fixup_a64_movw_dtprel_g0", 0, 32, 0 }, + { "fixup_a64_movw_dtprel_g0_nc", 0, 32, 0 }, + { "fixup_a64_add_dtprel_hi12", 0, 32, 0 }, + { "fixup_a64_add_dtprel_lo12", 0, 32, 0 }, + { "fixup_a64_add_dtprel_lo12_nc", 0, 32, 0 }, + { "fixup_a64_ldst8_dtprel_lo12", 0, 32, 0 }, + { "fixup_a64_ldst8_dtprel_lo12_nc", 0, 32, 0 }, + { "fixup_a64_ldst16_dtprel_lo12", 0, 32, 0 }, + { "fixup_a64_ldst16_dtprel_lo12_nc", 0, 32, 0 }, + { "fixup_a64_ldst32_dtprel_lo12", 0, 32, 0 }, + { "fixup_a64_ldst32_dtprel_lo12_nc", 0, 32, 0 }, + { "fixup_a64_ldst64_dtprel_lo12", 0, 32, 0 }, + { "fixup_a64_ldst64_dtprel_lo12_nc", 0, 32, 0 }, + { "fixup_a64_movw_gottprel_g1", 0, 32, 0 }, + { "fixup_a64_movw_gottprel_g0_nc", 0, 32, 0 }, + { "fixup_a64_adr_gottprel_page", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_a64_ld64_gottprel_lo12_nc", 0, 32, 0 }, + { "fixup_a64_ld_gottprel_prel19", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_a64_movw_tprel_g2", 0, 32, 0 }, + { "fixup_a64_movw_tprel_g1", 0, 32, 0 }, + { "fixup_a64_movw_tprel_g1_nc", 0, 32, 0 }, + { "fixup_a64_movw_tprel_g0", 0, 32, 0 }, + { "fixup_a64_movw_tprel_g0_nc", 0, 32, 0 }, + { "fixup_a64_add_tprel_hi12", 0, 32, 0 }, + { "fixup_a64_add_tprel_lo12", 0, 32, 0 }, + { "fixup_a64_add_tprel_lo12_nc", 0, 32, 0 }, + { "fixup_a64_ldst8_tprel_lo12", 0, 32, 0 }, + { "fixup_a64_ldst8_tprel_lo12_nc", 0, 32, 0 }, + { "fixup_a64_ldst16_tprel_lo12", 0, 32, 0 }, + { "fixup_a64_ldst16_tprel_lo12_nc", 0, 32, 0 }, + { "fixup_a64_ldst32_tprel_lo12", 0, 32, 0 }, + { "fixup_a64_ldst32_tprel_lo12_nc", 0, 32, 0 }, + { "fixup_a64_ldst64_tprel_lo12", 0, 32, 0 }, + { "fixup_a64_ldst64_tprel_lo12_nc", 0, 32, 0 }, + { "fixup_a64_tlsdesc_adr_page", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_a64_tlsdesc_ld64_lo12_nc", 0, 32, 0 }, + { "fixup_a64_tlsdesc_add_lo12_nc", 0, 32, 0 }, + { "fixup_a64_tlsdesc_call", 0, 0, 0 } + }; + if (Kind < FirstTargetFixupKind) + return MCAsmBackend::getFixupKindInfo(Kind); + + assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && + "Invalid kind!"); + return Infos[Kind - FirstTargetFixupKind]; + } + + void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, + uint64_t Value) const { + unsigned NumBytes = getFixupKindInfo(Fixup.getKind()).TargetSize / 8; + Value = adjustFixupValue(Fixup.getKind(), Value); + if (!Value) return; // Doesn't change encoding. + + unsigned Offset = Fixup.getOffset(); + assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!"); + + // For each byte of the fragment that the fixup touches, mask in the bits + // from the fixup value. + for (unsigned i = 0; i != NumBytes; ++i) { + Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff); + } + } + + bool mayNeedRelaxation(const MCInst&) const { + return false; + } + + void relaxInstruction(const MCInst&, llvm::MCInst&) const { + llvm_unreachable("Cannot relax instructions"); + } + + MCObjectWriter *createObjectWriter(raw_ostream &OS) const { + return createAArch64ELFObjectWriter(OS, OSABI); + } +}; + +} // end anonymous namespace + +bool +ELFAArch64AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, + uint64_t Value, + const MCRelaxableFragment *DF, + const MCAsmLayout &Layout) const { + // Correct for now. With all instructions 32-bit only very low-level + // considerations could make you select something which may fail. + return false; +} + + +bool AArch64AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { + // Can't emit NOP with size not multiple of 32-bits + if (Count % 4 != 0) + return false; + + uint64_t NumNops = Count / 4; + for (uint64_t i = 0; i != NumNops; ++i) + OW->Write32(0xd503201f); + + return true; +} + +static unsigned ADRImmBits(unsigned Value) { + unsigned lo2 = Value & 0x3; + unsigned hi19 = (Value & 0x1fffff) >> 2; + + return (hi19 << 5) | (lo2 << 29); +} + +static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) { + switch (Kind) { + default: + llvm_unreachable("Unknown fixup kind!"); + case FK_Data_2: + assert((int64_t)Value >= -32768 && + (int64_t)Value <= 65536 && + "Out of range ABS16 fixup"); + return Value; + case FK_Data_4: + assert((int64_t)Value >= -(1LL << 31) && + (int64_t)Value <= (1LL << 32) - 1 && + "Out of range ABS32 fixup"); + return Value; + case FK_Data_8: + return Value; + + case AArch64::fixup_a64_ld_gottprel_prel19: + // R_AARCH64_LD_GOTTPREL_PREL19: Set a load-literal immediate to bits 1F + // FFFC of G(TPREL(S+A)) - P; check -2^20 <= X < 2^20. + case AArch64::fixup_a64_ld_prel: + // R_AARCH64_LD_PREL_LO19: Sets a load-literal (immediate) value to bits + // 1F FFFC of S+A-P, checking that -2^20 <= S+A-P < 2^20. + assert((int64_t)Value >= -(1LL << 20) && + (int64_t)Value < (1LL << 20) && "Out of range LDR (lit) fixup"); + return (Value & 0x1ffffc) << 3; + + case AArch64::fixup_a64_adr_prel: + // R_AARCH64_ADR_PREL_LO21: Sets an ADR immediate value to bits 1F FFFF of + // the result of S+A-P, checking that -2^20 <= S+A-P < 2^20. + assert((int64_t)Value >= -(1LL << 20) && + (int64_t)Value < (1LL << 20) && "Out of range ADR fixup"); + return ADRImmBits(Value & 0x1fffff); + + case AArch64::fixup_a64_adr_prel_page: + // R_AARCH64_ADR_PREL_PG_HI21: Sets an ADRP immediate value to bits 1 FFFF + // F000 of the result of the operation, checking that -2^32 <= result < + // 2^32. + assert((int64_t)Value >= -(1LL << 32) && + (int64_t)Value < (1LL << 32) && "Out of range ADRP fixup"); + return ADRImmBits((Value & 0x1fffff000ULL) >> 12); + + case AArch64::fixup_a64_add_dtprel_hi12: + // R_AARCH64_TLSLD_ADD_DTPREL_LO12: Set an ADD immediate field to bits + // FF F000 of DTPREL(S+A), check 0 <= X < 2^24. + case AArch64::fixup_a64_add_tprel_hi12: + // R_AARCH64_TLSLD_ADD_TPREL_LO12: Set an ADD immediate field to bits + // FF F000 of TPREL(S+A), check 0 <= X < 2^24. + assert((int64_t)Value >= 0 && + (int64_t)Value < (1LL << 24) && "Out of range ADD fixup"); + return (Value & 0xfff000) >> 2; + + case AArch64::fixup_a64_add_dtprel_lo12: + // R_AARCH64_TLSLD_ADD_DTPREL_LO12: Set an ADD immediate field to bits + // FFF of DTPREL(S+A), check 0 <= X < 2^12. + case AArch64::fixup_a64_add_tprel_lo12: + // R_AARCH64_TLSLD_ADD_TPREL_LO12: Set an ADD immediate field to bits + // FFF of TPREL(S+A), check 0 <= X < 2^12. + assert((int64_t)Value >= 0 && + (int64_t)Value < (1LL << 12) && "Out of range ADD fixup"); + // ... fallthrough to no-checking versions ... + case AArch64::fixup_a64_add_dtprel_lo12_nc: + // R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC: Set an ADD immediate field to bits + // FFF of DTPREL(S+A) with no overflow check. + case AArch64::fixup_a64_add_tprel_lo12_nc: + // R_AARCH64_TLSLD_ADD_TPREL_LO12_NC: Set an ADD immediate field to bits + // FFF of TPREL(S+A) with no overflow check. + case AArch64::fixup_a64_tlsdesc_add_lo12_nc: + // R_AARCH64_TLSDESC_ADD_LO12_NC: Set an ADD immediate field to bits + // FFF of G(TLSDESC(S+A)), with no overflow check. + case AArch64::fixup_a64_add_lo12: + // R_AARCH64_ADD_ABS_LO12_NC: Sets an ADD immediate value to bits FFF of + // S+A, with no overflow check. + return (Value & 0xfff) << 10; + + case AArch64::fixup_a64_ldst8_dtprel_lo12: + // R_AARCH64_TLSLD_LDST8_DTPREL_LO12: Set an LD/ST offset field to bits FFF + // of DTPREL(S+A), check 0 <= X < 2^12. + case AArch64::fixup_a64_ldst8_tprel_lo12: + // R_AARCH64_TLSLE_LDST8_TPREL_LO12: Set an LD/ST offset field to bits FFF + // of DTPREL(S+A), check 0 <= X < 2^12. + assert((int64_t) Value >= 0 && + (int64_t) Value < (1LL << 12) && "Out of range LD/ST fixup"); + // ... fallthrough to no-checking versions ... + case AArch64::fixup_a64_ldst8_dtprel_lo12_nc: + // R_AARCH64_TLSLD_LDST8_DTPREL_LO12: Set an LD/ST offset field to bits FFF + // of DTPREL(S+A), with no overflow check. + case AArch64::fixup_a64_ldst8_tprel_lo12_nc: + // R_AARCH64_TLSLD_LDST8_TPREL_LO12: Set an LD/ST offset field to bits FFF + // of TPREL(S+A), with no overflow check. + case AArch64::fixup_a64_ldst8_lo12: + // R_AARCH64_LDST8_ABS_LO12_NC: Sets an LD/ST immediate value to bits FFF + // of S+A, with no overflow check. + return (Value & 0xfff) << 10; + + case AArch64::fixup_a64_ldst16_dtprel_lo12: + // R_AARCH64_TLSLD_LDST16_DTPREL_LO12: Set an LD/ST offset field to bits FFE + // of DTPREL(S+A), check 0 <= X < 2^12. + case AArch64::fixup_a64_ldst16_tprel_lo12: + // R_AARCH64_TLSLE_LDST16_TPREL_LO12: Set an LD/ST offset field to bits FFE + // of DTPREL(S+A), check 0 <= X < 2^12. + assert((int64_t) Value >= 0 && + (int64_t) Value < (1LL << 12) && "Out of range LD/ST fixup"); + // ... fallthrough to no-checking versions ... + case AArch64::fixup_a64_ldst16_dtprel_lo12_nc: + // R_AARCH64_TLSLD_LDST16_DTPREL_LO12: Set an LD/ST offset field to bits FFE + // of DTPREL(S+A), with no overflow check. + case AArch64::fixup_a64_ldst16_tprel_lo12_nc: + // R_AARCH64_TLSLD_LDST16_TPREL_LO12: Set an LD/ST offset field to bits FFE + // of TPREL(S+A), with no overflow check. + case AArch64::fixup_a64_ldst16_lo12: + // R_AARCH64_LDST16_ABS_LO12_NC: Sets an LD/ST immediate value to bits FFE + // of S+A, with no overflow check. + return (Value & 0xffe) << 9; + + case AArch64::fixup_a64_ldst32_dtprel_lo12: + // R_AARCH64_TLSLD_LDST32_DTPREL_LO12: Set an LD/ST offset field to bits FFC + // of DTPREL(S+A), check 0 <= X < 2^12. + case AArch64::fixup_a64_ldst32_tprel_lo12: + // R_AARCH64_TLSLE_LDST32_TPREL_LO12: Set an LD/ST offset field to bits FFC + // of DTPREL(S+A), check 0 <= X < 2^12. + assert((int64_t) Value >= 0 && + (int64_t) Value < (1LL << 12) && "Out of range LD/ST fixup"); + // ... fallthrough to no-checking versions ... + case AArch64::fixup_a64_ldst32_dtprel_lo12_nc: + // R_AARCH64_TLSLD_LDST32_DTPREL_LO12: Set an LD/ST offset field to bits FFC + // of DTPREL(S+A), with no overflow check. + case AArch64::fixup_a64_ldst32_tprel_lo12_nc: + // R_AARCH64_TLSLD_LDST32_TPREL_LO12: Set an LD/ST offset field to bits FFC + // of TPREL(S+A), with no overflow check. + case AArch64::fixup_a64_ldst32_lo12: + // R_AARCH64_LDST32_ABS_LO12_NC: Sets an LD/ST immediate value to bits FFC + // of S+A, with no overflow check. + return (Value & 0xffc) << 8; + + case AArch64::fixup_a64_ldst64_dtprel_lo12: + // R_AARCH64_TLSLD_LDST64_DTPREL_LO12: Set an LD/ST offset field to bits FF8 + // of DTPREL(S+A), check 0 <= X < 2^12. + case AArch64::fixup_a64_ldst64_tprel_lo12: + // R_AARCH64_TLSLE_LDST64_TPREL_LO12: Set an LD/ST offset field to bits FF8 + // of DTPREL(S+A), check 0 <= X < 2^12. + assert((int64_t) Value >= 0 && + (int64_t) Value < (1LL << 12) && "Out of range LD/ST fixup"); + // ... fallthrough to no-checking versions ... + case AArch64::fixup_a64_ldst64_dtprel_lo12_nc: + // R_AARCH64_TLSLD_LDST64_DTPREL_LO12: Set an LD/ST offset field to bits FF8 + // of DTPREL(S+A), with no overflow check. + case AArch64::fixup_a64_ldst64_tprel_lo12_nc: + // R_AARCH64_TLSLD_LDST64_TPREL_LO12: Set an LD/ST offset field to bits FF8 + // of TPREL(S+A), with no overflow check. + case AArch64::fixup_a64_ldst64_lo12: + // R_AARCH64_LDST64_ABS_LO12_NC: Sets an LD/ST immediate value to bits FF8 + // of S+A, with no overflow check. + return (Value & 0xff8) << 7; + + case AArch64::fixup_a64_ldst128_lo12: + // R_AARCH64_LDST128_ABS_LO12_NC: Sets an LD/ST immediate value to bits FF0 + // of S+A, with no overflow check. + return (Value & 0xff0) << 6; + + case AArch64::fixup_a64_movw_uabs_g0: + // R_AARCH64_MOVW_UABS_G0: Sets a MOVZ immediate field to bits FFFF of S+A + // with a check that S+A < 2^16 + assert(Value <= 0xffff && "Out of range move wide fixup"); + return (Value & 0xffff) << 5; + + case AArch64::fixup_a64_movw_dtprel_g0_nc: + // R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC: Sets a MOVK immediate field to bits + // FFFF of DTPREL(S+A) with no overflow check. + case AArch64::fixup_a64_movw_gottprel_g0_nc: + // R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC: Sets a MOVK immediate field to bits + // FFFF of G(TPREL(S+A)) - GOT with no overflow check. + case AArch64::fixup_a64_movw_tprel_g0_nc: + // R_AARCH64_TLSLE_MOVW_TPREL_G0_NC: Sets a MOVK immediate field to bits + // FFFF of TPREL(S+A) with no overflow check. + case AArch64::fixup_a64_movw_uabs_g0_nc: + // R_AARCH64_MOVW_UABS_G0_NC: Sets a MOVK immediate field to bits FFFF of + // S+A with no overflow check. + return (Value & 0xffff) << 5; + + case AArch64::fixup_a64_movw_uabs_g1: + // R_AARCH64_MOVW_UABS_G1: Sets a MOVZ immediate field to bits FFFF0000 of + // S+A with a check that S+A < 2^32 + assert(Value <= 0xffffffffull && "Out of range move wide fixup"); + return ((Value >> 16) & 0xffff) << 5; + + case AArch64::fixup_a64_movw_dtprel_g1_nc: + // R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC: Set a MOVK immediate field + // to bits FFFF0000 of DTPREL(S+A), with no overflow check. + case AArch64::fixup_a64_movw_tprel_g1_nc: + // R_AARCH64_TLSLD_MOVW_TPREL_G1_NC: Set a MOVK immediate field + // to bits FFFF0000 of TPREL(S+A), with no overflow check. + case AArch64::fixup_a64_movw_uabs_g1_nc: + // R_AARCH64_MOVW_UABS_G1_NC: Sets a MOVK immediate field to bits + // FFFF0000 of S+A with no overflow check. + return ((Value >> 16) & 0xffff) << 5; + + case AArch64::fixup_a64_movw_uabs_g2: + // R_AARCH64_MOVW_UABS_G2: Sets a MOVZ immediate field to bits FFFF 0000 + // 0000 of S+A with a check that S+A < 2^48 + assert(Value <= 0xffffffffffffull && "Out of range move wide fixup"); + return ((Value >> 32) & 0xffff) << 5; + + case AArch64::fixup_a64_movw_uabs_g2_nc: + // R_AARCH64_MOVW_UABS_G2: Sets a MOVK immediate field to bits FFFF 0000 + // 0000 of S+A with no overflow check. + return ((Value >> 32) & 0xffff) << 5; + + case AArch64::fixup_a64_movw_uabs_g3: + // R_AARCH64_MOVW_UABS_G3: Sets a MOVZ immediate field to bits FFFF 0000 + // 0000 0000 of S+A (no overflow check needed) + return ((Value >> 48) & 0xffff) << 5; + + case AArch64::fixup_a64_movw_dtprel_g0: + // R_AARCH64_TLSLD_MOVW_DTPREL_G0: Set a MOV[NZ] immediate field + // to bits FFFF of DTPREL(S+A). + case AArch64::fixup_a64_movw_tprel_g0: + // R_AARCH64_TLSLE_MOVW_TPREL_G0: Set a MOV[NZ] immediate field to + // bits FFFF of TPREL(S+A). + case AArch64::fixup_a64_movw_sabs_g0: { + // R_AARCH64_MOVW_SABS_G0: Sets MOV[NZ] immediate field using bits FFFF of + // S+A (see notes below); check -2^16 <= S+A < 2^16. (notes say that we + // should convert between MOVN and MOVZ to achieve our goals). + int64_t Signed = Value; + assert(Signed >= -(1LL << 16) && Signed < (1LL << 16) + && "Out of range move wide fixup"); + if (Signed >= 0) { + Value = (Value & 0xffff) << 5; + // Bit 30 converts the MOVN encoding into a MOVZ + Value |= 1 << 30; + } else { + // MCCodeEmitter should have encoded a MOVN, which is fine. + Value = (~Value & 0xffff) << 5; + } + return Value; + } + + case AArch64::fixup_a64_movw_dtprel_g1: + // R_AARCH64_TLSLD_MOVW_DTPREL_G1: Set a MOV[NZ] immediate field + // to bits FFFF0000 of DTPREL(S+A). + case AArch64::fixup_a64_movw_gottprel_g1: + // R_AARCH64_TLSIE_MOVW_GOTTPREL_G1: Set a MOV[NZ] immediate field + // to bits FFFF0000 of G(TPREL(S+A)) - GOT. + case AArch64::fixup_a64_movw_tprel_g1: + // R_AARCH64_TLSLE_MOVW_TPREL_G1: Set a MOV[NZ] immediate field to + // bits FFFF0000 of TPREL(S+A). + case AArch64::fixup_a64_movw_sabs_g1: { + // R_AARCH64_MOVW_SABS_G1: Sets MOV[NZ] immediate field using bits FFFF 0000 + // of S+A (see notes below); check -2^32 <= S+A < 2^32. (notes say that we + // should convert between MOVN and MOVZ to achieve our goals). + int64_t Signed = Value; + assert(Signed >= -(1LL << 32) && Signed < (1LL << 32) + && "Out of range move wide fixup"); + if (Signed >= 0) { + Value = ((Value >> 16) & 0xffff) << 5; + // Bit 30 converts the MOVN encoding into a MOVZ + Value |= 1 << 30; + } else { + Value = ((~Value >> 16) & 0xffff) << 5; + } + return Value; + } + + case AArch64::fixup_a64_movw_dtprel_g2: + // R_AARCH64_TLSLD_MOVW_DTPREL_G2: Set a MOV[NZ] immediate field + // to bits FFFF 0000 0000 of DTPREL(S+A). + case AArch64::fixup_a64_movw_tprel_g2: + // R_AARCH64_TLSLE_MOVW_TPREL_G2: Set a MOV[NZ] immediate field to + // bits FFFF 0000 0000 of TPREL(S+A). + case AArch64::fixup_a64_movw_sabs_g2: { + // R_AARCH64_MOVW_SABS_G2: Sets MOV[NZ] immediate field using bits FFFF 0000 + // 0000 of S+A (see notes below); check -2^48 <= S+A < 2^48. (notes say that + // we should convert between MOVN and MOVZ to achieve our goals). + int64_t Signed = Value; + assert(Signed >= -(1LL << 48) && Signed < (1LL << 48) + && "Out of range move wide fixup"); + if (Signed >= 0) { + Value = ((Value >> 32) & 0xffff) << 5; + // Bit 30 converts the MOVN encoding into a MOVZ + Value |= 1 << 30; + } else { + Value = ((~Value >> 32) & 0xffff) << 5; + } + return Value; + } + + case AArch64::fixup_a64_tstbr: + // R_AARCH64_TSTBR14: Sets the immediate field of a TBZ/TBNZ instruction to + // bits FFFC of S+A-P, checking -2^15 <= S+A-P < 2^15. + assert((int64_t)Value >= -(1LL << 15) && + (int64_t)Value < (1LL << 15) && "Out of range TBZ/TBNZ fixup"); + return (Value & 0xfffc) << (5 - 2); + + case AArch64::fixup_a64_condbr: + // R_AARCH64_CONDBR19: Sets the immediate field of a conditional branch + // instruction to bits 1FFFFC of S+A-P, checking -2^20 <= S+A-P < 2^20. + assert((int64_t)Value >= -(1LL << 20) && + (int64_t)Value < (1LL << 20) && "Out of range B.cond fixup"); + return (Value & 0x1ffffc) << (5 - 2); + + case AArch64::fixup_a64_uncondbr: + // R_AARCH64_JUMP26 same as below (except to a linker, possibly). + case AArch64::fixup_a64_call: + // R_AARCH64_CALL26: Sets a CALL immediate field to bits FFFFFFC of S+A-P, + // checking that -2^27 <= S+A-P < 2^27. + assert((int64_t)Value >= -(1LL << 27) && + (int64_t)Value < (1LL << 27) && "Out of range branch fixup"); + return (Value & 0xffffffc) >> 2; + + case AArch64::fixup_a64_adr_gottprel_page: + // R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21: Set an ADRP immediate field to bits + // 1FFFFF000 of Page(G(TPREL(S+A))) - Page(P); check -2^32 <= X < 2^32. + case AArch64::fixup_a64_tlsdesc_adr_page: + // R_AARCH64_TLSDESC_ADR_PAGE: Set an ADRP immediate field to bits 1FFFFF000 + // of Page(G(TLSDESC(S+A))) - Page(P); check -2^32 <= X < 2^32. + case AArch64::fixup_a64_adr_prel_got_page: + // R_AARCH64_ADR_GOT_PAGE: Sets the immediate value of an ADRP to bits + // 1FFFFF000 of the operation, checking that -2^32 < Page(G(S))-Page(GOT) < + // 2^32. + assert((int64_t)Value >= -(1LL << 32) && + (int64_t)Value < (1LL << 32) && "Out of range ADRP fixup"); + return ADRImmBits((Value & 0x1fffff000) >> 12); + + case AArch64::fixup_a64_ld64_gottprel_lo12_nc: + // R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC: Set an LD offset field to bits FF8 + // of X, with no overflow check. Check that X & 7 == 0. + case AArch64::fixup_a64_tlsdesc_ld64_lo12_nc: + // R_AARCH64_TLSDESC_LD64_LO12_NC: Set an LD offset field to bits FF8 of + // G(TLSDESC(S+A)), with no overflow check. Check that X & 7 == 0. + case AArch64::fixup_a64_ld64_got_lo12_nc: + // R_AARCH64_LD64_GOT_LO12_NC: Sets the LD/ST immediate field to bits FF8 of + // G(S) with no overflow check. Check X & 7 == 0 + assert(((int64_t)Value & 7) == 0 && "Misaligned fixup"); + return (Value & 0xff8) << 7; + + case AArch64::fixup_a64_tlsdesc_call: + // R_AARCH64_TLSDESC_CALL: For relaxation only. + return 0; + } +} + +MCAsmBackend * +llvm::createAArch64AsmBackend(const Target &T, StringRef TT, StringRef CPU) { + Triple TheTriple(TT); + + return new ELFAArch64AsmBackend(T, TT, TheTriple.getOS()); +} diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64BaseInfo.h b/lib/Target/AArch64/MCTargetDesc/AArch64BaseInfo.h new file mode 100644 index 0000000..b71eb0d --- /dev/null +++ b/lib/Target/AArch64/MCTargetDesc/AArch64BaseInfo.h @@ -0,0 +1,779 @@ +//===-- AArch64BaseInfo.h - Top level definitions for AArch64- --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains small standalone helper functions and enum definitions for +// the AArch64 target useful for the compiler back-end and the MC libraries. +// As such, it deliberately does not include references to LLVM core +// code gen types, passes, etc.. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_AARCH64_BASEINFO_H +#define LLVM_AARCH64_BASEINFO_H + +#include "AArch64MCTargetDesc.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/ErrorHandling.h" + +namespace llvm { + +// // Enums corresponding to AArch64 condition codes +namespace A64CC { + // The CondCodes constants map directly to the 4-bit encoding of the + // condition field for predicated instructions. + enum CondCodes { // Meaning (integer) Meaning (floating-point) + EQ = 0, // Equal Equal + NE, // Not equal Not equal, or unordered + HS, // Unsigned higher or same >, ==, or unordered + LO, // Unsigned lower or same Less than + MI, // Minus, negative Less than + PL, // Plus, positive or zero >, ==, or unordered + VS, // Overflow Unordered + VC, // No overflow Ordered + HI, // Unsigned higher Greater than, or unordered + LS, // Unsigned lower or same Less than or equal + GE, // Greater than or equal Greater than or equal + LT, // Less than Less than, or unordered + GT, // Signed greater than Greater than + LE, // Signed less than or equal <, ==, or unordered + AL, // Always (unconditional) Always (unconditional) + NV, // Always (unconditional) Always (unconditional) + // Note the NV exists purely to disassemble 0b1111. Execution + // is "always". + Invalid + }; + +} // namespace A64CC + +inline static const char *A64CondCodeToString(A64CC::CondCodes CC) { + switch (CC) { + default: llvm_unreachable("Unknown condition code"); + case A64CC::EQ: return "eq"; + case A64CC::NE: return "ne"; + case A64CC::HS: return "hs"; + case A64CC::LO: return "lo"; + case A64CC::MI: return "mi"; + case A64CC::PL: return "pl"; + case A64CC::VS: return "vs"; + case A64CC::VC: return "vc"; + case A64CC::HI: return "hi"; + case A64CC::LS: return "ls"; + case A64CC::GE: return "ge"; + case A64CC::LT: return "lt"; + case A64CC::GT: return "gt"; + case A64CC::LE: return "le"; + case A64CC::AL: return "al"; + case A64CC::NV: return "nv"; + } +} + +inline static A64CC::CondCodes A64StringToCondCode(StringRef CondStr) { + return StringSwitch(CondStr.lower()) + .Case("eq", A64CC::EQ) + .Case("ne", A64CC::NE) + .Case("ne", A64CC::NE) + .Case("hs", A64CC::HS) + .Case("cs", A64CC::HS) + .Case("lo", A64CC::LO) + .Case("cc", A64CC::LO) + .Case("mi", A64CC::MI) + .Case("pl", A64CC::PL) + .Case("vs", A64CC::VS) + .Case("vc", A64CC::VC) + .Case("hi", A64CC::HI) + .Case("ls", A64CC::LS) + .Case("ge", A64CC::GE) + .Case("lt", A64CC::LT) + .Case("gt", A64CC::GT) + .Case("le", A64CC::LE) + .Case("al", A64CC::AL) + .Case("nv", A64CC::NV) + .Default(A64CC::Invalid); +} + +inline static A64CC::CondCodes A64InvertCondCode(A64CC::CondCodes CC) { + // It turns out that the condition codes have been designed so that in order + // to reverse the intent of the condition you only have to invert the low bit: + + return static_cast(static_cast(CC) ^ 0x1); +} + +/// Instances of this class can perform bidirectional mapping from random +/// identifier strings to operand encodings. For example "MSR" takes a named +/// system-register which must be encoded somehow and decoded for printing. This +/// central location means that the information for those transformations is not +/// duplicated and remains in sync. +/// +/// FIXME: currently the algorithm is a completely unoptimised linear +/// search. Obviously this could be improved, but we would probably want to work +/// out just how often these instructions are emitted before working on it. It +/// might even be optimal to just reorder the tables for the common instructions +/// rather than changing the algorithm. +struct NamedImmMapper { + struct Mapping { + const char *Name; + uint32_t Value; + }; + + template + NamedImmMapper(const Mapping (&Pairs)[N], uint32_t TooBigImm) + : Pairs(&Pairs[0]), NumPairs(N), TooBigImm(TooBigImm) {} + + StringRef toString(uint32_t Value, bool &Valid) const; + uint32_t fromString(StringRef Name, bool &Valid) const; + + /// Many of the instructions allow an alternative assembly form consisting of + /// a simple immediate. Currently the only valid forms are ranges [0, N) where + /// N being 0 indicates no immediate syntax-form is allowed. + bool validImm(uint32_t Value) const; +protected: + const Mapping *Pairs; + size_t NumPairs; + uint32_t TooBigImm; +}; + +namespace A64AT { + enum ATValues { + Invalid = -1, // Op0 Op1 CRn CRm Op2 + S1E1R = 0x43c0, // 01 000 0111 1000 000 + S1E2R = 0x63c0, // 01 100 0111 1000 000 + S1E3R = 0x73c0, // 01 110 0111 1000 000 + S1E1W = 0x43c1, // 01 000 0111 1000 001 + S1E2W = 0x63c1, // 01 100 0111 1000 001 + S1E3W = 0x73c1, // 01 110 0111 1000 001 + S1E0R = 0x43c2, // 01 000 0111 1000 010 + S1E0W = 0x43c3, // 01 000 0111 1000 011 + S12E1R = 0x63c4, // 01 100 0111 1000 100 + S12E1W = 0x63c5, // 01 100 0111 1000 101 + S12E0R = 0x63c6, // 01 100 0111 1000 110 + S12E0W = 0x63c7 // 01 100 0111 1000 111 + }; + + struct ATMapper : NamedImmMapper { + const static Mapping ATPairs[]; + + ATMapper(); + }; + +} +namespace A64DB { + enum DBValues { + Invalid = -1, + OSHLD = 0x1, + OSHST = 0x2, + OSH = 0x3, + NSHLD = 0x5, + NSHST = 0x6, + NSH = 0x7, + ISHLD = 0x9, + ISHST = 0xa, + ISH = 0xb, + LD = 0xd, + ST = 0xe, + SY = 0xf + }; + + struct DBarrierMapper : NamedImmMapper { + const static Mapping DBarrierPairs[]; + + DBarrierMapper(); + }; +} + +namespace A64DC { + enum DCValues { + Invalid = -1, // Op1 CRn CRm Op2 + ZVA = 0x5ba1, // 01 011 0111 0100 001 + IVAC = 0x43b1, // 01 000 0111 0110 001 + ISW = 0x43b2, // 01 000 0111 0110 010 + CVAC = 0x5bd1, // 01 011 0111 1010 001 + CSW = 0x43d2, // 01 000 0111 1010 010 + CVAU = 0x5bd9, // 01 011 0111 1011 001 + CIVAC = 0x5bf1, // 01 011 0111 1110 001 + CISW = 0x43f2 // 01 000 0111 1110 010 + }; + + struct DCMapper : NamedImmMapper { + const static Mapping DCPairs[]; + + DCMapper(); + }; + +} + +namespace A64IC { + enum ICValues { + Invalid = -1, // Op1 CRn CRm Op2 + IALLUIS = 0x0388, // 000 0111 0001 000 + IALLU = 0x03a8, // 000 0111 0101 000 + IVAU = 0x1ba9 // 011 0111 0101 001 + }; + + + struct ICMapper : NamedImmMapper { + const static Mapping ICPairs[]; + + ICMapper(); + }; + + static inline bool NeedsRegister(ICValues Val) { + return Val == IVAU; + } +} + +namespace A64ISB { + enum ISBValues { + Invalid = -1, + SY = 0xf + }; + struct ISBMapper : NamedImmMapper { + const static Mapping ISBPairs[]; + + ISBMapper(); + }; +} + +namespace A64PRFM { + enum PRFMValues { + Invalid = -1, + PLDL1KEEP = 0x00, + PLDL1STRM = 0x01, + PLDL2KEEP = 0x02, + PLDL2STRM = 0x03, + PLDL3KEEP = 0x04, + PLDL3STRM = 0x05, + PSTL1KEEP = 0x10, + PSTL1STRM = 0x11, + PSTL2KEEP = 0x12, + PSTL2STRM = 0x13, + PSTL3KEEP = 0x14, + PSTL3STRM = 0x15 + }; + + struct PRFMMapper : NamedImmMapper { + const static Mapping PRFMPairs[]; + + PRFMMapper(); + }; +} + +namespace A64PState { + enum PStateValues { + Invalid = -1, + SPSel = 0x05, + DAIFSet = 0x1e, + DAIFClr = 0x1f + }; + + struct PStateMapper : NamedImmMapper { + const static Mapping PStatePairs[]; + + PStateMapper(); + }; + +} + +namespace A64SE { + enum ShiftExtSpecifiers { + Invalid = -1, + LSL, + LSR, + ASR, + ROR, + + UXTB, + UXTH, + UXTW, + UXTX, + + SXTB, + SXTH, + SXTW, + SXTX + }; +} + +namespace A64SysReg { + enum SysRegROValues { + MDCCSR_EL0 = 0x9808, // 10 011 0000 0001 000 + DBGDTRRX_EL0 = 0x9828, // 10 011 0000 0101 000 + MDRAR_EL1 = 0x8080, // 10 000 0001 0000 000 + OSLSR_EL1 = 0x808c, // 10 000 0001 0001 100 + DBGAUTHSTATUS_EL1 = 0x83f6, // 10 000 0111 1110 110 + PMCEID0_EL0 = 0xdce6, // 11 011 1001 1100 110 + PMCEID1_EL0 = 0xdce7, // 11 011 1001 1100 111 + MIDR_EL1 = 0xc000, // 11 000 0000 0000 000 + CCSIDR_EL1 = 0xc800, // 11 001 0000 0000 000 + CLIDR_EL1 = 0xc801, // 11 001 0000 0000 001 + CTR_EL0 = 0xd801, // 11 011 0000 0000 001 + MPIDR_EL1 = 0xc005, // 11 000 0000 0000 101 + REVIDR_EL1 = 0xc006, // 11 000 0000 0000 110 + AIDR_EL1 = 0xc807, // 11 001 0000 0000 111 + DCZID_EL0 = 0xd807, // 11 011 0000 0000 111 + ID_PFR0_EL1 = 0xc008, // 11 000 0000 0001 000 + ID_PFR1_EL1 = 0xc009, // 11 000 0000 0001 001 + ID_DFR0_EL1 = 0xc00a, // 11 000 0000 0001 010 + ID_AFR0_EL1 = 0xc00b, // 11 000 0000 0001 011 + ID_MMFR0_EL1 = 0xc00c, // 11 000 0000 0001 100 + ID_MMFR1_EL1 = 0xc00d, // 11 000 0000 0001 101 + ID_MMFR2_EL1 = 0xc00e, // 11 000 0000 0001 110 + ID_MMFR3_EL1 = 0xc00f, // 11 000 0000 0001 111 + ID_ISAR0_EL1 = 0xc010, // 11 000 0000 0010 000 + ID_ISAR1_EL1 = 0xc011, // 11 000 0000 0010 001 + ID_ISAR2_EL1 = 0xc012, // 11 000 0000 0010 010 + ID_ISAR3_EL1 = 0xc013, // 11 000 0000 0010 011 + ID_ISAR4_EL1 = 0xc014, // 11 000 0000 0010 100 + ID_ISAR5_EL1 = 0xc015, // 11 000 0000 0010 101 + ID_AA64PFR0_EL1 = 0xc020, // 11 000 0000 0100 000 + ID_AA64PFR1_EL1 = 0xc021, // 11 000 0000 0100 001 + ID_AA64DFR0_EL1 = 0xc028, // 11 000 0000 0101 000 + ID_AA64DFR1_EL1 = 0xc029, // 11 000 0000 0101 001 + ID_AA64AFR0_EL1 = 0xc02c, // 11 000 0000 0101 100 + ID_AA64AFR1_EL1 = 0xc02d, // 11 000 0000 0101 101 + ID_AA64ISAR0_EL1 = 0xc030, // 11 000 0000 0110 000 + ID_AA64ISAR1_EL1 = 0xc031, // 11 000 0000 0110 001 + ID_AA64MMFR0_EL1 = 0xc038, // 11 000 0000 0111 000 + ID_AA64MMFR1_EL1 = 0xc039, // 11 000 0000 0111 001 + MVFR0_EL1 = 0xc018, // 11 000 0000 0011 000 + MVFR1_EL1 = 0xc019, // 11 000 0000 0011 001 + MVFR2_EL1 = 0xc01a, // 11 000 0000 0011 010 + RVBAR_EL1 = 0xc601, // 11 000 1100 0000 001 + RVBAR_EL2 = 0xe601, // 11 100 1100 0000 001 + RVBAR_EL3 = 0xf601, // 11 110 1100 0000 001 + ISR_EL1 = 0xc608, // 11 000 1100 0001 000 + CNTPCT_EL0 = 0xdf01, // 11 011 1110 0000 001 + CNTVCT_EL0 = 0xdf02 // 11 011 1110 0000 010 + }; + + enum SysRegWOValues { + DBGDTRTX_EL0 = 0x9828, // 10 011 0000 0101 000 + OSLAR_EL1 = 0x8084, // 10 000 0001 0000 100 + PMSWINC_EL0 = 0xdce4 // 11 011 1001 1100 100 + }; + + enum SysRegValues { + Invalid = -1, // Op0 Op1 CRn CRm Op2 + OSDTRRX_EL1 = 0x8002, // 10 000 0000 0000 010 + OSDTRTX_EL1 = 0x801a, // 10 000 0000 0011 010 + TEECR32_EL1 = 0x9000, // 10 010 0000 0000 000 + MDCCINT_EL1 = 0x8010, // 10 000 0000 0010 000 + MDSCR_EL1 = 0x8012, // 10 000 0000 0010 010 + DBGDTR_EL0 = 0x9820, // 10 011 0000 0100 000 + OSECCR_EL1 = 0x8032, // 10 000 0000 0110 010 + DBGVCR32_EL2 = 0xa038, // 10 100 0000 0111 000 + DBGBVR0_EL1 = 0x8004, // 10 000 0000 0000 100 + DBGBVR1_EL1 = 0x800c, // 10 000 0000 0001 100 + DBGBVR2_EL1 = 0x8014, // 10 000 0000 0010 100 + DBGBVR3_EL1 = 0x801c, // 10 000 0000 0011 100 + DBGBVR4_EL1 = 0x8024, // 10 000 0000 0100 100 + DBGBVR5_EL1 = 0x802c, // 10 000 0000 0101 100 + DBGBVR6_EL1 = 0x8034, // 10 000 0000 0110 100 + DBGBVR7_EL1 = 0x803c, // 10 000 0000 0111 100 + DBGBVR8_EL1 = 0x8044, // 10 000 0000 1000 100 + DBGBVR9_EL1 = 0x804c, // 10 000 0000 1001 100 + DBGBVR10_EL1 = 0x8054, // 10 000 0000 1010 100 + DBGBVR11_EL1 = 0x805c, // 10 000 0000 1011 100 + DBGBVR12_EL1 = 0x8064, // 10 000 0000 1100 100 + DBGBVR13_EL1 = 0x806c, // 10 000 0000 1101 100 + DBGBVR14_EL1 = 0x8074, // 10 000 0000 1110 100 + DBGBVR15_EL1 = 0x807c, // 10 000 0000 1111 100 + DBGBCR0_EL1 = 0x8005, // 10 000 0000 0000 101 + DBGBCR1_EL1 = 0x800d, // 10 000 0000 0001 101 + DBGBCR2_EL1 = 0x8015, // 10 000 0000 0010 101 + DBGBCR3_EL1 = 0x801d, // 10 000 0000 0011 101 + DBGBCR4_EL1 = 0x8025, // 10 000 0000 0100 101 + DBGBCR5_EL1 = 0x802d, // 10 000 0000 0101 101 + DBGBCR6_EL1 = 0x8035, // 10 000 0000 0110 101 + DBGBCR7_EL1 = 0x803d, // 10 000 0000 0111 101 + DBGBCR8_EL1 = 0x8045, // 10 000 0000 1000 101 + DBGBCR9_EL1 = 0x804d, // 10 000 0000 1001 101 + DBGBCR10_EL1 = 0x8055, // 10 000 0000 1010 101 + DBGBCR11_EL1 = 0x805d, // 10 000 0000 1011 101 + DBGBCR12_EL1 = 0x8065, // 10 000 0000 1100 101 + DBGBCR13_EL1 = 0x806d, // 10 000 0000 1101 101 + DBGBCR14_EL1 = 0x8075, // 10 000 0000 1110 101 + DBGBCR15_EL1 = 0x807d, // 10 000 0000 1111 101 + DBGWVR0_EL1 = 0x8006, // 10 000 0000 0000 110 + DBGWVR1_EL1 = 0x800e, // 10 000 0000 0001 110 + DBGWVR2_EL1 = 0x8016, // 10 000 0000 0010 110 + DBGWVR3_EL1 = 0x801e, // 10 000 0000 0011 110 + DBGWVR4_EL1 = 0x8026, // 10 000 0000 0100 110 + DBGWVR5_EL1 = 0x802e, // 10 000 0000 0101 110 + DBGWVR6_EL1 = 0x8036, // 10 000 0000 0110 110 + DBGWVR7_EL1 = 0x803e, // 10 000 0000 0111 110 + DBGWVR8_EL1 = 0x8046, // 10 000 0000 1000 110 + DBGWVR9_EL1 = 0x804e, // 10 000 0000 1001 110 + DBGWVR10_EL1 = 0x8056, // 10 000 0000 1010 110 + DBGWVR11_EL1 = 0x805e, // 10 000 0000 1011 110 + DBGWVR12_EL1 = 0x8066, // 10 000 0000 1100 110 + DBGWVR13_EL1 = 0x806e, // 10 000 0000 1101 110 + DBGWVR14_EL1 = 0x8076, // 10 000 0000 1110 110 + DBGWVR15_EL1 = 0x807e, // 10 000 0000 1111 110 + DBGWCR0_EL1 = 0x8007, // 10 000 0000 0000 111 + DBGWCR1_EL1 = 0x800f, // 10 000 0000 0001 111 + DBGWCR2_EL1 = 0x8017, // 10 000 0000 0010 111 + DBGWCR3_EL1 = 0x801f, // 10 000 0000 0011 111 + DBGWCR4_EL1 = 0x8027, // 10 000 0000 0100 111 + DBGWCR5_EL1 = 0x802f, // 10 000 0000 0101 111 + DBGWCR6_EL1 = 0x8037, // 10 000 0000 0110 111 + DBGWCR7_EL1 = 0x803f, // 10 000 0000 0111 111 + DBGWCR8_EL1 = 0x8047, // 10 000 0000 1000 111 + DBGWCR9_EL1 = 0x804f, // 10 000 0000 1001 111 + DBGWCR10_EL1 = 0x8057, // 10 000 0000 1010 111 + DBGWCR11_EL1 = 0x805f, // 10 000 0000 1011 111 + DBGWCR12_EL1 = 0x8067, // 10 000 0000 1100 111 + DBGWCR13_EL1 = 0x806f, // 10 000 0000 1101 111 + DBGWCR14_EL1 = 0x8077, // 10 000 0000 1110 111 + DBGWCR15_EL1 = 0x807f, // 10 000 0000 1111 111 + TEEHBR32_EL1 = 0x9080, // 10 010 0001 0000 000 + OSDLR_EL1 = 0x809c, // 10 000 0001 0011 100 + DBGPRCR_EL1 = 0x80a4, // 10 000 0001 0100 100 + DBGCLAIMSET_EL1 = 0x83c6, // 10 000 0111 1000 110 + DBGCLAIMCLR_EL1 = 0x83ce, // 10 000 0111 1001 110 + CSSELR_EL1 = 0xd000, // 11 010 0000 0000 000 + VPIDR_EL2 = 0xe000, // 11 100 0000 0000 000 + VMPIDR_EL2 = 0xe005, // 11 100 0000 0000 101 + CPACR_EL1 = 0xc082, // 11 000 0001 0000 010 + SCTLR_EL1 = 0xc080, // 11 000 0001 0000 000 + SCTLR_EL2 = 0xe080, // 11 100 0001 0000 000 + SCTLR_EL3 = 0xf080, // 11 110 0001 0000 000 + ACTLR_EL1 = 0xc081, // 11 000 0001 0000 001 + ACTLR_EL2 = 0xe081, // 11 100 0001 0000 001 + ACTLR_EL3 = 0xf081, // 11 110 0001 0000 001 + HCR_EL2 = 0xe088, // 11 100 0001 0001 000 + SCR_EL3 = 0xf088, // 11 110 0001 0001 000 + MDCR_EL2 = 0xe089, // 11 100 0001 0001 001 + SDER32_EL3 = 0xf089, // 11 110 0001 0001 001 + CPTR_EL2 = 0xe08a, // 11 100 0001 0001 010 + CPTR_EL3 = 0xf08a, // 11 110 0001 0001 010 + HSTR_EL2 = 0xe08b, // 11 100 0001 0001 011 + HACR_EL2 = 0xe08f, // 11 100 0001 0001 111 + MDCR_EL3 = 0xf099, // 11 110 0001 0011 001 + TTBR0_EL1 = 0xc100, // 11 000 0010 0000 000 + TTBR0_EL2 = 0xe100, // 11 100 0010 0000 000 + TTBR0_EL3 = 0xf100, // 11 110 0010 0000 000 + TTBR1_EL1 = 0xc101, // 11 000 0010 0000 001 + TCR_EL1 = 0xc102, // 11 000 0010 0000 010 + TCR_EL2 = 0xe102, // 11 100 0010 0000 010 + TCR_EL3 = 0xf102, // 11 110 0010 0000 010 + VTTBR_EL2 = 0xe108, // 11 100 0010 0001 000 + VTCR_EL2 = 0xe10a, // 11 100 0010 0001 010 + DACR32_EL2 = 0xe180, // 11 100 0011 0000 000 + SPSR_EL1 = 0xc200, // 11 000 0100 0000 000 + SPSR_EL2 = 0xe200, // 11 100 0100 0000 000 + SPSR_EL3 = 0xf200, // 11 110 0100 0000 000 + ELR_EL1 = 0xc201, // 11 000 0100 0000 001 + ELR_EL2 = 0xe201, // 11 100 0100 0000 001 + ELR_EL3 = 0xf201, // 11 110 0100 0000 001 + SP_EL0 = 0xc208, // 11 000 0100 0001 000 + SP_EL1 = 0xe208, // 11 100 0100 0001 000 + SP_EL2 = 0xf208, // 11 110 0100 0001 000 + SPSel = 0xc210, // 11 000 0100 0010 000 + NZCV = 0xda10, // 11 011 0100 0010 000 + DAIF = 0xda11, // 11 011 0100 0010 001 + CurrentEL = 0xc212, // 11 000 0100 0010 010 + SPSR_irq = 0xe218, // 11 100 0100 0011 000 + SPSR_abt = 0xe219, // 11 100 0100 0011 001 + SPSR_und = 0xe21a, // 11 100 0100 0011 010 + SPSR_fiq = 0xe21b, // 11 100 0100 0011 011 + FPCR = 0xda20, // 11 011 0100 0100 000 + FPSR = 0xda21, // 11 011 0100 0100 001 + DSPSR_EL0 = 0xda28, // 11 011 0100 0101 000 + DLR_EL0 = 0xda29, // 11 011 0100 0101 001 + IFSR32_EL2 = 0xe281, // 11 100 0101 0000 001 + AFSR0_EL1 = 0xc288, // 11 000 0101 0001 000 + AFSR0_EL2 = 0xe288, // 11 100 0101 0001 000 + AFSR0_EL3 = 0xf288, // 11 110 0101 0001 000 + AFSR1_EL1 = 0xc289, // 11 000 0101 0001 001 + AFSR1_EL2 = 0xe289, // 11 100 0101 0001 001 + AFSR1_EL3 = 0xf289, // 11 110 0101 0001 001 + ESR_EL1 = 0xc290, // 11 000 0101 0010 000 + ESR_EL2 = 0xe290, // 11 100 0101 0010 000 + ESR_EL3 = 0xf290, // 11 110 0101 0010 000 + FPEXC32_EL2 = 0xe298, // 11 100 0101 0011 000 + FAR_EL1 = 0xc300, // 11 000 0110 0000 000 + FAR_EL2 = 0xe300, // 11 100 0110 0000 000 + FAR_EL3 = 0xf300, // 11 110 0110 0000 000 + HPFAR_EL2 = 0xe304, // 11 100 0110 0000 100 + PAR_EL1 = 0xc3a0, // 11 000 0111 0100 000 + PMCR_EL0 = 0xdce0, // 11 011 1001 1100 000 + PMCNTENSET_EL0 = 0xdce1, // 11 011 1001 1100 001 + PMCNTENCLR_EL0 = 0xdce2, // 11 011 1001 1100 010 + PMOVSCLR_EL0 = 0xdce3, // 11 011 1001 1100 011 + PMSELR_EL0 = 0xdce5, // 11 011 1001 1100 101 + PMCCNTR_EL0 = 0xdce8, // 11 011 1001 1101 000 + PMXEVTYPER_EL0 = 0xdce9, // 11 011 1001 1101 001 + PMXEVCNTR_EL0 = 0xdcea, // 11 011 1001 1101 010 + PMUSERENR_EL0 = 0xdcf0, // 11 011 1001 1110 000 + PMINTENSET_EL1 = 0xc4f1, // 11 000 1001 1110 001 + PMINTENCLR_EL1 = 0xc4f2, // 11 000 1001 1110 010 + PMOVSSET_EL0 = 0xdcf3, // 11 011 1001 1110 011 + MAIR_EL1 = 0xc510, // 11 000 1010 0010 000 + MAIR_EL2 = 0xe510, // 11 100 1010 0010 000 + MAIR_EL3 = 0xf510, // 11 110 1010 0010 000 + AMAIR_EL1 = 0xc518, // 11 000 1010 0011 000 + AMAIR_EL2 = 0xe518, // 11 100 1010 0011 000 + AMAIR_EL3 = 0xf518, // 11 110 1010 0011 000 + VBAR_EL1 = 0xc600, // 11 000 1100 0000 000 + VBAR_EL2 = 0xe600, // 11 100 1100 0000 000 + VBAR_EL3 = 0xf600, // 11 110 1100 0000 000 + RMR_EL1 = 0xc602, // 11 000 1100 0000 010 + RMR_EL2 = 0xe602, // 11 100 1100 0000 010 + RMR_EL3 = 0xf602, // 11 110 1100 0000 010 + CONTEXTIDR_EL1 = 0xc681, // 11 000 1101 0000 001 + TPIDR_EL0 = 0xde82, // 11 011 1101 0000 010 + TPIDR_EL2 = 0xe682, // 11 100 1101 0000 010 + TPIDR_EL3 = 0xf682, // 11 110 1101 0000 010 + TPIDRRO_EL0 = 0xde83, // 11 011 1101 0000 011 + TPIDR_EL1 = 0xc684, // 11 000 1101 0000 100 + CNTFRQ_EL0 = 0xdf00, // 11 011 1110 0000 000 + CNTVOFF_EL2 = 0xe703, // 11 100 1110 0000 011 + CNTKCTL_EL1 = 0xc708, // 11 000 1110 0001 000 + CNTHCTL_EL2 = 0xe708, // 11 100 1110 0001 000 + CNTP_TVAL_EL0 = 0xdf10, // 11 011 1110 0010 000 + CNTHP_TVAL_EL2 = 0xe710, // 11 100 1110 0010 000 + CNTPS_TVAL_EL1 = 0xff10, // 11 111 1110 0010 000 + CNTP_CTL_EL0 = 0xdf11, // 11 011 1110 0010 001 + CNTHP_CTL_EL2 = 0xe711, // 11 100 1110 0010 001 + CNTPS_CTL_EL1 = 0xff11, // 11 111 1110 0010 001 + CNTP_CVAL_EL0 = 0xdf12, // 11 011 1110 0010 010 + CNTHP_CVAL_EL2 = 0xe712, // 11 100 1110 0010 010 + CNTPS_CVAL_EL1 = 0xff12, // 11 111 1110 0010 010 + CNTV_TVAL_EL0 = 0xdf18, // 11 011 1110 0011 000 + CNTV_CTL_EL0 = 0xdf19, // 11 011 1110 0011 001 + CNTV_CVAL_EL0 = 0xdf1a, // 11 011 1110 0011 010 + PMEVCNTR0_EL0 = 0xdf40, // 11 011 1110 1000 000 + PMEVCNTR1_EL0 = 0xdf41, // 11 011 1110 1000 001 + PMEVCNTR2_EL0 = 0xdf42, // 11 011 1110 1000 010 + PMEVCNTR3_EL0 = 0xdf43, // 11 011 1110 1000 011 + PMEVCNTR4_EL0 = 0xdf44, // 11 011 1110 1000 100 + PMEVCNTR5_EL0 = 0xdf45, // 11 011 1110 1000 101 + PMEVCNTR6_EL0 = 0xdf46, // 11 011 1110 1000 110 + PMEVCNTR7_EL0 = 0xdf47, // 11 011 1110 1000 111 + PMEVCNTR8_EL0 = 0xdf48, // 11 011 1110 1001 000 + PMEVCNTR9_EL0 = 0xdf49, // 11 011 1110 1001 001 + PMEVCNTR10_EL0 = 0xdf4a, // 11 011 1110 1001 010 + PMEVCNTR11_EL0 = 0xdf4b, // 11 011 1110 1001 011 + PMEVCNTR12_EL0 = 0xdf4c, // 11 011 1110 1001 100 + PMEVCNTR13_EL0 = 0xdf4d, // 11 011 1110 1001 101 + PMEVCNTR14_EL0 = 0xdf4e, // 11 011 1110 1001 110 + PMEVCNTR15_EL0 = 0xdf4f, // 11 011 1110 1001 111 + PMEVCNTR16_EL0 = 0xdf50, // 11 011 1110 1010 000 + PMEVCNTR17_EL0 = 0xdf51, // 11 011 1110 1010 001 + PMEVCNTR18_EL0 = 0xdf52, // 11 011 1110 1010 010 + PMEVCNTR19_EL0 = 0xdf53, // 11 011 1110 1010 011 + PMEVCNTR20_EL0 = 0xdf54, // 11 011 1110 1010 100 + PMEVCNTR21_EL0 = 0xdf55, // 11 011 1110 1010 101 + PMEVCNTR22_EL0 = 0xdf56, // 11 011 1110 1010 110 + PMEVCNTR23_EL0 = 0xdf57, // 11 011 1110 1010 111 + PMEVCNTR24_EL0 = 0xdf58, // 11 011 1110 1011 000 + PMEVCNTR25_EL0 = 0xdf59, // 11 011 1110 1011 001 + PMEVCNTR26_EL0 = 0xdf5a, // 11 011 1110 1011 010 + PMEVCNTR27_EL0 = 0xdf5b, // 11 011 1110 1011 011 + PMEVCNTR28_EL0 = 0xdf5c, // 11 011 1110 1011 100 + PMEVCNTR29_EL0 = 0xdf5d, // 11 011 1110 1011 101 + PMEVCNTR30_EL0 = 0xdf5e, // 11 011 1110 1011 110 + PMCCFILTR_EL0 = 0xdf7f, // 11 011 1110 1111 111 + PMEVTYPER0_EL0 = 0xdf60, // 11 011 1110 1100 000 + PMEVTYPER1_EL0 = 0xdf61, // 11 011 1110 1100 001 + PMEVTYPER2_EL0 = 0xdf62, // 11 011 1110 1100 010 + PMEVTYPER3_EL0 = 0xdf63, // 11 011 1110 1100 011 + PMEVTYPER4_EL0 = 0xdf64, // 11 011 1110 1100 100 + PMEVTYPER5_EL0 = 0xdf65, // 11 011 1110 1100 101 + PMEVTYPER6_EL0 = 0xdf66, // 11 011 1110 1100 110 + PMEVTYPER7_EL0 = 0xdf67, // 11 011 1110 1100 111 + PMEVTYPER8_EL0 = 0xdf68, // 11 011 1110 1101 000 + PMEVTYPER9_EL0 = 0xdf69, // 11 011 1110 1101 001 + PMEVTYPER10_EL0 = 0xdf6a, // 11 011 1110 1101 010 + PMEVTYPER11_EL0 = 0xdf6b, // 11 011 1110 1101 011 + PMEVTYPER12_EL0 = 0xdf6c, // 11 011 1110 1101 100 + PMEVTYPER13_EL0 = 0xdf6d, // 11 011 1110 1101 101 + PMEVTYPER14_EL0 = 0xdf6e, // 11 011 1110 1101 110 + PMEVTYPER15_EL0 = 0xdf6f, // 11 011 1110 1101 111 + PMEVTYPER16_EL0 = 0xdf70, // 11 011 1110 1110 000 + PMEVTYPER17_EL0 = 0xdf71, // 11 011 1110 1110 001 + PMEVTYPER18_EL0 = 0xdf72, // 11 011 1110 1110 010 + PMEVTYPER19_EL0 = 0xdf73, // 11 011 1110 1110 011 + PMEVTYPER20_EL0 = 0xdf74, // 11 011 1110 1110 100 + PMEVTYPER21_EL0 = 0xdf75, // 11 011 1110 1110 101 + PMEVTYPER22_EL0 = 0xdf76, // 11 011 1110 1110 110 + PMEVTYPER23_EL0 = 0xdf77, // 11 011 1110 1110 111 + PMEVTYPER24_EL0 = 0xdf78, // 11 011 1110 1111 000 + PMEVTYPER25_EL0 = 0xdf79, // 11 011 1110 1111 001 + PMEVTYPER26_EL0 = 0xdf7a, // 11 011 1110 1111 010 + PMEVTYPER27_EL0 = 0xdf7b, // 11 011 1110 1111 011 + PMEVTYPER28_EL0 = 0xdf7c, // 11 011 1110 1111 100 + PMEVTYPER29_EL0 = 0xdf7d, // 11 011 1110 1111 101 + PMEVTYPER30_EL0 = 0xdf7e // 11 011 1110 1111 110 + }; + + // Note that these do not inherit from NamedImmMapper. This class is + // sufficiently different in its behaviour that I don't believe it's worth + // burdening the common NamedImmMapper with abstractions only needed in + // this one case. + struct SysRegMapper { + static const NamedImmMapper::Mapping SysRegPairs[]; + + const NamedImmMapper::Mapping *InstPairs; + size_t NumInstPairs; + + SysRegMapper() {} + uint32_t fromString(StringRef Name, bool &Valid) const; + std::string toString(uint32_t Bits, bool &Valid) const; + }; + + struct MSRMapper : SysRegMapper { + static const NamedImmMapper::Mapping MSRPairs[]; + MSRMapper(); + }; + + struct MRSMapper : SysRegMapper { + static const NamedImmMapper::Mapping MRSPairs[]; + MRSMapper(); + }; + + uint32_t ParseGenericRegister(StringRef Name, bool &Valid); +} + +namespace A64TLBI { + enum TLBIValues { + Invalid = -1, // Op0 Op1 CRn CRm Op2 + IPAS2E1IS = 0x6401, // 01 100 1000 0000 001 + IPAS2LE1IS = 0x6405, // 01 100 1000 0000 101 + VMALLE1IS = 0x4418, // 01 000 1000 0011 000 + ALLE2IS = 0x6418, // 01 100 1000 0011 000 + ALLE3IS = 0x7418, // 01 110 1000 0011 000 + VAE1IS = 0x4419, // 01 000 1000 0011 001 + VAE2IS = 0x6419, // 01 100 1000 0011 001 + VAE3IS = 0x7419, // 01 110 1000 0011 001 + ASIDE1IS = 0x441a, // 01 000 1000 0011 010 + VAAE1IS = 0x441b, // 01 000 1000 0011 011 + ALLE1IS = 0x641c, // 01 100 1000 0011 100 + VALE1IS = 0x441d, // 01 000 1000 0011 101 + VALE2IS = 0x641d, // 01 100 1000 0011 101 + VALE3IS = 0x741d, // 01 110 1000 0011 101 + VMALLS12E1IS = 0x641e, // 01 100 1000 0011 110 + VAALE1IS = 0x441f, // 01 000 1000 0011 111 + IPAS2E1 = 0x6421, // 01 100 1000 0100 001 + IPAS2LE1 = 0x6425, // 01 100 1000 0100 101 + VMALLE1 = 0x4438, // 01 000 1000 0111 000 + ALLE2 = 0x6438, // 01 100 1000 0111 000 + ALLE3 = 0x7438, // 01 110 1000 0111 000 + VAE1 = 0x4439, // 01 000 1000 0111 001 + VAE2 = 0x6439, // 01 100 1000 0111 001 + VAE3 = 0x7439, // 01 110 1000 0111 001 + ASIDE1 = 0x443a, // 01 000 1000 0111 010 + VAAE1 = 0x443b, // 01 000 1000 0111 011 + ALLE1 = 0x643c, // 01 100 1000 0111 100 + VALE1 = 0x443d, // 01 000 1000 0111 101 + VALE2 = 0x643d, // 01 100 1000 0111 101 + VALE3 = 0x743d, // 01 110 1000 0111 101 + VMALLS12E1 = 0x643e, // 01 100 1000 0111 110 + VAALE1 = 0x443f // 01 000 1000 0111 111 + }; + + struct TLBIMapper : NamedImmMapper { + const static Mapping TLBIPairs[]; + + TLBIMapper(); + }; + + static inline bool NeedsRegister(TLBIValues Val) { + switch (Val) { + case VMALLE1IS: + case ALLE2IS: + case ALLE3IS: + case ALLE1IS: + case VMALLS12E1IS: + case VMALLE1: + case ALLE2: + case ALLE3: + case ALLE1: + case VMALLS12E1: + return false; + default: + return true; + } + } +} + +namespace AArch64II { + + enum TOF { + //===--------------------------------------------------------------===// + // AArch64 Specific MachineOperand flags. + + MO_NO_FLAG, + + // MO_GOT - Represents a relocation referring to the GOT entry of a given + // symbol. Used in adrp. + MO_GOT, + + // MO_GOT_LO12 - Represents a relocation referring to the low 12 bits of the + // GOT entry of a given symbol. Used in ldr only. + MO_GOT_LO12, + + // MO_DTPREL_* - Represents a relocation referring to the offset from a + // module's dynamic thread pointer. Used in the local-dynamic TLS access + // model. + MO_DTPREL_G1, + MO_DTPREL_G0_NC, + + // MO_GOTTPREL_* - Represents a relocation referring to a GOT entry + // providing the offset of a variable from the thread-pointer. Used in + // initial-exec TLS model where this offset is assigned in the static thread + // block and thus known by the dynamic linker. + MO_GOTTPREL, + MO_GOTTPREL_LO12, + + // MO_TLSDESC_* - Represents a relocation referring to a GOT entry providing + // a TLS descriptor chosen by the dynamic linker. Used for the + // general-dynamic and local-dynamic TLS access models where very littls is + // known at link-time. + MO_TLSDESC, + MO_TLSDESC_LO12, + + // MO_TPREL_* - Represents a relocation referring to the offset of a + // variable from the thread pointer itself. Used in the local-exec TLS + // access model. + MO_TPREL_G1, + MO_TPREL_G0_NC, + + // MO_LO12 - On a symbol operand, this represents a relocation containing + // lower 12 bits of the address. Used in add/sub/ldr/str. + MO_LO12 + }; +} + +class APFloat; + +namespace A64Imms { + bool isFPImm(const APFloat &Val, uint32_t &Imm8Bits); + + inline bool isFPImm(const APFloat &Val) { + uint32_t Imm8; + return isFPImm(Val, Imm8); + } + + bool isLogicalImm(unsigned RegWidth, uint64_t Imm, uint32_t &Bits); + bool isLogicalImmBits(unsigned RegWidth, uint32_t Bits, uint64_t &Imm); + + bool isMOVZImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift); + bool isMOVNImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift); + + // We sometimes want to know whether the immediate is representable with a + // MOVN but *not* with a MOVZ (because that would take priority). + bool isOnlyMOVNImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift); + +} + +} // end namespace llvm; + +#endif diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp new file mode 100644 index 0000000..476b94e --- /dev/null +++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp @@ -0,0 +1,287 @@ +//===-- AArch64ELFObjectWriter.cpp - AArch64 ELF Writer -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/AArch64FixupKinds.h" +#include "MCTargetDesc/AArch64MCTargetDesc.h" +#include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/MC/MCValue.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace llvm; + +namespace { +class AArch64ELFObjectWriter : public MCELFObjectTargetWriter { +public: + AArch64ELFObjectWriter(uint8_t OSABI); + + virtual ~AArch64ELFObjectWriter(); + +protected: + virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, + bool IsPCRel, bool IsRelocWithSymbol, + int64_t Addend) const; +private: +}; +} + +AArch64ELFObjectWriter::AArch64ELFObjectWriter(uint8_t OSABI) + : MCELFObjectTargetWriter(/*Is64Bit*/ true, OSABI, ELF::EM_AARCH64, + /*HasRelocationAddend*/ true) +{} + +AArch64ELFObjectWriter::~AArch64ELFObjectWriter() +{} + +unsigned AArch64ELFObjectWriter::GetRelocType(const MCValue &Target, + const MCFixup &Fixup, + bool IsPCRel, + bool IsRelocWithSymbol, + int64_t Addend) const { + unsigned Type; + if (IsPCRel) { + switch ((unsigned)Fixup.getKind()) { + default: + llvm_unreachable("Unimplemented fixup -> relocation"); + case FK_Data_8: + return ELF::R_AARCH64_PREL64; + case FK_Data_4: + return ELF::R_AARCH64_PREL32; + case FK_Data_2: + return ELF::R_AARCH64_PREL16; + case AArch64::fixup_a64_ld_prel: + Type = ELF::R_AARCH64_LD_PREL_LO19; + break; + case AArch64::fixup_a64_adr_prel: + Type = ELF::R_AARCH64_ADR_PREL_LO21; + break; + case AArch64::fixup_a64_adr_prel_page: + Type = ELF::R_AARCH64_ADR_PREL_PG_HI21; + break; + case AArch64::fixup_a64_adr_prel_got_page: + Type = ELF::R_AARCH64_ADR_GOT_PAGE; + break; + case AArch64::fixup_a64_tstbr: + Type = ELF::R_AARCH64_TSTBR14; + break; + case AArch64::fixup_a64_condbr: + Type = ELF::R_AARCH64_CONDBR19; + break; + case AArch64::fixup_a64_uncondbr: + Type = ELF::R_AARCH64_JUMP26; + break; + case AArch64::fixup_a64_call: + Type = ELF::R_AARCH64_CALL26; + break; + case AArch64::fixup_a64_adr_gottprel_page: + Type = ELF::R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21; + break; + case AArch64::fixup_a64_ld_gottprel_prel19: + Type = ELF::R_AARCH64_TLSIE_LD_GOTTPREL_PREL19; + break; + case AArch64::fixup_a64_tlsdesc_adr_page: + Type = ELF::R_AARCH64_TLSDESC_ADR_PAGE; + break; + } + } else { + switch ((unsigned)Fixup.getKind()) { + default: + llvm_unreachable("Unimplemented fixup -> relocation"); + case FK_Data_8: + return ELF::R_AARCH64_ABS64; + case FK_Data_4: + return ELF::R_AARCH64_ABS32; + case FK_Data_2: + return ELF::R_AARCH64_ABS16; + case AArch64::fixup_a64_add_lo12: + Type = ELF::R_AARCH64_ADD_ABS_LO12_NC; + break; + case AArch64::fixup_a64_ld64_got_lo12_nc: + Type = ELF::R_AARCH64_LD64_GOT_LO12_NC; + break; + case AArch64::fixup_a64_ldst8_lo12: + Type = ELF::R_AARCH64_LDST8_ABS_LO12_NC; + break; + case AArch64::fixup_a64_ldst16_lo12: + Type = ELF::R_AARCH64_LDST16_ABS_LO12_NC; + break; + case AArch64::fixup_a64_ldst32_lo12: + Type = ELF::R_AARCH64_LDST32_ABS_LO12_NC; + break; + case AArch64::fixup_a64_ldst64_lo12: + Type = ELF::R_AARCH64_LDST64_ABS_LO12_NC; + break; + case AArch64::fixup_a64_ldst128_lo12: + Type = ELF::R_AARCH64_LDST128_ABS_LO12_NC; + break; + case AArch64::fixup_a64_movw_uabs_g0: + Type = ELF::R_AARCH64_MOVW_UABS_G0; + break; + case AArch64::fixup_a64_movw_uabs_g0_nc: + Type = ELF::R_AARCH64_MOVW_UABS_G0_NC; + break; + case AArch64::fixup_a64_movw_uabs_g1: + Type = ELF::R_AARCH64_MOVW_UABS_G1; + break; + case AArch64::fixup_a64_movw_uabs_g1_nc: + Type = ELF::R_AARCH64_MOVW_UABS_G1_NC; + break; + case AArch64::fixup_a64_movw_uabs_g2: + Type = ELF::R_AARCH64_MOVW_UABS_G2; + break; + case AArch64::fixup_a64_movw_uabs_g2_nc: + Type = ELF::R_AARCH64_MOVW_UABS_G2_NC; + break; + case AArch64::fixup_a64_movw_uabs_g3: + Type = ELF::R_AARCH64_MOVW_UABS_G3; + break; + case AArch64::fixup_a64_movw_sabs_g0: + Type = ELF::R_AARCH64_MOVW_SABS_G0; + break; + case AArch64::fixup_a64_movw_sabs_g1: + Type = ELF::R_AARCH64_MOVW_SABS_G1; + break; + case AArch64::fixup_a64_movw_sabs_g2: + Type = ELF::R_AARCH64_MOVW_SABS_G2; + break; + + // TLS Local-dynamic block + case AArch64::fixup_a64_movw_dtprel_g2: + Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G2; + break; + case AArch64::fixup_a64_movw_dtprel_g1: + Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G1; + break; + case AArch64::fixup_a64_movw_dtprel_g1_nc: + Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC; + break; + case AArch64::fixup_a64_movw_dtprel_g0: + Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G0; + break; + case AArch64::fixup_a64_movw_dtprel_g0_nc: + Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC; + break; + case AArch64::fixup_a64_add_dtprel_hi12: + Type = ELF::R_AARCH64_TLSLD_ADD_DTPREL_HI12; + break; + case AArch64::fixup_a64_add_dtprel_lo12: + Type = ELF::R_AARCH64_TLSLD_ADD_DTPREL_LO12; + break; + case AArch64::fixup_a64_add_dtprel_lo12_nc: + Type = ELF::R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC; + break; + case AArch64::fixup_a64_ldst8_dtprel_lo12: + Type = ELF::R_AARCH64_TLSLD_LDST8_DTPREL_LO12; + break; + case AArch64::fixup_a64_ldst8_dtprel_lo12_nc: + Type = ELF::R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC; + break; + case AArch64::fixup_a64_ldst16_dtprel_lo12: + Type = ELF::R_AARCH64_TLSLD_LDST16_DTPREL_LO12; + break; + case AArch64::fixup_a64_ldst16_dtprel_lo12_nc: + Type = ELF::R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC; + break; + case AArch64::fixup_a64_ldst32_dtprel_lo12: + Type = ELF::R_AARCH64_TLSLD_LDST32_DTPREL_LO12; + break; + case AArch64::fixup_a64_ldst32_dtprel_lo12_nc: + Type = ELF::R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC; + break; + case AArch64::fixup_a64_ldst64_dtprel_lo12: + Type = ELF::R_AARCH64_TLSLD_LDST64_DTPREL_LO12; + break; + case AArch64::fixup_a64_ldst64_dtprel_lo12_nc: + Type = ELF::R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC; + break; + + // TLS initial-exec block + case AArch64::fixup_a64_movw_gottprel_g1: + Type = ELF::R_AARCH64_TLSIE_MOVW_GOTTPREL_G1; + break; + case AArch64::fixup_a64_movw_gottprel_g0_nc: + Type = ELF::R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC; + break; + case AArch64::fixup_a64_ld64_gottprel_lo12_nc: + Type = ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC; + break; + + // TLS local-exec block + case AArch64::fixup_a64_movw_tprel_g2: + Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G2; + break; + case AArch64::fixup_a64_movw_tprel_g1: + Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G1; + break; + case AArch64::fixup_a64_movw_tprel_g1_nc: + Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G1_NC; + break; + case AArch64::fixup_a64_movw_tprel_g0: + Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G0; + break; + case AArch64::fixup_a64_movw_tprel_g0_nc: + Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G0_NC; + break; + case AArch64::fixup_a64_add_tprel_hi12: + Type = ELF::R_AARCH64_TLSLE_ADD_TPREL_HI12; + break; + case AArch64::fixup_a64_add_tprel_lo12: + Type = ELF::R_AARCH64_TLSLE_ADD_TPREL_LO12; + break; + case AArch64::fixup_a64_add_tprel_lo12_nc: + Type = ELF::R_AARCH64_TLSLE_ADD_TPREL_LO12_NC; + break; + case AArch64::fixup_a64_ldst8_tprel_lo12: + Type = ELF::R_AARCH64_TLSLE_LDST8_TPREL_LO12; + break; + case AArch64::fixup_a64_ldst8_tprel_lo12_nc: + Type = ELF::R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC; + break; + case AArch64::fixup_a64_ldst16_tprel_lo12: + Type = ELF::R_AARCH64_TLSLE_LDST16_TPREL_LO12; + break; + case AArch64::fixup_a64_ldst16_tprel_lo12_nc: + Type = ELF::R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC; + break; + case AArch64::fixup_a64_ldst32_tprel_lo12: + Type = ELF::R_AARCH64_TLSLE_LDST32_TPREL_LO12; + break; + case AArch64::fixup_a64_ldst32_tprel_lo12_nc: + Type = ELF::R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC; + break; + case AArch64::fixup_a64_ldst64_tprel_lo12: + Type = ELF::R_AARCH64_TLSLE_LDST64_TPREL_LO12; + break; + case AArch64::fixup_a64_ldst64_tprel_lo12_nc: + Type = ELF::R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC; + break; + + // TLS general-dynamic block + case AArch64::fixup_a64_tlsdesc_adr_page: + Type = ELF::R_AARCH64_TLSDESC_ADR_PAGE; + break; + case AArch64::fixup_a64_tlsdesc_ld64_lo12_nc: + Type = ELF::R_AARCH64_TLSDESC_LD64_LO12_NC; + break; + case AArch64::fixup_a64_tlsdesc_add_lo12_nc: + Type = ELF::R_AARCH64_TLSDESC_ADD_LO12_NC; + break; + case AArch64::fixup_a64_tlsdesc_call: + Type = ELF::R_AARCH64_TLSDESC_CALL; + break; + } + } + + return Type; +} + +MCObjectWriter *llvm::createAArch64ELFObjectWriter(raw_ostream &OS, + uint8_t OSABI) { + MCELFObjectTargetWriter *MOTW = new AArch64ELFObjectWriter(OSABI); + return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/true); +} diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp new file mode 100644 index 0000000..b83577a --- /dev/null +++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp @@ -0,0 +1,160 @@ +//===- lib/MC/AArch64ELFStreamer.cpp - ELF Object Output for AArch64 ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file assembles .s files and emits AArch64 ELF .o object files. Different +// from generic ELF streamer in emitting mapping symbols ($x and $d) to delimit +// regions of data and code. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCELFStreamer.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Twine.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCELF.h" +#include "llvm/MC/MCELFStreamer.h" +#include "llvm/MC/MCELFSymbolFlags.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCObjectStreamer.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCValue.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +namespace { + +/// Extend the generic ELFStreamer class so that it can emit mapping symbols at +/// the appropriate points in the object files. These symbols are defined in the +/// AArch64 ELF ABI: +/// infocenter.arm.com/help/topic/com.arm.doc.ihi0056a/IHI0056A_aaelf64.pdf +/// +/// In brief: $x or $d should be emitted at the start of each contiguous region +/// of A64 code or data in a section. In practice, this emission does not rely +/// on explicit assembler directives but on inherent properties of the +/// directives doing the emission (e.g. ".byte" is data, "add x0, x0, x0" an +/// instruction). +/// +/// As a result this system is orthogonal to the DataRegion infrastructure used +/// by MachO. Beware! +class AArch64ELFStreamer : public MCELFStreamer { +public: + AArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, + raw_ostream &OS, MCCodeEmitter *Emitter) + : MCELFStreamer(Context, TAB, OS, Emitter), + MappingSymbolCounter(0), LastEMS(EMS_None) { + } + + ~AArch64ELFStreamer() {} + + virtual void ChangeSection(const MCSection *Section) { + // We have to keep track of the mapping symbol state of any sections we + // use. Each one should start off as EMS_None, which is provided as the + // default constructor by DenseMap::lookup. + LastMappingSymbols[getPreviousSection()] = LastEMS; + LastEMS = LastMappingSymbols.lookup(Section); + + MCELFStreamer::ChangeSection(Section); + } + + /// This function is the one used to emit instruction data into the ELF + /// streamer. We override it to add the appropriate mapping symbol if + /// necessary. + virtual void EmitInstruction(const MCInst& Inst) { + EmitA64MappingSymbol(); + MCELFStreamer::EmitInstruction(Inst); + } + + /// This is one of the functions used to emit data into an ELF section, so the + /// AArch64 streamer overrides it to add the appropriate mapping symbol ($d) + /// if necessary. + virtual void EmitBytes(StringRef Data, unsigned AddrSpace) { + EmitDataMappingSymbol(); + MCELFStreamer::EmitBytes(Data, AddrSpace); + } + + /// This is one of the functions used to emit data into an ELF section, so the + /// AArch64 streamer overrides it to add the appropriate mapping symbol ($d) + /// if necessary. + virtual void EmitValueImpl(const MCExpr *Value, unsigned Size, + unsigned AddrSpace) { + EmitDataMappingSymbol(); + MCELFStreamer::EmitValueImpl(Value, Size, AddrSpace); + } + +private: + enum ElfMappingSymbol { + EMS_None, + EMS_A64, + EMS_Data + }; + + void EmitDataMappingSymbol() { + if (LastEMS == EMS_Data) return; + EmitMappingSymbol("$d"); + LastEMS = EMS_Data; + } + + void EmitA64MappingSymbol() { + if (LastEMS == EMS_A64) return; + EmitMappingSymbol("$x"); + LastEMS = EMS_A64; + } + + void EmitMappingSymbol(StringRef Name) { + MCSymbol *Start = getContext().CreateTempSymbol(); + EmitLabel(Start); + + MCSymbol *Symbol = + getContext().GetOrCreateSymbol(Name + "." + + Twine(MappingSymbolCounter++)); + + MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); + MCELF::SetType(SD, ELF::STT_NOTYPE); + MCELF::SetBinding(SD, ELF::STB_LOCAL); + SD.setExternal(false); + Symbol->setSection(*getCurrentSection()); + + const MCExpr *Value = MCSymbolRefExpr::Create(Start, getContext()); + Symbol->setVariableValue(Value); + } + + int64_t MappingSymbolCounter; + + DenseMap LastMappingSymbols; + ElfMappingSymbol LastEMS; + + /// @} +}; +} + +namespace llvm { + MCELFStreamer* createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, + raw_ostream &OS, MCCodeEmitter *Emitter, + bool RelaxAll, bool NoExecStack) { + AArch64ELFStreamer *S = new AArch64ELFStreamer(Context, TAB, OS, Emitter); + if (RelaxAll) + S->getAssembler().setRelaxAll(true); + if (NoExecStack) + S->getAssembler().setNoExecStack(true); + return S; + } +} + + diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h new file mode 100644 index 0000000..5a89ca5 --- /dev/null +++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h @@ -0,0 +1,27 @@ +//===-- AArch64ELFStreamer.h - ELF Streamer for AArch64 ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements ELF streamer information for the AArch64 backend. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_AARCH64_ELF_STREAMER_H +#define LLVM_AARCH64_ELF_STREAMER_H + +#include "llvm/MC/MCELFStreamer.h" + +namespace llvm { + + MCELFStreamer* createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, + raw_ostream &OS, + MCCodeEmitter *Emitter, + bool RelaxAll, bool NoExecStack); +} + +#endif // AArch64_ELF_STREAMER_H diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h b/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h new file mode 100644 index 0000000..15e0886 --- /dev/null +++ b/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h @@ -0,0 +1,108 @@ +//=- AArch64/AArch64FixupKinds.h - AArch64 Specific Fixup Entries -*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_AARCH64_AARCH64FIXUPKINDS_H +#define LLVM_AARCH64_AARCH64FIXUPKINDS_H + +#include "llvm/MC/MCFixup.h" + +namespace llvm { + namespace AArch64 { + enum Fixups { + fixup_a64_ld_prel = FirstTargetFixupKind, + fixup_a64_adr_prel, + fixup_a64_adr_prel_page, + + fixup_a64_add_lo12, + + fixup_a64_ldst8_lo12, + fixup_a64_ldst16_lo12, + fixup_a64_ldst32_lo12, + fixup_a64_ldst64_lo12, + fixup_a64_ldst128_lo12, + + fixup_a64_tstbr, + fixup_a64_condbr, + fixup_a64_uncondbr, + fixup_a64_call, + + fixup_a64_movw_uabs_g0, + fixup_a64_movw_uabs_g0_nc, + fixup_a64_movw_uabs_g1, + fixup_a64_movw_uabs_g1_nc, + fixup_a64_movw_uabs_g2, + fixup_a64_movw_uabs_g2_nc, + fixup_a64_movw_uabs_g3, + + fixup_a64_movw_sabs_g0, + fixup_a64_movw_sabs_g1, + fixup_a64_movw_sabs_g2, + + fixup_a64_adr_prel_got_page, + fixup_a64_ld64_got_lo12_nc, + + // Produce offsets relative to the module's dynamic TLS area. + fixup_a64_movw_dtprel_g2, + fixup_a64_movw_dtprel_g1, + fixup_a64_movw_dtprel_g1_nc, + fixup_a64_movw_dtprel_g0, + fixup_a64_movw_dtprel_g0_nc, + fixup_a64_add_dtprel_hi12, + fixup_a64_add_dtprel_lo12, + fixup_a64_add_dtprel_lo12_nc, + fixup_a64_ldst8_dtprel_lo12, + fixup_a64_ldst8_dtprel_lo12_nc, + fixup_a64_ldst16_dtprel_lo12, + fixup_a64_ldst16_dtprel_lo12_nc, + fixup_a64_ldst32_dtprel_lo12, + fixup_a64_ldst32_dtprel_lo12_nc, + fixup_a64_ldst64_dtprel_lo12, + fixup_a64_ldst64_dtprel_lo12_nc, + + // Produce the GOT entry containing a variable's address in TLS's + // initial-exec mode. + fixup_a64_movw_gottprel_g1, + fixup_a64_movw_gottprel_g0_nc, + fixup_a64_adr_gottprel_page, + fixup_a64_ld64_gottprel_lo12_nc, + fixup_a64_ld_gottprel_prel19, + + // Produce offsets relative to the thread pointer: TPIDR_EL0. + fixup_a64_movw_tprel_g2, + fixup_a64_movw_tprel_g1, + fixup_a64_movw_tprel_g1_nc, + fixup_a64_movw_tprel_g0, + fixup_a64_movw_tprel_g0_nc, + fixup_a64_add_tprel_hi12, + fixup_a64_add_tprel_lo12, + fixup_a64_add_tprel_lo12_nc, + fixup_a64_ldst8_tprel_lo12, + fixup_a64_ldst8_tprel_lo12_nc, + fixup_a64_ldst16_tprel_lo12, + fixup_a64_ldst16_tprel_lo12_nc, + fixup_a64_ldst32_tprel_lo12, + fixup_a64_ldst32_tprel_lo12_nc, + fixup_a64_ldst64_tprel_lo12, + fixup_a64_ldst64_tprel_lo12_nc, + + // Produce the special fixups used by the general-dynamic TLS model. + fixup_a64_tlsdesc_adr_page, + fixup_a64_tlsdesc_ld64_lo12_nc, + fixup_a64_tlsdesc_add_lo12_nc, + fixup_a64_tlsdesc_call, + + + // Marker + LastTargetFixupKind, + NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind + }; + } +} + +#endif diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp new file mode 100644 index 0000000..8ec8cbf --- /dev/null +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp @@ -0,0 +1,41 @@ +//===-- AArch64MCAsmInfo.cpp - AArch64 asm properties ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declarations of the AArch64MCAsmInfo properties. +// +//===----------------------------------------------------------------------===// + +#include "AArch64MCAsmInfo.h" + +using namespace llvm; + +AArch64ELFMCAsmInfo::AArch64ELFMCAsmInfo() { + PointerSize = 8; + + // ".comm align is in bytes but .align is pow-2." + AlignmentIsInBytes = false; + + CommentString = "//"; + PrivateGlobalPrefix = ".L"; + Code32Directive = ".code\t32"; + + Data16bitsDirective = "\t.hword\t"; + Data32bitsDirective = "\t.word\t"; + Data64bitsDirective = "\t.xword\t"; + + UseDataRegionDirectives = true; + + WeakRefDirective = "\t.weak\t"; + + HasLEB128 = true; + SupportsDebugInformation = true; + + // Exceptions handling + ExceptionsType = ExceptionHandling::DwarfCFI; +} diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h new file mode 100644 index 0000000..a20bc47 --- /dev/null +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h @@ -0,0 +1,27 @@ +//==-- AArch64MCAsmInfo.h - AArch64 asm properties -------------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the AArch64MCAsmInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_AARCH64TARGETASMINFO_H +#define LLVM_AARCH64TARGETASMINFO_H + +#include "llvm/MC/MCAsmInfo.h" + +namespace llvm { + + struct AArch64ELFMCAsmInfo : public MCAsmInfo { + explicit AArch64ELFMCAsmInfo(); + }; + +} // namespace llvm + +#endif diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp new file mode 100644 index 0000000..f2bbd85 --- /dev/null +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp @@ -0,0 +1,517 @@ +//=- AArch64/AArch64MCCodeEmitter.cpp - Convert AArch64 code to machine code =// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the AArch64MCCodeEmitter class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "mccodeemitter" +#include "MCTargetDesc/AArch64BaseInfo.h" +#include "MCTargetDesc/AArch64FixupKinds.h" +#include "MCTargetDesc/AArch64MCExpr.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +namespace { +class AArch64MCCodeEmitter : public MCCodeEmitter { + AArch64MCCodeEmitter(const AArch64MCCodeEmitter &); // DO NOT IMPLEMENT + void operator=(const AArch64MCCodeEmitter &); // DO NOT IMPLEMENT + const MCInstrInfo &MCII; + const MCSubtargetInfo &STI; + MCContext &Ctx; + +public: + AArch64MCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti, + MCContext &ctx) + : MCII(mcii), STI(sti), Ctx(ctx) { + } + + ~AArch64MCCodeEmitter() {} + + unsigned getAddSubImmOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups) const; + + unsigned getAdrpLabelOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups) const; + + template + unsigned getOffsetUImm12OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups) const { + return getOffsetUImm12OpValue(MI, OpIdx, Fixups, MemSize); + } + + unsigned getOffsetUImm12OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + int MemSize) const; + + unsigned getBitfield32LSLOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups) const; + unsigned getBitfield64LSLOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups) const; + + + // Labels are handled mostly the same way: a symbol is needed, and + // just gets some fixup attached. + template + unsigned getLabelOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups) const; + + unsigned getLoadLitLabelOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups) const; + + + unsigned getMoveWideImmOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups) const; + + + unsigned getAddressWithFixup(const MCOperand &MO, + unsigned FixupKind, + SmallVectorImpl &Fixups) const; + + + // getBinaryCodeForInstr - TableGen'erated function for getting the + // binary encoding for an instruction. + uint64_t getBinaryCodeForInstr(const MCInst &MI, + SmallVectorImpl &Fixups) const; + + /// getMachineOpValue - Return binary encoding of operand. If the machine + /// operand requires relocation, record the relocation and return zero. + unsigned getMachineOpValue(const MCInst &MI,const MCOperand &MO, + SmallVectorImpl &Fixups) const; + + + void EmitByte(unsigned char C, raw_ostream &OS) const { + OS << (char)C; + } + + void EmitInstruction(uint32_t Val, raw_ostream &OS) const { + // Output the constant in little endian byte order. + for (unsigned i = 0; i != 4; ++i) { + EmitByte(Val & 0xff, OS); + Val >>= 8; + } + } + + + void EncodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl &Fixups) const; + + unsigned fixFCMPImm(const MCInst &MI, unsigned EncodedValue) const; + + template unsigned + fixLoadStoreExclusive(const MCInst &MI, unsigned EncodedValue) const; + + unsigned fixMOVZ(const MCInst &MI, unsigned EncodedValue) const; + + unsigned fixMulHigh(const MCInst &MI, unsigned EncodedValue) const; + + +}; + +} // end anonymous namespace + +unsigned AArch64MCCodeEmitter::getAddressWithFixup(const MCOperand &MO, + unsigned FixupKind, + SmallVectorImpl &Fixups) const { + if (!MO.isExpr()) { + // This can occur for manually decoded or constructed MCInsts, but neither + // the assembly-parser nor instruction selection will currently produce an + // MCInst that's not a symbol reference. + assert(MO.isImm() && "Unexpected address requested"); + return MO.getImm(); + } + + const MCExpr *Expr = MO.getExpr(); + MCFixupKind Kind = MCFixupKind(FixupKind); + Fixups.push_back(MCFixup::Create(0, Expr, Kind)); + + return 0; +} + +unsigned AArch64MCCodeEmitter:: +getOffsetUImm12OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + int MemSize) const { + const MCOperand &ImmOp = MI.getOperand(OpIdx); + if (ImmOp.isImm()) + return ImmOp.getImm(); + + assert(ImmOp.isExpr() && "Unexpected operand type"); + const AArch64MCExpr *Expr = cast(ImmOp.getExpr()); + unsigned FixupKind; + + + switch (Expr->getKind()) { + default: llvm_unreachable("Unexpected operand modifier"); + case AArch64MCExpr::VK_AARCH64_LO12: { + unsigned FixupsBySize[] = { AArch64::fixup_a64_ldst8_lo12, + AArch64::fixup_a64_ldst16_lo12, + AArch64::fixup_a64_ldst32_lo12, + AArch64::fixup_a64_ldst64_lo12, + AArch64::fixup_a64_ldst128_lo12 }; + assert(MemSize <= 16 && "Invalid fixup for operation"); + FixupKind = FixupsBySize[Log2_32(MemSize)]; + break; + } + case AArch64MCExpr::VK_AARCH64_GOT_LO12: + assert(MemSize == 8 && "Invalid fixup for operation"); + FixupKind = AArch64::fixup_a64_ld64_got_lo12_nc; + break; + case AArch64MCExpr::VK_AARCH64_DTPREL_LO12: { + unsigned FixupsBySize[] = { AArch64::fixup_a64_ldst8_dtprel_lo12, + AArch64::fixup_a64_ldst16_dtprel_lo12, + AArch64::fixup_a64_ldst32_dtprel_lo12, + AArch64::fixup_a64_ldst64_dtprel_lo12 }; + assert(MemSize <= 8 && "Invalid fixup for operation"); + FixupKind = FixupsBySize[Log2_32(MemSize)]; + break; + } + case AArch64MCExpr::VK_AARCH64_DTPREL_LO12_NC: { + unsigned FixupsBySize[] = { AArch64::fixup_a64_ldst8_dtprel_lo12_nc, + AArch64::fixup_a64_ldst16_dtprel_lo12_nc, + AArch64::fixup_a64_ldst32_dtprel_lo12_nc, + AArch64::fixup_a64_ldst64_dtprel_lo12_nc }; + assert(MemSize <= 8 && "Invalid fixup for operation"); + FixupKind = FixupsBySize[Log2_32(MemSize)]; + break; + } + case AArch64MCExpr::VK_AARCH64_GOTTPREL_LO12: + assert(MemSize == 8 && "Invalid fixup for operation"); + FixupKind = AArch64::fixup_a64_ld64_gottprel_lo12_nc; + break; + case AArch64MCExpr::VK_AARCH64_TPREL_LO12:{ + unsigned FixupsBySize[] = { AArch64::fixup_a64_ldst8_tprel_lo12, + AArch64::fixup_a64_ldst16_tprel_lo12, + AArch64::fixup_a64_ldst32_tprel_lo12, + AArch64::fixup_a64_ldst64_tprel_lo12 }; + assert(MemSize <= 8 && "Invalid fixup for operation"); + FixupKind = FixupsBySize[Log2_32(MemSize)]; + break; + } + case AArch64MCExpr::VK_AARCH64_TPREL_LO12_NC: { + unsigned FixupsBySize[] = { AArch64::fixup_a64_ldst8_tprel_lo12_nc, + AArch64::fixup_a64_ldst16_tprel_lo12_nc, + AArch64::fixup_a64_ldst32_tprel_lo12_nc, + AArch64::fixup_a64_ldst64_tprel_lo12_nc }; + assert(MemSize <= 8 && "Invalid fixup for operation"); + FixupKind = FixupsBySize[Log2_32(MemSize)]; + break; + } + case AArch64MCExpr::VK_AARCH64_TLSDESC_LO12: + assert(MemSize == 8 && "Invalid fixup for operation"); + FixupKind = AArch64::fixup_a64_tlsdesc_ld64_lo12_nc; + break; + } + + return getAddressWithFixup(ImmOp, FixupKind, Fixups); +} + +unsigned +AArch64MCCodeEmitter::getAddSubImmOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups) const { + const MCOperand &MO = MI.getOperand(OpIdx); + if (MO.isImm()) + return static_cast(MO.getImm()); + + assert(MO.isExpr()); + + unsigned FixupKind = 0; + switch(cast(MO.getExpr())->getKind()) { + default: llvm_unreachable("Invalid expression modifier"); + case AArch64MCExpr::VK_AARCH64_LO12: + FixupKind = AArch64::fixup_a64_add_lo12; break; + case AArch64MCExpr::VK_AARCH64_DTPREL_HI12: + FixupKind = AArch64::fixup_a64_add_dtprel_hi12; break; + case AArch64MCExpr::VK_AARCH64_DTPREL_LO12: + FixupKind = AArch64::fixup_a64_add_dtprel_lo12; break; + case AArch64MCExpr::VK_AARCH64_DTPREL_LO12_NC: + FixupKind = AArch64::fixup_a64_add_dtprel_lo12_nc; break; + case AArch64MCExpr::VK_AARCH64_TPREL_HI12: + FixupKind = AArch64::fixup_a64_add_tprel_hi12; break; + case AArch64MCExpr::VK_AARCH64_TPREL_LO12: + FixupKind = AArch64::fixup_a64_add_tprel_lo12; break; + case AArch64MCExpr::VK_AARCH64_TPREL_LO12_NC: + FixupKind = AArch64::fixup_a64_add_tprel_lo12_nc; break; + case AArch64MCExpr::VK_AARCH64_TLSDESC_LO12: + FixupKind = AArch64::fixup_a64_tlsdesc_add_lo12_nc; break; + } + + return getAddressWithFixup(MO, FixupKind, Fixups); +} + +unsigned +AArch64MCCodeEmitter::getAdrpLabelOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups) const { + + const MCOperand &MO = MI.getOperand(OpIdx); + if (MO.isImm()) + return static_cast(MO.getImm()); + + assert(MO.isExpr()); + + unsigned Modifier = AArch64MCExpr::VK_AARCH64_None; + if (const AArch64MCExpr *Expr = dyn_cast(MO.getExpr())) + Modifier = Expr->getKind(); + + unsigned FixupKind = 0; + switch(Modifier) { + case AArch64MCExpr::VK_AARCH64_None: + FixupKind = AArch64::fixup_a64_adr_prel_page; + break; + case AArch64MCExpr::VK_AARCH64_GOT: + FixupKind = AArch64::fixup_a64_adr_prel_got_page; + break; + case AArch64MCExpr::VK_AARCH64_GOTTPREL: + FixupKind = AArch64::fixup_a64_adr_gottprel_page; + break; + case AArch64MCExpr::VK_AARCH64_TLSDESC: + FixupKind = AArch64::fixup_a64_tlsdesc_adr_page; + break; + default: + llvm_unreachable("Unknown symbol reference kind for ADRP instruction"); + } + + return getAddressWithFixup(MO, FixupKind, Fixups); +} + +unsigned +AArch64MCCodeEmitter::getBitfield32LSLOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups) const { + + const MCOperand &MO = MI.getOperand(OpIdx); + assert(MO.isImm() && "Only immediate expected for shift"); + + return ((32 - MO.getImm()) & 0x1f) | (31 - MO.getImm()) << 6; +} + +unsigned +AArch64MCCodeEmitter::getBitfield64LSLOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups) const { + + const MCOperand &MO = MI.getOperand(OpIdx); + assert(MO.isImm() && "Only immediate expected for shift"); + + return ((64 - MO.getImm()) & 0x3f) | (63 - MO.getImm()) << 6; +} + + +template unsigned +AArch64MCCodeEmitter::getLabelOpValue(const MCInst &MI, + unsigned OpIdx, + SmallVectorImpl &Fixups) const { + const MCOperand &MO = MI.getOperand(OpIdx); + + if (MO.isExpr()) + return getAddressWithFixup(MO, fixupDesired, Fixups); + + assert(MO.isImm()); + return MO.getImm(); +} + +unsigned +AArch64MCCodeEmitter::getLoadLitLabelOpValue(const MCInst &MI, + unsigned OpIdx, + SmallVectorImpl &Fixups) const { + const MCOperand &MO = MI.getOperand(OpIdx); + + if (MO.isImm()) + return MO.getImm(); + + assert(MO.isExpr()); + + unsigned FixupKind; + if (isa(MO.getExpr())) { + assert(dyn_cast(MO.getExpr())->getKind() + == AArch64MCExpr::VK_AARCH64_GOTTPREL + && "Invalid symbol modifier for literal load"); + FixupKind = AArch64::fixup_a64_ld_gottprel_prel19; + } else { + FixupKind = AArch64::fixup_a64_ld_prel; + } + + return getAddressWithFixup(MO, FixupKind, Fixups); +} + + +unsigned +AArch64MCCodeEmitter::getMachineOpValue(const MCInst &MI, + const MCOperand &MO, + SmallVectorImpl &Fixups) const { + if (MO.isReg()) { + return Ctx.getRegisterInfo().getEncodingValue(MO.getReg()); + } else if (MO.isImm()) { + return static_cast(MO.getImm()); + } + + llvm_unreachable("Unable to encode MCOperand!"); + return 0; +} + +unsigned +AArch64MCCodeEmitter::getMoveWideImmOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups) const { + const MCOperand &UImm16MO = MI.getOperand(OpIdx); + const MCOperand &ShiftMO = MI.getOperand(OpIdx + 1); + + unsigned Result = static_cast(ShiftMO.getImm()) << 16; + + if (UImm16MO.isImm()) { + Result |= UImm16MO.getImm(); + return Result; + } + + const AArch64MCExpr *A64E = cast(UImm16MO.getExpr()); + AArch64::Fixups requestedFixup; + switch (A64E->getKind()) { + default: llvm_unreachable("unexpected expression modifier"); + case AArch64MCExpr::VK_AARCH64_ABS_G0: + requestedFixup = AArch64::fixup_a64_movw_uabs_g0; break; + case AArch64MCExpr::VK_AARCH64_ABS_G0_NC: + requestedFixup = AArch64::fixup_a64_movw_uabs_g0_nc; break; + case AArch64MCExpr::VK_AARCH64_ABS_G1: + requestedFixup = AArch64::fixup_a64_movw_uabs_g1; break; + case AArch64MCExpr::VK_AARCH64_ABS_G1_NC: + requestedFixup = AArch64::fixup_a64_movw_uabs_g1_nc; break; + case AArch64MCExpr::VK_AARCH64_ABS_G2: + requestedFixup = AArch64::fixup_a64_movw_uabs_g2; break; + case AArch64MCExpr::VK_AARCH64_ABS_G2_NC: + requestedFixup = AArch64::fixup_a64_movw_uabs_g2_nc; break; + case AArch64MCExpr::VK_AARCH64_ABS_G3: + requestedFixup = AArch64::fixup_a64_movw_uabs_g3; break; + case AArch64MCExpr::VK_AARCH64_SABS_G0: + requestedFixup = AArch64::fixup_a64_movw_sabs_g0; break; + case AArch64MCExpr::VK_AARCH64_SABS_G1: + requestedFixup = AArch64::fixup_a64_movw_sabs_g1; break; + case AArch64MCExpr::VK_AARCH64_SABS_G2: + requestedFixup = AArch64::fixup_a64_movw_sabs_g2; break; + case AArch64MCExpr::VK_AARCH64_DTPREL_G2: + requestedFixup = AArch64::fixup_a64_movw_dtprel_g2; break; + case AArch64MCExpr::VK_AARCH64_DTPREL_G1: + requestedFixup = AArch64::fixup_a64_movw_dtprel_g1; break; + case AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC: + requestedFixup = AArch64::fixup_a64_movw_dtprel_g1_nc; break; + case AArch64MCExpr::VK_AARCH64_DTPREL_G0: + requestedFixup = AArch64::fixup_a64_movw_dtprel_g0; break; + case AArch64MCExpr::VK_AARCH64_DTPREL_G0_NC: + requestedFixup = AArch64::fixup_a64_movw_dtprel_g0_nc; break; + case AArch64MCExpr::VK_AARCH64_GOTTPREL_G1: + requestedFixup = AArch64::fixup_a64_movw_gottprel_g1; break; + case AArch64MCExpr::VK_AARCH64_GOTTPREL_G0_NC: + requestedFixup = AArch64::fixup_a64_movw_gottprel_g0_nc; break; + case AArch64MCExpr::VK_AARCH64_TPREL_G2: + requestedFixup = AArch64::fixup_a64_movw_tprel_g2; break; + case AArch64MCExpr::VK_AARCH64_TPREL_G1: + requestedFixup = AArch64::fixup_a64_movw_tprel_g1; break; + case AArch64MCExpr::VK_AARCH64_TPREL_G1_NC: + requestedFixup = AArch64::fixup_a64_movw_tprel_g1_nc; break; + case AArch64MCExpr::VK_AARCH64_TPREL_G0: + requestedFixup = AArch64::fixup_a64_movw_tprel_g0; break; + case AArch64MCExpr::VK_AARCH64_TPREL_G0_NC: + requestedFixup = AArch64::fixup_a64_movw_tprel_g0_nc; break; + } + + return Result | getAddressWithFixup(UImm16MO, requestedFixup, Fixups); +} + +unsigned AArch64MCCodeEmitter::fixFCMPImm(const MCInst &MI, + unsigned EncodedValue) const { + // For FCMP[E] Rn, #0.0, the Rm field has a canonical representation + // with 0s, but is architecturally ignored + EncodedValue &= ~0x1f0000u; + + return EncodedValue; +} + +template unsigned +AArch64MCCodeEmitter::fixLoadStoreExclusive(const MCInst &MI, + unsigned EncodedValue) const { + if (!hasRs) EncodedValue |= 0x001F0000; + if (!hasRt2) EncodedValue |= 0x00007C00; + + return EncodedValue; +} + +unsigned +AArch64MCCodeEmitter::fixMOVZ(const MCInst &MI, unsigned EncodedValue) const { + // If one of the signed fixup kinds is applied to a MOVZ instruction, the + // eventual result could be either a MOVZ or a MOVN. It's the MCCodeEmitter's + // job to ensure that any bits possibly affected by this are 0. This means we + // must zero out bit 30 (essentially emitting a MOVN). + MCOperand UImm16MO = MI.getOperand(1); + + // Nothing to do if there's no fixup. + if (UImm16MO.isImm()) + return EncodedValue; + + const AArch64MCExpr *A64E = cast(UImm16MO.getExpr()); + switch (A64E->getKind()) { + case AArch64MCExpr::VK_AARCH64_SABS_G0: + case AArch64MCExpr::VK_AARCH64_SABS_G1: + case AArch64MCExpr::VK_AARCH64_SABS_G2: + case AArch64MCExpr::VK_AARCH64_DTPREL_G2: + case AArch64MCExpr::VK_AARCH64_DTPREL_G1: + case AArch64MCExpr::VK_AARCH64_DTPREL_G0: + case AArch64MCExpr::VK_AARCH64_GOTTPREL_G1: + case AArch64MCExpr::VK_AARCH64_TPREL_G2: + case AArch64MCExpr::VK_AARCH64_TPREL_G1: + case AArch64MCExpr::VK_AARCH64_TPREL_G0: + return EncodedValue & ~(1u << 30); + default: + // Nothing to do for an unsigned fixup. + return EncodedValue; + } + + llvm_unreachable("Should have returned by now"); +} + +unsigned +AArch64MCCodeEmitter::fixMulHigh(const MCInst &MI, + unsigned EncodedValue) const { + // The Ra field of SMULH and UMULH is unused: it should be assembled as 31 + // (i.e. all bits 1) but is ignored by the processor. + EncodedValue |= 0x1f << 10; + return EncodedValue; +} + +MCCodeEmitter *llvm::createAArch64MCCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, + const MCSubtargetInfo &STI, + MCContext &Ctx) { + return new AArch64MCCodeEmitter(MCII, STI, Ctx); +} + +void AArch64MCCodeEmitter:: +EncodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl &Fixups) const { + if (MI.getOpcode() == AArch64::TLSDESCCALL) { + // This is a directive which applies an R_AARCH64_TLSDESC_CALL to the + // following (BLR) instruction. It doesn't emit any code itself so it + // doesn't go through the normal TableGenerated channels. + MCFixupKind Fixup = MCFixupKind(AArch64::fixup_a64_tlsdesc_call); + const MCExpr *Expr; + Expr = AArch64MCExpr::CreateTLSDesc(MI.getOperand(0).getExpr(), Ctx); + Fixups.push_back(MCFixup::Create(0, Expr, Fixup)); + return; + } + + uint32_t Binary = getBinaryCodeForInstr(MI, Fixups); + + EmitInstruction(Binary, OS); +} + + +#include "AArch64GenMCCodeEmitter.inc" diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp new file mode 100644 index 0000000..e86e04a --- /dev/null +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp @@ -0,0 +1,173 @@ +//===-- AArch64MCExpr.cpp - AArch64 specific MC expression classes --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "aarch64mcexpr" +#include "AArch64MCExpr.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCELF.h" +#include "llvm/Object/ELF.h" + +using namespace llvm; + +const AArch64MCExpr* +AArch64MCExpr::Create(VariantKind Kind, const MCExpr *Expr, + MCContext &Ctx) { + return new (Ctx) AArch64MCExpr(Kind, Expr); +} + +void AArch64MCExpr::PrintImpl(raw_ostream &OS) const { + switch (Kind) { + default: llvm_unreachable("Invalid kind!"); + case VK_AARCH64_GOT: OS << ":got:"; break; + case VK_AARCH64_GOT_LO12: OS << ":got_lo12:"; break; + case VK_AARCH64_LO12: OS << ":lo12:"; break; + case VK_AARCH64_ABS_G0: OS << ":abs_g0:"; break; + case VK_AARCH64_ABS_G0_NC: OS << ":abs_g0_nc:"; break; + case VK_AARCH64_ABS_G1: OS << ":abs_g1:"; break; + case VK_AARCH64_ABS_G1_NC: OS << ":abs_g1_nc:"; break; + case VK_AARCH64_ABS_G2: OS << ":abs_g2:"; break; + case VK_AARCH64_ABS_G2_NC: OS << ":abs_g2_nc:"; break; + case VK_AARCH64_ABS_G3: OS << ":abs_g3:"; break; + case VK_AARCH64_SABS_G0: OS << ":abs_g0_s:"; break; + case VK_AARCH64_SABS_G1: OS << ":abs_g1_s:"; break; + case VK_AARCH64_SABS_G2: OS << ":abs_g2_s:"; break; + case VK_AARCH64_DTPREL_G2: OS << ":dtprel_g2:"; break; + case VK_AARCH64_DTPREL_G1: OS << ":dtprel_g1:"; break; + case VK_AARCH64_DTPREL_G1_NC: OS << ":dtprel_g1_nc:"; break; + case VK_AARCH64_DTPREL_G0: OS << ":dtprel_g0:"; break; + case VK_AARCH64_DTPREL_G0_NC: OS << ":dtprel_g0_nc:"; break; + case VK_AARCH64_DTPREL_HI12: OS << ":dtprel_hi12:"; break; + case VK_AARCH64_DTPREL_LO12: OS << ":dtprel_lo12:"; break; + case VK_AARCH64_DTPREL_LO12_NC: OS << ":dtprel_lo12_nc:"; break; + case VK_AARCH64_GOTTPREL_G1: OS << ":gottprel_g1:"; break; + case VK_AARCH64_GOTTPREL_G0_NC: OS << ":gottprel_g0_nc:"; break; + case VK_AARCH64_GOTTPREL: OS << ":gottprel:"; break; + case VK_AARCH64_GOTTPREL_LO12: OS << ":gottprel_lo12:"; break; + case VK_AARCH64_TPREL_G2: OS << ":tprel_g2:"; break; + case VK_AARCH64_TPREL_G1: OS << ":tprel_g1:"; break; + case VK_AARCH64_TPREL_G1_NC: OS << ":tprel_g1_nc:"; break; + case VK_AARCH64_TPREL_G0: OS << ":tprel_g0:"; break; + case VK_AARCH64_TPREL_G0_NC: OS << ":tprel_g0_nc:"; break; + case VK_AARCH64_TPREL_HI12: OS << ":tprel_hi12:"; break; + case VK_AARCH64_TPREL_LO12: OS << ":tprel_lo12:"; break; + case VK_AARCH64_TPREL_LO12_NC: OS << ":tprel_lo12_nc:"; break; + case VK_AARCH64_TLSDESC: OS << ":tlsdesc:"; break; + case VK_AARCH64_TLSDESC_LO12: OS << ":tlsdesc_lo12:"; break; + + } + + const MCExpr *Expr = getSubExpr(); + if (Expr->getKind() != MCExpr::SymbolRef) + OS << '('; + Expr->print(OS); + if (Expr->getKind() != MCExpr::SymbolRef) + OS << ')'; +} + +bool +AArch64MCExpr::EvaluateAsRelocatableImpl(MCValue &Res, + const MCAsmLayout *Layout) const { + return getSubExpr()->EvaluateAsRelocatable(Res, *Layout); +} + +static void fixELFSymbolsInTLSFixupsImpl(const MCExpr *Expr, MCAssembler &Asm) { + switch (Expr->getKind()) { + case MCExpr::Target: + llvm_unreachable("Can't handle nested target expression"); + break; + case MCExpr::Constant: + break; + + case MCExpr::Binary: { + const MCBinaryExpr *BE = cast(Expr); + fixELFSymbolsInTLSFixupsImpl(BE->getLHS(), Asm); + fixELFSymbolsInTLSFixupsImpl(BE->getRHS(), Asm); + break; + } + + case MCExpr::SymbolRef: { + // We're known to be under a TLS fixup, so any symbol should be + // modified. There should be only one. + const MCSymbolRefExpr &SymRef = *cast(Expr); + MCSymbolData &SD = Asm.getOrCreateSymbolData(SymRef.getSymbol()); + MCELF::SetType(SD, ELF::STT_TLS); + break; + } + + case MCExpr::Unary: + fixELFSymbolsInTLSFixupsImpl(cast(Expr)->getSubExpr(), Asm); + break; + } +} + +void AArch64MCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const { + switch (getKind()) { + default: + return; + case VK_AARCH64_DTPREL_G2: + case VK_AARCH64_DTPREL_G1: + case VK_AARCH64_DTPREL_G1_NC: + case VK_AARCH64_DTPREL_G0: + case VK_AARCH64_DTPREL_G0_NC: + case VK_AARCH64_DTPREL_HI12: + case VK_AARCH64_DTPREL_LO12: + case VK_AARCH64_DTPREL_LO12_NC: + case VK_AARCH64_GOTTPREL_G1: + case VK_AARCH64_GOTTPREL_G0_NC: + case VK_AARCH64_GOTTPREL: + case VK_AARCH64_GOTTPREL_LO12: + case VK_AARCH64_TPREL_G2: + case VK_AARCH64_TPREL_G1: + case VK_AARCH64_TPREL_G1_NC: + case VK_AARCH64_TPREL_G0: + case VK_AARCH64_TPREL_G0_NC: + case VK_AARCH64_TPREL_HI12: + case VK_AARCH64_TPREL_LO12: + case VK_AARCH64_TPREL_LO12_NC: + case VK_AARCH64_TLSDESC: + case VK_AARCH64_TLSDESC_LO12: + break; + } + + fixELFSymbolsInTLSFixupsImpl(getSubExpr(), Asm); +} + +// FIXME: This basically copies MCObjectStreamer::AddValueSymbols. Perhaps +// that method should be made public? +// FIXME: really do above: now that two backends are using it. +static void AddValueSymbolsImpl(const MCExpr *Value, MCAssembler *Asm) { + switch (Value->getKind()) { + case MCExpr::Target: + llvm_unreachable("Can't handle nested target expr!"); + break; + + case MCExpr::Constant: + break; + + case MCExpr::Binary: { + const MCBinaryExpr *BE = cast(Value); + AddValueSymbolsImpl(BE->getLHS(), Asm); + AddValueSymbolsImpl(BE->getRHS(), Asm); + break; + } + + case MCExpr::SymbolRef: + Asm->getOrCreateSymbolData(cast(Value)->getSymbol()); + break; + + case MCExpr::Unary: + AddValueSymbolsImpl(cast(Value)->getSubExpr(), Asm); + break; + } +} + +void AArch64MCExpr::AddValueSymbols(MCAssembler *Asm) const { + AddValueSymbolsImpl(getSubExpr(), Asm); +} diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h new file mode 100644 index 0000000..20adc0c --- /dev/null +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h @@ -0,0 +1,161 @@ +//==- AArch64MCExpr.h - AArch64 specific MC expression classes --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_AARCH64MCEXPR_H +#define LLVM_AARCH64MCEXPR_H + +#include "llvm/MC/MCExpr.h" + +namespace llvm { + +class AArch64MCExpr : public MCTargetExpr { +public: + enum VariantKind { + VK_AARCH64_None, + VK_AARCH64_GOT, // :got: modifier in assembly + VK_AARCH64_GOT_LO12, // :got_lo12: + VK_AARCH64_LO12, // :lo12: + + VK_AARCH64_ABS_G0, // :abs_g0: + VK_AARCH64_ABS_G0_NC, // :abs_g0_nc: + VK_AARCH64_ABS_G1, + VK_AARCH64_ABS_G1_NC, + VK_AARCH64_ABS_G2, + VK_AARCH64_ABS_G2_NC, + VK_AARCH64_ABS_G3, + + VK_AARCH64_SABS_G0, // :abs_g0_s: + VK_AARCH64_SABS_G1, + VK_AARCH64_SABS_G2, + + VK_AARCH64_DTPREL_G2, // :dtprel_g2: + VK_AARCH64_DTPREL_G1, + VK_AARCH64_DTPREL_G1_NC, + VK_AARCH64_DTPREL_G0, + VK_AARCH64_DTPREL_G0_NC, + VK_AARCH64_DTPREL_HI12, + VK_AARCH64_DTPREL_LO12, + VK_AARCH64_DTPREL_LO12_NC, + + VK_AARCH64_GOTTPREL_G1, // :gottprel: + VK_AARCH64_GOTTPREL_G0_NC, + VK_AARCH64_GOTTPREL, + VK_AARCH64_GOTTPREL_LO12, + + VK_AARCH64_TPREL_G2, // :tprel: + VK_AARCH64_TPREL_G1, + VK_AARCH64_TPREL_G1_NC, + VK_AARCH64_TPREL_G0, + VK_AARCH64_TPREL_G0_NC, + VK_AARCH64_TPREL_HI12, + VK_AARCH64_TPREL_LO12, + VK_AARCH64_TPREL_LO12_NC, + + VK_AARCH64_TLSDESC, // :tlsdesc: + VK_AARCH64_TLSDESC_LO12 + }; + +private: + const VariantKind Kind; + const MCExpr *Expr; + + explicit AArch64MCExpr(VariantKind _Kind, const MCExpr *_Expr) + : Kind(_Kind), Expr(_Expr) {} + +public: + /// @name Construction + /// @{ + + static const AArch64MCExpr *Create(VariantKind Kind, const MCExpr *Expr, + MCContext &Ctx); + + static const AArch64MCExpr *CreateLo12(const MCExpr *Expr, MCContext &Ctx) { + return Create(VK_AARCH64_LO12, Expr, Ctx); + } + + static const AArch64MCExpr *CreateGOT(const MCExpr *Expr, MCContext &Ctx) { + return Create(VK_AARCH64_GOT, Expr, Ctx); + } + + static const AArch64MCExpr *CreateGOTLo12(const MCExpr *Expr, MCContext &Ctx) { + return Create(VK_AARCH64_GOT_LO12, Expr, Ctx); + } + + static const AArch64MCExpr *CreateDTPREL_G1(const MCExpr *Expr, + MCContext &Ctx) { + return Create(VK_AARCH64_DTPREL_G1, Expr, Ctx); + } + + static const AArch64MCExpr *CreateDTPREL_G0_NC(const MCExpr *Expr, + MCContext &Ctx) { + return Create(VK_AARCH64_DTPREL_G0_NC, Expr, Ctx); + } + + static const AArch64MCExpr *CreateGOTTPREL(const MCExpr *Expr, + MCContext &Ctx) { + return Create(VK_AARCH64_GOTTPREL, Expr, Ctx); + } + + static const AArch64MCExpr *CreateGOTTPRELLo12(const MCExpr *Expr, + MCContext &Ctx) { + return Create(VK_AARCH64_GOTTPREL_LO12, Expr, Ctx); + } + + static const AArch64MCExpr *CreateTLSDesc(const MCExpr *Expr, + MCContext &Ctx) { + return Create(VK_AARCH64_TLSDESC, Expr, Ctx); + } + + static const AArch64MCExpr *CreateTLSDescLo12(const MCExpr *Expr, + MCContext &Ctx) { + return Create(VK_AARCH64_TLSDESC_LO12, Expr, Ctx); + } + + static const AArch64MCExpr *CreateTPREL_G1(const MCExpr *Expr, + MCContext &Ctx) { + return Create(VK_AARCH64_TPREL_G1, Expr, Ctx); + } + + static const AArch64MCExpr *CreateTPREL_G0_NC(const MCExpr *Expr, + MCContext &Ctx) { + return Create(VK_AARCH64_TPREL_G0_NC, Expr, Ctx); + } + + /// @} + /// @name Accessors + /// @{ + + /// getOpcode - Get the kind of this expression. + VariantKind getKind() const { return Kind; } + + /// getSubExpr - Get the child of this expression. + const MCExpr *getSubExpr() const { return Expr; } + + /// @} + + void PrintImpl(raw_ostream &OS) const; + bool EvaluateAsRelocatableImpl(MCValue &Res, + const MCAsmLayout *Layout) const; + void AddValueSymbols(MCAssembler *) const; + const MCSection *FindAssociatedSection() const { + return getSubExpr()->FindAssociatedSection(); + } + + void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const; + + static bool classof(const MCExpr *E) { + return E->getKind() == MCExpr::Target; + } + + static bool classof(const AArch64MCExpr *) { return true; } + +}; +} // end namespace llvm + +#endif diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp new file mode 100644 index 0000000..0d2855f --- /dev/null +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp @@ -0,0 +1,991 @@ +//===-- AArch64MCTargetDesc.cpp - AArch64 Target Descriptions -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides AArch64 specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#include "AArch64MCTargetDesc.h" +#include "AArch64BaseInfo.h" +#include "AArch64ELFStreamer.h" +#include "AArch64MCAsmInfo.h" +#include "InstPrinter/AArch64InstPrinter.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/MC/MCCodeGenInfo.h" +#include "llvm/MC/MCInstrAnalysis.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/Regex.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/ErrorHandling.h" + +#define GET_REGINFO_MC_DESC +#include "AArch64GenRegisterInfo.inc" + +#define GET_INSTRINFO_MC_DESC +#include "AArch64GenInstrInfo.inc" + +#define GET_SUBTARGETINFO_MC_DESC +#include "AArch64GenSubtargetInfo.inc" + +using namespace llvm; + +StringRef NamedImmMapper::toString(uint32_t Value, bool &Valid) const { + for (unsigned i = 0; i < NumPairs; ++i) { + if (Pairs[i].Value == Value) { + Valid = true; + return Pairs[i].Name; + } + } + + Valid = false; + return StringRef(); +} + +uint32_t NamedImmMapper::fromString(StringRef Name, bool &Valid) const { + std::string LowerCaseName = Name.lower(); + for (unsigned i = 0; i < NumPairs; ++i) { + if (Pairs[i].Name == LowerCaseName) { + Valid = true; + return Pairs[i].Value; + } + } + + Valid = false; + return -1; +} + +bool NamedImmMapper::validImm(uint32_t Value) const { + return Value < TooBigImm; +} + +const NamedImmMapper::Mapping A64AT::ATMapper::ATPairs[] = { + {"s1e1r", S1E1R}, + {"s1e2r", S1E2R}, + {"s1e3r", S1E3R}, + {"s1e1w", S1E1W}, + {"s1e2w", S1E2W}, + {"s1e3w", S1E3W}, + {"s1e0r", S1E0R}, + {"s1e0w", S1E0W}, + {"s12e1r", S12E1R}, + {"s12e1w", S12E1W}, + {"s12e0r", S12E0R}, + {"s12e0w", S12E0W}, +}; + +A64AT::ATMapper::ATMapper() + : NamedImmMapper(ATPairs, 0) {} + +const NamedImmMapper::Mapping A64DB::DBarrierMapper::DBarrierPairs[] = { + {"oshld", OSHLD}, + {"oshst", OSHST}, + {"osh", OSH}, + {"nshld", NSHLD}, + {"nshst", NSHST}, + {"nsh", NSH}, + {"ishld", ISHLD}, + {"ishst", ISHST}, + {"ish", ISH}, + {"ld", LD}, + {"st", ST}, + {"sy", SY} +}; + +A64DB::DBarrierMapper::DBarrierMapper() + : NamedImmMapper(DBarrierPairs, 16u) {} + +const NamedImmMapper::Mapping A64DC::DCMapper::DCPairs[] = { + {"zva", ZVA}, + {"ivac", IVAC}, + {"isw", ISW}, + {"cvac", CVAC}, + {"csw", CSW}, + {"cvau", CVAU}, + {"civac", CIVAC}, + {"cisw", CISW} +}; + +A64DC::DCMapper::DCMapper() + : NamedImmMapper(DCPairs, 0) {} + +const NamedImmMapper::Mapping A64IC::ICMapper::ICPairs[] = { + {"ialluis", IALLUIS}, + {"iallu", IALLU}, + {"ivau", IVAU} +}; + +A64IC::ICMapper::ICMapper() + : NamedImmMapper(ICPairs, 0) {} + +const NamedImmMapper::Mapping A64ISB::ISBMapper::ISBPairs[] = { + {"sy", SY}, +}; + +A64ISB::ISBMapper::ISBMapper() + : NamedImmMapper(ISBPairs, 16) {} + +const NamedImmMapper::Mapping A64PRFM::PRFMMapper::PRFMPairs[] = { + {"pldl1keep", PLDL1KEEP}, + {"pldl1strm", PLDL1STRM}, + {"pldl2keep", PLDL2KEEP}, + {"pldl2strm", PLDL2STRM}, + {"pldl3keep", PLDL3KEEP}, + {"pldl3strm", PLDL3STRM}, + {"pstl1keep", PSTL1KEEP}, + {"pstl1strm", PSTL1STRM}, + {"pstl2keep", PSTL2KEEP}, + {"pstl2strm", PSTL2STRM}, + {"pstl3keep", PSTL3KEEP}, + {"pstl3strm", PSTL3STRM} +}; + +A64PRFM::PRFMMapper::PRFMMapper() + : NamedImmMapper(PRFMPairs, 32) {} + +const NamedImmMapper::Mapping A64PState::PStateMapper::PStatePairs[] = { + {"spsel", SPSel}, + {"daifset", DAIFSet}, + {"daifclr", DAIFClr} +}; + +A64PState::PStateMapper::PStateMapper() + : NamedImmMapper(PStatePairs, 0) {} + +const NamedImmMapper::Mapping A64SysReg::MRSMapper::MRSPairs[] = { + {"mdccsr_el0", MDCCSR_EL0}, + {"dbgdtrrx_el0", DBGDTRRX_EL0}, + {"mdrar_el1", MDRAR_EL1}, + {"oslsr_el1", OSLSR_EL1}, + {"dbgauthstatus_el1", DBGAUTHSTATUS_EL1}, + {"pmceid0_el0", PMCEID0_EL0}, + {"pmceid1_el0", PMCEID1_EL0}, + {"midr_el1", MIDR_EL1}, + {"ccsidr_el1", CCSIDR_EL1}, + {"clidr_el1", CLIDR_EL1}, + {"ctr_el0", CTR_EL0}, + {"mpidr_el1", MPIDR_EL1}, + {"revidr_el1", REVIDR_EL1}, + {"aidr_el1", AIDR_EL1}, + {"dczid_el0", DCZID_EL0}, + {"id_pfr0_el1", ID_PFR0_EL1}, + {"id_pfr1_el1", ID_PFR1_EL1}, + {"id_dfr0_el1", ID_DFR0_EL1}, + {"id_afr0_el1", ID_AFR0_EL1}, + {"id_mmfr0_el1", ID_MMFR0_EL1}, + {"id_mmfr1_el1", ID_MMFR1_EL1}, + {"id_mmfr2_el1", ID_MMFR2_EL1}, + {"id_mmfr3_el1", ID_MMFR3_EL1}, + {"id_isar0_el1", ID_ISAR0_EL1}, + {"id_isar1_el1", ID_ISAR1_EL1}, + {"id_isar2_el1", ID_ISAR2_EL1}, + {"id_isar3_el1", ID_ISAR3_EL1}, + {"id_isar4_el1", ID_ISAR4_EL1}, + {"id_isar5_el1", ID_ISAR5_EL1}, + {"id_aa64pfr0_el1", ID_AA64PFR0_EL1}, + {"id_aa64pfr1_el1", ID_AA64PFR1_EL1}, + {"id_aa64dfr0_el1", ID_AA64DFR0_EL1}, + {"id_aa64dfr1_el1", ID_AA64DFR1_EL1}, + {"id_aa64afr0_el1", ID_AA64AFR0_EL1}, + {"id_aa64afr1_el1", ID_AA64AFR1_EL1}, + {"id_aa64isar0_el1", ID_AA64ISAR0_EL1}, + {"id_aa64isar1_el1", ID_AA64ISAR1_EL1}, + {"id_aa64mmfr0_el1", ID_AA64MMFR0_EL1}, + {"id_aa64mmfr1_el1", ID_AA64MMFR1_EL1}, + {"mvfr0_el1", MVFR0_EL1}, + {"mvfr1_el1", MVFR1_EL1}, + {"mvfr2_el1", MVFR2_EL1}, + {"rvbar_el1", RVBAR_EL1}, + {"rvbar_el2", RVBAR_EL2}, + {"rvbar_el3", RVBAR_EL3}, + {"isr_el1", ISR_EL1}, + {"cntpct_el0", CNTPCT_EL0}, + {"cntvct_el0", CNTVCT_EL0} +}; + +A64SysReg::MRSMapper::MRSMapper() { + InstPairs = &MRSPairs[0]; + NumInstPairs = llvm::array_lengthof(MRSPairs); +} + +const NamedImmMapper::Mapping A64SysReg::MSRMapper::MSRPairs[] = { + {"dbgdtrtx_el0", DBGDTRTX_EL0}, + {"oslar_el1", OSLAR_EL1}, + {"pmswinc_el0", PMSWINC_EL0} +}; + +A64SysReg::MSRMapper::MSRMapper() { + InstPairs = &MSRPairs[0]; + NumInstPairs = llvm::array_lengthof(MSRPairs); +} + + +const NamedImmMapper::Mapping A64SysReg::SysRegMapper::SysRegPairs[] = { + {"osdtrrx_el1", OSDTRRX_EL1}, + {"osdtrtx_el1", OSDTRTX_EL1}, + {"teecr32_el1", TEECR32_EL1}, + {"mdccint_el1", MDCCINT_EL1}, + {"mdscr_el1", MDSCR_EL1}, + {"dbgdtr_el0", DBGDTR_EL0}, + {"oseccr_el1", OSECCR_EL1}, + {"dbgvcr32_el2", DBGVCR32_EL2}, + {"dbgbvr0_el1", DBGBVR0_EL1}, + {"dbgbvr1_el1", DBGBVR1_EL1}, + {"dbgbvr2_el1", DBGBVR2_EL1}, + {"dbgbvr3_el1", DBGBVR3_EL1}, + {"dbgbvr4_el1", DBGBVR4_EL1}, + {"dbgbvr5_el1", DBGBVR5_EL1}, + {"dbgbvr6_el1", DBGBVR6_EL1}, + {"dbgbvr7_el1", DBGBVR7_EL1}, + {"dbgbvr8_el1", DBGBVR8_EL1}, + {"dbgbvr9_el1", DBGBVR9_EL1}, + {"dbgbvr10_el1", DBGBVR10_EL1}, + {"dbgbvr11_el1", DBGBVR11_EL1}, + {"dbgbvr12_el1", DBGBVR12_EL1}, + {"dbgbvr13_el1", DBGBVR13_EL1}, + {"dbgbvr14_el1", DBGBVR14_EL1}, + {"dbgbvr15_el1", DBGBVR15_EL1}, + {"dbgbcr0_el1", DBGBCR0_EL1}, + {"dbgbcr1_el1", DBGBCR1_EL1}, + {"dbgbcr2_el1", DBGBCR2_EL1}, + {"dbgbcr3_el1", DBGBCR3_EL1}, + {"dbgbcr4_el1", DBGBCR4_EL1}, + {"dbgbcr5_el1", DBGBCR5_EL1}, + {"dbgbcr6_el1", DBGBCR6_EL1}, + {"dbgbcr7_el1", DBGBCR7_EL1}, + {"dbgbcr8_el1", DBGBCR8_EL1}, + {"dbgbcr9_el1", DBGBCR9_EL1}, + {"dbgbcr10_el1", DBGBCR10_EL1}, + {"dbgbcr11_el1", DBGBCR11_EL1}, + {"dbgbcr12_el1", DBGBCR12_EL1}, + {"dbgbcr13_el1", DBGBCR13_EL1}, + {"dbgbcr14_el1", DBGBCR14_EL1}, + {"dbgbcr15_el1", DBGBCR15_EL1}, + {"dbgwvr0_el1", DBGWVR0_EL1}, + {"dbgwvr1_el1", DBGWVR1_EL1}, + {"dbgwvr2_el1", DBGWVR2_EL1}, + {"dbgwvr3_el1", DBGWVR3_EL1}, + {"dbgwvr4_el1", DBGWVR4_EL1}, + {"dbgwvr5_el1", DBGWVR5_EL1}, + {"dbgwvr6_el1", DBGWVR6_EL1}, + {"dbgwvr7_el1", DBGWVR7_EL1}, + {"dbgwvr8_el1", DBGWVR8_EL1}, + {"dbgwvr9_el1", DBGWVR9_EL1}, + {"dbgwvr10_el1", DBGWVR10_EL1}, + {"dbgwvr11_el1", DBGWVR11_EL1}, + {"dbgwvr12_el1", DBGWVR12_EL1}, + {"dbgwvr13_el1", DBGWVR13_EL1}, + {"dbgwvr14_el1", DBGWVR14_EL1}, + {"dbgwvr15_el1", DBGWVR15_EL1}, + {"dbgwcr0_el1", DBGWCR0_EL1}, + {"dbgwcr1_el1", DBGWCR1_EL1}, + {"dbgwcr2_el1", DBGWCR2_EL1}, + {"dbgwcr3_el1", DBGWCR3_EL1}, + {"dbgwcr4_el1", DBGWCR4_EL1}, + {"dbgwcr5_el1", DBGWCR5_EL1}, + {"dbgwcr6_el1", DBGWCR6_EL1}, + {"dbgwcr7_el1", DBGWCR7_EL1}, + {"dbgwcr8_el1", DBGWCR8_EL1}, + {"dbgwcr9_el1", DBGWCR9_EL1}, + {"dbgwcr10_el1", DBGWCR10_EL1}, + {"dbgwcr11_el1", DBGWCR11_EL1}, + {"dbgwcr12_el1", DBGWCR12_EL1}, + {"dbgwcr13_el1", DBGWCR13_EL1}, + {"dbgwcr14_el1", DBGWCR14_EL1}, + {"dbgwcr15_el1", DBGWCR15_EL1}, + {"teehbr32_el1", TEEHBR32_EL1}, + {"osdlr_el1", OSDLR_EL1}, + {"dbgprcr_el1", DBGPRCR_EL1}, + {"dbgclaimset_el1", DBGCLAIMSET_EL1}, + {"dbgclaimclr_el1", DBGCLAIMCLR_EL1}, + {"csselr_el1", CSSELR_EL1}, + {"vpidr_el2", VPIDR_EL2}, + {"vmpidr_el2", VMPIDR_EL2}, + {"sctlr_el1", SCTLR_EL1}, + {"sctlr_el2", SCTLR_EL2}, + {"sctlr_el3", SCTLR_EL3}, + {"actlr_el1", ACTLR_EL1}, + {"actlr_el2", ACTLR_EL2}, + {"actlr_el3", ACTLR_EL3}, + {"cpacr_el1", CPACR_EL1}, + {"hcr_el2", HCR_EL2}, + {"scr_el3", SCR_EL3}, + {"mdcr_el2", MDCR_EL2}, + {"sder32_el3", SDER32_EL3}, + {"cptr_el2", CPTR_EL2}, + {"cptr_el3", CPTR_EL3}, + {"hstr_el2", HSTR_EL2}, + {"hacr_el2", HACR_EL2}, + {"mdcr_el3", MDCR_EL3}, + {"ttbr0_el1", TTBR0_EL1}, + {"ttbr0_el2", TTBR0_EL2}, + {"ttbr0_el3", TTBR0_EL3}, + {"ttbr1_el1", TTBR1_EL1}, + {"tcr_el1", TCR_EL1}, + {"tcr_el2", TCR_EL2}, + {"tcr_el3", TCR_EL3}, + {"vttbr_el2", VTTBR_EL2}, + {"vtcr_el2", VTCR_EL2}, + {"dacr32_el2", DACR32_EL2}, + {"spsr_el1", SPSR_EL1}, + {"spsr_el2", SPSR_EL2}, + {"spsr_el3", SPSR_EL3}, + {"elr_el1", ELR_EL1}, + {"elr_el2", ELR_EL2}, + {"elr_el3", ELR_EL3}, + {"sp_el0", SP_EL0}, + {"sp_el1", SP_EL1}, + {"sp_el2", SP_EL2}, + {"spsel", SPSel}, + {"nzcv", NZCV}, + {"daif", DAIF}, + {"currentel", CurrentEL}, + {"spsr_irq", SPSR_irq}, + {"spsr_abt", SPSR_abt}, + {"spsr_und", SPSR_und}, + {"spsr_fiq", SPSR_fiq}, + {"fpcr", FPCR}, + {"fpsr", FPSR}, + {"dspsr_el0", DSPSR_EL0}, + {"dlr_el0", DLR_EL0}, + {"ifsr32_el2", IFSR32_EL2}, + {"afsr0_el1", AFSR0_EL1}, + {"afsr0_el2", AFSR0_EL2}, + {"afsr0_el3", AFSR0_EL3}, + {"afsr1_el1", AFSR1_EL1}, + {"afsr1_el2", AFSR1_EL2}, + {"afsr1_el3", AFSR1_EL3}, + {"esr_el1", ESR_EL1}, + {"esr_el2", ESR_EL2}, + {"esr_el3", ESR_EL3}, + {"fpexc32_el2", FPEXC32_EL2}, + {"far_el1", FAR_EL1}, + {"far_el2", FAR_EL2}, + {"far_el3", FAR_EL3}, + {"hpfar_el2", HPFAR_EL2}, + {"par_el1", PAR_EL1}, + {"pmcr_el0", PMCR_EL0}, + {"pmcntenset_el0", PMCNTENSET_EL0}, + {"pmcntenclr_el0", PMCNTENCLR_EL0}, + {"pmovsclr_el0", PMOVSCLR_EL0}, + {"pmselr_el0", PMSELR_EL0}, + {"pmccntr_el0", PMCCNTR_EL0}, + {"pmxevtyper_el0", PMXEVTYPER_EL0}, + {"pmxevcntr_el0", PMXEVCNTR_EL0}, + {"pmuserenr_el0", PMUSERENR_EL0}, + {"pmintenset_el1", PMINTENSET_EL1}, + {"pmintenclr_el1", PMINTENCLR_EL1}, + {"pmovsset_el0", PMOVSSET_EL0}, + {"mair_el1", MAIR_EL1}, + {"mair_el2", MAIR_EL2}, + {"mair_el3", MAIR_EL3}, + {"amair_el1", AMAIR_EL1}, + {"amair_el2", AMAIR_EL2}, + {"amair_el3", AMAIR_EL3}, + {"vbar_el1", VBAR_EL1}, + {"vbar_el2", VBAR_EL2}, + {"vbar_el3", VBAR_EL3}, + {"rmr_el1", RMR_EL1}, + {"rmr_el2", RMR_EL2}, + {"rmr_el3", RMR_EL3}, + {"contextidr_el1", CONTEXTIDR_EL1}, + {"tpidr_el0", TPIDR_EL0}, + {"tpidr_el2", TPIDR_EL2}, + {"tpidr_el3", TPIDR_EL3}, + {"tpidrro_el0", TPIDRRO_EL0}, + {"tpidr_el1", TPIDR_EL1}, + {"cntfrq_el0", CNTFRQ_EL0}, + {"cntvoff_el2", CNTVOFF_EL2}, + {"cntkctl_el1", CNTKCTL_EL1}, + {"cnthctl_el2", CNTHCTL_EL2}, + {"cntp_tval_el0", CNTP_TVAL_EL0}, + {"cnthp_tval_el2", CNTHP_TVAL_EL2}, + {"cntps_tval_el1", CNTPS_TVAL_EL1}, + {"cntp_ctl_el0", CNTP_CTL_EL0}, + {"cnthp_ctl_el2", CNTHP_CTL_EL2}, + {"cntps_ctl_el1", CNTPS_CTL_EL1}, + {"cntp_cval_el0", CNTP_CVAL_EL0}, + {"cnthp_cval_el2", CNTHP_CVAL_EL2}, + {"cntps_cval_el1", CNTPS_CVAL_EL1}, + {"cntv_tval_el0", CNTV_TVAL_EL0}, + {"cntv_ctl_el0", CNTV_CTL_EL0}, + {"cntv_cval_el0", CNTV_CVAL_EL0}, + {"pmevcntr0_el0", PMEVCNTR0_EL0}, + {"pmevcntr1_el0", PMEVCNTR1_EL0}, + {"pmevcntr2_el0", PMEVCNTR2_EL0}, + {"pmevcntr3_el0", PMEVCNTR3_EL0}, + {"pmevcntr4_el0", PMEVCNTR4_EL0}, + {"pmevcntr5_el0", PMEVCNTR5_EL0}, + {"pmevcntr6_el0", PMEVCNTR6_EL0}, + {"pmevcntr7_el0", PMEVCNTR7_EL0}, + {"pmevcntr8_el0", PMEVCNTR8_EL0}, + {"pmevcntr9_el0", PMEVCNTR9_EL0}, + {"pmevcntr10_el0", PMEVCNTR10_EL0}, + {"pmevcntr11_el0", PMEVCNTR11_EL0}, + {"pmevcntr12_el0", PMEVCNTR12_EL0}, + {"pmevcntr13_el0", PMEVCNTR13_EL0}, + {"pmevcntr14_el0", PMEVCNTR14_EL0}, + {"pmevcntr15_el0", PMEVCNTR15_EL0}, + {"pmevcntr16_el0", PMEVCNTR16_EL0}, + {"pmevcntr17_el0", PMEVCNTR17_EL0}, + {"pmevcntr18_el0", PMEVCNTR18_EL0}, + {"pmevcntr19_el0", PMEVCNTR19_EL0}, + {"pmevcntr20_el0", PMEVCNTR20_EL0}, + {"pmevcntr21_el0", PMEVCNTR21_EL0}, + {"pmevcntr22_el0", PMEVCNTR22_EL0}, + {"pmevcntr23_el0", PMEVCNTR23_EL0}, + {"pmevcntr24_el0", PMEVCNTR24_EL0}, + {"pmevcntr25_el0", PMEVCNTR25_EL0}, + {"pmevcntr26_el0", PMEVCNTR26_EL0}, + {"pmevcntr27_el0", PMEVCNTR27_EL0}, + {"pmevcntr28_el0", PMEVCNTR28_EL0}, + {"pmevcntr29_el0", PMEVCNTR29_EL0}, + {"pmevcntr30_el0", PMEVCNTR30_EL0}, + {"pmccfiltr_el0", PMCCFILTR_EL0}, + {"pmevtyper0_el0", PMEVTYPER0_EL0}, + {"pmevtyper1_el0", PMEVTYPER1_EL0}, + {"pmevtyper2_el0", PMEVTYPER2_EL0}, + {"pmevtyper3_el0", PMEVTYPER3_EL0}, + {"pmevtyper4_el0", PMEVTYPER4_EL0}, + {"pmevtyper5_el0", PMEVTYPER5_EL0}, + {"pmevtyper6_el0", PMEVTYPER6_EL0}, + {"pmevtyper7_el0", PMEVTYPER7_EL0}, + {"pmevtyper8_el0", PMEVTYPER8_EL0}, + {"pmevtyper9_el0", PMEVTYPER9_EL0}, + {"pmevtyper10_el0", PMEVTYPER10_EL0}, + {"pmevtyper11_el0", PMEVTYPER11_EL0}, + {"pmevtyper12_el0", PMEVTYPER12_EL0}, + {"pmevtyper13_el0", PMEVTYPER13_EL0}, + {"pmevtyper14_el0", PMEVTYPER14_EL0}, + {"pmevtyper15_el0", PMEVTYPER15_EL0}, + {"pmevtyper16_el0", PMEVTYPER16_EL0}, + {"pmevtyper17_el0", PMEVTYPER17_EL0}, + {"pmevtyper18_el0", PMEVTYPER18_EL0}, + {"pmevtyper19_el0", PMEVTYPER19_EL0}, + {"pmevtyper20_el0", PMEVTYPER20_EL0}, + {"pmevtyper21_el0", PMEVTYPER21_EL0}, + {"pmevtyper22_el0", PMEVTYPER22_EL0}, + {"pmevtyper23_el0", PMEVTYPER23_EL0}, + {"pmevtyper24_el0", PMEVTYPER24_EL0}, + {"pmevtyper25_el0", PMEVTYPER25_EL0}, + {"pmevtyper26_el0", PMEVTYPER26_EL0}, + {"pmevtyper27_el0", PMEVTYPER27_EL0}, + {"pmevtyper28_el0", PMEVTYPER28_EL0}, + {"pmevtyper29_el0", PMEVTYPER29_EL0}, + {"pmevtyper30_el0", PMEVTYPER30_EL0}, +}; + +uint32_t +A64SysReg::SysRegMapper::fromString(StringRef Name, bool &Valid) const { + // First search the registers shared by all + std::string NameLower = Name.lower(); + for (unsigned i = 0; i < array_lengthof(SysRegPairs); ++i) { + if (SysRegPairs[i].Name == NameLower) { + Valid = true; + return SysRegPairs[i].Value; + } + } + + // Now try the instruction-specific registers (either read-only or + // write-only). + for (unsigned i = 0; i < NumInstPairs; ++i) { + if (InstPairs[i].Name == NameLower) { + Valid = true; + return InstPairs[i].Value; + } + } + + // Try to parse an S____ register name, where the bits + // are: 11 xxx 1x11 xxxx xxx + Regex GenericRegPattern("^s3_([0-7])_c(1[15])_c([0-9]|1[0-5])_([0-7])$"); + + SmallVector Ops; + if (!GenericRegPattern.match(NameLower, &Ops)) { + Valid = false; + return -1; + } + + uint32_t Op0 = 3, Op1 = 0, CRn = 0, CRm = 0, Op2 = 0; + uint32_t Bits; + Ops[1].getAsInteger(10, Op1); + Ops[2].getAsInteger(10, CRn); + Ops[3].getAsInteger(10, CRm); + Ops[4].getAsInteger(10, Op2); + Bits = (Op0 << 14) | (Op1 << 11) | (CRn << 7) | (CRm << 3) | Op2; + + Valid = true; + return Bits; +} + +std::string +A64SysReg::SysRegMapper::toString(uint32_t Bits, bool &Valid) const { + for (unsigned i = 0; i < array_lengthof(SysRegPairs); ++i) { + if (SysRegPairs[i].Value == Bits) { + Valid = true; + return SysRegPairs[i].Name; + } + } + + for (unsigned i = 0; i < NumInstPairs; ++i) { + if (InstPairs[i].Value == Bits) { + Valid = true; + return InstPairs[i].Name; + } + } + + uint32_t Op0 = (Bits >> 14) & 0x3; + uint32_t Op1 = (Bits >> 11) & 0x7; + uint32_t CRn = (Bits >> 7) & 0xf; + uint32_t CRm = (Bits >> 3) & 0xf; + uint32_t Op2 = Bits & 0x7; + + // Only combinations matching: 11 xxx 1x11 xxxx xxx are valid for a generic + // name. + if (Op0 != 3 || (CRn != 11 && CRn != 15)) { + Valid = false; + return ""; + } + + assert(Op0 == 3 && (CRn == 11 || CRn == 15) && "Invalid generic sysreg"); + + Valid = true; + return "s3_" + utostr(Op1) + "_c" + utostr(CRn) + + "_c" + utostr(CRm) + "_" + utostr(Op2); +} + +const NamedImmMapper::Mapping A64TLBI::TLBIMapper::TLBIPairs[] = { + {"ipas2e1is", IPAS2E1IS}, + {"ipas2le1is", IPAS2LE1IS}, + {"vmalle1is", VMALLE1IS}, + {"alle2is", ALLE2IS}, + {"alle3is", ALLE3IS}, + {"vae1is", VAE1IS}, + {"vae2is", VAE2IS}, + {"vae3is", VAE3IS}, + {"aside1is", ASIDE1IS}, + {"vaae1is", VAAE1IS}, + {"alle1is", ALLE1IS}, + {"vale1is", VALE1IS}, + {"vale2is", VALE2IS}, + {"vale3is", VALE3IS}, + {"vmalls12e1is", VMALLS12E1IS}, + {"vaale1is", VAALE1IS}, + {"ipas2e1", IPAS2E1}, + {"ipas2le1", IPAS2LE1}, + {"vmalle1", VMALLE1}, + {"alle2", ALLE2}, + {"alle3", ALLE3}, + {"vae1", VAE1}, + {"vae2", VAE2}, + {"vae3", VAE3}, + {"aside1", ASIDE1}, + {"vaae1", VAAE1}, + {"alle1", ALLE1}, + {"vale1", VALE1}, + {"vale2", VALE2}, + {"vale3", VALE3}, + {"vmalls12e1", VMALLS12E1}, + {"vaale1", VAALE1} +}; + +A64TLBI::TLBIMapper::TLBIMapper() + : NamedImmMapper(TLBIPairs, 0) {} + +bool A64Imms::isFPImm(const APFloat &Val, uint32_t &Imm8Bits) { + const fltSemantics &Sem = Val.getSemantics(); + unsigned FracBits = APFloat::semanticsPrecision(Sem) - 1; + + uint32_t ExpMask; + switch (FracBits) { + case 10: // IEEE half-precision + ExpMask = 0x1f; + break; + case 23: // IEEE single-precision + ExpMask = 0xff; + break; + case 52: // IEEE double-precision + ExpMask = 0x7ff; + break; + case 112: // IEEE quad-precision + // No immediates are valid for double precision. + return false; + default: + llvm_unreachable("Only half, single and double precision supported"); + } + + uint32_t ExpStart = FracBits; + uint64_t FracMask = (1ULL << FracBits) - 1; + + uint32_t Sign = Val.isNegative(); + + uint64_t Bits= Val.bitcastToAPInt().getLimitedValue(); + uint64_t Fraction = Bits & FracMask; + int32_t Exponent = ((Bits >> ExpStart) & ExpMask); + Exponent -= ExpMask >> 1; + + // S[d] = imm8<7>:NOT(imm8<6>):Replicate(imm8<6>, 5):imm8<5:0>:Zeros(19) + // D[d] = imm8<7>:NOT(imm8<6>):Replicate(imm8<6>, 8):imm8<5:0>:Zeros(48) + // This translates to: only 4 bits of fraction; -3 <= exp <= 4. + uint64_t A64FracStart = FracBits - 4; + uint64_t A64FracMask = 0xf; + + // Are there too many fraction bits? + if (Fraction & ~(A64FracMask << A64FracStart)) + return false; + + if (Exponent < -3 || Exponent > 4) + return false; + + uint32_t PackedFraction = (Fraction >> A64FracStart) & A64FracMask; + uint32_t PackedExp = (Exponent + 7) & 0x7; + + Imm8Bits = (Sign << 7) | (PackedExp << 4) | PackedFraction; + return true; +} + +// Encoding of the immediate for logical (immediate) instructions: +// +// | N | imms | immr | size | R | S | +// |---+--------+--------+------+--------------+--------------| +// | 1 | ssssss | rrrrrr | 64 | UInt(rrrrrr) | UInt(ssssss) | +// | 0 | 0sssss | xrrrrr | 32 | UInt(rrrrr) | UInt(sssss) | +// | 0 | 10ssss | xxrrrr | 16 | UInt(rrrr) | UInt(ssss) | +// | 0 | 110sss | xxxrrr | 8 | UInt(rrr) | UInt(sss) | +// | 0 | 1110ss | xxxxrr | 4 | UInt(rr) | UInt(ss) | +// | 0 | 11110s | xxxxxr | 2 | UInt(r) | UInt(s) | +// | 0 | 11111x | - | | UNALLOCATED | | +// +// Columns 'R', 'S' and 'size' specify a "bitmask immediate" of size bits in +// which the lower S+1 bits are ones and the remaining bits are zero, then +// rotated right by R bits, which is then replicated across the datapath. +// +// + Values of 'N', 'imms' and 'immr' which do not match the above table are +// RESERVED. +// + If all 's' bits in the imms field are set then the instruction is +// RESERVED. +// + The 'x' bits in the 'immr' field are IGNORED. + +bool A64Imms::isLogicalImm(unsigned RegWidth, uint64_t Imm, uint32_t &Bits) { + int RepeatWidth; + int Rotation = 0; + int Num1s = 0; + + // Because there are S+1 ones in the replicated mask, an immediate of all + // zeros is not allowed. Filtering it here is probably more efficient. + if (Imm == 0) return false; + + for (RepeatWidth = RegWidth; RepeatWidth > 1; RepeatWidth /= 2) { + uint64_t RepeatMask = RepeatWidth == 64 ? -1 : (1ULL << RepeatWidth) - 1; + uint64_t ReplicatedMask = Imm & RepeatMask; + + if (ReplicatedMask == 0) continue; + + // First we have to make sure the mask is actually repeated in each slot for + // this width-specifier. + bool IsReplicatedMask = true; + for (unsigned i = RepeatWidth; i < RegWidth; i += RepeatWidth) { + if (((Imm >> i) & RepeatMask) != ReplicatedMask) { + IsReplicatedMask = false; + break; + } + } + if (!IsReplicatedMask) continue; + + // Now we have to work out the amount of rotation needed. The first part of + // this calculation is actually independent of RepeatWidth, but the complex + // case will depend on it. + Rotation = CountTrailingZeros_64(Imm); + if (Rotation == 0) { + // There were no leading zeros, which means it's either in place or there + // are 1s at each end (e.g. 0x8003 needs rotating). + Rotation = RegWidth == 64 ? CountLeadingOnes_64(Imm) + : CountLeadingOnes_32(Imm); + Rotation = RepeatWidth - Rotation; + } + + uint64_t ReplicatedOnes = (ReplicatedMask >> Rotation) + | ((ReplicatedMask << (RepeatWidth - Rotation)) & RepeatMask); + // Of course, they may not actually be ones, so we have to check that: + if (!isMask_64(ReplicatedOnes)) + continue; + + Num1s = CountTrailingOnes_64(ReplicatedOnes); + + // We know we've got an almost valid encoding (certainly, if this is invalid + // no other parameters would work). + break; + } + + // The encodings which would produce all 1s are RESERVED. + if (RepeatWidth == 1 || Num1s == RepeatWidth) return false; + + uint32_t N = RepeatWidth == 64; + uint32_t ImmR = RepeatWidth - Rotation; + uint32_t ImmS = Num1s - 1; + + switch (RepeatWidth) { + default: break; // No action required for other valid rotations. + case 16: ImmS |= 0x20; break; // 10ssss + case 8: ImmS |= 0x30; break; // 110sss + case 4: ImmS |= 0x38; break; // 1110ss + case 2: ImmS |= 0x3c; break; // 11110s + } + + Bits = ImmS | (ImmR << 6) | (N << 12); + + return true; +} + + +bool A64Imms::isLogicalImmBits(unsigned RegWidth, uint32_t Bits, uint64_t &Imm) { + uint32_t N = Bits >> 12; + uint32_t ImmR = (Bits >> 6) & 0x3f; + uint32_t ImmS = Bits & 0x3f; + + // N=1 encodes a 64-bit replication and is invalid for the 32-bit + // instructions. + if (RegWidth == 32 && N != 0) return false; + + int Width = 0; + if (N == 1) + Width = 64; + else if ((ImmS & 0x20) == 0) + Width = 32; + else if ((ImmS & 0x10) == 0) + Width = 16; + else if ((ImmS & 0x08) == 0) + Width = 8; + else if ((ImmS & 0x04) == 0) + Width = 4; + else if ((ImmS & 0x02) == 0) + Width = 2; + else { + // ImmS is 0b11111x: UNALLOCATED + return false; + } + + int Num1s = (ImmS & (Width - 1)) + 1; + + // All encodings which would map to -1 (signed) are RESERVED. + if (Num1s == Width) return false; + + int Rotation = (ImmR & (Width - 1)); + uint64_t Mask = (1ULL << Num1s) - 1; + uint64_t WidthMask = Width == 64 ? -1 : (1ULL << Width) - 1; + Mask = (Mask >> Rotation) + | ((Mask << (Width - Rotation)) & WidthMask); + + Imm = 0; + for (unsigned i = 0; i < RegWidth / Width; ++i) { + Imm |= Mask; + Mask <<= Width; + } + + return true; +} + +bool A64Imms::isMOVZImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift) { + // If high bits are set then a 32-bit MOVZ can't possibly work. + if (RegWidth == 32 && (Value & ~0xffffffffULL)) + return false; + + for (int i = 0; i < RegWidth; i += 16) { + // If the value is 0 when we mask out all the bits that could be set with + // the current LSL value then it's representable. + if ((Value & ~(0xffffULL << i)) == 0) { + Shift = i / 16; + UImm16 = (Value >> i) & 0xffff; + return true; + } + } + return false; +} + +bool A64Imms::isMOVNImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift) { + // MOVN is defined to set its register to NOT(LSL(imm16, shift)). + + // We have to be a little careful about a 32-bit register: 0xffff_1234 *is* + // representable, but ~0xffff_1234 == 0xffff_ffff_0000_edcb which is not + // a valid input for isMOVZImm. + if (RegWidth == 32 && (Value & ~0xffffffffULL)) + return false; + + uint64_t MOVZEquivalent = RegWidth == 32 ? ~Value & 0xffffffff : ~Value; + + return isMOVZImm(RegWidth, MOVZEquivalent, UImm16, Shift); +} + +bool A64Imms::isOnlyMOVNImm(int RegWidth, uint64_t Value, + int &UImm16, int &Shift) { + if (isMOVZImm(RegWidth, Value, UImm16, Shift)) + return false; + + return isMOVNImm(RegWidth, Value, UImm16, Shift); +} + +MCSubtargetInfo *AArch64_MC::createAArch64MCSubtargetInfo(StringRef TT, + StringRef CPU, + StringRef FS) { + MCSubtargetInfo *X = new MCSubtargetInfo(); + InitAArch64MCSubtargetInfo(X, TT, CPU, ""); + return X; +} + + +static MCInstrInfo *createAArch64MCInstrInfo() { + MCInstrInfo *X = new MCInstrInfo(); + InitAArch64MCInstrInfo(X); + return X; +} + +static MCRegisterInfo *createAArch64MCRegisterInfo(StringRef Triple) { + MCRegisterInfo *X = new MCRegisterInfo(); + InitAArch64MCRegisterInfo(X, AArch64::X30); + return X; +} + +static MCAsmInfo *createAArch64MCAsmInfo(const Target &T, StringRef TT) { + Triple TheTriple(TT); + + MCAsmInfo *MAI = new AArch64ELFMCAsmInfo(); + MachineLocation Dst(MachineLocation::VirtualFP); + MachineLocation Src(AArch64::XSP, 0); + MAI->addInitialFrameState(0, Dst, Src); + + return MAI; +} + +static MCCodeGenInfo *createAArch64MCCodeGenInfo(StringRef TT, Reloc::Model RM, + CodeModel::Model CM, + CodeGenOpt::Level OL) { + MCCodeGenInfo *X = new MCCodeGenInfo(); + if (RM == Reloc::Default || RM == Reloc::DynamicNoPIC) { + // On ELF platforms the default static relocation model has a smart enough + // linker to cope with referencing external symbols defined in a shared + // library. Hence DynamicNoPIC doesn't need to be promoted to PIC. + RM = Reloc::Static; + } + + if (CM == CodeModel::Default) + CM = CodeModel::Small; + + X->InitMCCodeGenInfo(RM, CM, OL); + return X; +} + +static MCStreamer *createMCStreamer(const Target &T, StringRef TT, + MCContext &Ctx, MCAsmBackend &MAB, + raw_ostream &OS, + MCCodeEmitter *Emitter, + bool RelaxAll, + bool NoExecStack) { + Triple TheTriple(TT); + + return createAArch64ELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll, NoExecStack); +} + + +static MCInstPrinter *createAArch64MCInstPrinter(const Target &T, + unsigned SyntaxVariant, + const MCAsmInfo &MAI, + const MCInstrInfo &MII, + const MCRegisterInfo &MRI, + const MCSubtargetInfo &STI) { + if (SyntaxVariant == 0) + return new AArch64InstPrinter(MAI, MII, MRI, STI); + return 0; +} + +namespace { + +class AArch64MCInstrAnalysis : public MCInstrAnalysis { +public: + AArch64MCInstrAnalysis(const MCInstrInfo *Info) : MCInstrAnalysis(Info) {} + + virtual bool isUnconditionalBranch(const MCInst &Inst) const { + if (Inst.getOpcode() == AArch64::Bcc + && Inst.getOperand(0).getImm() == A64CC::AL) + return true; + return MCInstrAnalysis::isUnconditionalBranch(Inst); + } + + virtual bool isConditionalBranch(const MCInst &Inst) const { + if (Inst.getOpcode() == AArch64::Bcc + && Inst.getOperand(0).getImm() == A64CC::AL) + return false; + return MCInstrAnalysis::isConditionalBranch(Inst); + } + + uint64_t evaluateBranch(const MCInst &Inst, uint64_t Addr, + uint64_t Size) const { + unsigned LblOperand = Inst.getOpcode() == AArch64::Bcc ? 1 : 0; + // FIXME: We only handle PCRel branches for now. + if (Info->get(Inst.getOpcode()).OpInfo[LblOperand].OperandType + != MCOI::OPERAND_PCREL) + return -1ULL; + + int64_t Imm = Inst.getOperand(LblOperand).getImm(); + + return Addr + Imm; + } +}; + +} + +static MCInstrAnalysis *createAArch64MCInstrAnalysis(const MCInstrInfo *Info) { + return new AArch64MCInstrAnalysis(Info); +} + + + +extern "C" void LLVMInitializeAArch64TargetMC() { + // Register the MC asm info. + RegisterMCAsmInfoFn A(TheAArch64Target, createAArch64MCAsmInfo); + + // Register the MC codegen info. + TargetRegistry::RegisterMCCodeGenInfo(TheAArch64Target, + createAArch64MCCodeGenInfo); + + // Register the MC instruction info. + TargetRegistry::RegisterMCInstrInfo(TheAArch64Target, + createAArch64MCInstrInfo); + + // Register the MC register info. + TargetRegistry::RegisterMCRegInfo(TheAArch64Target, + createAArch64MCRegisterInfo); + + // Register the MC subtarget info. + using AArch64_MC::createAArch64MCSubtargetInfo; + TargetRegistry::RegisterMCSubtargetInfo(TheAArch64Target, + createAArch64MCSubtargetInfo); + + // Register the MC instruction analyzer. + TargetRegistry::RegisterMCInstrAnalysis(TheAArch64Target, + createAArch64MCInstrAnalysis); + + // Register the MC Code Emitter + TargetRegistry::RegisterMCCodeEmitter(TheAArch64Target, + createAArch64MCCodeEmitter); + + // Register the asm backend. + TargetRegistry::RegisterMCAsmBackend(TheAArch64Target, + createAArch64AsmBackend); + + // Register the object streamer. + TargetRegistry::RegisterMCObjectStreamer(TheAArch64Target, + createMCStreamer); + + // Register the MCInstPrinter. + TargetRegistry::RegisterMCInstPrinter(TheAArch64Target, + createAArch64MCInstPrinter); +} diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h new file mode 100644 index 0000000..3849fe3 --- /dev/null +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h @@ -0,0 +1,65 @@ +//===-- AArch64MCTargetDesc.h - AArch64 Target Descriptions -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides AArch64 specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_AARCH64MCTARGETDESC_H +#define LLVM_AARCH64MCTARGETDESC_H + +#include "llvm/Support/DataTypes.h" + +namespace llvm { +class MCAsmBackend; +class MCCodeEmitter; +class MCContext; +class MCInstrInfo; +class MCObjectWriter; +class MCRegisterInfo; +class MCSubtargetInfo; +class StringRef; +class Target; +class raw_ostream; + +extern Target TheAArch64Target; + +namespace AArch64_MC { + MCSubtargetInfo *createAArch64MCSubtargetInfo(StringRef TT, StringRef CPU, + StringRef FS); +} + +MCCodeEmitter *createAArch64MCCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, + const MCSubtargetInfo &STI, + MCContext &Ctx); + +MCObjectWriter *createAArch64ELFObjectWriter(raw_ostream &OS, + uint8_t OSABI); + +MCAsmBackend *createAArch64AsmBackend(const Target &T, StringRef TT, + StringRef CPU); + +} // End llvm namespace + +// Defines symbolic names for AArch64 registers. This defines a mapping from +// register name to register number. +// +#define GET_REGINFO_ENUM +#include "AArch64GenRegisterInfo.inc" + +// Defines symbolic names for the AArch64 instructions. +// +#define GET_INSTRINFO_ENUM +#include "AArch64GenInstrInfo.inc" + +#define GET_SUBTARGETINFO_ENUM +#include "AArch64GenSubtargetInfo.inc" + +#endif diff --git a/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt b/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt new file mode 100644 index 0000000..44c66a2 --- /dev/null +++ b/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt @@ -0,0 +1,13 @@ +add_llvm_library(LLVMAArch64Desc + AArch64AsmBackend.cpp + AArch64ELFObjectWriter.cpp + AArch64ELFStreamer.cpp + AArch64MCAsmInfo.cpp + AArch64MCCodeEmitter.cpp + AArch64MCExpr.cpp + AArch64MCTargetDesc.cpp + ) +add_dependencies(LLVMAArch64Desc AArch64CommonTableGen) + +# Hack: we need to include 'main' target directory to grab private headers +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..) diff --git a/lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt b/lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt new file mode 100644 index 0000000..5a2f467 --- /dev/null +++ b/lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt @@ -0,0 +1,24 @@ +;===- ./lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt ----------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = AArch64Desc +parent = AArch64 +required_libraries = AArch64AsmPrinter MC Support +add_to_library_groups = AArch64 + diff --git a/lib/Target/AArch64/MCTargetDesc/Makefile b/lib/Target/AArch64/MCTargetDesc/Makefile new file mode 100644 index 0000000..5779ac5 --- /dev/null +++ b/lib/Target/AArch64/MCTargetDesc/Makefile @@ -0,0 +1,16 @@ +##===- lib/Target/AArch64/TargetDesc/Makefile --------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME = LLVMAArch64Desc + +# Hack: we need to include 'main' target directory to grab private headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/AArch64/Makefile b/lib/Target/AArch64/Makefile new file mode 100644 index 0000000..b2ca278 --- /dev/null +++ b/lib/Target/AArch64/Makefile @@ -0,0 +1,30 @@ +##===- lib/Target/AArch64/Makefile -------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../.. +LIBRARYNAME = LLVMAArch64CodeGen +TARGET = AArch64 + +# Make sure that tblgen is run, first thing. +BUILT_SOURCES = AArch64GenAsmMatcher.inc \ + AArch64GenAsmWriter.inc \ + AArch64GenCallingConv.inc \ + AArch64GenDAGISel.inc \ + AArch64GenDisassemblerTables.inc \ + AArch64GenInstrInfo.inc \ + AArch64GenMCCodeEmitter.inc \ + AArch64GenMCPseudoLowering.inc \ + AArch64GenRegisterInfo.inc \ + AArch64GenSubtargetInfo.inc + +DIRS = InstPrinter AsmParser Disassembler TargetInfo MCTargetDesc + +include $(LEVEL)/Makefile.common + + diff --git a/lib/Target/AArch64/README.txt b/lib/Target/AArch64/README.txt new file mode 100644 index 0000000..601990f --- /dev/null +++ b/lib/Target/AArch64/README.txt @@ -0,0 +1,2 @@ +This file will contain changes that need to be made before AArch64 can become an +officially supported target. Currently a placeholder. diff --git a/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp b/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp new file mode 100644 index 0000000..fa07d49 --- /dev/null +++ b/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp @@ -0,0 +1,20 @@ +//===-- AArch64TargetInfo.cpp - AArch64 Target Implementation ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "AArch64.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/TargetRegistry.h" +using namespace llvm; + +Target llvm::TheAArch64Target; + +extern "C" void LLVMInitializeAArch64TargetInfo() { + RegisterTarget + X(TheAArch64Target, "aarch64", "AArch64"); +} diff --git a/lib/Target/AArch64/TargetInfo/CMakeLists.txt b/lib/Target/AArch64/TargetInfo/CMakeLists.txt new file mode 100644 index 0000000..e236eed --- /dev/null +++ b/lib/Target/AArch64/TargetInfo/CMakeLists.txt @@ -0,0 +1,7 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMAArch64Info + AArch64TargetInfo.cpp + ) + +add_dependencies(LLVMAArch64Info AArch64CommonTableGen) diff --git a/lib/Target/AArch64/TargetInfo/LLVMBuild.txt b/lib/Target/AArch64/TargetInfo/LLVMBuild.txt new file mode 100644 index 0000000..5b003f0 --- /dev/null +++ b/lib/Target/AArch64/TargetInfo/LLVMBuild.txt @@ -0,0 +1,24 @@ +;===- ./lib/Target/AArch64/TargetInfo/LLVMBuild.txt ------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = AArch64Info +parent = AArch64 +required_libraries = MC Support Target +add_to_library_groups = AArch64 + diff --git a/lib/Target/AArch64/TargetInfo/Makefile b/lib/Target/AArch64/TargetInfo/Makefile new file mode 100644 index 0000000..9dc9aa4 --- /dev/null +++ b/lib/Target/AArch64/TargetInfo/Makefile @@ -0,0 +1,15 @@ +##===- lib/Target/AArch64/TargetInfo/Makefile --------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../../.. +LIBRARYNAME = LLVMAArch64Info + +# Hack: we need to include 'main' target directory to grab private headers +CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h index b404e6c..cd4067a 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h +++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h @@ -64,6 +64,9 @@ public: return getSubExpr()->FindAssociatedSection(); } + // There are no TLS ARMMCExprs at the moment. + void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {} + static bool classof(const MCExpr *E) { return E->getKind() == MCExpr::Target; } diff --git a/lib/Target/LLVMBuild.txt b/lib/Target/LLVMBuild.txt index f3a9c1c..c06e8bc 100644 --- a/lib/Target/LLVMBuild.txt +++ b/lib/Target/LLVMBuild.txt @@ -16,7 +16,7 @@ ;===------------------------------------------------------------------------===; [common] -subdirectories = ARM CppBackend Hexagon MBlaze MSP430 NVPTX Mips PowerPC R600 Sparc X86 XCore +subdirectories = AArch64 ARM CppBackend Hexagon MBlaze MSP430 NVPTX Mips PowerPC R600 Sparc X86 XCore ; This is a special group whose required libraries are extended (by llvm-build) ; with the best execution engine (the native JIT, if available, or the -- cgit v1.1 From b11917c1aa7348a67d80149fa9613f09a8d56f14 Mon Sep 17 00:00:00 2001 From: Derek Schuff Date: Thu, 31 Jan 2013 17:00:03 +0000 Subject: [MC] bundle alignment: prevent padding instructions from crossing bundle boundaries git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174067 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCAssembler.cpp | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp index 983de1a..498fbf7 100644 --- a/lib/MC/MCAssembler.cpp +++ b/lib/MC/MCAssembler.cpp @@ -467,7 +467,7 @@ void MCAsmLayout::layoutFragment(MCFragment *F) { // // // BundlePadding - // ||| + // ||| // ------------------------------------- // Prev |##########| F | // ------------------------------------- @@ -506,6 +506,9 @@ static void writeFragment(const MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment &F) { MCObjectWriter *OW = &Asm.getWriter(); + // FIXME: Embed in fragments instead? + uint64_t FragmentSize = Asm.computeFragmentSize(Layout, F); + // Should NOP padding be written out before this fragment? unsigned BundlePadding = F.getBundlePadding(); if (BundlePadding > 0) { @@ -514,6 +517,22 @@ static void writeFragment(const MCAssembler &Asm, const MCAsmLayout &Layout, assert(F.hasInstructions() && "Writing bundle padding for a fragment without instructions"); + unsigned TotalLength = BundlePadding + static_cast(FragmentSize); + if (F.alignToBundleEnd() && TotalLength > Asm.getBundleAlignSize()) { + // If the padding itself crosses a bundle boundary, it must be emitted + // in 2 pieces, since even nop instructions must not cross boundaries. + // v--------------v <- BundleAlignSize + // v---------v <- BundlePadding + // ---------------------------- + // | Prev |####|####| F | + // ---------------------------- + // ^-------------------^ <- TotalLength + unsigned DistanceToBoundary = TotalLength - Asm.getBundleAlignSize(); + if (!Asm.getBackend().writeNopData(DistanceToBoundary, OW)) + report_fatal_error("unable to write NOP sequence of " + + Twine(DistanceToBoundary) + " bytes"); + BundlePadding -= DistanceToBoundary; + } if (!Asm.getBackend().writeNopData(BundlePadding, OW)) report_fatal_error("unable to write NOP sequence of " + Twine(BundlePadding) + " bytes"); @@ -526,8 +545,6 @@ static void writeFragment(const MCAssembler &Asm, const MCAsmLayout &Layout, ++stats::EmittedFragments; - // FIXME: Embed in fragments instead? - uint64_t FragmentSize = Asm.computeFragmentSize(Layout, F); switch (F.getKind()) { case MCFragment::FT_Align: { ++stats::EmittedAlignFragments; @@ -1134,4 +1151,3 @@ void MCOrgFragment::anchor() { } void MCLEBFragment::anchor() { } void MCDwarfLineAddrFragment::anchor() { } void MCDwarfCallFrameFragment::anchor() { } - -- cgit v1.1 From 6f6f17197259edbb82cc9b0d800fe0e3cb8e201c Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Thu, 31 Jan 2013 19:46:28 +0000 Subject: RuntimeDyld: Fix errant fallthrough. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174078 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp index d2310b5..bcc3df1 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp @@ -96,6 +96,7 @@ bool RuntimeDyldMachO::resolveI386Relocation(uint8_t *LocalAddress, *p++ = (uint8_t)(ValueToWrite & 0xff); ValueToWrite >>= 8; } + return false; } case macho::RIT_Difference: case macho::RIT_Generic_LocalDifference: -- cgit v1.1 From 133f6b8582f12ab4fdaee3b870d4bcb12d61ca9b Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Thu, 31 Jan 2013 19:46:57 +0000 Subject: Object: Fix errant fallthrough. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174079 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Object/MachOObjectFile.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp index 0ad8893..a853618 100644 --- a/lib/Object/MachOObjectFile.cpp +++ b/lib/Object/MachOObjectFile.cpp @@ -1076,6 +1076,7 @@ error_code MachOObjectFile::getRelocationValueString(DataRefImpl Rel, printRelocationTargetName(RENext, fmt); fmt << "-"; printRelocationTargetName(RE, fmt); + break; } case macho::RIT_X86_64_TLV: printRelocationTargetName(RE, fmt); -- cgit v1.1 From cce07c9b3162acc756535e813e6ae818ad61564b Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Thu, 31 Jan 2013 19:46:59 +0000 Subject: interpreter: Fix errant fallthrough. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174080 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/ExecutionEngine/Interpreter/Execution.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp index 431744a..b9a3fc7 100644 --- a/lib/ExecutionEngine/Interpreter/Execution.cpp +++ b/lib/ExecutionEngine/Interpreter/Execution.cpp @@ -1169,10 +1169,12 @@ void Interpreter::visitVAArgInst(VAArgInst &I) { .VarArgs[VAList.UIntPairVal.second]; Type *Ty = I.getType(); switch (Ty->getTypeID()) { - case Type::IntegerTyID: Dest.IntVal = Src.IntVal; + case Type::IntegerTyID: + Dest.IntVal = Src.IntVal; IMPLEMENT_VAARG(Pointer); IMPLEMENT_VAARG(Float); IMPLEMENT_VAARG(Double); + break; default: dbgs() << "Unhandled dest type for vaarg instruction: " << *Ty << "\n"; llvm_unreachable(0); -- cgit v1.1 From 108fb3202af6f500073cdbb7be32c25d7a273a2e Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Thu, 31 Jan 2013 20:02:54 +0000 Subject: [PEI] Pass the frame index operand number to the eliminateFrameIndex function. Each target implementation was needlessly recomputing the index. Part of rdar://13076458 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174083 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/PrologEpilogInserter.cpp | 2 +- lib/CodeGen/RegisterScavenging.cpp | 18 +++++++++-- lib/Target/ARM/ARMBaseRegisterInfo.cpp | 24 ++++++-------- lib/Target/ARM/ARMBaseRegisterInfo.h | 3 +- lib/Target/ARM/Thumb1RegisterInfo.cpp | 27 +++++++--------- lib/Target/ARM/Thumb1RegisterInfo.h | 3 +- lib/Target/Hexagon/HexagonRegisterInfo.cpp | 50 ++++++++++++++---------------- lib/Target/Hexagon/HexagonRegisterInfo.h | 3 +- lib/Target/MBlaze/MBlazeRegisterInfo.cpp | 20 ++++-------- lib/Target/MBlaze/MBlazeRegisterInfo.h | 3 +- lib/Target/MSP430/MSP430RegisterInfo.cpp | 18 +++++------ lib/Target/MSP430/MSP430RegisterInfo.h | 3 +- lib/Target/Mips/MipsRegisterInfo.cpp | 13 ++------ lib/Target/Mips/MipsRegisterInfo.h | 3 +- lib/Target/NVPTX/NVPTXRegisterInfo.cpp | 18 +++-------- lib/Target/NVPTX/NVPTXRegisterInfo.h | 2 +- lib/Target/PowerPC/PPCRegisterInfo.cpp | 20 +++++------- lib/Target/PowerPC/PPCRegisterInfo.h | 3 +- lib/Target/Sparc/SparcRegisterInfo.cpp | 21 +++++-------- lib/Target/Sparc/SparcRegisterInfo.h | 3 +- lib/Target/X86/X86RegisterInfo.cpp | 25 ++++++--------- lib/Target/X86/X86RegisterInfo.h | 3 +- lib/Target/XCore/XCoreRegisterInfo.cpp | 20 +++++------- lib/Target/XCore/XCoreRegisterInfo.h | 3 +- 24 files changed, 135 insertions(+), 173 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index 1d0e71e..87a6528 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -766,7 +766,7 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { // If this instruction has a FrameIndex operand, we need to // use that target machine register info object to eliminate // it. - TRI.eliminateFrameIndex(MI, SPAdj, + TRI.eliminateFrameIndex(MI, SPAdj, i, FrameIndexVirtualScavenging ? NULL : RS); // Reset the iterator if we were at the beginning of the BB. diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp index 88f67da..6da901f 100644 --- a/lib/CodeGen/RegisterScavenging.cpp +++ b/lib/CodeGen/RegisterScavenging.cpp @@ -316,6 +316,16 @@ unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI, return Survivor; } +static unsigned getFrameIndexOperandNum(MachineInstr *MI) { + unsigned i = 0; + while (!MI->getOperand(i).isFI()) { + ++i; + assert(i < MI->getNumOperands() && + "Instr doesn't have FrameIndex operand!"); + } + return i; +} + unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, MachineBasicBlock::iterator I, int SPAdj) { @@ -364,12 +374,16 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, "Cannot scavenge register without an emergency spill slot!"); TII->storeRegToStackSlot(*MBB, I, SReg, true, ScavengingFrameIndex, RC,TRI); MachineBasicBlock::iterator II = prior(I); - TRI->eliminateFrameIndex(II, SPAdj, this); + + unsigned FIOperandNum = getFrameIndexOperandNum(II); + TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this); // Restore the scavenged register before its use (or first terminator). TII->loadRegFromStackSlot(*MBB, UseMI, SReg, ScavengingFrameIndex, RC, TRI); II = prior(UseMI); - TRI->eliminateFrameIndex(II, SPAdj, this); + + FIOperandNum = getFrameIndexOperandNum(II); + TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this); } ScavengeRestore = prior(UseMI); diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index d2f6a33..0deafae 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -717,8 +717,8 @@ bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, void ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS) const { - unsigned i = 0; + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS) const { MachineInstr &MI = *II; MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); @@ -727,13 +727,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, ARMFunctionInfo *AFI = MF.getInfo(); assert(!AFI->isThumb1OnlyFunction() && "This eliminateFrameIndex does not support Thumb1!"); - - while (!MI.getOperand(i).isFI()) { - ++i; - assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); - } - - int FrameIndex = MI.getOperand(i).getIndex(); + int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); unsigned FrameReg; int Offset = TFI->ResolveFrameIndexReference(MF, FrameIndex, FrameReg, SPAdj); @@ -755,18 +749,18 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // Special handling of dbg_value instructions. if (MI.isDebugValue()) { - MI.getOperand(i). ChangeToRegister(FrameReg, false /*isDef*/); - MI.getOperand(i+1).ChangeToImmediate(Offset); + MI.getOperand(FIOperandNum). ChangeToRegister(FrameReg, false /*isDef*/); + MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); return; } // Modify MI as necessary to handle as much of 'Offset' as possible bool Done = false; if (!AFI->isThumbFunction()) - Done = rewriteARMFrameIndex(MI, i, FrameReg, Offset, TII); + Done = rewriteARMFrameIndex(MI, FIOperandNum, FrameReg, Offset, TII); else { assert(AFI->isThumb2Function()); - Done = rewriteT2FrameIndex(MI, i, FrameReg, Offset, TII); + Done = rewriteT2FrameIndex(MI, FIOperandNum, FrameReg, Offset, TII); } if (Done) return; @@ -786,7 +780,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, unsigned PredReg = (PIdx == -1) ? 0 : MI.getOperand(PIdx+1).getReg(); if (Offset == 0) // Must be addrmode4/6. - MI.getOperand(i).ChangeToRegister(FrameReg, false, false, false); + MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false, false, false); else { ScratchReg = MF.getRegInfo().createVirtualRegister(&ARM::GPRRegClass); if (!AFI->isThumbFunction()) @@ -798,6 +792,6 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, Offset, Pred, PredReg, TII); } // Update the original instruction to use the scratch register. - MI.getOperand(i).ChangeToRegister(ScratchReg, false, false, true); + MI.getOperand(FIOperandNum).ChangeToRegister(ScratchReg, false, false,true); } } diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h index aaa56a9..7fab9ff 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -173,7 +173,8 @@ public: MachineBasicBlock::iterator I) const; virtual void eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS = NULL) const; + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS = NULL) const; }; } // end namespace llvm diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp index 57cc7d8..5442aa7 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp @@ -593,9 +593,9 @@ Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB, void Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS) const { + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS) const { unsigned VReg = 0; - unsigned i = 0; MachineInstr &MI = *II; MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); @@ -603,13 +603,8 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, DebugLoc dl = MI.getDebugLoc(); MachineInstrBuilder MIB(*MBB.getParent(), &MI); - while (!MI.getOperand(i).isFI()) { - ++i; - assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); - } - unsigned FrameReg = ARM::SP; - int FrameIndex = MI.getOperand(i).getIndex(); + int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) + MF.getFrameInfo()->getStackSize() + SPAdj; @@ -646,15 +641,15 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // Special handling of dbg_value instructions. if (MI.isDebugValue()) { - MI.getOperand(i). ChangeToRegister(FrameReg, false /*isDef*/); - MI.getOperand(i+1).ChangeToImmediate(Offset); + MI.getOperand(FIOperandNum). ChangeToRegister(FrameReg, false /*isDef*/); + MI.getOperand(FIOperandNum+1).ChangeToImmediate(Offset); return; } // Modify MI as necessary to handle as much of 'Offset' as possible assert(AFI->isThumbFunction() && "This eliminateFrameIndex only supports Thumb1!"); - if (rewriteFrameIndex(MI, i, FrameReg, Offset, TII)) + if (rewriteFrameIndex(MI, FIOperandNum, FrameReg, Offset, TII)) return; // If we get here, the immediate doesn't fit into the instruction. We folded @@ -687,11 +682,12 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } MI.setDesc(TII.get(UseRR ? ARM::tLDRr : ARM::tLDRi)); - MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true); + MI.getOperand(FIOperandNum).ChangeToRegister(TmpReg, false, false, true); if (UseRR) // Use [reg, reg] addrmode. Replace the immediate operand w/ the frame // register. The offset is already handled in the vreg value. - MI.getOperand(i+1).ChangeToRegister(FrameReg, false, false, false); + MI.getOperand(FIOperandNum+1).ChangeToRegister(FrameReg, false, false, + false); } else if (MI.mayStore()) { VReg = MF.getRegInfo().createVirtualRegister(&ARM::tGPRRegClass); bool UseRR = false; @@ -708,11 +704,12 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, emitThumbRegPlusImmediate(MBB, II, dl, VReg, FrameReg, Offset, TII, *this); MI.setDesc(TII.get(UseRR ? ARM::tSTRr : ARM::tSTRi)); - MI.getOperand(i).ChangeToRegister(VReg, false, false, true); + MI.getOperand(FIOperandNum).ChangeToRegister(VReg, false, false, true); if (UseRR) // Use [reg, reg] addrmode. Replace the immediate operand w/ the frame // register. The offset is already handled in the vreg value. - MI.getOperand(i+1).ChangeToRegister(FrameReg, false, false, false); + MI.getOperand(FIOperandNum+1).ChangeToRegister(FrameReg, false, false, + false); } else { llvm_unreachable("Unexpected opcode!"); } diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/Thumb1RegisterInfo.h index f2e4b08..6232551 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.h +++ b/lib/Target/ARM/Thumb1RegisterInfo.h @@ -62,7 +62,8 @@ public: const TargetRegisterClass *RC, unsigned Reg) const; void eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS = NULL) const; + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS = NULL) const; }; } diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp index d1882de..7929610 100644 --- a/lib/Target/Hexagon/HexagonRegisterInfo.cpp +++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp @@ -133,21 +133,14 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, } void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS) const { - + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS) const { // // Hexagon_TODO: Do we need to enforce this for Hexagon? assert(SPAdj == 0 && "Unexpected"); - - unsigned i = 0; MachineInstr &MI = *II; - while (!MI.getOperand(i).isFI()) { - ++i; - assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); - } - - int FrameIndex = MI.getOperand(i).getIndex(); + int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); // Addressable stack objects are accessed using neg. offsets from %fp. MachineFunction &MF = *MI.getParent()->getParent(); @@ -167,8 +160,9 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, TII.isValidOffset(MI.getOpcode(), (FrameSize+Offset)) && !TII.isSpillPredRegOp(&MI)) { // Replace frame index with a stack pointer reference. - MI.getOperand(i).ChangeToRegister(getStackRegister(), false, false, true); - MI.getOperand(i+1).ChangeToImmediate(FrameSize+Offset); + MI.getOperand(FIOperandNum).ChangeToRegister(getStackRegister(), false, + false, true); + MI.getOperand(FIOperandNum + 1).ChangeToImmediate(FrameSize+Offset); } else { // Replace frame index with a frame pointer reference. if (!TII.isValidOffset(MI.getOpcode(), Offset)) { @@ -205,8 +199,8 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, dstReg).addReg(FrameReg).addImm(Offset); } - MI.getOperand(i).ChangeToRegister(dstReg, false, false, true); - MI.getOperand(i+1).ChangeToImmediate(0); + MI.getOperand(FIOperandNum).ChangeToRegister(dstReg, false, false,true); + MI.getOperand(FIOperandNum+1).ChangeToImmediate(0); } else if ((MI.getOpcode() == Hexagon::STriw_indexed) || (MI.getOpcode() == Hexagon::STriw) || (MI.getOpcode() == Hexagon::STrid) || @@ -233,29 +227,31 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, TII.get(Hexagon::ADD_ri), resReg).addReg(FrameReg).addImm(Offset); } - MI.getOperand(i).ChangeToRegister(resReg, false, false, true); - MI.getOperand(i+1).ChangeToImmediate(0); + MI.getOperand(FIOperandNum).ChangeToRegister(resReg, false, false,true); + MI.getOperand(FIOperandNum+1).ChangeToImmediate(0); } else if (TII.isMemOp(&MI)) { unsigned resReg = HEXAGON_RESERVED_REG_1; if (!MFI.hasVarSizedObjects() && TII.isValidOffset(MI.getOpcode(), (FrameSize+Offset))) { - MI.getOperand(i).ChangeToRegister(getStackRegister(), false, false, - true); - MI.getOperand(i+1).ChangeToImmediate(FrameSize+Offset); + MI.getOperand(FIOperandNum).ChangeToRegister(getStackRegister(), + false, false, true); + MI.getOperand(FIOperandNum+1).ChangeToImmediate(FrameSize+Offset); } else if (!TII.isValidOffset(Hexagon::ADD_ri, Offset)) { BuildMI(*MI.getParent(), II, MI.getDebugLoc(), TII.get(Hexagon::CONST32_Int_Real), resReg).addImm(Offset); BuildMI(*MI.getParent(), II, MI.getDebugLoc(), TII.get(Hexagon::ADD_rr), resReg).addReg(FrameReg).addReg(resReg); - MI.getOperand(i).ChangeToRegister(resReg, false, false, true); - MI.getOperand(i+1).ChangeToImmediate(0); + MI.getOperand(FIOperandNum).ChangeToRegister(resReg, false, false, + true); + MI.getOperand(FIOperandNum+1).ChangeToImmediate(0); } else { BuildMI(*MI.getParent(), II, MI.getDebugLoc(), TII.get(Hexagon::ADD_ri), resReg).addReg(FrameReg).addImm(Offset); - MI.getOperand(i).ChangeToRegister(resReg, false, false, true); - MI.getOperand(i+1).ChangeToImmediate(0); + MI.getOperand(FIOperandNum).ChangeToRegister(resReg, false, false, + true); + MI.getOperand(FIOperandNum+1).ChangeToImmediate(0); } } else { unsigned dstReg = MI.getOperand(0).getReg(); @@ -265,14 +261,14 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, TII.get(Hexagon::ADD_rr), dstReg).addReg(FrameReg).addReg(dstReg); // Can we delete MI??? r2 = add (r2, #0). - MI.getOperand(i).ChangeToRegister(dstReg, false, false, true); - MI.getOperand(i+1).ChangeToImmediate(0); + MI.getOperand(FIOperandNum).ChangeToRegister(dstReg, false, false,true); + MI.getOperand(FIOperandNum+1).ChangeToImmediate(0); } } else { // If the offset is small enough to fit in the immediate field, directly // encode it. - MI.getOperand(i).ChangeToRegister(FrameReg, false); - MI.getOperand(i+1).ChangeToImmediate(Offset); + MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false); + MI.getOperand(FIOperandNum+1).ChangeToImmediate(Offset); } } diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.h b/lib/Target/Hexagon/HexagonRegisterInfo.h index e8f3cfb..58c374e 100644 --- a/lib/Target/Hexagon/HexagonRegisterInfo.h +++ b/lib/Target/Hexagon/HexagonRegisterInfo.h @@ -61,7 +61,8 @@ struct HexagonRegisterInfo : public HexagonGenRegisterInfo { MachineBasicBlock::iterator I) const; void eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS = NULL) const; + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS = NULL) const; /// determineFrameLayout - Determine the size of the frame and maximum call /// frame size. diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp index ed06cc4..1682db1 100644 --- a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp +++ b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp @@ -126,24 +126,16 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, // direct reference. void MBlazeRegisterInfo:: eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, - RegScavenger *RS) const { + unsigned FIOperandNum, RegScavenger *RS) const { MachineInstr &MI = *II; MachineFunction &MF = *MI.getParent()->getParent(); MachineFrameInfo *MFI = MF.getFrameInfo(); - - unsigned i = 0; - while (!MI.getOperand(i).isFI()) { - ++i; - assert(i < MI.getNumOperands() && - "Instr doesn't have FrameIndex operand!"); - } - - unsigned oi = i == 2 ? 1 : 2; + unsigned OFIOperandNum = FIOperandNum == 2 ? 1 : 2; DEBUG(dbgs() << "\nFunction : " << MF.getName() << "\n"; dbgs() << "<--------->\n" << MI); - int FrameIndex = MI.getOperand(i).getIndex(); + int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); int stackSize = MFI->getStackSize(); int spOffset = MFI->getObjectOffset(FrameIndex); @@ -159,12 +151,12 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, // as explained on LowerFormalArguments, detect negative offsets // and adjust SPOffsets considering the final stack size. int Offset = (spOffset < 0) ? (stackSize - spOffset) : spOffset; - Offset += MI.getOperand(oi).getImm(); + Offset += MI.getOperand(OFIOperandNum).getImm(); DEBUG(dbgs() << "Offset : " << Offset << "\n" << "<--------->\n"); - MI.getOperand(oi).ChangeToImmediate(Offset); - MI.getOperand(i).ChangeToRegister(getFrameRegister(MF), false); + MI.getOperand(OFIOperandNum).ChangeToImmediate(Offset); + MI.getOperand(FIOperandNum).ChangeToRegister(getFrameRegister(MF), false); } void MBlazeRegisterInfo:: diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.h b/lib/Target/MBlaze/MBlazeRegisterInfo.h index 1d51162..99d2e4b 100644 --- a/lib/Target/MBlaze/MBlazeRegisterInfo.h +++ b/lib/Target/MBlaze/MBlazeRegisterInfo.h @@ -56,7 +56,8 @@ struct MBlazeRegisterInfo : public MBlazeGenRegisterInfo { /// Stack Frame Processing Methods void eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS = NULL) const; + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS = NULL) const; void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp index 8f7813a..bb0f660 100644 --- a/lib/Target/MSP430/MSP430RegisterInfo.cpp +++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp @@ -163,7 +163,8 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, void MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS) const { + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS) const { assert(SPAdj == 0 && "Unexpected"); unsigned i = 0; @@ -172,12 +173,7 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MachineFunction &MF = *MBB.getParent(); const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); DebugLoc dl = MI.getDebugLoc(); - while (!MI.getOperand(i).isFI()) { - ++i; - assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); - } - - int FrameIndex = MI.getOperand(i).getIndex(); + int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); unsigned BasePtr = (TFI->hasFP(MF) ? MSP430::FPW : MSP430::SPW); int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex); @@ -191,7 +187,7 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, Offset += 2; // Skip the saved FPW // Fold imm into offset - Offset += MI.getOperand(i+1).getImm(); + Offset += MI.getOperand(FIOperandNum + 1).getImm(); if (MI.getOpcode() == MSP430::ADD16ri) { // This is actually "load effective address" of the stack slot @@ -199,7 +195,7 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // expand it into mov + add MI.setDesc(TII.get(MSP430::MOV16rr)); - MI.getOperand(i).ChangeToRegister(BasePtr, false); + MI.getOperand(FIOperandNum).ChangeToRegister(BasePtr, false); if (Offset == 0) return; @@ -216,8 +212,8 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, return; } - MI.getOperand(i).ChangeToRegister(BasePtr, false); - MI.getOperand(i+1).ChangeToImmediate(Offset); + MI.getOperand(FIOperandNum).ChangeToRegister(BasePtr, false); + MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); } unsigned MSP430RegisterInfo::getFrameRegister(const MachineFunction &MF) const { diff --git a/lib/Target/MSP430/MSP430RegisterInfo.h b/lib/Target/MSP430/MSP430RegisterInfo.h index 64a43bc..fca903a 100644 --- a/lib/Target/MSP430/MSP430RegisterInfo.h +++ b/lib/Target/MSP430/MSP430RegisterInfo.h @@ -47,7 +47,8 @@ public: MachineBasicBlock::iterator I) const; void eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS = NULL) const; + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS = NULL) const; // Debug information queries. unsigned getFrameRegister(const MachineFunction &MF) const; diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp index 6486e98..3250733 100644 --- a/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/lib/Target/Mips/MipsRegisterInfo.cpp @@ -177,21 +177,14 @@ MipsRegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const { // direct reference. void MipsRegisterInfo:: eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, - RegScavenger *RS) const { + unsigned FIOperandNum, RegScavenger *RS) const { MachineInstr &MI = *II; MachineFunction &MF = *MI.getParent()->getParent(); - unsigned i = 0; - while (!MI.getOperand(i).isFI()) { - ++i; - assert(i < MI.getNumOperands() && - "Instr doesn't have FrameIndex operand!"); - } - DEBUG(errs() << "\nFunction : " << MF.getName() << "\n"; errs() << "<--------->\n" << MI); - int FrameIndex = MI.getOperand(i).getIndex(); + int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); uint64_t stackSize = MF.getFrameInfo()->getStackSize(); int64_t spOffset = MF.getFrameInfo()->getObjectOffset(FrameIndex); @@ -199,7 +192,7 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, << "spOffset : " << spOffset << "\n" << "stackSize : " << stackSize << "\n"); - eliminateFI(MI, i, FrameIndex, stackSize, spOffset); + eliminateFI(MI, FIOperandNum, FrameIndex, stackSize, spOffset); } unsigned MipsRegisterInfo:: diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h index 032c2fd..13b2a6a 100644 --- a/lib/Target/Mips/MipsRegisterInfo.h +++ b/lib/Target/Mips/MipsRegisterInfo.h @@ -55,7 +55,8 @@ public: /// Stack Frame Processing Methods void eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS = NULL) const; + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS = NULL) const; void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp index 08be917..e0c9161 100644 --- a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp +++ b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp @@ -277,30 +277,22 @@ BitVector NVPTXRegisterInfo::getReservedRegs(const MachineFunction &MF) const { void NVPTXRegisterInfo:: eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, + int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const { assert(SPAdj == 0 && "Unexpected"); - unsigned i = 0; MachineInstr &MI = *II; - while (!MI.getOperand(i).isFI()) { - ++i; - assert(i < MI.getNumOperands() && - "Instr doesn't have FrameIndex operand!"); - } - - int FrameIndex = MI.getOperand(i).getIndex(); + int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); MachineFunction &MF = *MI.getParent()->getParent(); int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) + - MI.getOperand(i+1).getImm(); + MI.getOperand(FIOperandNum+1).getImm(); // Using I0 as the frame pointer - MI.getOperand(i).ChangeToRegister(NVPTX::VRFrame, false); - MI.getOperand(i+1).ChangeToImmediate(Offset); + MI.getOperand(FIOperandNum).ChangeToRegister(NVPTX::VRFrame, false); + MI.getOperand(FIOperandNum+1).ChangeToImmediate(Offset); } - int NVPTXRegisterInfo:: getDwarfRegNum(unsigned RegNum, bool isEH) const { return 0; diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.h b/lib/Target/NVPTX/NVPTXRegisterInfo.h index 5951783..a3e1252 100644 --- a/lib/Target/NVPTX/NVPTXRegisterInfo.h +++ b/lib/Target/NVPTX/NVPTXRegisterInfo.h @@ -55,7 +55,7 @@ public: virtual BitVector getReservedRegs(const MachineFunction &MF) const; virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI, - int SPAdj, + int SPAdj, unsigned FIOperandNum, RegScavenger *RS=NULL) const; void eliminateCallFramePseudoInstr(MachineFunction &MF, diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index 378c147..eca7f12 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -510,7 +510,8 @@ PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF, void PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS) const { + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS) const { assert(SPAdj == 0 && "Unexpected"); // Get the instruction. @@ -524,20 +525,13 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); DebugLoc dl = MI.getDebugLoc(); - // Find out which operand is the frame index. - unsigned FIOperandNo = 0; - while (!MI.getOperand(FIOperandNo).isFI()) { - ++FIOperandNo; - assert(FIOperandNo != MI.getNumOperands() && - "Instr doesn't have FrameIndex operand!"); - } // Take into account whether it's an add or mem instruction - unsigned OffsetOperandNo = (FIOperandNo == 2) ? 1 : 2; + unsigned OffsetOperandNo = (FIOperandNum == 2) ? 1 : 2; if (MI.isInlineAsm()) - OffsetOperandNo = FIOperandNo-1; + OffsetOperandNo = FIOperandNum-1; // Get the frame index. - int FrameIndex = MI.getOperand(FIOperandNo).getIndex(); + int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); // Get the frame pointer save index. Users of this index are primarily // DYNALLOC instructions. @@ -567,7 +561,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // Replace the FrameIndex with base register with GPR1 (SP) or GPR31 (FP). bool is64Bit = Subtarget.isPPC64(); - MI.getOperand(FIOperandNo).ChangeToRegister(TFI->hasFP(MF) ? + MI.getOperand(FIOperandNum).ChangeToRegister(TFI->hasFP(MF) ? (is64Bit ? PPC::X31 : PPC::R31) : (is64Bit ? PPC::X1 : PPC::R1), false); @@ -649,7 +643,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, OperandBase = OffsetOperandNo; } - unsigned StackReg = MI.getOperand(FIOperandNo).getReg(); + unsigned StackReg = MI.getOperand(FIOperandNum).getReg(); MI.getOperand(OperandBase).ChangeToRegister(StackReg, false); MI.getOperand(OperandBase + 1).ChangeToRegister(SReg, false, false, true); } diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h index a8fd796..3e07a01 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/lib/Target/PowerPC/PPCRegisterInfo.h @@ -69,7 +69,8 @@ public: bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg, int &FrameIdx) const; void eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS = NULL) const; + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS = NULL) const; // Debug information queries. unsigned getFrameRegister(const MachineFunction &MF) const; diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp index 9c1c30b..ac1a350 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.cpp +++ b/lib/Target/Sparc/SparcRegisterInfo.cpp @@ -71,30 +71,25 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, void SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS) const { + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS) const { assert(SPAdj == 0 && "Unexpected"); - unsigned i = 0; MachineInstr &MI = *II; DebugLoc dl = MI.getDebugLoc(); - while (!MI.getOperand(i).isFI()) { - ++i; - assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); - } - - int FrameIndex = MI.getOperand(i).getIndex(); + int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); // Addressable stack objects are accessed using neg. offsets from %fp MachineFunction &MF = *MI.getParent()->getParent(); int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) + - MI.getOperand(i+1).getImm(); + MI.getOperand(FIOperandNum + 1).getImm(); // Replace frame index with a frame pointer reference. if (Offset >= -4096 && Offset <= 4095) { // If the offset is small enough to fit in the immediate field, directly // encode it. - MI.getOperand(i).ChangeToRegister(SP::I6, false); - MI.getOperand(i+1).ChangeToImmediate(Offset); + MI.getOperand(FIOperandNum).ChangeToRegister(SP::I6, false); + MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); } else { // Otherwise, emit a G1 = SETHI %hi(offset). FIXME: it would be better to // scavenge a register here instead of reserving G1 all of the time. @@ -104,8 +99,8 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, BuildMI(*MI.getParent(), II, dl, TII.get(SP::ADDrr), SP::G1).addReg(SP::G1) .addReg(SP::I6); // Insert: G1+%lo(offset) into the user. - MI.getOperand(i).ChangeToRegister(SP::G1, false); - MI.getOperand(i+1).ChangeToImmediate(Offset & ((1 << 10)-1)); + MI.getOperand(FIOperandNum).ChangeToRegister(SP::G1, false); + MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset & ((1 << 10)-1)); } } diff --git a/lib/Target/Sparc/SparcRegisterInfo.h b/lib/Target/Sparc/SparcRegisterInfo.h index 9515ad3..37bb4d5 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.h +++ b/lib/Target/Sparc/SparcRegisterInfo.h @@ -41,7 +41,8 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo { MachineBasicBlock::iterator I) const; void eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS = NULL) const; + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS = NULL) const; void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 1dd1e41..02da140 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -544,20 +544,14 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, void X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS) const { + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS) const { assert(SPAdj == 0 && "Unexpected"); - unsigned i = 0; MachineInstr &MI = *II; MachineFunction &MF = *MI.getParent()->getParent(); const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - - while (!MI.getOperand(i).isFI()) { - ++i; - assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); - } - - int FrameIndex = MI.getOperand(i).getIndex(); + int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); unsigned BasePtr; unsigned Opc = MI.getOpcode(); @@ -573,7 +567,7 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // This must be part of a four operand memory reference. Replace the // FrameIndex with base register with EBP. Add an offset to the offset. - MI.getOperand(i).ChangeToRegister(BasePtr, false); + MI.getOperand(FIOperandNum).ChangeToRegister(BasePtr, false); // Now add the frame object offset to the offset from EBP. int FIOffset; @@ -584,17 +578,18 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } else FIOffset = TFI->getFrameIndexOffset(MF, FrameIndex); - if (MI.getOperand(i+3).isImm()) { + if (MI.getOperand(FIOperandNum+3).isImm()) { // Offset is a 32-bit integer. - int Imm = (int)(MI.getOperand(i + 3).getImm()); + int Imm = (int)(MI.getOperand(FIOperandNum + 3).getImm()); int Offset = FIOffset + Imm; assert((!Is64Bit || isInt<32>((long long)FIOffset + Imm)) && "Requesting 64-bit offset in 32-bit immediate!"); - MI.getOperand(i + 3).ChangeToImmediate(Offset); + MI.getOperand(FIOperandNum + 3).ChangeToImmediate(Offset); } else { // Offset is symbolic. This is extremely rare. - uint64_t Offset = FIOffset + (uint64_t)MI.getOperand(i+3).getOffset(); - MI.getOperand(i+3).setOffset(Offset); + uint64_t Offset = FIOffset + + (uint64_t)MI.getOperand(FIOperandNum+3).getOffset(); + MI.getOperand(FIOperandNum + 3).setOffset(Offset); } } diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h index 7932ede..5b45e9e 100644 --- a/lib/Target/X86/X86RegisterInfo.h +++ b/lib/Target/X86/X86RegisterInfo.h @@ -122,7 +122,8 @@ public: MachineBasicBlock::iterator MI) const; void eliminateFrameIndex(MachineBasicBlock::iterator MI, - int SPAdj, RegScavenger *RS = NULL) const; + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS = NULL) const; // Debug information queries. unsigned getFrameRegister(const MachineFunction &MF) const; diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp index 7e3e476..01749a8 100644 --- a/lib/Target/XCore/XCoreRegisterInfo.cpp +++ b/lib/Target/XCore/XCoreRegisterInfo.cpp @@ -155,18 +155,12 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, void XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS) const { + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS) const { assert(SPAdj == 0 && "Unexpected"); MachineInstr &MI = *II; DebugLoc dl = MI.getDebugLoc(); - unsigned i = 0; - - while (!MI.getOperand(i).isFI()) { - ++i; - assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); - } - - MachineOperand &FrameOp = MI.getOperand(i); + MachineOperand &FrameOp = MI.getOperand(FIOperandNum); int FrameIndex = FrameOp.getIndex(); MachineFunction &MF = *MI.getParent()->getParent(); @@ -190,14 +184,14 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // Special handling of DBG_VALUE instructions. if (MI.isDebugValue()) { - MI.getOperand(i).ChangeToRegister(FrameReg, false /*isDef*/); - MI.getOperand(i+1).ChangeToImmediate(Offset); + MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false /*isDef*/); + MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); return; } // fold constant into offset. - Offset += MI.getOperand(i + 1).getImm(); - MI.getOperand(i + 1).ChangeToImmediate(0); + Offset += MI.getOperand(FIOperandNum + 1).getImm(); + MI.getOperand(FIOperandNum + 1).ChangeToImmediate(0); assert(Offset%4 == 0 && "Misaligned stack offset"); diff --git a/lib/Target/XCore/XCoreRegisterInfo.h b/lib/Target/XCore/XCoreRegisterInfo.h index c4dcb6b..62549a8 100644 --- a/lib/Target/XCore/XCoreRegisterInfo.h +++ b/lib/Target/XCore/XCoreRegisterInfo.h @@ -59,7 +59,8 @@ public: MachineBasicBlock::iterator I) const; void eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS = NULL) const; + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS = NULL) const; // Debug information queries. unsigned getFrameRegister(const MachineFunction &MF) const; -- cgit v1.1 From 8b15d74434dfde5ea9227b985c2c934f62bf03a7 Mon Sep 17 00:00:00 2001 From: Manman Ren Date: Thu, 31 Jan 2013 20:05:14 +0000 Subject: [Dwarf] early exit to avoid creating dangling DIEs We used to create children DIEs for a scope, then check whether ScopeDIE is null. If ScopeDIE is null, the children DIEs will be dangling. Other DIEs can link to those dangling DIEs, which are not emitted at all, causing dwarf error. The current testing case is 4k lines, from MultiSource/BenchMark/McCat/09-vor. rdar://problem/13071959 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174084 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index f3c7a2a..13ad353 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -540,6 +540,12 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { if (!Scope || !Scope->getScopeNode()) return NULL; + DIScope DS(Scope->getScopeNode()); + // Early return to avoid creating dangling variable|scope DIEs. + if (!Scope->getInlinedAt() && DS.isSubprogram() && Scope->isAbstractScope() && + !TheCU->getDIE(DS)) + return NULL; + SmallVector Children; DIE *ObjectPointer = NULL; @@ -565,7 +571,6 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { for (unsigned j = 0, M = Scopes.size(); j < M; ++j) if (DIE *Nested = constructScopeDIE(TheCU, Scopes[j])) Children.push_back(Nested); - DIScope DS(Scope->getScopeNode()); DIE *ScopeDIE = NULL; if (Scope->getInlinedAt()) ScopeDIE = constructInlinedScopeDIE(TheCU, Scope); -- cgit v1.1 From 2d95e43fd877f81f419485baed3bd559de55e4c4 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Thu, 31 Jan 2013 20:23:43 +0000 Subject: When lowering memcpys to loads and stores, make sure we don't promote alignments past the natural stack alignment. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174085 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index b672926..5724122 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3561,6 +3561,15 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, if (DstAlignCanChange) { Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext()); unsigned NewAlign = (unsigned) TLI.getDataLayout()->getABITypeAlignment(Ty); + + // Don't promote to an alignment that would require dynamic stack + // realignment. + const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); + if (!TRI->needsStackRealignment(MF)) + while (NewAlign > Align && + TLI.getDataLayout()->exceedsNaturalStackAlignment(NewAlign)) + NewAlign /= 2; + if (NewAlign > Align) { // Give the stack frame object a larger alignment if needed. if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign) -- cgit v1.1 From a9da5c50e246966c04784756e2083dbfe606c448 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Thu, 31 Jan 2013 20:46:53 +0000 Subject: Update AArch64 backend to changed eliminateFrameIndex interface. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174086 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64RegisterInfo.cpp | 22 +++++++++------------- lib/Target/AArch64/AArch64RegisterInfo.h | 1 + 2 files changed, 10 insertions(+), 13 deletions(-) (limited to 'lib') diff --git a/lib/Target/AArch64/AArch64RegisterInfo.cpp b/lib/Target/AArch64/AArch64RegisterInfo.cpp index ce66504..2481176 100644 --- a/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -78,7 +78,9 @@ AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const { void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MBBI, - int SPAdj, RegScavenger *RS) const { + int SPAdj, + unsigned FIOperandNum, + RegScavenger *RS) const { assert(SPAdj == 0 && "Cannot deal with nonzero SPAdj yet"); MachineInstr &MI = *MBBI; MachineBasicBlock &MBB = *MI.getParent(); @@ -87,12 +89,6 @@ AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MBBI, const AArch64FrameLowering *TFI = static_cast(MF.getTarget().getFrameLowering()); - unsigned i = 0; - while (!MI.getOperand(i).isFI()) { - ++i; - assert(i < MI.getNumOperands() && "Instr doesn't have a FrameIndex Operand"); - } - // In order to work out the base and offset for addressing, the FrameLowering // code needs to know (sometimes) whether the instruction is storing/loading a // callee-saved register, or whether it's a more generic @@ -107,7 +103,7 @@ AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MBBI, MaxCSFI = CSI[CSI.size() - 1].getFrameIdx(); } - int FrameIndex = MI.getOperand(i).getIndex(); + int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); bool IsCalleeSaveOp = FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI; unsigned FrameReg; @@ -115,13 +111,13 @@ AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MBBI, Offset = TFI->resolveFrameIndexReference(MF, FrameIndex, FrameReg, SPAdj, IsCalleeSaveOp); - Offset += MI.getOperand(i+1).getImm(); + Offset += MI.getOperand(FIOperandNum + 1).getImm(); // DBG_VALUE instructions have no real restrictions so they can be handled // easily. if (MI.isDebugValue()) { - MI.getOperand(i).ChangeToRegister(FrameReg, /*isDef=*/ false); - MI.getOperand(i+1).ChangeToImmediate(Offset); + MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, /*isDef=*/ false); + MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); return; } @@ -151,8 +147,8 @@ AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MBBI, // now this checks nothing has gone horribly wrong. assert(Offset >= 0 && "Unexpected negative offset from SP"); - MI.getOperand(i).ChangeToRegister(FrameReg, false, false, true); - MI.getOperand(i+1).ChangeToImmediate(Offset / OffsetScale); + MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false, false, true); + MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset / OffsetScale); } void diff --git a/lib/Target/AArch64/AArch64RegisterInfo.h b/lib/Target/AArch64/AArch64RegisterInfo.h index ea538e2..3be083d 100644 --- a/lib/Target/AArch64/AArch64RegisterInfo.h +++ b/lib/Target/AArch64/AArch64RegisterInfo.h @@ -41,6 +41,7 @@ public: unsigned getFrameRegister(const MachineFunction &MF) const; void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, + unsigned FIOperandNum, RegScavenger *Rs = NULL) const; void eliminateCallFramePseudoInstr(MachineFunction &MF, -- cgit v1.1 From 3ab115ce8f5262608630d67c28707dbd24361d03 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Thu, 31 Jan 2013 20:57:50 +0000 Subject: Add braces, so my head doesn't explode. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174088 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/PrologEpilogInserter.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index 87a6528..bca3ede3 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -751,7 +751,7 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { MachineInstr *MI = I; bool DoIncr = true; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { if (MI->getOperand(i).isFI()) { // Some instructions (e.g. inline asm instructions) can have // multiple frame indices and/or cause eliminateFrameIndex @@ -778,6 +778,7 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { MI = 0; break; } + } if (DoIncr && I != BB->end()) ++I; -- cgit v1.1 From 14292a6be51ab57ff425ff263d4134fe46d082c4 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Thu, 31 Jan 2013 20:59:05 +0000 Subject: Add support for emitting a string attribute. Attributes that are strings are typically target-dependent attributes. They are of this form in the IR: "attr" "attr" = "val" git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174090 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/AttributeImpl.h | 7 +-- lib/IR/Attributes.cpp | 115 +++++++++++++++++++++++++++++++------------------ 2 files changed, 77 insertions(+), 45 deletions(-) (limited to 'lib') diff --git a/lib/IR/AttributeImpl.h b/lib/IR/AttributeImpl.h index af9d4fa..442860d 100644 --- a/lib/IR/AttributeImpl.h +++ b/lib/IR/AttributeImpl.h @@ -45,23 +45,24 @@ public: ArrayRef values); AttributeImpl(LLVMContext &C, StringRef data); + LLVMContext &getContext() { return Context; } + bool hasAttribute(Attribute::AttrKind A) const; Constant *getAttributeKind() const { return Kind; } ArrayRef getAttributeValues() const { return Vals; } - LLVMContext &getContext() { return Context; } - ArrayRef getValues() const { return Vals; } - uint64_t getAlignment() const; uint64_t getStackAlignment() const; + /// \brief Equality and non-equality comparison operators. bool operator==(Attribute::AttrKind Kind) const; bool operator!=(Attribute::AttrKind Kind) const; bool operator==(StringRef Kind) const; bool operator!=(StringRef Kind) const; + /// \brief Used when sorting the attributes. bool operator<(const AttributeImpl &AI) const; void Profile(FoldingSetNodeID &ID) const { diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 3a8cfe5..5608cbd 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -106,60 +106,70 @@ unsigned Attribute::getStackAlignment() const { } std::string Attribute::getAsString() const { - if (hasAttribute(Attribute::ZExt)) - return "zeroext"; - if (hasAttribute(Attribute::SExt)) - return "signext"; - if (hasAttribute(Attribute::NoReturn)) - return "noreturn"; - if (hasAttribute(Attribute::NoUnwind)) - return "nounwind"; - if (hasAttribute(Attribute::UWTable)) - return "uwtable"; - if (hasAttribute(Attribute::ReturnsTwice)) - return "returns_twice"; + if (!pImpl) return ""; + + if (hasAttribute(Attribute::AddressSafety)) + return "address_safety"; + if (hasAttribute(Attribute::AlwaysInline)) + return "alwaysinline"; + if (hasAttribute(Attribute::ByVal)) + return "byval"; + if (hasAttribute(Attribute::InlineHint)) + return "inlinehint"; if (hasAttribute(Attribute::InReg)) return "inreg"; + if (hasAttribute(Attribute::MinSize)) + return "minsize"; + if (hasAttribute(Attribute::Naked)) + return "naked"; + if (hasAttribute(Attribute::Nest)) + return "nest"; if (hasAttribute(Attribute::NoAlias)) return "noalias"; if (hasAttribute(Attribute::NoCapture)) return "nocapture"; - if (hasAttribute(Attribute::StructRet)) - return "sret"; - if (hasAttribute(Attribute::ByVal)) - return "byval"; - if (hasAttribute(Attribute::Nest)) - return "nest"; + if (hasAttribute(Attribute::NoDuplicate)) + return "noduplicate"; + if (hasAttribute(Attribute::NoImplicitFloat)) + return "noimplicitfloat"; + if (hasAttribute(Attribute::NoInline)) + return "noinline"; + if (hasAttribute(Attribute::NonLazyBind)) + return "nonlazybind"; + if (hasAttribute(Attribute::NoRedZone)) + return "noredzone"; + if (hasAttribute(Attribute::NoReturn)) + return "noreturn"; + if (hasAttribute(Attribute::NoUnwind)) + return "nounwind"; + if (hasAttribute(Attribute::OptimizeForSize)) + return "optsize"; if (hasAttribute(Attribute::ReadNone)) return "readnone"; if (hasAttribute(Attribute::ReadOnly)) return "readonly"; - if (hasAttribute(Attribute::OptimizeForSize)) - return "optsize"; - if (hasAttribute(Attribute::NoInline)) - return "noinline"; - if (hasAttribute(Attribute::InlineHint)) - return "inlinehint"; - if (hasAttribute(Attribute::AlwaysInline)) - return "alwaysinline"; + if (hasAttribute(Attribute::ReturnsTwice)) + return "returns_twice"; + if (hasAttribute(Attribute::SExt)) + return "signext"; if (hasAttribute(Attribute::StackProtect)) return "ssp"; if (hasAttribute(Attribute::StackProtectReq)) return "sspreq"; if (hasAttribute(Attribute::StackProtectStrong)) return "sspstrong"; - if (hasAttribute(Attribute::NoRedZone)) - return "noredzone"; - if (hasAttribute(Attribute::NoImplicitFloat)) - return "noimplicitfloat"; - if (hasAttribute(Attribute::Naked)) - return "naked"; - if (hasAttribute(Attribute::NonLazyBind)) - return "nonlazybind"; - if (hasAttribute(Attribute::AddressSafety)) - return "address_safety"; - if (hasAttribute(Attribute::MinSize)) - return "minsize"; + if (hasAttribute(Attribute::StructRet)) + return "sret"; + if (hasAttribute(Attribute::UWTable)) + return "uwtable"; + if (hasAttribute(Attribute::ZExt)) + return "zeroext"; + + // FIXME: These should be output like this: + // + // align=4 + // alignstack=8 + // if (hasAttribute(Attribute::StackAlignment)) { std::string Result; Result += "alignstack("; @@ -171,17 +181,38 @@ std::string Attribute::getAsString() const { std::string Result; Result += "align "; Result += utostr(getAlignment()); - Result += ""; return Result; } - if (hasAttribute(Attribute::NoDuplicate)) - return "noduplicate"; + + // Convert target-dependent attributes to strings of the form: + // + // "kind" + // "kind" = "value" + // "kind" = ("value1" "value2" "value3" ) + // + if (ConstantDataArray *CDA = + dyn_cast(pImpl->getAttributeKind())) { + std::string Result; + Result += '\"' + CDA->getAsString().str() + '"'; + + ArrayRef Vals = pImpl->getAttributeValues(); + if (Vals.empty()) return Result; + Result += " = "; + if (Vals.size() > 1) Result += '('; + for (ArrayRef::iterator I = Vals.begin(), E = Vals.end(); + I != E; ) { + ConstantDataArray *CDA = cast(*I++); + Result += '\"' + CDA->getAsString().str() + '"'; + if (I != E) Result += ' '; + } + if (Vals.size() > 1) Result += ')'; + } llvm_unreachable("Unknown attribute"); } bool Attribute::operator==(AttrKind K) const { - return pImpl && *pImpl == K; + return (pImpl && *pImpl == K) || (!pImpl && K == None); } bool Attribute::operator!=(AttrKind K) const { return !(*this == K); -- cgit v1.1 From 16d1a6bf082e7dbda4de2e5a54b9e8caae990c61 Mon Sep 17 00:00:00 2001 From: Manman Ren Date: Thu, 31 Jan 2013 21:19:18 +0000 Subject: Linker: correctly link in dbg.declare This is a re-worked version of r174048. Given source IR: call void @llvm.dbg.declare(metadata !{i32* %argc.addr}, metadata !14), !dbg !15 we used to generate call void @llvm.dbg.declare(metadata !27, metadata !28), !dbg !29 !27 = metadata !{null} With this patch, we will correctly generate call void @llvm.dbg.declare(metadata !{i32* %argc.addr}, metadata !27), !dbg !28 Looking up %argc.addr in ValueMap will return null, since %argc.addr is already correctly set up, we can use identity mapping. rdar://problem/13089880 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174093 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Utils/ValueMapper.cpp | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp index a5e1643..b5941bd 100644 --- a/lib/Transforms/Utils/ValueMapper.cpp +++ b/lib/Transforms/Utils/ValueMapper.cpp @@ -63,14 +63,29 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags, // Check all operands to see if any need to be remapped. for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i) { Value *OP = MD->getOperand(i); - if (OP == 0 || MapValue(OP, VM, Flags, TypeMapper) == OP) continue; + if (OP == 0) continue; + Value *Mapped_OP = MapValue(OP, VM, Flags, TypeMapper); + // Use identity map if Mapped_Op is null and we can ignore missing + // entries. + if (Mapped_OP == OP || + (Mapped_OP == 0 && (Flags & RF_IgnoreMissingEntries))) + continue; // Ok, at least one operand needs remapping. SmallVector Elts; Elts.reserve(MD->getNumOperands()); for (i = 0; i != e; ++i) { Value *Op = MD->getOperand(i); - Elts.push_back(Op ? MapValue(Op, VM, Flags, TypeMapper) : 0); + if (Op == 0) + Elts.push_back(0); + else { + Value *Mapped_Op = MapValue(Op, VM, Flags, TypeMapper); + // Use identity map if Mapped_Op is null and we can ignore missing + // entries. + if (Mapped_Op == 0 && (Flags & RF_IgnoreMissingEntries)) + Mapped_Op = Op; + Elts.push_back(Mapped_Op); + } } MDNode *NewMD = MDNode::get(V->getContext(), Elts); Dummy->replaceAllUsesWith(NewMD); -- cgit v1.1 From a6d8e19c02e4f96c673dbb256d922ed65e3ddb6f Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Thu, 31 Jan 2013 21:23:44 +0000 Subject: Remove unused variable, which should have been removed with r174083. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174094 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/MSP430/MSP430RegisterInfo.cpp | 1 - 1 file changed, 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp index bb0f660..6e0a223 100644 --- a/lib/Target/MSP430/MSP430RegisterInfo.cpp +++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp @@ -167,7 +167,6 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, RegScavenger *RS) const { assert(SPAdj == 0 && "Unexpected"); - unsigned i = 0; MachineInstr &MI = *II; MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); -- cgit v1.1 From 44ddc362542e2530e51f3269dcb3e0b82362acea Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 31 Jan 2013 22:11:46 +0000 Subject: R600: Make store_dummy intrinsic more general by passing export type Patch by: Vincent Lejeune Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174097 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/R600Instructions.td | 9 +++++++-- lib/Target/R600/R600Intrinsics.td | 4 ++-- 2 files changed, 9 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index 04b83bc..86ee0bb 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -589,9 +589,14 @@ multiclass ExportPattern cf_inst> { 0, 61, 7, 0, 7, 7, cf_inst, 0) >; - def : Pat<(int_R600_store_pixel_dummy), + def : Pat<(int_R600_store_dummy (i32 imm:$type)), (ExportInst - (v4f32 (IMPLICIT_DEF)), 0, 0, 7, 7, 7, 7, cf_inst, 0) + (v4f32 (IMPLICIT_DEF)), imm:$type, 0, 7, 7, 7, 7, cf_inst, 0) + >; + + def : Pat<(int_R600_store_dummy 1), + (ExportInst + (v4f32 (IMPLICIT_DEF)), 1, 60, 7, 7, 7, 7, cf_inst, 0) >; def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 0), diff --git a/lib/Target/R600/R600Intrinsics.td b/lib/Target/R600/R600Intrinsics.td index 1394a85..9d416a6 100644 --- a/lib/Target/R600/R600Intrinsics.td +++ b/lib/Target/R600/R600Intrinsics.td @@ -29,6 +29,6 @@ let TargetPrefix = "R600", isTarget = 1 in { Intrinsic<[], [llvm_float_ty], []>; def int_R600_store_pixel_stencil : Intrinsic<[], [llvm_float_ty], []>; - def int_R600_store_pixel_dummy : - Intrinsic<[], [], []>; + def int_R600_store_dummy : + Intrinsic<[], [llvm_i32_ty], []>; } -- cgit v1.1 From cacbcb0f2c60d45618dee0e10ded2ed2052166a6 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 31 Jan 2013 22:11:53 +0000 Subject: R600: Consider bitcast when folding const_address node. Patch by: Vincent Lejeune Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174098 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDILISelDAGToDAG.cpp | 3 +++ lib/Target/R600/R600ISelLowering.cpp | 8 ++++++++ 2 files changed, 11 insertions(+) (limited to 'lib') diff --git a/lib/Target/R600/AMDILISelDAGToDAG.cpp b/lib/Target/R600/AMDILISelDAGToDAG.cpp index 567b3e2..ece26ef 100644 --- a/lib/Target/R600/AMDILISelDAGToDAG.cpp +++ b/lib/Target/R600/AMDILISelDAGToDAG.cpp @@ -318,6 +318,9 @@ bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode, } } break; + case ISD::BITCAST: + Ops[OperandIdx[i] - 1] = Operand.getOperand(0); + return true; default: break; } diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index 3dc5b00..ff18a44 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -991,6 +991,14 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N, return Arg->getOperand(Element); } } + if (Arg.getOpcode() == ISD::BITCAST && + Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) { + if (ConstantSDNode *Const = dyn_cast(N->getOperand(1))) { + unsigned Element = Const->getZExtValue(); + return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), N->getVTList(), + Arg->getOperand(0).getOperand(Element)); + } + } } } return SDValue(); -- cgit v1.1 From 4bdf9890edd91b82487a29ae134d53676829bd0d Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 31 Jan 2013 22:11:54 +0000 Subject: R600: Fold clamp, neg, abs Patch by: Vincent Lejeune Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174099 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDILISelDAGToDAG.cpp | 51 ++++++++++++++++++++++++++++++++--- 1 file changed, 48 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Target/R600/AMDILISelDAGToDAG.cpp b/lib/Target/R600/AMDILISelDAGToDAG.cpp index ece26ef..84223f6 100644 --- a/lib/Target/R600/AMDILISelDAGToDAG.cpp +++ b/lib/Target/R600/AMDILISelDAGToDAG.cpp @@ -272,7 +272,11 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) { const R600InstrInfo *TII = static_cast(TM.getInstrInfo()); - if (Result && TII->isALUInstr(Result->getMachineOpcode())) { + if (Result && Result->isMachineOpcode() + && TII->isALUInstr(Result->getMachineOpcode())) { + // Fold FNEG/FABS/CONST_ADDRESS + // TODO: Isel can generate multiple MachineInst, we need to recursively + // parse Result bool IsModified = false; do { std::vector Ops; @@ -281,10 +285,28 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { Ops.push_back(*I); IsModified = FoldOperands(Result->getMachineOpcode(), TII, Ops); if (IsModified) { - Result = CurDAG->MorphNodeTo(Result, Result->getOpcode(), - Result->getVTList(), Ops.data(), Ops.size()); + Result = CurDAG->UpdateNodeOperands(Result, Ops.data(), Ops.size()); } } while (IsModified); + + // If node has a single use which is CLAMP_R600, folds it + if (Result->hasOneUse() && Result->isMachineOpcode()) { + SDNode *PotentialClamp = *Result->use_begin(); + if (PotentialClamp->isMachineOpcode() && + PotentialClamp->getMachineOpcode() == AMDGPU::CLAMP_R600) { + unsigned ClampIdx = + TII->getOperandIdx(Result->getMachineOpcode(), R600Operands::CLAMP); + std::vector Ops; + unsigned NumOp = Result->getNumOperands(); + for (unsigned i = 0; i < NumOp; ++i) { + Ops.push_back(Result->getOperand(i)); + } + Ops[ClampIdx - 1] = CurDAG->getTargetConstant(1, MVT::i32); + Result = CurDAG->SelectNodeTo(PotentialClamp, + Result->getMachineOpcode(), PotentialClamp->getVTList(), + Ops.data(), NumOp); + } + } } } @@ -303,6 +325,17 @@ bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode, TII->getOperandIdx(Opcode, R600Operands::SRC1_SEL), TII->getOperandIdx(Opcode, R600Operands::SRC2_SEL) }; + int NegIdx[] = { + TII->getOperandIdx(Opcode, R600Operands::SRC0_NEG), + TII->getOperandIdx(Opcode, R600Operands::SRC1_NEG), + TII->getOperandIdx(Opcode, R600Operands::SRC2_NEG) + }; + int AbsIdx[] = { + TII->getOperandIdx(Opcode, R600Operands::SRC0_ABS), + TII->getOperandIdx(Opcode, R600Operands::SRC1_ABS), + -1 + }; + for (unsigned i = 0; i < 3; i++) { if (OperandIdx[i] < 0) return false; @@ -318,6 +351,18 @@ bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode, } } break; + case ISD::FNEG: + if (NegIdx[i] < 0) + break; + Ops[OperandIdx[i] - 1] = Operand.getOperand(0); + Ops[NegIdx[i] - 1] = CurDAG->getTargetConstant(1, MVT::i32); + return true; + case ISD::FABS: + if (AbsIdx[i] < 0) + break; + Ops[OperandIdx[i] - 1] = Operand.getOperand(0); + Ops[AbsIdx[i] - 1] = CurDAG->getTargetConstant(1, MVT::i32); + return true; case ISD::BITCAST: Ops[OperandIdx[i] - 1] = Operand.getOperand(0); return true; -- cgit v1.1 From 1f560aac96b1cabb8da4689b3fb26cc585363093 Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Thu, 31 Jan 2013 22:55:51 +0000 Subject: Update AMDGPURegisterInfo::eliminateFrameIndex() corresponding to r174083. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174106 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPURegisterInfo.cpp | 1 + lib/Target/R600/AMDGPURegisterInfo.h | 1 + 2 files changed, 2 insertions(+) (limited to 'lib') diff --git a/lib/Target/R600/AMDGPURegisterInfo.cpp b/lib/Target/R600/AMDGPURegisterInfo.cpp index eeafec8..b332905 100644 --- a/lib/Target/R600/AMDGPURegisterInfo.cpp +++ b/lib/Target/R600/AMDGPURegisterInfo.cpp @@ -38,6 +38,7 @@ const uint16_t* AMDGPURegisterInfo::getCalleeSavedRegs(const MachineFunction *MF void AMDGPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, + unsigned FIOperandNum, RegScavenger *RS) const { assert(!"Subroutines not supported yet"); } diff --git a/lib/Target/R600/AMDGPURegisterInfo.h b/lib/Target/R600/AMDGPURegisterInfo.h index 76ee7ae..4a4bead 100644 --- a/lib/Target/R600/AMDGPURegisterInfo.h +++ b/lib/Target/R600/AMDGPURegisterInfo.h @@ -53,6 +53,7 @@ struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo { const uint16_t* getCalleeSavedRegs(const MachineFunction *MF) const; void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, + unsigned FIOperandNum, RegScavenger *RS) const; unsigned getFrameRegister(const MachineFunction &MF) const; -- cgit v1.1 From 169d5270751597aed4095ead00401a3374906147 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Thu, 31 Jan 2013 23:16:25 +0000 Subject: Remove the AttrBuilder form of the Attribute::get creators. The AttrBuilder is for building a collection of attributes. The Attribute object holds only one attribute. So it's not really useful for the Attribute object to have a creator which takes an AttrBuilder. This has two fallouts: 1. The AttrBuilder no longer holds its internal attributes in a bit-mask form. 2. The attributes are now ordered alphabetically (hence why the tests have changed). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174110 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/AttributeImpl.h | 2 ++ lib/IR/Attributes.cpp | 70 ++++++++++++++++++++++++++------------------------ 2 files changed, 38 insertions(+), 34 deletions(-) (limited to 'lib') diff --git a/lib/IR/AttributeImpl.h b/lib/IR/AttributeImpl.h index 442860d..e952578 100644 --- a/lib/IR/AttributeImpl.h +++ b/lib/IR/AttributeImpl.h @@ -40,6 +40,8 @@ class AttributeImpl : public FoldingSetNode { public: AttributeImpl(LLVMContext &C, Constant *Kind) : Context(C), Kind(Kind) {} + AttributeImpl(LLVMContext &C, Constant *Kind, ArrayRef Vals) + : Context(C), Kind(Kind), Vals(Vals.begin(), Vals.end()) {} explicit AttributeImpl(LLVMContext &C, Attribute::AttrKind data); AttributeImpl(LLVMContext &C, Attribute::AttrKind data, ArrayRef values); diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 5608cbd..c2ea2b2 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -30,24 +30,11 @@ using namespace llvm; // Attribute Construction Methods //===----------------------------------------------------------------------===// -Attribute Attribute::get(LLVMContext &Context, AttrKind Kind) { - AttrBuilder B; - return Attribute::get(Context, B.addAttribute(Kind)); -} - -Attribute Attribute::get(LLVMContext &Context, AttrBuilder &B) { - // If there are no attributes, return an empty Attribute class. - if (!B.hasAttributes()) - return Attribute(); - - assert(std::distance(B.begin(), B.end()) == 1 && - "The Attribute object should represent one attribute only!"); - - // Otherwise, build a key to look up the existing attributes. +Attribute Attribute::get(LLVMContext &Context, Constant *Kind, Constant *Val) { LLVMContextImpl *pImpl = Context.pImpl; FoldingSetNodeID ID; - ConstantInt *CI = ConstantInt::get(Type::getInt64Ty(Context), B.Raw()); - ID.AddPointer(CI); + ID.AddPointer(Kind); + if (Val) ID.AddPointer(Val); void *InsertPoint; AttributeImpl *PA = pImpl->AttrsSet.FindNodeOrInsertPos(ID, InsertPoint); @@ -55,7 +42,9 @@ Attribute Attribute::get(LLVMContext &Context, AttrBuilder &B) { if (!PA) { // If we didn't find any existing attributes of the same shape then create a // new one and insert it. - PA = new AttributeImpl(Context, CI); + PA = (!Val) ? + new AttributeImpl(Context, Kind) : + new AttributeImpl(Context, Kind, Val); pImpl->AttrsSet.InsertNode(PA, InsertPoint); } @@ -63,15 +52,24 @@ Attribute Attribute::get(LLVMContext &Context, AttrBuilder &B) { return Attribute(PA); } +Attribute Attribute::get(LLVMContext &Context, AttrKind Kind, Constant *Val) { + ConstantInt *KindVal = ConstantInt::get(Type::getInt64Ty(Context), Kind); + return get(Context, KindVal, Val); +} + Attribute Attribute::getWithAlignment(LLVMContext &Context, uint64_t Align) { - AttrBuilder B; - return get(Context, B.addAlignmentAttr(Align)); + assert(isPowerOf2_32(Align) && "Alignment must be a power of two."); + assert(Align <= 0x40000000 && "Alignment too large."); + return get(Context, Alignment, + ConstantInt::get(Type::getInt64Ty(Context), Align)); } Attribute Attribute::getWithStackAlignment(LLVMContext &Context, uint64_t Align) { - AttrBuilder B; - return get(Context, B.addStackAlignmentAttr(Align)); + assert(isPowerOf2_32(Align) && "Alignment must be a power of two."); + assert(Align <= 0x100 && "Alignment too large."); + return get(Context, StackAlignment, + ConstantInt::get(Type::getInt64Ty(Context), Align)); } //===----------------------------------------------------------------------===// @@ -250,17 +248,21 @@ AttributeImpl::AttributeImpl(LLVMContext &C, StringRef kind) } bool AttributeImpl::hasAttribute(Attribute::AttrKind A) const { - return (Raw() & getAttrMask(A)) != 0; + if (ConstantInt *CI = dyn_cast(Kind)) + return CI->getZExtValue() == A; + return false; } uint64_t AttributeImpl::getAlignment() const { - uint64_t Mask = Raw() & getAttrMask(Attribute::Alignment); - return 1ULL << ((Mask >> 16) - 1); + assert(hasAttribute(Attribute::Alignment) && + "Trying to retrieve the alignment from a non-alignment attr!"); + return cast(Vals[0])->getZExtValue(); } uint64_t AttributeImpl::getStackAlignment() const { - uint64_t Mask = Raw() & getAttrMask(Attribute::StackAlignment); - return 1ULL << ((Mask >> 26) - 1); + assert(hasAttribute(Attribute::StackAlignment) && + "Trying to retrieve the stack alignment from a non-alignment attr!"); + return cast(Vals[0])->getZExtValue(); } bool AttributeImpl::operator==(Attribute::AttrKind kind) const { @@ -808,12 +810,15 @@ void AttrBuilder::clear() { } AttrBuilder &AttrBuilder::addAttribute(Attribute::AttrKind Val) { + assert(Val != Attribute::Alignment && Val != Attribute::StackAlignment && + "Adding alignment attribute without adding alignment value!"); Attrs.insert(Val); return *this; } AttrBuilder &AttrBuilder::removeAttribute(Attribute::AttrKind Val) { Attrs.erase(Val); + if (Val == Attribute::Alignment) Alignment = 0; else if (Val == Attribute::StackAlignment) @@ -823,16 +828,13 @@ AttrBuilder &AttrBuilder::removeAttribute(Attribute::AttrKind Val) { } AttrBuilder &AttrBuilder::addAttributes(Attribute Attr) { - uint64_t Mask = Attr.Raw(); - - for (Attribute::AttrKind I = Attribute::None; I != Attribute::EndAttrKinds; - I = Attribute::AttrKind(I + 1)) - if ((Mask & AttributeImpl::getAttrMask(I)) != 0) - Attrs.insert(I); + ConstantInt *Kind = cast(Attr.getAttributeKind()); + Attribute::AttrKind KindVal = Attribute::AttrKind(Kind->getZExtValue()); + Attrs.insert(KindVal); - if (Attr.getAlignment()) + if (KindVal == Attribute::Alignment) Alignment = Attr.getAlignment(); - if (Attr.getStackAlignment()) + else if (KindVal == Attribute::StackAlignment) StackAlignment = Attr.getStackAlignment(); return *this; } -- cgit v1.1 From 5da3665cc501ed8928e63678254357214ec0b9eb Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Thu, 31 Jan 2013 23:29:57 +0000 Subject: Give the MCStreamer class hierarchy LLVM RTTI facilities for use with isa<> and dyn_cast<>. In several places, code is already hacking around the absence of this, and there seem to be several interfaces that might be lifted and/or devirtualized using this. This change was based on a discussion with Jim Grosbach about how best to handle testing for specific MCStreamer subclasses. He said that this was the correct end state, and everything else was too hacky so I decided to just make it so. No functionality should be changed here, this is just threading the kind through all the constructors and setting up the classof overloads. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174113 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCAsmStreamer.cpp | 6 +++++- lib/MC/MCMachOStreamer.cpp | 10 +++++++--- lib/MC/MCNullStreamer.cpp | 7 ++++++- lib/MC/MCObjectStreamer.cpp | 27 ++++++++++++-------------- lib/MC/MCPureStreamer.cpp | 10 +++++++--- lib/MC/MCStreamer.cpp | 7 +++---- lib/MC/WinCOFFStreamer.cpp | 15 +++++++------- lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp | 15 ++++++++------ 8 files changed, 57 insertions(+), 40 deletions(-) (limited to 'lib') diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp index 7191947..71be1a9 100644 --- a/lib/MC/MCAsmStreamer.cpp +++ b/lib/MC/MCAsmStreamer.cpp @@ -71,7 +71,7 @@ public: MCInstPrinter *printer, MCCodeEmitter *emitter, MCAsmBackend *asmbackend, bool showInst) - : MCStreamer(Context), OS(os), MAI(Context.getAsmInfo()), + : MCStreamer(SK_AsmStreamer, Context), OS(os), MAI(Context.getAsmInfo()), InstPrinter(printer), Emitter(emitter), AsmBackend(asmbackend), CommentStream(CommentToEmit), IsVerboseAsm(isVerboseAsm), ShowInst(showInst), UseLoc(useLoc), UseCFI(useCFI), @@ -277,6 +277,10 @@ public: virtual void FinishImpl(); /// @} + + static bool classof(const MCStreamer *S) { + return S->getKind() == SK_NullStreamer; + } }; } // end anonymous namespace. diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp index 2c0c97a..7d08d0e 100644 --- a/lib/MC/MCMachOStreamer.cpp +++ b/lib/MC/MCMachOStreamer.cpp @@ -34,9 +34,9 @@ private: void EmitDataRegion(DataRegionData::KindTy Kind); void EmitDataRegionEnd(); public: - MCMachOStreamer(MCContext &Context, MCAsmBackend &MAB, - raw_ostream &OS, MCCodeEmitter *Emitter) - : MCObjectStreamer(Context, MAB, OS, Emitter) {} + MCMachOStreamer(MCContext &Context, MCAsmBackend &MAB, raw_ostream &OS, + MCCodeEmitter *Emitter) + : MCObjectStreamer(SK_MachOStreamer, Context, MAB, OS, Emitter) {} /// @name MCStreamer Interface /// @{ @@ -87,6 +87,10 @@ public: virtual void FinishImpl(); /// @} + + static bool classof(const MCStreamer *S) { + return S->getKind() == SK_MachOStreamer; + } }; } // end anonymous namespace. diff --git a/lib/MC/MCNullStreamer.cpp b/lib/MC/MCNullStreamer.cpp index 3eee5ca..89f74c1 100644 --- a/lib/MC/MCNullStreamer.cpp +++ b/lib/MC/MCNullStreamer.cpp @@ -19,7 +19,7 @@ namespace { class MCNullStreamer : public MCStreamer { public: - MCNullStreamer(MCContext &Context) : MCStreamer(Context) {} + MCNullStreamer(MCContext &Context) : MCStreamer(SK_NullStreamer, Context) {} /// @name MCStreamer Interface /// @{ @@ -109,6 +109,11 @@ namespace { } /// @} + + static bool classof(const MCStreamer *S) { + return S->getKind() == SK_NullStreamer; + } + }; } diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp index 6f2dce6..5aa4b40 100644 --- a/lib/MC/MCObjectStreamer.cpp +++ b/lib/MC/MCObjectStreamer.cpp @@ -20,22 +20,19 @@ #include "llvm/Support/ErrorHandling.h" using namespace llvm; -MCObjectStreamer::MCObjectStreamer(MCContext &Context, MCAsmBackend &TAB, - raw_ostream &OS, MCCodeEmitter *Emitter_) - : MCStreamer(Context), - Assembler(new MCAssembler(Context, TAB, - *Emitter_, *TAB.createObjectWriter(OS), - OS)), - CurSectionData(0) -{ -} - -MCObjectStreamer::MCObjectStreamer(MCContext &Context, MCAsmBackend &TAB, - raw_ostream &OS, MCCodeEmitter *Emitter_, +MCObjectStreamer::MCObjectStreamer(StreamerKind Kind, MCContext &Context, + MCAsmBackend &TAB, raw_ostream &OS, + MCCodeEmitter *Emitter_) + : MCStreamer(Kind, Context), + Assembler(new MCAssembler(Context, TAB, *Emitter_, + *TAB.createObjectWriter(OS), OS)), + CurSectionData(0) {} + +MCObjectStreamer::MCObjectStreamer(StreamerKind Kind, MCContext &Context, + MCAsmBackend &TAB, raw_ostream &OS, + MCCodeEmitter *Emitter_, MCAssembler *_Assembler) - : MCStreamer(Context), Assembler(_Assembler), CurSectionData(0) -{ -} + : MCStreamer(Kind, Context), Assembler(_Assembler), CurSectionData(0) {} MCObjectStreamer::~MCObjectStreamer() { delete &Assembler->getBackend(); diff --git a/lib/MC/MCPureStreamer.cpp b/lib/MC/MCPureStreamer.cpp index 6ce7ae8..573308a 100644 --- a/lib/MC/MCPureStreamer.cpp +++ b/lib/MC/MCPureStreamer.cpp @@ -28,9 +28,9 @@ private: virtual void EmitInstToData(const MCInst &Inst); public: - MCPureStreamer(MCContext &Context, MCAsmBackend &TAB, - raw_ostream &OS, MCCodeEmitter *Emitter) - : MCObjectStreamer(Context, TAB, OS, Emitter) {} + MCPureStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS, + MCCodeEmitter *Emitter) + : MCObjectStreamer(SK_PureStreamer, Context, TAB, OS, Emitter) {} /// @name MCStreamer Interface /// @{ @@ -100,6 +100,10 @@ public: } /// @} + + static bool classof(const MCStreamer *S) { + return S->getKind() == SK_PureStreamer; + } }; } // end anonymous namespace. diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp index 00ebde3..e92569b 100644 --- a/lib/MC/MCStreamer.cpp +++ b/lib/MC/MCStreamer.cpp @@ -21,10 +21,9 @@ #include using namespace llvm; -MCStreamer::MCStreamer(MCContext &Ctx) - : Context(Ctx), EmitEHFrame(true), EmitDebugFrame(false), - CurrentW64UnwindInfo(0), LastSymbol(0), - AutoInitSections(false) { +MCStreamer::MCStreamer(StreamerKind Kind, MCContext &Ctx) + : Kind(Kind), Context(Ctx), EmitEHFrame(true), EmitDebugFrame(false), + CurrentW64UnwindInfo(0), LastSymbol(0), AutoInitSections(false) { const MCSection *section = NULL; SectionStack.push_back(std::make_pair(section, section)); } diff --git a/lib/MC/WinCOFFStreamer.cpp b/lib/MC/WinCOFFStreamer.cpp index b529489b..75f343c 100644 --- a/lib/MC/WinCOFFStreamer.cpp +++ b/lib/MC/WinCOFFStreamer.cpp @@ -75,6 +75,10 @@ public: virtual void EmitWin64EHHandlerData(); virtual void FinishImpl(); + static bool classof(const MCStreamer *S) { + return S->getKind() == SK_WinCOFFStreamer; + } + private: virtual void EmitInstToData(const MCInst &Inst) { MCDataFragment *DF = getOrCreateDataFragment(); @@ -128,13 +132,10 @@ private: }; } // end anonymous namespace. -WinCOFFStreamer::WinCOFFStreamer(MCContext &Context, - MCAsmBackend &MAB, - MCCodeEmitter &CE, - raw_ostream &OS) - : MCObjectStreamer(Context, MAB, OS, &CE) - , CurSymbol(NULL) { -} +WinCOFFStreamer::WinCOFFStreamer(MCContext &Context, MCAsmBackend &MAB, + MCCodeEmitter &CE, raw_ostream &OS) + : MCObjectStreamer(SK_WinCOFFStreamer, Context, MAB, OS, &CE), + CurSymbol(NULL) {} void WinCOFFStreamer::AddCommonSymbol(MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment, bool External) { diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp index 526f571..418971d 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -54,12 +54,11 @@ namespace { /// by MachO. Beware! class ARMELFStreamer : public MCELFStreamer { public: - ARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, - raw_ostream &OS, MCCodeEmitter *Emitter, bool IsThumb) - : MCELFStreamer(Context, TAB, OS, Emitter), - IsThumb(IsThumb), MappingSymbolCounter(0), LastEMS(EMS_None), - ExTab(0), FnStart(0), Personality(0), CantUnwind(false) { - } + ARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS, + MCCodeEmitter *Emitter, bool IsThumb) + : MCELFStreamer(SK_ARMELFStreamer, Context, TAB, OS, Emitter), + IsThumb(IsThumb), MappingSymbolCounter(0), LastEMS(EMS_None), ExTab(0), + FnStart(0), Personality(0), CantUnwind(false) {} ~ARMELFStreamer() {} @@ -134,6 +133,10 @@ public: } } + static bool classof(const MCStreamer *S) { + return S->getKind() == SK_ARMELFStreamer; + } + private: enum ElfMappingSymbol { EMS_None, -- cgit v1.1 From 39da078977ae98b6bf1c3c76a472ed24f5f2a2d2 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Thu, 31 Jan 2013 23:38:01 +0000 Subject: s/AttrBuilder::addAttributes/AttrBuilder::addAttribute/g because that's more descriptive of what it actually is. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174116 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Attributes.cpp | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'lib') diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index c2ea2b2..14aba08 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -437,7 +437,7 @@ uint64_t AttributeSetImpl::Raw(uint64_t Index) const { for (AttributeSetNode::const_iterator II = ASN->begin(), IE = ASN->end(); II != IE; ++II) - B.addAttributes(*II); + B.addAttribute(*II); return B.Raw(); } @@ -596,7 +596,7 @@ AttributeSet AttributeSet::addAttributes(LLVMContext &C, unsigned Idx, if (Attrs.getSlotIndex(I) == Idx) { for (AttributeSetImpl::const_iterator II = Attrs.pImpl->begin(I), IE = Attrs.pImpl->end(I); II != IE; ++II) - B.addAttributes(*II); + B.addAttribute(*II); break; } @@ -798,7 +798,7 @@ AttrBuilder::AttrBuilder(AttributeSet AS, unsigned Idx) for (AttributeSetImpl::const_iterator II = pImpl->begin(I), IE = pImpl->end(I); II != IE; ++II) - addAttributes(*II); + addAttribute(*II); break; } @@ -816,18 +816,7 @@ AttrBuilder &AttrBuilder::addAttribute(Attribute::AttrKind Val) { return *this; } -AttrBuilder &AttrBuilder::removeAttribute(Attribute::AttrKind Val) { - Attrs.erase(Val); - - if (Val == Attribute::Alignment) - Alignment = 0; - else if (Val == Attribute::StackAlignment) - StackAlignment = 0; - - return *this; -} - -AttrBuilder &AttrBuilder::addAttributes(Attribute Attr) { +AttrBuilder &AttrBuilder::addAttribute(Attribute Attr) { ConstantInt *Kind = cast(Attr.getAttributeKind()); Attribute::AttrKind KindVal = Attribute::AttrKind(Kind->getZExtValue()); Attrs.insert(KindVal); @@ -839,6 +828,17 @@ AttrBuilder &AttrBuilder::addAttributes(Attribute Attr) { return *this; } +AttrBuilder &AttrBuilder::removeAttribute(Attribute::AttrKind Val) { + Attrs.erase(Val); + + if (Val == Attribute::Alignment) + Alignment = 0; + else if (Val == Attribute::StackAlignment) + StackAlignment = 0; + + return *this; +} + AttrBuilder &AttrBuilder::removeAttributes(AttributeSet A, uint64_t Index) { uint64_t Mask = A.Raw(Index); -- cgit v1.1 From 27aacedf7d975243170206efb948a20d6fd4a2c1 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Thu, 31 Jan 2013 23:43:14 +0000 Subject: Switch the code added in r173885 to use the new, shiny RTTI infrastructure on MCStreamer to test for whether there is an MCELFStreamer object available. This is just a cleanup on the AsmPrinter side of things, moving ad-hoc tests of random APIs to a direct type query. But the AsmParser completely broken. There were no tests, it just blindly cast its streamer to an MCELFStreamer and started manipulating it. I don't have a test case -- this actually failed on LLVM's own regression test suite. Unfortunately the failure only appears when the stars, compilers, and runtime align to misbehave when we read a pointer to a formatted_raw_ostream as-if it were an MCAssembler. =/ UBSan would catch this immediately. Many thanks to Matt for doing about 80% of the debugging work here in GDB, Jim for helping to explain how exactly to fix this, and others for putting up with the hair pulling that ensued during debugging it. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174118 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMAsmPrinter.cpp | 8 ++------ lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 6 +++--- 2 files changed, 5 insertions(+), 9 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index 577cdb0..986dfb7 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -704,12 +704,8 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) { // FIXME: This should eventually end up somewhere else where more // intelligent flag decisions can be made. For now we are just maintaining // the status quo for ARM and setting EF_ARM_EABI_VER5 as the default. - if (Subtarget->isTargetELF()) { - if (OutStreamer.hasRawTextSupport()) return; - - MCELFStreamer &MES = static_cast(OutStreamer); - MES.getAssembler().setELFHeaderEFlags(ELF::EF_ARM_EABI_VER5); - } + if (MCELFStreamer *MES = dyn_cast(&OutStreamer)) + MES->getAssembler().setELFHeaderEFlags(ELF::EF_ARM_EABI_VER5); } //===----------------------------------------------------------------------===// diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 77620e9..106fd13 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -257,9 +257,9 @@ public: // Set ELF header flags. // FIXME: This should eventually end up somewhere else where more // intelligent flag decisions can be made. For now we are just maintaining - // the status quo for ARM and setting EF_ARM_EABI_VER5 as the default. - MCELFStreamer &MES = static_cast(Parser.getStreamer()); - MES.getAssembler().setELFHeaderEFlags(ELF::EF_ARM_EABI_VER5); + // the statu/parseDirects quo for ARM and setting EF_ARM_EABI_VER5 as the default. + if (MCELFStreamer *MES = dyn_cast(&Parser.getStreamer())) + MES->getAssembler().setELFHeaderEFlags(ELF::EF_ARM_EABI_VER5); } // Implementation of the MCTargetAsmParser interface: -- cgit v1.1 From ac08785edaac826afe11a4e596a46fc693848848 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Thu, 31 Jan 2013 23:49:33 +0000 Subject: An alloca can be equal to an argument. It can't *alias* an alloca, but it could be equal, since there's nothing preventing a caller from correctly predicting the stack location of an alloca. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174119 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/InstructionSimplify.cpp | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'lib') diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index d5e38e5..f8e76ca 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -1828,18 +1828,6 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, else if (Pred == CmpInst::ICMP_NE) return ConstantInt::get(ITy, true); } - } else if (Argument *LHSArg = dyn_cast(LHSPtr)) { - RHSPtr = RHSPtr->stripInBoundsOffsets(); - // An alloca can't be equal to an argument unless they come from separate - // functions via inlining. - if (AllocaInst *RHSInst = dyn_cast(RHSPtr)) { - if (LHSArg->getParent() == RHSInst->getParent()->getParent()) { - if (Pred == CmpInst::ICMP_EQ) - return ConstantInt::get(ITy, false); - else if (Pred == CmpInst::ICMP_NE) - return ConstantInt::get(ITy, true); - } - } } // If we are comparing with zero then try hard since this is a common case. -- cgit v1.1 From 16c4b3cf2943ae2327752cf3de39769d14cfcece Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Thu, 31 Jan 2013 23:53:05 +0000 Subject: Add iterators to the AttributeSet class so that we can access the Attributes in a nice way. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174120 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Attributes.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'lib') diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 14aba08..9d5f53b 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -740,6 +740,18 @@ AttributeSetNode *AttributeSet::getAttributes(unsigned Idx) const { return 0; } +AttributeSet::iterator AttributeSet::begin(unsigned Idx) { + if (!pImpl) + return ArrayRef().begin(); + return pImpl->begin(Idx); +} + +AttributeSet::iterator AttributeSet::end(unsigned Idx) { + if (!pImpl) + return ArrayRef().end(); + return pImpl->begin(Idx); +} + //===----------------------------------------------------------------------===// // AttributeSet Introspection Methods //===----------------------------------------------------------------------===// -- cgit v1.1 From 3529d1aa8df3cfd9e37b1a4252cabc0f01652e94 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Fri, 1 Feb 2013 00:04:33 +0000 Subject: Fix another typo in the classof definitions that doesn't (currently) have any effect. Spotted by Eli in review, thanks!!! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174121 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCAsmStreamer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp index 71be1a9..7eb7202 100644 --- a/lib/MC/MCAsmStreamer.cpp +++ b/lib/MC/MCAsmStreamer.cpp @@ -279,7 +279,7 @@ public: /// @} static bool classof(const MCStreamer *S) { - return S->getKind() == SK_NullStreamer; + return S->getKind() == SK_AsmStreamer; } }; -- cgit v1.1 From fdd1eafe867734df285bbdb01cf1d21f63716798 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Fri, 1 Feb 2013 00:11:13 +0000 Subject: Rewrite instsimplify's handling if icmp on pointer values to remove the remaining use of AliasAnalysis concepts such as isIdentifiedObject to prove pointer inequality. @external_compare in test/Transforms/InstSimplify/compare.ll shows a simple case where a noalias argument can be equal to a global variable address, and while AliasAnalysis can get away with saying that these pointers don't alias, instsimplify cannot say that they are not equal. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174122 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/InstructionSimplify.cpp | 144 +++++++++++++++++++++-------------- 1 file changed, 88 insertions(+), 56 deletions(-) (limited to 'lib') diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index f8e76ca..2ca37cc 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -21,10 +21,10 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/Operator.h" @@ -667,8 +667,8 @@ Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, /// This is very similar to GetPointerBaseWithConstantOffset except it doesn't /// follow non-inbounds geps. This allows it to remain usable for icmp ult/etc. /// folding. -static Constant *stripAndComputeConstantOffsets(const DataLayout *TD, - Value *&V) { +static ConstantInt *stripAndComputeConstantOffsets(const DataLayout *TD, + Value *&V) { assert(V->getType()->isPointerTy()); // Without DataLayout, just be conservative for now. Theoretically, more could @@ -701,7 +701,7 @@ static Constant *stripAndComputeConstantOffsets(const DataLayout *TD, } while (Visited.insert(V)); Type *IntPtrTy = TD->getIntPtrType(V->getContext()); - return ConstantInt::get(IntPtrTy, Offset); + return cast(ConstantInt::get(IntPtrTy, Offset)); } /// \brief Compute the constant difference between two pointer values. @@ -1689,8 +1689,19 @@ static Value *ExtractEquivalentCondition(Value *V, CmpInst::Predicate Pred, } static Constant *computePointerICmp(const DataLayout *TD, + const TargetLibraryInfo *TLI, CmpInst::Predicate Pred, Value *LHS, Value *RHS) { + // First, skip past any trivial no-ops. + LHS = LHS->stripPointerCasts(); + RHS = RHS->stripPointerCasts(); + + // A non-null pointer is not equal to a null pointer. + if (llvm::isKnownNonNull(LHS) && isa(RHS) && + (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE)) + return ConstantInt::get(GetCompareTy(LHS), + !CmpInst::isTrueWhenEqual(Pred)); + // We can only fold certain predicates on pointer comparisons. switch (Pred) { default: @@ -1713,15 +1724,80 @@ static Constant *computePointerICmp(const DataLayout *TD, break; } - Constant *LHSOffset = stripAndComputeConstantOffsets(TD, LHS); - Constant *RHSOffset = stripAndComputeConstantOffsets(TD, RHS); + // Strip off any constant offsets so that we can reason about them. + // It's tempting to use getUnderlyingObject or even just stripInBoundsOffsets + // here and compare base addresses like AliasAnalysis does, however there are + // numerous hazards. AliasAnalysis and its utilities rely on special rules + // governing loads and stores which don't apply to icmps. Also, AliasAnalysis + // doesn't need to guarantee pointer inequality when it says NoAlias. + ConstantInt *LHSOffset = stripAndComputeConstantOffsets(TD, LHS); + ConstantInt *RHSOffset = stripAndComputeConstantOffsets(TD, RHS); + + // If LHS and RHS are related via constant offsets to the same base + // value, we can replace it with an icmp which just compares the offsets. + if (LHS == RHS) + return ConstantExpr::getICmp(Pred, LHSOffset, RHSOffset); + + // Various optimizations for (in)equality comparisons. + if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) { + // Different non-empty allocations that exist at the same time have + // different addresses (if the program can tell). Global variables always + // exist, so they always exist during the lifetime of each other and all + // allocas. Two different allocas usually have different addresses... + // + // However, if there's an @llvm.stackrestore dynamically in between two + // allocas, they may have the same address. It's tempting to reduce the + // scope of the problem by only looking at *static* allocas here. That would + // cover the majority of allocas while significantly reducing the likelihood + // of having an @llvm.stackrestore pop up in the middle. However, it's not + // actually impossible for an @llvm.stackrestore to pop up in the middle of + // an entry block. Also, if we have a block that's not attached to a + // function, we can't tell if it's "static" under the current definition. + // Theoretically, this problem could be fixed by creating a new kind of + // instruction kind specifically for static allocas. Such a new instruction + // could be required to be at the top of the entry block, thus preventing it + // from being subject to a @llvm.stackrestore. Instcombine could even + // convert regular allocas into these special allocas. It'd be nifty. + // However, until then, this problem remains open. + // + // So, we'll assume that two non-empty allocas have different addresses + // for now. + // + // With all that, if the offsets are within the bounds of their allocations + // (and not one-past-the-end! so we can't use inbounds!), and their + // allocations aren't the same, the pointers are not equal. + // + // Note that it's not necessary to check for LHS being a global variable + // address, due to canonicalization and constant folding. + if (isa(LHS) && + (isa(RHS) || isa(RHS))) { + uint64_t LHSSize, RHSSize; + if (getObjectSize(LHS, LHSSize, TD, TLI) && + getObjectSize(RHS, RHSSize, TD, TLI)) { + const APInt &LHSOffsetValue = LHSOffset->getValue(); + const APInt &RHSOffsetValue = RHSOffset->getValue(); + if (!LHSOffsetValue.isNegative() && + !RHSOffsetValue.isNegative() && + LHSOffsetValue.ult(LHSSize) && + RHSOffsetValue.ult(RHSSize)) { + return ConstantInt::get(GetCompareTy(LHS), + !CmpInst::isTrueWhenEqual(Pred)); + } + } - // If LHS and RHS are not related via constant offsets to the same base - // value, there is nothing we can do here. - if (LHS != RHS) - return 0; + // Repeat the above check but this time without depending on DataLayout + // or being able to compute a precise size. + if (!cast(LHS->getType())->isEmptyTy() && + !cast(RHS->getType())->isEmptyTy() && + LHSOffset->isNullValue() && + RHSOffset->isNullValue()) + return ConstantInt::get(GetCompareTy(LHS), + !CmpInst::isTrueWhenEqual(Pred)); + } + } - return ConstantExpr::getICmp(Pred, LHSOffset, RHSOffset); + // Otherwise, fail. + return 0; } /// SimplifyICmpInst - Given operands for an ICmpInst, see if we can @@ -1786,50 +1862,6 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, } } - // icmp , - Different identified objects have - // different addresses (unless null), and what's more the address of an - // identified local is never equal to another argument (again, barring null). - // Note that generalizing to the case where LHS is a global variable address - // or null is pointless, since if both LHS and RHS are constants then we - // already constant folded the compare, and if only one of them is then we - // moved it to RHS already. - Value *LHSPtr = LHS->stripPointerCasts(); - Value *RHSPtr = RHS->stripPointerCasts(); - if (LHSPtr == RHSPtr) - return ConstantInt::get(ITy, CmpInst::isTrueWhenEqual(Pred)); - - // Be more aggressive about stripping pointer adjustments when checking a - // comparison of an alloca address to another object. We can rip off all - // inbounds GEP operations, even if they are variable. - LHSPtr = LHSPtr->stripInBoundsOffsets(); - if (llvm::isIdentifiedObject(LHSPtr)) { - RHSPtr = RHSPtr->stripInBoundsOffsets(); - if (llvm::isKnownNonNull(LHSPtr) || llvm::isKnownNonNull(RHSPtr)) { - // If both sides are different identified objects, they aren't equal - // unless they're null. - if (LHSPtr != RHSPtr && llvm::isIdentifiedObject(RHSPtr) && - Pred == CmpInst::ICMP_EQ) - return ConstantInt::get(ITy, false); - - // A local identified object (alloca or noalias call) can't equal any - // incoming argument, unless they're both null or they belong to - // different functions. The latter happens during inlining. - if (Instruction *LHSInst = dyn_cast(LHSPtr)) - if (Argument *RHSArg = dyn_cast(RHSPtr)) - if (LHSInst->getParent()->getParent() == RHSArg->getParent() && - Pred == CmpInst::ICMP_EQ) - return ConstantInt::get(ITy, false); - } - - // Assume that the constant null is on the right. - if (llvm::isKnownNonNull(LHSPtr) && isa(RHSPtr)) { - if (Pred == CmpInst::ICMP_EQ) - return ConstantInt::get(ITy, false); - else if (Pred == CmpInst::ICMP_NE) - return ConstantInt::get(ITy, true); - } - } - // If we are comparing with zero then try hard since this is a common case. if (match(RHS, m_Zero())) { bool LHSKnownNonNegative, LHSKnownNegative; @@ -2457,7 +2489,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, // Simplify comparisons of related pointers using a powerful, recursive // GEP-walk when we have target data available.. if (LHS->getType()->isPointerTy()) - if (Constant *C = computePointerICmp(Q.TD, Pred, LHS, RHS)) + if (Constant *C = computePointerICmp(Q.TD, Q.TLI, Pred, LHS, RHS)) return C; if (GetElementPtrInst *GLHS = dyn_cast(LHS)) { -- cgit v1.1 From 30d2c76800bc821aff6e224e0bd11d88a793303e Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Fri, 1 Feb 2013 00:13:50 +0000 Subject: Use iterators instead of relying upon a bitmask of attributes to remove attributes from an AttrBuilder. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174123 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Attributes.cpp | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) (limited to 'lib') diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 9d5f53b..01e0235 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -749,7 +749,7 @@ AttributeSet::iterator AttributeSet::begin(unsigned Idx) { AttributeSet::iterator AttributeSet::end(unsigned Idx) { if (!pImpl) return ArrayRef().end(); - return pImpl->begin(Idx); + return pImpl->end(Idx); } //===----------------------------------------------------------------------===// @@ -852,18 +852,24 @@ AttrBuilder &AttrBuilder::removeAttribute(Attribute::AttrKind Val) { } AttrBuilder &AttrBuilder::removeAttributes(AttributeSet A, uint64_t Index) { - uint64_t Mask = A.Raw(Index); + unsigned Idx = ~0U; + for (unsigned I = 0, E = A.getNumSlots(); I != E; ++I) + if (A.getSlotIndex(I) == Index) { + Idx = I; + break; + } - for (Attribute::AttrKind I = Attribute::None; I != Attribute::EndAttrKinds; - I = Attribute::AttrKind(I + 1)) { - if (Mask & AttributeImpl::getAttrMask(I)) { - Attrs.erase(I); + assert(Idx != ~0U && "Couldn't find index in AttributeSet!"); - if (I == Attribute::Alignment) - Alignment = 0; - else if (I == Attribute::StackAlignment) - StackAlignment = 0; - } + for (AttributeSet::iterator I = A.begin(Idx), E = A.end(Idx); I != E; ++I) { + ConstantInt *CI = cast(I->getAttributeKind()); + Attribute::AttrKind Kind = Attribute::AttrKind(CI->getZExtValue()); + Attrs.erase(Kind); + + if (Kind == Attribute::Alignment) + Alignment = 0; + else if (Kind == Attribute::StackAlignment) + StackAlignment = 0; } return *this; -- cgit v1.1 From f715dbd263149efeb9c684dfdb0637cf84f94399 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Fri, 1 Feb 2013 00:48:14 +0000 Subject: Remove one of the odious 'Raw' methods. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174130 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/AttributeImpl.h | 3 +-- lib/IR/Attributes.cpp | 13 ++----------- 2 files changed, 3 insertions(+), 13 deletions(-) (limited to 'lib') diff --git a/lib/IR/AttributeImpl.h b/lib/IR/AttributeImpl.h index e952578..2eb7f07 100644 --- a/lib/IR/AttributeImpl.h +++ b/lib/IR/AttributeImpl.h @@ -77,8 +77,7 @@ public: ID.AddPointer(Vals[I]); } - // FIXME: Remove these! - uint64_t Raw() const; + // FIXME: Remove this! static uint64_t getAttrMask(Attribute::AttrKind Val); }; diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 01e0235..68b831d 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -223,10 +223,6 @@ bool Attribute::operator<(Attribute A) const { return *pImpl < *A.pImpl; } -uint64_t Attribute::Raw() const { - return pImpl ? pImpl->Raw() : 0; -} - //===----------------------------------------------------------------------===// // AttributeImpl Definition //===----------------------------------------------------------------------===// @@ -308,11 +304,6 @@ bool AttributeImpl::operator<(const AttributeImpl &AI) const { return ThisCDA->getAsString() < ThatCDA->getAsString(); } -uint64_t AttributeImpl::Raw() const { - // FIXME: Remove this. - return cast(Kind)->getZExtValue(); -} - uint64_t AttributeImpl::getAttrMask(Attribute::AttrKind Val) { // FIXME: Remove this. switch (Val) { @@ -740,13 +731,13 @@ AttributeSetNode *AttributeSet::getAttributes(unsigned Idx) const { return 0; } -AttributeSet::iterator AttributeSet::begin(unsigned Idx) { +AttributeSet::iterator AttributeSet::begin(unsigned Idx) const { if (!pImpl) return ArrayRef().begin(); return pImpl->begin(Idx); } -AttributeSet::iterator AttributeSet::end(unsigned Idx) { +AttributeSet::iterator AttributeSet::end(unsigned Idx) const { if (!pImpl) return ArrayRef().end(); return pImpl->end(Idx); -- cgit v1.1 From 901261d558d0b41ba75d8aa2b38aac72aaa41bae Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Fri, 1 Feb 2013 00:49:06 +0000 Subject: Add a comment explaining an unavailable optimization. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174131 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/InstructionSimplify.cpp | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'lib') diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index 2ca37cc..e296215 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -1688,6 +1688,34 @@ static Value *ExtractEquivalentCondition(Value *V, CmpInst::Predicate Pred, return 0; } +// A significant optimization not implemented here is assuming that alloca +// addresses are not equal to incoming argument values. They don't *alias*, +// as we say, but that doesn't mean they aren't equal, so we take a +// conservative approach. +// +// This is inspired in part by C++11 5.10p1: +// "Two pointers of the same type compare equal if and only if they are both +// null, both point to the same function, or both represent the same +// address." +// +// This is pretty permissive. +// +// It's also partly due to C11 6.5.9p6: +// "Two pointers compare equal if and only if both are null pointers, both are +// pointers to the same object (including a pointer to an object and a +// subobject at its beginning) or function, both are pointers to one past the +// last element of the same array object, or one is a pointer to one past the +// end of one array object and the other is a pointer to the start of a +// different array object that happens to immediately follow the first array +// object in the address space.) +// +// C11's version is more restrictive, however there's no reason why an argument +// couldn't be a one-past-the-end value for a stack object in the caller and be +// equal to the beginning of a stack object in the callee. +// +// If the C and C++ standards are ever made sufficiently restrictive in this +// area, it may be possible to update LLVM's semantics accordingly and reinstate +// this optimization. static Constant *computePointerICmp(const DataLayout *TD, const TargetLibraryInfo *TLI, CmpInst::Predicate Pred, -- cgit v1.1 From 7beee2876795098d2e2f31ecc2ca29fa7640a8eb Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Fri, 1 Feb 2013 01:04:27 +0000 Subject: Remove some dead code, improve some asserts, and other assorted changes. No functionality change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174132 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/AttributeImpl.h | 9 ++------- lib/IR/Attributes.cpp | 33 +++++++++++---------------------- 2 files changed, 13 insertions(+), 29 deletions(-) (limited to 'lib') diff --git a/lib/IR/AttributeImpl.h b/lib/IR/AttributeImpl.h index 2eb7f07..3fbd723 100644 --- a/lib/IR/AttributeImpl.h +++ b/lib/IR/AttributeImpl.h @@ -38,14 +38,9 @@ class AttributeImpl : public FoldingSetNode { void operator=(const AttributeImpl &) LLVM_DELETED_FUNCTION; AttributeImpl(const AttributeImpl &) LLVM_DELETED_FUNCTION; public: - AttributeImpl(LLVMContext &C, Constant *Kind) - : Context(C), Kind(Kind) {} - AttributeImpl(LLVMContext &C, Constant *Kind, ArrayRef Vals) + AttributeImpl(LLVMContext &C, Constant *Kind, + ArrayRef Vals = ArrayRef()) : Context(C), Kind(Kind), Vals(Vals.begin(), Vals.end()) {} - explicit AttributeImpl(LLVMContext &C, Attribute::AttrKind data); - AttributeImpl(LLVMContext &C, Attribute::AttrKind data, - ArrayRef values); - AttributeImpl(LLVMContext &C, StringRef data); LLVMContext &getContext() { return Context; } diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 68b831d..412d83e 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -90,16 +90,16 @@ ArrayRef Attribute::getAttributeValues() const { /// This returns the alignment field of an attribute as a byte alignment value. unsigned Attribute::getAlignment() const { - if (!hasAttribute(Attribute::Alignment)) - return 0; + assert(hasAttribute(Attribute::Alignment) && + "Trying to get alignment from non-alignment attribute!"); return pImpl->getAlignment(); } /// This returns the stack alignment field of an attribute as a byte alignment /// value. unsigned Attribute::getStackAlignment() const { - if (!hasAttribute(Attribute::StackAlignment)) - return 0; + assert(hasAttribute(Attribute::StackAlignment) && + "Trying to get alignment from non-alignment attribute!"); return pImpl->getStackAlignment(); } @@ -204,6 +204,7 @@ std::string Attribute::getAsString() const { if (I != E) Result += ' '; } if (Vals.size() > 1) Result += ')'; + return Result; } llvm_unreachable("Unknown attribute"); @@ -227,22 +228,6 @@ bool Attribute::operator<(Attribute A) const { // AttributeImpl Definition //===----------------------------------------------------------------------===// -AttributeImpl::AttributeImpl(LLVMContext &C, Attribute::AttrKind kind) - : Context(C) { - Kind = ConstantInt::get(Type::getInt64Ty(C), kind); -} -AttributeImpl::AttributeImpl(LLVMContext &C, Attribute::AttrKind kind, - ArrayRef values) - : Context(C) { - Kind = ConstantInt::get(Type::getInt64Ty(C), kind); - Vals.reserve(values.size()); - Vals.append(values.begin(), values.end()); -} -AttributeImpl::AttributeImpl(LLVMContext &C, StringRef kind) - : Context(C) { - Kind = ConstantDataArray::getString(C, kind); -} - bool AttributeImpl::hasAttribute(Attribute::AttrKind A) const { if (ConstantInt *CI = dyn_cast(Kind)) return CI->getZExtValue() == A; @@ -282,6 +267,9 @@ bool AttributeImpl::operator!=(StringRef kind) const { } bool AttributeImpl::operator<(const AttributeImpl &AI) const { + // This sorts the attributes with Attribute::AttrKinds coming first (sorted + // relative to their enum value) and then strings. + if (!Kind && !AI.Kind) return false; if (!Kind && AI.Kind) return true; if (Kind && !AI.Kind) return false; @@ -409,9 +397,9 @@ unsigned AttributeSetNode::getStackAlignment() const { std::string AttributeSetNode::getAsString() const { std::string Str = ""; for (SmallVectorImpl::const_iterator I = AttrList.begin(), - E = AttrList.end(); I != E; ++I) { - if (I != AttrList.begin()) Str += " "; + E = AttrList.end(); I != E; ) { Str += I->getAsString(); + if (++I != E) Str += " "; } return Str; } @@ -951,6 +939,7 @@ uint64_t AttrBuilder::Raw() const { // AttributeFuncs Function Defintions //===----------------------------------------------------------------------===// +/// \brief Which attributes cannot be applied to a type. AttributeSet AttributeFuncs::typeIncompatible(Type *Ty, uint64_t Index) { AttrBuilder Incompatible; -- cgit v1.1 From d5eb1cbee55b60dd7a5745f47c0b46a3a0b952e3 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Fri, 1 Feb 2013 06:45:40 +0000 Subject: Optimize shift lefts of a constant by a value plus constant into a single shift. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174152 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/InstCombine/InstCombineShifts.cpp | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'lib') diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp index 8cf76e5..f9e94f2 100644 --- a/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -709,6 +709,12 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) { match(I.getOperand(1), m_Constant(C2))) return BinaryOperator::CreateShl(ConstantExpr::getShl(C1, C2), A); + // shl (c1 , add(y , c2)) -> (shl (shl(c1, c2)), y) + if (match(I.getOperand(0), m_Constant(C1)) && + match(I.getOperand(1), m_Add(m_Value(A), m_Constant(C2)))) { + return BinaryOperator::CreateShl(ConstantExpr::getShl(C1, C2), A); + } + return 0; } -- cgit v1.1 From 36146afb9676a81866acff882f76661891af3f3b Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Fri, 1 Feb 2013 07:59:33 +0000 Subject: Revert r174152. The shift amount may overflow and in that case this transformation is illegal. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174156 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/InstCombine/InstCombineShifts.cpp | 6 ------ 1 file changed, 6 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp index f9e94f2..8cf76e5 100644 --- a/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -709,12 +709,6 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) { match(I.getOperand(1), m_Constant(C2))) return BinaryOperator::CreateShl(ConstantExpr::getShl(C1, C2), A); - // shl (c1 , add(y , c2)) -> (shl (shl(c1, c2)), y) - if (match(I.getOperand(0), m_Constant(C1)) && - match(I.getOperand(1), m_Add(m_Value(A), m_Constant(C2)))) { - return BinaryOperator::CreateShl(ConstantExpr::getShl(C1, C2), A); - } - return 0; } -- cgit v1.1 From 84d3239889c576029382c3165495e0c78b14e9eb Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Fri, 1 Feb 2013 14:55:05 +0000 Subject: Remove currently unused register decoder from AArch64. This should fix a warning when building this backend. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174177 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'lib') diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp index e98285b..a7fc352 100644 --- a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp +++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp @@ -79,8 +79,6 @@ static DecodeStatus DecodeFPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); static DecodeStatus DecodeFPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); static DecodeStatus DecodeVPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); @@ -326,18 +324,6 @@ DecodeFPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo, return MCDisassembler::Success; } - -static DecodeStatus -DecodeVPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { - if (RegNo > 31) - return MCDisassembler::Fail; - - uint16_t Register = getReg(Decoder, AArch64::VPR64RegClassID, RegNo); - Inst.addOperand(MCOperand::CreateReg(Register)); - return MCDisassembler::Success; -} - static DecodeStatus DecodeVPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { -- cgit v1.1 From d9f32c20da9de8798284a8007662c6557560c876 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Fri, 1 Feb 2013 15:21:10 +0000 Subject: InstSimplify: stripAndComputeConstantOffsets can be called with vectors of pointers too. Prepare it for vectors of pointers and handle simple cases. We don't handle complicated cases because accumulateConstantOffset bails on pointer vectors. Fixes selfhost on i386. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174179 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/InstructionSimplify.cpp | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) (limited to 'lib') diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index e296215..34ff64d 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -667,9 +667,9 @@ Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, /// This is very similar to GetPointerBaseWithConstantOffset except it doesn't /// follow non-inbounds geps. This allows it to remain usable for icmp ult/etc. /// folding. -static ConstantInt *stripAndComputeConstantOffsets(const DataLayout *TD, - Value *&V) { - assert(V->getType()->isPointerTy()); +static Constant *stripAndComputeConstantOffsets(const DataLayout *TD, + Value *&V) { + assert(V->getType()->getScalarType()->isPointerTy()); // Without DataLayout, just be conservative for now. Theoretically, more could // be done in this case. @@ -697,11 +697,16 @@ static ConstantInt *stripAndComputeConstantOffsets(const DataLayout *TD, } else { break; } - assert(V->getType()->isPointerTy() && "Unexpected operand type!"); + assert(V->getType()->getScalarType()->isPointerTy() && + "Unexpected operand type!"); } while (Visited.insert(V)); Type *IntPtrTy = TD->getIntPtrType(V->getContext()); - return cast(ConstantInt::get(IntPtrTy, Offset)); + Constant *OffsetIntPtr = ConstantInt::get(IntPtrTy, Offset); + if (V->getType()->isVectorTy()) + return ConstantVector::getSplat(V->getType()->getVectorNumElements(), + OffsetIntPtr); + return OffsetIntPtr; } /// \brief Compute the constant difference between two pointer values. @@ -1758,8 +1763,8 @@ static Constant *computePointerICmp(const DataLayout *TD, // numerous hazards. AliasAnalysis and its utilities rely on special rules // governing loads and stores which don't apply to icmps. Also, AliasAnalysis // doesn't need to guarantee pointer inequality when it says NoAlias. - ConstantInt *LHSOffset = stripAndComputeConstantOffsets(TD, LHS); - ConstantInt *RHSOffset = stripAndComputeConstantOffsets(TD, RHS); + Constant *LHSOffset = stripAndComputeConstantOffsets(TD, LHS); + Constant *RHSOffset = stripAndComputeConstantOffsets(TD, RHS); // If LHS and RHS are related via constant offsets to the same base // value, we can replace it with an icmp which just compares the offsets. @@ -1799,11 +1804,14 @@ static Constant *computePointerICmp(const DataLayout *TD, // address, due to canonicalization and constant folding. if (isa(LHS) && (isa(RHS) || isa(RHS))) { + ConstantInt *LHSOffsetCI = dyn_cast(LHSOffset); + ConstantInt *RHSOffsetCI = dyn_cast(RHSOffset); uint64_t LHSSize, RHSSize; - if (getObjectSize(LHS, LHSSize, TD, TLI) && + if (LHSOffsetCI && RHSOffsetCI && + getObjectSize(LHS, LHSSize, TD, TLI) && getObjectSize(RHS, RHSSize, TD, TLI)) { - const APInt &LHSOffsetValue = LHSOffset->getValue(); - const APInt &RHSOffsetValue = RHSOffset->getValue(); + const APInt &LHSOffsetValue = LHSOffsetCI->getValue(); + const APInt &RHSOffsetValue = RHSOffsetCI->getValue(); if (!LHSOffsetValue.isNegative() && !RHSOffsetValue.isNegative() && LHSOffsetValue.ult(LHSSize) && -- cgit v1.1 From 05f52eca949f4ea3d2d1a4f1c805d0e2658bc369 Mon Sep 17 00:00:00 2001 From: Jyotsna Verma Date: Fri, 1 Feb 2013 15:54:43 +0000 Subject: Add appropriate TSFlags to the instructions that must be always extended. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174186 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Hexagon/HexagonInstrInfoV4.td | 295 +++++++++++++++---------------- 1 file changed, 147 insertions(+), 148 deletions(-) (limited to 'lib') diff --git a/lib/Target/Hexagon/HexagonInstrInfoV4.td b/lib/Target/Hexagon/HexagonInstrInfoV4.td index 2e389c6..ba7b749 100644 --- a/lib/Target/Hexagon/HexagonInstrInfoV4.td +++ b/lib/Target/Hexagon/HexagonInstrInfoV4.td @@ -299,101 +299,95 @@ def COMBINE_iI_V4 : ALU32_ii<(outs DoubleRegs:$dst), // These absolute set addressing mode instructions accept immediate as // an operand. We have duplicated these patterns to take global address. -let neverHasSideEffects = 1 in +let isExtended = 1, opExtendable = 2, neverHasSideEffects = 1, +validSubTargets = HasV4SubT in { def LDrid_abs_setimm_V4 : LDInst2<(outs DoubleRegs:$dst1, IntRegs:$dst2), - (ins u6Imm:$addr), - "$dst1 = memd($dst2=#$addr)", + (ins u0AlwaysExt:$addr), + "$dst1 = memd($dst2=##$addr)", []>, Requires<[HasV4T]>; // Rd=memb(Re=#U6) -let neverHasSideEffects = 1 in def LDrib_abs_setimm_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2), - (ins u6Imm:$addr), - "$dst1 = memb($dst2=#$addr)", + (ins u0AlwaysExt:$addr), + "$dst1 = memb($dst2=##$addr)", []>, Requires<[HasV4T]>; // Rd=memh(Re=#U6) -let neverHasSideEffects = 1 in def LDrih_abs_setimm_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2), - (ins u6Imm:$addr), - "$dst1 = memh($dst2=#$addr)", + (ins u0AlwaysExt:$addr), + "$dst1 = memh($dst2=##$addr)", []>, Requires<[HasV4T]>; // Rd=memub(Re=#U6) -let neverHasSideEffects = 1 in def LDriub_abs_setimm_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2), - (ins u6Imm:$addr), - "$dst1 = memub($dst2=#$addr)", + (ins u0AlwaysExt:$addr), + "$dst1 = memub($dst2=##$addr)", []>, Requires<[HasV4T]>; // Rd=memuh(Re=#U6) -let neverHasSideEffects = 1 in def LDriuh_abs_setimm_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2), - (ins u6Imm:$addr), - "$dst1 = memuh($dst2=#$addr)", + (ins u0AlwaysExt:$addr), + "$dst1 = memuh($dst2=##$addr)", []>, Requires<[HasV4T]>; // Rd=memw(Re=#U6) -let neverHasSideEffects = 1 in def LDriw_abs_setimm_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2), - (ins u6Imm:$addr), - "$dst1 = memw($dst2=#$addr)", + (ins u0AlwaysExt:$addr), + "$dst1 = memw($dst2=##$addr)", []>, Requires<[HasV4T]>; +} // Following patterns are defined for absolute set addressing mode // instruction which take global address as operand. -let neverHasSideEffects = 1 in +let isExtended = 1, opExtendable = 2, neverHasSideEffects = 1, +validSubTargets = HasV4SubT in { def LDrid_abs_set_V4 : LDInst2<(outs DoubleRegs:$dst1, IntRegs:$dst2), - (ins globaladdress:$addr), + (ins globaladdressExt:$addr), "$dst1 = memd($dst2=##$addr)", []>, Requires<[HasV4T]>; // Rd=memb(Re=#U6) -let neverHasSideEffects = 1 in def LDrib_abs_set_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2), - (ins globaladdress:$addr), + (ins globaladdressExt:$addr), "$dst1 = memb($dst2=##$addr)", []>, Requires<[HasV4T]>; // Rd=memh(Re=#U6) -let neverHasSideEffects = 1 in def LDrih_abs_set_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2), - (ins globaladdress:$addr), + (ins globaladdressExt:$addr), "$dst1 = memh($dst2=##$addr)", []>, Requires<[HasV4T]>; // Rd=memub(Re=#U6) -let neverHasSideEffects = 1 in def LDriub_abs_set_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2), - (ins globaladdress:$addr), + (ins globaladdressExt:$addr), "$dst1 = memub($dst2=##$addr)", []>, Requires<[HasV4T]>; // Rd=memuh(Re=#U6) -let neverHasSideEffects = 1 in def LDriuh_abs_set_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2), - (ins globaladdress:$addr), + (ins globaladdressExt:$addr), "$dst1 = memuh($dst2=##$addr)", []>, Requires<[HasV4T]>; // Rd=memw(Re=#U6) -let neverHasSideEffects = 1 in def LDriw_abs_set_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2), - (ins globaladdress:$addr), + (ins globaladdressExt:$addr), "$dst1 = memw($dst2=##$addr)", []>, Requires<[HasV4T]>; +} // Load doubleword. // @@ -1457,62 +1451,65 @@ def : Pat <(i32 (load (add (HexagonCONST32_GP tglobaladdr:$global), /// last operand. /// -// memd(Re=#U6)=Rtt +// memd(Re=#U)=Rtt +let isExtended = 1, opExtendable = 2, validSubTargets = HasV4SubT in { def STrid_abs_setimm_V4 : STInst2<(outs IntRegs:$dst1), - (ins DoubleRegs:$src1, u6Imm:$src2), - "memd($dst1=#$src2) = $src1", + (ins DoubleRegs:$src1, u0AlwaysExt:$src2), + "memd($dst1=##$src2) = $src1", []>, Requires<[HasV4T]>; -// memb(Re=#U6)=Rs +// memb(Re=#U)=Rs def STrib_abs_setimm_V4 : STInst2<(outs IntRegs:$dst1), - (ins IntRegs:$src1, u6Imm:$src2), - "memb($dst1=#$src2) = $src1", + (ins IntRegs:$src1, u0AlwaysExt:$src2), + "memb($dst1=##$src2) = $src1", []>, Requires<[HasV4T]>; -// memh(Re=#U6)=Rs +// memh(Re=#U)=Rs def STrih_abs_setimm_V4 : STInst2<(outs IntRegs:$dst1), - (ins IntRegs:$src1, u6Imm:$src2), - "memh($dst1=#$src2) = $src1", + (ins IntRegs:$src1, u0AlwaysExt:$src2), + "memh($dst1=##$src2) = $src1", []>, Requires<[HasV4T]>; -// memw(Re=#U6)=Rs +// memw(Re=#U)=Rs def STriw_abs_setimm_V4 : STInst2<(outs IntRegs:$dst1), - (ins IntRegs:$src1, u6Imm:$src2), - "memw($dst1=#$src2) = $src1", + (ins IntRegs:$src1, u0AlwaysExt:$src2), + "memw($dst1=##$src2) = $src1", []>, Requires<[HasV4T]>; +} -// memd(Re=#U6)=Rtt +// memd(Re=#U)=Rtt +let isExtended = 1, opExtendable = 2, validSubTargets = HasV4SubT in { def STrid_abs_set_V4 : STInst2<(outs IntRegs:$dst1), - (ins DoubleRegs:$src1, globaladdress:$src2), + (ins DoubleRegs:$src1, globaladdressExt:$src2), "memd($dst1=##$src2) = $src1", []>, Requires<[HasV4T]>; -// memb(Re=#U6)=Rs +// memb(Re=#U)=Rs def STrib_abs_set_V4 : STInst2<(outs IntRegs:$dst1), - (ins IntRegs:$src1, globaladdress:$src2), + (ins IntRegs:$src1, globaladdressExt:$src2), "memb($dst1=##$src2) = $src1", []>, Requires<[HasV4T]>; -// memh(Re=#U6)=Rs +// memh(Re=#U)=Rs def STrih_abs_set_V4 : STInst2<(outs IntRegs:$dst1), - (ins IntRegs:$src1, globaladdress:$src2), + (ins IntRegs:$src1, globaladdressExt:$src2), "memh($dst1=##$src2) = $src1", []>, Requires<[HasV4T]>; -// memw(Re=#U6)=Rs +// memw(Re=#U)=Rs def STriw_abs_set_V4 : STInst2<(outs IntRegs:$dst1), - (ins IntRegs:$src1, globaladdress:$src2), + (ins IntRegs:$src1, globaladdressExt:$src2), "memw($dst1=##$src2) = $src1", []>, Requires<[HasV4T]>; - +} // multiclass for store instructions with base + register offset addressing // mode @@ -1632,13 +1629,14 @@ def : Pat<(store (i64 DoubleRegs:$src4), } // memd(Ru<<#u2+#U6)=Rtt -let AddedComplexity = 10 in +let isExtended = 1, opExtendable = 2, AddedComplexity = 10, +validSubTargets = HasV4SubT in def STrid_shl_V4 : STInst<(outs), - (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, DoubleRegs:$src4), + (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, DoubleRegs:$src4), "memd($src1<<#$src2+#$src3) = $src4", [(store (i64 DoubleRegs:$src4), (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2), - u6ImmPred:$src3))]>, + u0AlwaysExtPred:$src3))]>, Requires<[HasV4T]>; // memd(Rx++#s4:3)=Rtt @@ -1719,13 +1717,14 @@ def : Pat <(truncstorei8 s8ExtPred:$src2, (i32 IntRegs:$src1)), Requires<[HasV4T]>; // memb(Ru<<#u2+#U6)=Rt -let AddedComplexity = 10 in +let isExtended = 1, opExtendable = 2, AddedComplexity = 10, isNVStorable = 1, +validSubTargets = HasV4SubT in def STrib_shl_V4 : STInst<(outs), - (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4), + (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4), "memb($src1<<#$src2+#$src3) = $src4", [(truncstorei8 (i32 IntRegs:$src4), (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2), - u6ImmPred:$src3))]>, + u0AlwaysExtPred:$src3))]>, Requires<[HasV4T]>; // memb(Rx++#s4:0:circ(Mu))=Rt @@ -1749,13 +1748,14 @@ def : Pat <(truncstorei16 s8ExtPred:$src2, (i32 IntRegs:$src1)), // memh(Ru<<#u2+#U6)=Rt.H // memh(Ru<<#u2+#U6)=Rt -let AddedComplexity = 10 in +let isExtended = 1, opExtendable = 2, AddedComplexity = 10, isNVStorable = 1, +validSubTargets = HasV4SubT in def STrih_shl_V4 : STInst<(outs), - (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4), + (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4), "memh($src1<<#$src2+#$src3) = $src4", [(truncstorei16 (i32 IntRegs:$src4), (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2), - u6ImmPred:$src3))]>, + u0AlwaysExtPred:$src3))]>, Requires<[HasV4T]>; // memh(Rx++#s4:1:circ(Mu))=Rt.H @@ -1795,13 +1795,14 @@ def : Pat <(store s8ExtPred:$src2, (i32 IntRegs:$src1)), Requires<[HasV4T]>; // memw(Ru<<#u2+#U6)=Rt -let AddedComplexity = 10 in +let isExtended = 1, opExtendable = 2, AddedComplexity = 10, isNVStorable = 1, +validSubTargets = HasV4SubT in def STriw_shl_V4 : STInst<(outs), - (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4), + (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4), "memw($src1<<#$src2+#$src3) = $src4", [(store (i32 IntRegs:$src4), (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2), - u6ImmPred:$src3))]>, + u0AlwaysExtPred:$src3))]>, Requires<[HasV4T]>; // memw(Rx++#s4:2)=Rt @@ -2365,9 +2366,10 @@ mayStore = 1 in { } // memb(Ru<<#u2+#U6)=Nt.new -let mayStore = 1, AddedComplexity = 10 in +let isExtended = 1, opExtendable = 2, mayStore = 1, AddedComplexity = 10, +isNVStore = 1, validSubTargets = HasV4SubT in def STrib_shl_nv_V4 : NVInst_V4<(outs), - (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4), + (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4), "memb($src1<<#$src2+#$src3) = $src4.new", []>, Requires<[HasV4T]>; @@ -2447,9 +2449,10 @@ def STb_GP_nv_V4 : NVInst_V4<(outs), Requires<[HasV4T]>; // memh(Ru<<#u2+#U6)=Nt.new -let mayStore = 1, AddedComplexity = 10 in +let isExtended = 1, opExtendable = 2, mayStore = 1, AddedComplexity = 10, +isNVStore = 1, validSubTargets = HasV4SubT in def STrih_shl_nv_V4 : NVInst_V4<(outs), - (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4), + (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4), "memh($src1<<#$src2+#$src3) = $src4.new", []>, Requires<[HasV4T]>; @@ -2476,9 +2479,10 @@ def STh_GP_nv_V4 : NVInst_V4<(outs), Requires<[HasV4T]>; // memw(Ru<<#u2+#U6)=Nt.new -let mayStore = 1, AddedComplexity = 10 in +let isExtended = 1, opExtendable = 2, mayStore = 1, AddedComplexity = 10, +isNVStore = 1, validSubTargets = HasV4SubT in def STriw_shl_nv_V4 : NVInst_V4<(outs), - (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4), + (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4), "memw($src1<<#$src2+#$src3) = $src4.new", []>, Requires<[HasV4T]>; @@ -4460,172 +4464,167 @@ defm STrih_ind : ST_indirect_lo<"memh", truncstorei16>; defm STriw_ind : ST_indirect_lo<"memw", store>; // Store - absolute addressing mode: These instruction take constant -// value as the extended operand +// value as the extended operand. multiclass ST_absimm { - let isPredicable = 1 in +let isExtended = 1, opExtendable = 0, isPredicable = 1, +validSubTargets = HasV4SubT in def _abs_V4 : STInst2<(outs), - (ins u6Imm:$src1, IntRegs:$src2), - !strconcat(OpcStr, "(#$src1) = $src2"), + (ins u0AlwaysExt:$src1, IntRegs:$src2), + !strconcat(OpcStr, "(##$src1) = $src2"), []>, Requires<[HasV4T]>; - let isPredicated = 1 in +let isExtended = 1, opExtendable = 1, isPredicated = 1, +validSubTargets = HasV4SubT in { def _abs_cPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3), - !strconcat("if ($src1)", !strconcat(OpcStr, "(#$src2) = $src3")), + (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3), + !strconcat("if ($src1)", !strconcat(OpcStr, "(##$src2) = $src3")), []>, Requires<[HasV4T]>; - let isPredicated = 1 in def _abs_cNotPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3), - !strconcat("if (!$src1)", !strconcat(OpcStr, "(#$src2) = $src3")), + (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3), + !strconcat("if (!$src1)", !strconcat(OpcStr, "(##$src2) = $src3")), []>, Requires<[HasV4T]>; - let isPredicated = 1 in def _abs_cdnPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3), + (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3), !strconcat("if ($src1.new)", - !strconcat(OpcStr, "(#$src2) = $src3")), + !strconcat(OpcStr, "(##$src2) = $src3")), []>, Requires<[HasV4T]>; - let isPredicated = 1 in def _abs_cdnNotPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3), + (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3), !strconcat("if (!$src1.new)", - !strconcat(OpcStr, "(#$src2) = $src3")), + !strconcat(OpcStr, "(##$src2) = $src3")), []>, Requires<[HasV4T]>; +} - def _abs_nv_V4 : STInst2<(outs), - (ins u6Imm:$src1, IntRegs:$src2), - !strconcat(OpcStr, "(#$src1) = $src2.new"), +let isExtended = 1, opExtendable = 0, mayStore = 1, isNVStore = 1, +validSubTargets = HasV4SubT in + def _abs_nv_V4 : NVInst_V4<(outs), + (ins u0AlwaysExt:$src1, IntRegs:$src2), + !strconcat(OpcStr, "(##$src1) = $src2.new"), []>, Requires<[HasV4T]>; - let isPredicated = 1 in - def _abs_cPt_nv_V4 : STInst2<(outs), - (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3), +let isExtended = 1, opExtendable = 1, mayStore = 1, isPredicated = 1, +isNVStore = 1, validSubTargets = HasV4SubT in { + def _abs_cPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3), !strconcat("if ($src1)", - !strconcat(OpcStr, "(#$src2) = $src3.new")), + !strconcat(OpcStr, "(##$src2) = $src3.new")), []>, Requires<[HasV4T]>; - let isPredicated = 1 in - def _abs_cNotPt_nv_V4 : STInst2<(outs), - (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3), + def _abs_cNotPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3), !strconcat("if (!$src1)", - !strconcat(OpcStr, "(#$src2) = $src3.new")), + !strconcat(OpcStr, "(##$src2) = $src3.new")), []>, Requires<[HasV4T]>; - let isPredicated = 1 in - def _abs_cdnPt_nv_V4 : STInst2<(outs), - (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3), + def _abs_cdnPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3), !strconcat("if ($src1.new)", - !strconcat(OpcStr, "(#$src2) = $src3.new")), + !strconcat(OpcStr, "(##$src2) = $src3.new")), []>, Requires<[HasV4T]>; - let isPredicated = 1 in - def _abs_cdnNotPt_nv_V4 : STInst2<(outs), - (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3), + def _abs_cdnNotPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3), !strconcat("if (!$src1.new)", - !strconcat(OpcStr, "(#$src2) = $src3.new")), + !strconcat(OpcStr, "(##$src2) = $src3.new")), []>, Requires<[HasV4T]>; } +} defm STrib_imm : ST_absimm<"memb">; defm STrih_imm : ST_absimm<"memh">; defm STriw_imm : ST_absimm<"memw">; -let Predicates = [HasV4T], AddedComplexity = 30 in -def : Pat<(truncstorei8 (i32 IntRegs:$src1), u6ImmPred:$src2), - (STrib_imm_abs_V4 u6ImmPred:$src2, IntRegs: $src1)>; - -let Predicates = [HasV4T], AddedComplexity = 30 in -def : Pat<(truncstorei16 (i32 IntRegs:$src1), u6ImmPred:$src2), - (STrih_imm_abs_V4 u6ImmPred:$src2, IntRegs: $src1)>; +let Predicates = [HasV4T], AddedComplexity = 30 in { +def : Pat<(truncstorei8 (i32 IntRegs:$src1), u0AlwaysExtPred:$src2), + (STrib_imm_abs_V4 u0AlwaysExtPred:$src2, IntRegs: $src1)>; -let Predicates = [HasV4T], AddedComplexity = 30 in -def : Pat<(store (i32 IntRegs:$src1), u6ImmPred:$src2), - (STriw_imm_abs_V4 u6ImmPred:$src2, IntRegs: $src1)>; +def : Pat<(truncstorei16 (i32 IntRegs:$src1), u0AlwaysExtPred:$src2), + (STrih_imm_abs_V4 u0AlwaysExtPred:$src2, IntRegs: $src1)>; +def : Pat<(store (i32 IntRegs:$src1), u0AlwaysExtPred:$src2), + (STriw_imm_abs_V4 u0AlwaysExtPred:$src2, IntRegs: $src1)>; +} // Load - absolute addressing mode: These instruction take constant // value as the extended operand multiclass LD_absimm { - let isPredicable = 1 in +let isExtended = 1, opExtendable = 1, isPredicable = 1, +validSubTargets = HasV4SubT in def _abs_V4 : LDInst2<(outs IntRegs:$dst), - (ins u6Imm:$src), + (ins u0AlwaysExt:$src), !strconcat("$dst = ", - !strconcat(OpcStr, "(#$src)")), + !strconcat(OpcStr, "(##$src)")), []>, Requires<[HasV4T]>; - let isPredicated = 1 in +let isExtended = 1, opExtendable = 2, isPredicated = 1, +validSubTargets = HasV4SubT in { def _abs_cPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, u6Imm:$src2), + (ins PredRegs:$src1, u0AlwaysExt:$src2), !strconcat("if ($src1) $dst = ", - !strconcat(OpcStr, "(#$src2)")), + !strconcat(OpcStr, "(##$src2)")), []>, Requires<[HasV4T]>; - let isPredicated = 1 in def _abs_cNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, u6Imm:$src2), + (ins PredRegs:$src1, u0AlwaysExt:$src2), !strconcat("if (!$src1) $dst = ", - !strconcat(OpcStr, "(#$src2)")), + !strconcat(OpcStr, "(##$src2)")), []>, Requires<[HasV4T]>; - let isPredicated = 1 in def _abs_cdnPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, u6Imm:$src2), + (ins PredRegs:$src1, u0AlwaysExt:$src2), !strconcat("if ($src1.new) $dst = ", - !strconcat(OpcStr, "(#$src2)")), + !strconcat(OpcStr, "(##$src2)")), []>, Requires<[HasV4T]>; - let isPredicated = 1 in def _abs_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, u6Imm:$src2), + (ins PredRegs:$src1, u0AlwaysExt:$src2), !strconcat("if (!$src1.new) $dst = ", - !strconcat(OpcStr, "(#$src2)")), + !strconcat(OpcStr, "(##$src2)")), []>, Requires<[HasV4T]>; } +} -defm LDrib_imm : LD_absimm<"memb">; +defm LDrib_imm : LD_absimm<"memb">; defm LDriub_imm : LD_absimm<"memub">; -defm LDrih_imm : LD_absimm<"memh">; +defm LDrih_imm : LD_absimm<"memh">; defm LDriuh_imm : LD_absimm<"memuh">; -defm LDriw_imm : LD_absimm<"memw">; +defm LDriw_imm : LD_absimm<"memw">; -let Predicates = [HasV4T], AddedComplexity = 30 in -def : Pat<(i32 (load u6ImmPred:$src)), - (LDriw_imm_abs_V4 u6ImmPred:$src)>; +let Predicates = [HasV4T], AddedComplexity = 30 in { +def : Pat<(i32 (load u0AlwaysExtPred:$src)), + (LDriw_imm_abs_V4 u0AlwaysExtPred:$src)>; -let Predicates = [HasV4T], AddedComplexity=30 in -def : Pat<(i32 (sextloadi8 u6ImmPred:$src)), - (LDrib_imm_abs_V4 u6ImmPred:$src)>; +def : Pat<(i32 (sextloadi8 u0AlwaysExtPred:$src)), + (LDrib_imm_abs_V4 u0AlwaysExtPred:$src)>; -let Predicates = [HasV4T], AddedComplexity=30 in -def : Pat<(i32 (zextloadi8 u6ImmPred:$src)), - (LDriub_imm_abs_V4 u6ImmPred:$src)>; +def : Pat<(i32 (zextloadi8 u0AlwaysExtPred:$src)), + (LDriub_imm_abs_V4 u0AlwaysExtPred:$src)>; -let Predicates = [HasV4T], AddedComplexity=30 in -def : Pat<(i32 (sextloadi16 u6ImmPred:$src)), - (LDrih_imm_abs_V4 u6ImmPred:$src)>; - -let Predicates = [HasV4T], AddedComplexity=30 in -def : Pat<(i32 (zextloadi16 u6ImmPred:$src)), - (LDriuh_imm_abs_V4 u6ImmPred:$src)>; +def : Pat<(i32 (sextloadi16 u0AlwaysExtPred:$src)), + (LDrih_imm_abs_V4 u0AlwaysExtPred:$src)>; +def : Pat<(i32 (zextloadi16 u0AlwaysExtPred:$src)), + (LDriuh_imm_abs_V4 u0AlwaysExtPred:$src)>; +} // Indexed store double word - global address. // memw(Rs+#u6:2)=#S8 -- cgit v1.1 From 924223c9aba54736c86bed74ffe0ceb01467a23d Mon Sep 17 00:00:00 2001 From: Jyotsna Verma Date: Fri, 1 Feb 2013 16:36:16 +0000 Subject: Replace LDriu*[bhdw]_indexed_V4 instructions with "def Pats". git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174193 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Hexagon/HexagonInstrInfo.cpp | 72 ------ lib/Target/Hexagon/HexagonInstrInfoV4.td | 357 +++------------------------ lib/Target/Hexagon/HexagonVLIWPacketizer.cpp | 108 -------- 3 files changed, 33 insertions(+), 504 deletions(-) (limited to 'lib') diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp index ab35982..2cb77dd 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -1500,26 +1500,11 @@ unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const { return Hexagon::JMPR_cPt; // V4 indexed+scaled load. - case Hexagon::LDrid_indexed_cPt_V4: - return Hexagon::LDrid_indexed_cNotPt_V4; - case Hexagon::LDrid_indexed_cNotPt_V4: - return Hexagon::LDrid_indexed_cPt_V4; - case Hexagon::LDrid_indexed_shl_cPt_V4: return Hexagon::LDrid_indexed_shl_cNotPt_V4; case Hexagon::LDrid_indexed_shl_cNotPt_V4: return Hexagon::LDrid_indexed_shl_cPt_V4; - case Hexagon::LDrib_indexed_cPt_V4: - return Hexagon::LDrib_indexed_cNotPt_V4; - case Hexagon::LDrib_indexed_cNotPt_V4: - return Hexagon::LDrib_indexed_cPt_V4; - - case Hexagon::LDriub_indexed_cPt_V4: - return Hexagon::LDriub_indexed_cNotPt_V4; - case Hexagon::LDriub_indexed_cNotPt_V4: - return Hexagon::LDriub_indexed_cPt_V4; - case Hexagon::LDrib_indexed_shl_cPt_V4: return Hexagon::LDrib_indexed_shl_cNotPt_V4; case Hexagon::LDrib_indexed_shl_cNotPt_V4: @@ -1530,16 +1515,6 @@ unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const { case Hexagon::LDriub_indexed_shl_cNotPt_V4: return Hexagon::LDriub_indexed_shl_cPt_V4; - case Hexagon::LDrih_indexed_cPt_V4: - return Hexagon::LDrih_indexed_cNotPt_V4; - case Hexagon::LDrih_indexed_cNotPt_V4: - return Hexagon::LDrih_indexed_cPt_V4; - - case Hexagon::LDriuh_indexed_cPt_V4: - return Hexagon::LDriuh_indexed_cNotPt_V4; - case Hexagon::LDriuh_indexed_cNotPt_V4: - return Hexagon::LDriuh_indexed_cPt_V4; - case Hexagon::LDrih_indexed_shl_cPt_V4: return Hexagon::LDrih_indexed_shl_cNotPt_V4; case Hexagon::LDrih_indexed_shl_cNotPt_V4: @@ -1550,11 +1525,6 @@ unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const { case Hexagon::LDriuh_indexed_shl_cNotPt_V4: return Hexagon::LDriuh_indexed_shl_cPt_V4; - case Hexagon::LDriw_indexed_cPt_V4: - return Hexagon::LDriw_indexed_cNotPt_V4; - case Hexagon::LDriw_indexed_cNotPt_V4: - return Hexagon::LDriw_indexed_cPt_V4; - case Hexagon::LDriw_indexed_shl_cPt_V4: return Hexagon::LDriw_indexed_shl_cNotPt_V4; case Hexagon::LDriw_indexed_shl_cNotPt_V4: @@ -1965,51 +1935,21 @@ getMatchingCondBranchOpcode(int Opc, bool invertPredicate) const { Hexagon::JMPR_cNotPt; // V4 indexed+scaled load. - case Hexagon::LDrid_indexed_V4: - return !invertPredicate ? Hexagon::LDrid_indexed_cPt_V4 : - Hexagon::LDrid_indexed_cNotPt_V4; case Hexagon::LDrid_indexed_shl_V4: return !invertPredicate ? Hexagon::LDrid_indexed_shl_cPt_V4 : Hexagon::LDrid_indexed_shl_cNotPt_V4; - case Hexagon::LDrib_indexed_V4: - return !invertPredicate ? Hexagon::LDrib_indexed_cPt_V4 : - Hexagon::LDrib_indexed_cNotPt_V4; - case Hexagon::LDriub_indexed_V4: - return !invertPredicate ? Hexagon::LDriub_indexed_cPt_V4 : - Hexagon::LDriub_indexed_cNotPt_V4; - case Hexagon::LDriub_ae_indexed_V4: - return !invertPredicate ? Hexagon::LDriub_indexed_cPt_V4 : - Hexagon::LDriub_indexed_cNotPt_V4; case Hexagon::LDrib_indexed_shl_V4: return !invertPredicate ? Hexagon::LDrib_indexed_shl_cPt_V4 : Hexagon::LDrib_indexed_shl_cNotPt_V4; case Hexagon::LDriub_indexed_shl_V4: return !invertPredicate ? Hexagon::LDriub_indexed_shl_cPt_V4 : Hexagon::LDriub_indexed_shl_cNotPt_V4; - case Hexagon::LDriub_ae_indexed_shl_V4: - return !invertPredicate ? Hexagon::LDriub_indexed_shl_cPt_V4 : - Hexagon::LDriub_indexed_shl_cNotPt_V4; - case Hexagon::LDrih_indexed_V4: - return !invertPredicate ? Hexagon::LDrih_indexed_cPt_V4 : - Hexagon::LDrih_indexed_cNotPt_V4; - case Hexagon::LDriuh_indexed_V4: - return !invertPredicate ? Hexagon::LDriuh_indexed_cPt_V4 : - Hexagon::LDriuh_indexed_cNotPt_V4; - case Hexagon::LDriuh_ae_indexed_V4: - return !invertPredicate ? Hexagon::LDriuh_indexed_cPt_V4 : - Hexagon::LDriuh_indexed_cNotPt_V4; case Hexagon::LDrih_indexed_shl_V4: return !invertPredicate ? Hexagon::LDrih_indexed_shl_cPt_V4 : Hexagon::LDrih_indexed_shl_cNotPt_V4; case Hexagon::LDriuh_indexed_shl_V4: return !invertPredicate ? Hexagon::LDriuh_indexed_shl_cPt_V4 : Hexagon::LDriuh_indexed_shl_cNotPt_V4; - case Hexagon::LDriuh_ae_indexed_shl_V4: - return !invertPredicate ? Hexagon::LDriuh_indexed_shl_cPt_V4 : - Hexagon::LDriuh_indexed_shl_cNotPt_V4; - case Hexagon::LDriw_indexed_V4: - return !invertPredicate ? Hexagon::LDriw_indexed_cPt_V4 : - Hexagon::LDriw_indexed_cNotPt_V4; case Hexagon::LDriw_indexed_shl_V4: return !invertPredicate ? Hexagon::LDriw_indexed_shl_cPt_V4 : Hexagon::LDriw_indexed_shl_cNotPt_V4; @@ -2647,28 +2587,16 @@ isConditionalLoad (const MachineInstr* MI) const { case Hexagon::POST_LDriub_cPt : case Hexagon::POST_LDriub_cNotPt : return QRI.Subtarget.hasV4TOps(); - case Hexagon::LDrid_indexed_cPt_V4 : - case Hexagon::LDrid_indexed_cNotPt_V4 : case Hexagon::LDrid_indexed_shl_cPt_V4 : case Hexagon::LDrid_indexed_shl_cNotPt_V4 : - case Hexagon::LDrib_indexed_cPt_V4 : - case Hexagon::LDrib_indexed_cNotPt_V4 : case Hexagon::LDrib_indexed_shl_cPt_V4 : case Hexagon::LDrib_indexed_shl_cNotPt_V4 : - case Hexagon::LDriub_indexed_cPt_V4 : - case Hexagon::LDriub_indexed_cNotPt_V4 : case Hexagon::LDriub_indexed_shl_cPt_V4 : case Hexagon::LDriub_indexed_shl_cNotPt_V4 : - case Hexagon::LDrih_indexed_cPt_V4 : - case Hexagon::LDrih_indexed_cNotPt_V4 : case Hexagon::LDrih_indexed_shl_cPt_V4 : case Hexagon::LDrih_indexed_shl_cNotPt_V4 : - case Hexagon::LDriuh_indexed_cPt_V4 : - case Hexagon::LDriuh_indexed_cNotPt_V4 : case Hexagon::LDriuh_indexed_shl_cPt_V4 : case Hexagon::LDriuh_indexed_shl_cNotPt_V4 : - case Hexagon::LDriw_indexed_cPt_V4 : - case Hexagon::LDriw_indexed_cNotPt_V4 : case Hexagon::LDriw_indexed_shl_cPt_V4 : case Hexagon::LDriw_indexed_shl_cNotPt_V4 : return QRI.Subtarget.hasV4TOps(); diff --git a/lib/Target/Hexagon/HexagonInstrInfoV4.td b/lib/Target/Hexagon/HexagonInstrInfoV4.td index ba7b749..617eef4 100644 --- a/lib/Target/Hexagon/HexagonInstrInfoV4.td +++ b/lib/Target/Hexagon/HexagonInstrInfoV4.td @@ -389,25 +389,6 @@ def LDriw_abs_set_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2), Requires<[HasV4T]>; } -// Load doubleword. -// -// Make sure that in post increment load, the first operand is always the post -// increment operand. -// -// Rdd=memd(Rs+Rt<<#u2) -// Special case pattern for indexed load without offset which is easier to -// match. AddedComplexity of this pattern should be lower than base+offset load -// and lower yet than the more generic version with offset/shift below -// Similar approach is taken for all other base+index loads. -let AddedComplexity = 10, isPredicable = 1 in -def LDrid_indexed_V4 : LDInst<(outs DoubleRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2), - "$dst=memd($src1+$src2<<#0)", - [(set (i64 DoubleRegs:$dst), - (i64 (load (add (i32 IntRegs:$src1), - (i32 IntRegs:$src2)))))]>, - Requires<[HasV4T]>; - // multiclass for load instructions with base + register offset // addressing mode multiclass ld_idxd_shl_pbase; } -//// Load doubleword conditionally. -// if ([!]Pv[.new]) Rd=memd(Rs+Rt<<#u2) -// if (Pv) Rd=memd(Rs+Rt<<#u2) -let AddedComplexity = 15, isPredicated = 1 in -def LDrid_indexed_cPt_V4 : LDInst2<(outs DoubleRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if ($src1) $dst=memd($src2+$src3<<#0)", - []>, - Requires<[HasV4T]>; - -// if (Pv.new) Rd=memd(Rs+Rt<<#u2) -let AddedComplexity = 15, isPredicated = 1 in -def LDrid_indexed_cdnPt_V4 : LDInst2<(outs DoubleRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if ($src1.new) $dst=memd($src2+$src3<<#0)", - []>, - Requires<[HasV4T]>; - -// if (!Pv) Rd=memd(Rs+Rt<<#u2) -let AddedComplexity = 15, isPredicated = 1 in -def LDrid_indexed_cNotPt_V4 : LDInst2<(outs DoubleRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if (!$src1) $dst=memd($src2+$src3<<#0)", - []>, - Requires<[HasV4T]>; - -// if (!Pv.new) Rd=memd(Rs+Rt<<#u2) -let AddedComplexity = 15, isPredicated = 1 in -def LDrid_indexed_cdnNotPt_V4 : LDInst2<(outs DoubleRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if (!$src1.new) $dst=memd($src2+$src3<<#0)", - []>, - Requires<[HasV4T]>; - -// Rdd=memd(Rt<<#u2+#U6) - -//// Load byte. -// Rd=memb(Rs+Rt<<#u2) -let AddedComplexity = 10, isPredicable = 1 in -def LDrib_indexed_V4 : LDInst<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2), - "$dst=memb($src1+$src2<<#0)", - [(set (i32 IntRegs:$dst), - (i32 (sextloadi8 (add (i32 IntRegs:$src1), - (i32 IntRegs:$src2)))))]>, - Requires<[HasV4T]>; - -let AddedComplexity = 10, isPredicable = 1 in -def LDriub_indexed_V4 : LDInst<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2), - "$dst=memub($src1+$src2<<#0)", - [(set (i32 IntRegs:$dst), - (i32 (zextloadi8 (add (i32 IntRegs:$src1), - (i32 IntRegs:$src2)))))]>, - Requires<[HasV4T]>; - -let AddedComplexity = 10, isPredicable = 1 in -def LDriub_ae_indexed_V4 : LDInst<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2), - "$dst=memub($src1+$src2<<#0)", - [(set (i32 IntRegs:$dst), - (i32 (extloadi8 (add (i32 IntRegs:$src1), - (i32 IntRegs:$src2)))))]>, - Requires<[HasV4T]>; - -let AddedComplexity = 40, isPredicable = 1 in -def LDriub_ae_indexed_shl_V4 : LDInst<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset), - "$dst=memub($src1+$src2<<#$offset)", - [(set (i32 IntRegs:$dst), - (i32 (extloadi8 (add (i32 IntRegs:$src1), - (shl (i32 IntRegs:$src2), - u2ImmPred:$offset)))))]>, - Requires<[HasV4T]>; - -//// Load byte conditionally. -// if ([!]Pv[.new]) Rd=memb(Rs+Rt<<#u2) -// if (Pv) Rd=memb(Rs+Rt<<#u2) -let AddedComplexity = 15, isPredicated = 1 in -def LDrib_indexed_cPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if ($src1) $dst=memb($src2+$src3<<#0)", - []>, - Requires<[HasV4T]>; - -// if (Pv.new) Rd=memb(Rs+Rt<<#u2) -let AddedComplexity = 15, isPredicated = 1 in -def LDrib_indexed_cdnPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if ($src1.new) $dst=memb($src2+$src3<<#0)", - []>, - Requires<[HasV4T]>; - -// if (!Pv) Rd=memb(Rs+Rt<<#u2) -let AddedComplexity = 15, isPredicated = 1 in -def LDrib_indexed_cNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if (!$src1) $dst=memb($src2+$src3<<#0)", - []>, - Requires<[HasV4T]>; - -// if (!Pv.new) Rd=memb(Rs+Rt<<#u2) -let AddedComplexity = 15, isPredicated = 1 in -def LDrib_indexed_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if (!$src1.new) $dst=memb($src2+$src3<<#0)", - []>, - Requires<[HasV4T]>; - -//// Load unsigned byte conditionally. -// if ([!]Pv[.new]) Rd=memub(Rs+Rt<<#u2) -// if (Pv) Rd=memub(Rs+Rt<<#u2) -let AddedComplexity = 15, isPredicated = 1 in -def LDriub_indexed_cPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if ($src1) $dst=memub($src2+$src3<<#0)", - []>, - Requires<[HasV4T]>; - -// if (Pv.new) Rd=memub(Rs+Rt<<#u2) -let AddedComplexity = 15, isPredicated = 1 in -def LDriub_indexed_cdnPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if ($src1.new) $dst=memub($src2+$src3<<#0)", - []>, - Requires<[HasV4T]>; - -// if (!Pv) Rd=memub(Rs+Rt<<#u2) -let AddedComplexity = 15, isPredicated = 1 in -def LDriub_indexed_cNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if (!$src1) $dst=memub($src2+$src3<<#0)", - []>, - Requires<[HasV4T]>; - -// if (!Pv.new) Rd=memub(Rs+Rt<<#u2) -let AddedComplexity = 15, isPredicated = 1 in -def LDriub_indexed_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if (!$src1.new) $dst=memub($src2+$src3<<#0)", - []>, - Requires<[HasV4T]>; - -// Rd=memb(Rt<<#u2+#U6) - -//// Load halfword -// Rd=memh(Rs+Rt<<#u2) -let AddedComplexity = 10, isPredicable = 1 in -def LDrih_indexed_V4 : LDInst<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2), - "$dst=memh($src1+$src2<<#0)", - [(set (i32 IntRegs:$dst), - (i32 (sextloadi16 (add (i32 IntRegs:$src1), - (i32 IntRegs:$src2)))))]>, - Requires<[HasV4T]>; - -let AddedComplexity = 10, isPredicable = 1 in -def LDriuh_indexed_V4 : LDInst<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2), - "$dst=memuh($src1+$src2<<#0)", - [(set (i32 IntRegs:$dst), - (i32 (zextloadi16 (add (i32 IntRegs:$src1), - (i32 IntRegs:$src2)))))]>, - Requires<[HasV4T]>; - -let AddedComplexity = 10, isPredicable = 1 in -def LDriuh_ae_indexed_V4 : LDInst<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2), - "$dst=memuh($src1+$src2<<#0)", - [(set (i32 IntRegs:$dst), - (i32 (extloadi16 (add (i32 IntRegs:$src1), - (i32 IntRegs:$src2)))))]>, - Requires<[HasV4T]>; - -let AddedComplexity = 40, isPredicable = 1 in -def LDriuh_ae_indexed_shl_V4 : LDInst<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset), - "$dst=memuh($src1+$src2<<#$offset)", - [(set (i32 IntRegs:$dst), - (i32 (extloadi16 (add (i32 IntRegs:$src1), - (shl (i32 IntRegs:$src2), - u2ImmPred:$offset)))))]>, - Requires<[HasV4T]>; - -//// Load halfword conditionally. -// if ([!]Pv[.new]) Rd=memh(Rs+Rt<<#u2) -// if (Pv) Rd=memh(Rs+Rt<<#u2) -let AddedComplexity = 15, isPredicated = 1 in -def LDrih_indexed_cPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if ($src1) $dst=memh($src2+$src3<<#0)", - []>, - Requires<[HasV4T]>; - -// if (Pv.new) Rd=memh(Rs+Rt<<#u2) -let AddedComplexity = 15, isPredicated = 1 in -def LDrih_indexed_cdnPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if ($src1.new) $dst=memh($src2+$src3<<#0)", - []>, - Requires<[HasV4T]>; - -// if (!Pv) Rd=memh(Rs+Rt<<#u2) -let AddedComplexity = 15, isPredicated = 1 in -def LDrih_indexed_cNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if (!$src1) $dst=memh($src2+$src3<<#0)", - []>, - Requires<[HasV4T]>; - -// if (!Pv.new) Rd=memh(Rs+Rt<<#u2) -let AddedComplexity = 15, isPredicated = 1 in -def LDrih_indexed_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if (!$src1.new) $dst=memh($src2+$src3<<#0)", - []>, - Requires<[HasV4T]>; - -//// Load unsigned halfword conditionally. -// if ([!]Pv[.new]) Rd=memuh(Rs+Rt<<#u2) -// if (Pv) Rd=memuh(Rs+Rt<<#u2) -let AddedComplexity = 15, isPredicated = 1 in -def LDriuh_indexed_cPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if ($src1) $dst=memuh($src2+$src3<<#0)", - []>, - Requires<[HasV4T]>; - -// if (Pv.new) Rd=memuh(Rs+Rt<<#u2) -let AddedComplexity = 15, isPredicated = 1 in -def LDriuh_indexed_cdnPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if ($src1.new) $dst=memuh($src2+$src3<<#0)", - []>, - Requires<[HasV4T]>; - -// if (!Pv) Rd=memuh(Rs+Rt<<#u2) -let AddedComplexity = 15, isPredicated = 1 in -def LDriuh_indexed_cNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if (!$src1) $dst=memuh($src2+$src3<<#0)", - []>, - Requires<[HasV4T]>; - -// if (!Pv.new) Rd=memuh(Rs+Rt<<#u2) -let AddedComplexity = 15, isPredicated = 1 in -def LDriuh_indexed_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if (!$src1.new) $dst=memuh($src2+$src3<<#0)", - []>, - Requires<[HasV4T]>; - -// Rd=memh(Rt<<#u2+#U6) - -//// Load word. -// Load predicate: Fix for bug 5279. -let neverHasSideEffects = 1 in -def LDriw_pred_V4 : LDInst2<(outs PredRegs:$dst), - (ins MEMri:$addr), - "Error; should not emit", - []>, + +// 'def pats' for load instruction base + register offset and +// zero immediate value. +let AddedComplexity = 10 in { +def : Pat <(i64 (load (add IntRegs:$src1, IntRegs:$src2))), + (LDrid_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>, Requires<[HasV4T]>; -// Rd=memw(Re=#U6) +def : Pat <(i32 (sextloadi8 (add IntRegs:$src1, IntRegs:$src2))), + (LDrib_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>, + Requires<[HasV4T]>; + +def : Pat <(i32 (zextloadi8 (add IntRegs:$src1, IntRegs:$src2))), + (LDriub_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>, + Requires<[HasV4T]>; -// Rd=memw(Rs+Rt<<#u2) -let AddedComplexity = 10, isPredicable = 1 in -def LDriw_indexed_V4 : LDInst<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2), - "$dst=memw($src1+$src2<<#0)", - [(set (i32 IntRegs:$dst), - (i32 (load (add (i32 IntRegs:$src1), - (i32 IntRegs:$src2)))))]>, - Requires<[HasV4T]>; - -//// Load word conditionally. -// if ([!]Pv[.new]) Rd=memw(Rs+Rt<<#u2) -// if (Pv) Rd=memw(Rs+Rt<<#u2) -let AddedComplexity = 15, isPredicated = 1 in -def LDriw_indexed_cPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if ($src1) $dst=memw($src2+$src3<<#0)", - []>, - Requires<[HasV4T]>; - -// if (Pv.new) Rd=memh(Rs+Rt<<#u2) -let AddedComplexity = 15, isPredicated = 1 in -def LDriw_indexed_cdnPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if ($src1.new) $dst=memw($src2+$src3<<#0)", - []>, - Requires<[HasV4T]>; - -// if (!Pv) Rd=memh(Rs+Rt<<#u2) -let AddedComplexity = 15, isPredicated = 1 in -def LDriw_indexed_cNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if (!$src1) $dst=memw($src2+$src3<<#0)", - []>, - Requires<[HasV4T]>; - -// if (!Pv.new) Rd=memh(Rs+Rt<<#u2) -let AddedComplexity = 15, isPredicated = 1 in -def LDriw_indexed_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if (!$src1.new) $dst=memw($src2+$src3<<#0)", - []>, - Requires<[HasV4T]>; +def : Pat <(i32 (extloadi8 (add IntRegs:$src1, IntRegs:$src2))), + (LDriub_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>, + Requires<[HasV4T]>; + +def : Pat <(i32 (sextloadi16 (add IntRegs:$src1, IntRegs:$src2))), + (LDrih_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>, + Requires<[HasV4T]>; + +def : Pat <(i32 (zextloadi16 (add IntRegs:$src1, IntRegs:$src2))), + (LDriuh_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>, + Requires<[HasV4T]>; + +def : Pat <(i32 (extloadi16 (add IntRegs:$src1, IntRegs:$src2))), + (LDriuh_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>, + Requires<[HasV4T]>; + +def : Pat <(i32 (load (add IntRegs:$src1, IntRegs:$src2))), + (LDriw_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>, + Requires<[HasV4T]>; +} /// Load from global offset diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp index 409a243..2ab6dee 100644 --- a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp +++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp @@ -1092,72 +1092,36 @@ static int GetDotNewPredOp(const int opc) { // V4 indexed+scaled load - case Hexagon::LDrid_indexed_cPt_V4 : - return Hexagon::LDrid_indexed_cdnPt_V4; - - case Hexagon::LDrid_indexed_cNotPt_V4 : - return Hexagon::LDrid_indexed_cdnNotPt_V4; - case Hexagon::LDrid_indexed_shl_cPt_V4 : return Hexagon::LDrid_indexed_shl_cdnPt_V4; case Hexagon::LDrid_indexed_shl_cNotPt_V4 : return Hexagon::LDrid_indexed_shl_cdnNotPt_V4; - case Hexagon::LDrib_indexed_cPt_V4 : - return Hexagon::LDrib_indexed_cdnPt_V4; - - case Hexagon::LDrib_indexed_cNotPt_V4 : - return Hexagon::LDrib_indexed_cdnNotPt_V4; - case Hexagon::LDrib_indexed_shl_cPt_V4 : return Hexagon::LDrib_indexed_shl_cdnPt_V4; case Hexagon::LDrib_indexed_shl_cNotPt_V4 : return Hexagon::LDrib_indexed_shl_cdnNotPt_V4; - case Hexagon::LDriub_indexed_cPt_V4 : - return Hexagon::LDriub_indexed_cdnPt_V4; - - case Hexagon::LDriub_indexed_cNotPt_V4 : - return Hexagon::LDriub_indexed_cdnNotPt_V4; - case Hexagon::LDriub_indexed_shl_cPt_V4 : return Hexagon::LDriub_indexed_shl_cdnPt_V4; case Hexagon::LDriub_indexed_shl_cNotPt_V4 : return Hexagon::LDriub_indexed_shl_cdnNotPt_V4; - case Hexagon::LDrih_indexed_cPt_V4 : - return Hexagon::LDrih_indexed_cdnPt_V4; - - case Hexagon::LDrih_indexed_cNotPt_V4 : - return Hexagon::LDrih_indexed_cdnNotPt_V4; - case Hexagon::LDrih_indexed_shl_cPt_V4 : return Hexagon::LDrih_indexed_shl_cdnPt_V4; case Hexagon::LDrih_indexed_shl_cNotPt_V4 : return Hexagon::LDrih_indexed_shl_cdnNotPt_V4; - case Hexagon::LDriuh_indexed_cPt_V4 : - return Hexagon::LDriuh_indexed_cdnPt_V4; - - case Hexagon::LDriuh_indexed_cNotPt_V4 : - return Hexagon::LDriuh_indexed_cdnNotPt_V4; - case Hexagon::LDriuh_indexed_shl_cPt_V4 : return Hexagon::LDriuh_indexed_shl_cdnPt_V4; case Hexagon::LDriuh_indexed_shl_cNotPt_V4 : return Hexagon::LDriuh_indexed_shl_cdnNotPt_V4; - case Hexagon::LDriw_indexed_cPt_V4 : - return Hexagon::LDriw_indexed_cdnPt_V4; - - case Hexagon::LDriw_indexed_cNotPt_V4 : - return Hexagon::LDriw_indexed_cdnNotPt_V4; - case Hexagon::LDriw_indexed_shl_cPt_V4 : return Hexagon::LDriw_indexed_shl_cdnPt_V4; @@ -1623,72 +1587,36 @@ static int GetDotOldOp(const int opc) { // V4 indexed+scaled Load - case Hexagon::LDrid_indexed_cdnPt_V4 : - return Hexagon::LDrid_indexed_cPt_V4; - - case Hexagon::LDrid_indexed_cdnNotPt_V4 : - return Hexagon::LDrid_indexed_cNotPt_V4; - case Hexagon::LDrid_indexed_shl_cdnPt_V4 : return Hexagon::LDrid_indexed_shl_cPt_V4; case Hexagon::LDrid_indexed_shl_cdnNotPt_V4 : return Hexagon::LDrid_indexed_shl_cNotPt_V4; - case Hexagon::LDrib_indexed_cdnPt_V4 : - return Hexagon::LDrib_indexed_cPt_V4; - - case Hexagon::LDrib_indexed_cdnNotPt_V4 : - return Hexagon::LDrib_indexed_cNotPt_V4; - case Hexagon::LDrib_indexed_shl_cdnPt_V4 : return Hexagon::LDrib_indexed_shl_cPt_V4; case Hexagon::LDrib_indexed_shl_cdnNotPt_V4 : return Hexagon::LDrib_indexed_shl_cNotPt_V4; - case Hexagon::LDriub_indexed_cdnPt_V4 : - return Hexagon::LDriub_indexed_cPt_V4; - - case Hexagon::LDriub_indexed_cdnNotPt_V4 : - return Hexagon::LDriub_indexed_cNotPt_V4; - case Hexagon::LDriub_indexed_shl_cdnPt_V4 : return Hexagon::LDriub_indexed_shl_cPt_V4; case Hexagon::LDriub_indexed_shl_cdnNotPt_V4 : return Hexagon::LDriub_indexed_shl_cNotPt_V4; - case Hexagon::LDrih_indexed_cdnPt_V4 : - return Hexagon::LDrih_indexed_cPt_V4; - - case Hexagon::LDrih_indexed_cdnNotPt_V4 : - return Hexagon::LDrih_indexed_cNotPt_V4; - case Hexagon::LDrih_indexed_shl_cdnPt_V4 : return Hexagon::LDrih_indexed_shl_cPt_V4; case Hexagon::LDrih_indexed_shl_cdnNotPt_V4 : return Hexagon::LDrih_indexed_shl_cNotPt_V4; - case Hexagon::LDriuh_indexed_cdnPt_V4 : - return Hexagon::LDriuh_indexed_cPt_V4; - - case Hexagon::LDriuh_indexed_cdnNotPt_V4 : - return Hexagon::LDriuh_indexed_cNotPt_V4; - case Hexagon::LDriuh_indexed_shl_cdnPt_V4 : return Hexagon::LDriuh_indexed_shl_cPt_V4; case Hexagon::LDriuh_indexed_shl_cdnNotPt_V4 : return Hexagon::LDriuh_indexed_shl_cNotPt_V4; - case Hexagon::LDriw_indexed_cdnPt_V4 : - return Hexagon::LDriw_indexed_cPt_V4; - - case Hexagon::LDriw_indexed_cdnNotPt_V4 : - return Hexagon::LDriw_indexed_cNotPt_V4; - case Hexagon::LDriw_indexed_shl_cdnPt_V4 : return Hexagon::LDriw_indexed_shl_cPt_V4; @@ -2249,28 +2177,16 @@ static bool GetPredicateSense(MachineInstr* MI, case Hexagon::LDriub_indexed_cdnPt : case Hexagon::POST_LDriub_cPt : case Hexagon::POST_LDriub_cdnPt_V4 : - case Hexagon::LDrid_indexed_cPt_V4 : - case Hexagon::LDrid_indexed_cdnPt_V4 : case Hexagon::LDrid_indexed_shl_cPt_V4 : case Hexagon::LDrid_indexed_shl_cdnPt_V4 : - case Hexagon::LDrib_indexed_cPt_V4 : - case Hexagon::LDrib_indexed_cdnPt_V4 : case Hexagon::LDrib_indexed_shl_cPt_V4 : case Hexagon::LDrib_indexed_shl_cdnPt_V4 : - case Hexagon::LDriub_indexed_cPt_V4 : - case Hexagon::LDriub_indexed_cdnPt_V4 : case Hexagon::LDriub_indexed_shl_cPt_V4 : case Hexagon::LDriub_indexed_shl_cdnPt_V4 : - case Hexagon::LDrih_indexed_cPt_V4 : - case Hexagon::LDrih_indexed_cdnPt_V4 : case Hexagon::LDrih_indexed_shl_cPt_V4 : case Hexagon::LDrih_indexed_shl_cdnPt_V4 : - case Hexagon::LDriuh_indexed_cPt_V4 : - case Hexagon::LDriuh_indexed_cdnPt_V4 : case Hexagon::LDriuh_indexed_shl_cPt_V4 : case Hexagon::LDriuh_indexed_shl_cdnPt_V4 : - case Hexagon::LDriw_indexed_cPt_V4 : - case Hexagon::LDriw_indexed_cdnPt_V4 : case Hexagon::LDriw_indexed_shl_cPt_V4 : case Hexagon::LDriw_indexed_shl_cdnPt_V4 : case Hexagon::ADD_ri_cPt : @@ -2420,28 +2336,16 @@ static bool GetPredicateSense(MachineInstr* MI, case Hexagon::LDriub_indexed_cdnNotPt : case Hexagon::POST_LDriub_cNotPt : case Hexagon::POST_LDriub_cdnNotPt_V4 : - case Hexagon::LDrid_indexed_cNotPt_V4 : - case Hexagon::LDrid_indexed_cdnNotPt_V4 : case Hexagon::LDrid_indexed_shl_cNotPt_V4 : case Hexagon::LDrid_indexed_shl_cdnNotPt_V4 : - case Hexagon::LDrib_indexed_cNotPt_V4 : - case Hexagon::LDrib_indexed_cdnNotPt_V4 : case Hexagon::LDrib_indexed_shl_cNotPt_V4 : case Hexagon::LDrib_indexed_shl_cdnNotPt_V4 : - case Hexagon::LDriub_indexed_cNotPt_V4 : - case Hexagon::LDriub_indexed_cdnNotPt_V4 : case Hexagon::LDriub_indexed_shl_cNotPt_V4 : case Hexagon::LDriub_indexed_shl_cdnNotPt_V4 : - case Hexagon::LDrih_indexed_cNotPt_V4 : - case Hexagon::LDrih_indexed_cdnNotPt_V4 : case Hexagon::LDrih_indexed_shl_cNotPt_V4 : case Hexagon::LDrih_indexed_shl_cdnNotPt_V4 : - case Hexagon::LDriuh_indexed_cNotPt_V4 : - case Hexagon::LDriuh_indexed_cdnNotPt_V4 : case Hexagon::LDriuh_indexed_shl_cNotPt_V4 : case Hexagon::LDriuh_indexed_shl_cdnNotPt_V4 : - case Hexagon::LDriw_indexed_cNotPt_V4 : - case Hexagon::LDriw_indexed_cdnNotPt_V4 : case Hexagon::LDriw_indexed_shl_cNotPt_V4 : case Hexagon::LDriw_indexed_shl_cdnNotPt_V4 : case Hexagon::ADD_ri_cNotPt : @@ -2563,28 +2467,16 @@ bool HexagonPacketizerList::isDotNewInst(MachineInstr* MI) { case Hexagon::POST_LDriub_cdnPt_V4 : case Hexagon::POST_LDriub_cdnNotPt_V4 : - case Hexagon::LDrid_indexed_cdnPt_V4 : - case Hexagon::LDrid_indexed_cdnNotPt_V4 : case Hexagon::LDrid_indexed_shl_cdnPt_V4 : case Hexagon::LDrid_indexed_shl_cdnNotPt_V4 : - case Hexagon::LDrib_indexed_cdnPt_V4 : - case Hexagon::LDrib_indexed_cdnNotPt_V4 : case Hexagon::LDrib_indexed_shl_cdnPt_V4 : case Hexagon::LDrib_indexed_shl_cdnNotPt_V4 : - case Hexagon::LDriub_indexed_cdnPt_V4 : - case Hexagon::LDriub_indexed_cdnNotPt_V4 : case Hexagon::LDriub_indexed_shl_cdnPt_V4 : case Hexagon::LDriub_indexed_shl_cdnNotPt_V4 : - case Hexagon::LDrih_indexed_cdnPt_V4 : - case Hexagon::LDrih_indexed_cdnNotPt_V4 : case Hexagon::LDrih_indexed_shl_cdnPt_V4 : case Hexagon::LDrih_indexed_shl_cdnNotPt_V4 : - case Hexagon::LDriuh_indexed_cdnPt_V4 : - case Hexagon::LDriuh_indexed_cdnNotPt_V4 : case Hexagon::LDriuh_indexed_shl_cdnPt_V4 : case Hexagon::LDriuh_indexed_shl_cdnNotPt_V4 : - case Hexagon::LDriw_indexed_cdnPt_V4 : - case Hexagon::LDriw_indexed_cdnNotPt_V4 : case Hexagon::LDriw_indexed_shl_cdnPt_V4 : case Hexagon::LDriw_indexed_shl_cdnNotPt_V4 : -- cgit v1.1 From 5fb7947ed16a57a9c0b71ac70905454d82d25696 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Fri, 1 Feb 2013 17:49:07 +0000 Subject: Use a continue to simplify loop and reduce indentation. No functional change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174198 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/PrologEpilogInserter.cpp | 49 ++++++++++++++++++------------------ 1 file changed, 25 insertions(+), 24 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index bca3ede3..954613d 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -752,32 +752,33 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { MachineInstr *MI = I; bool DoIncr = true; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - if (MI->getOperand(i).isFI()) { - // Some instructions (e.g. inline asm instructions) can have - // multiple frame indices and/or cause eliminateFrameIndex - // to insert more than one instruction. We need the register - // scavenger to go through all of these instructions so that - // it can update its register information. We keep the - // iterator at the point before insertion so that we can - // revisit them in full. - bool AtBeginning = (I == BB->begin()); - if (!AtBeginning) --I; - - // If this instruction has a FrameIndex operand, we need to - // use that target machine register info object to eliminate - // it. - TRI.eliminateFrameIndex(MI, SPAdj, i, - FrameIndexVirtualScavenging ? NULL : RS); - - // Reset the iterator if we were at the beginning of the BB. - if (AtBeginning) { - I = BB->begin(); - DoIncr = false; - } + if (!MI->getOperand(i).isFI()) + continue; - MI = 0; - break; + // Some instructions (e.g. inline asm instructions) can have + // multiple frame indices and/or cause eliminateFrameIndex + // to insert more than one instruction. We need the register + // scavenger to go through all of these instructions so that + // it can update its register information. We keep the + // iterator at the point before insertion so that we can + // revisit them in full. + bool AtBeginning = (I == BB->begin()); + if (!AtBeginning) --I; + + // If this instruction has a FrameIndex operand, we need to + // use that target machine register info object to eliminate + // it. + TRI.eliminateFrameIndex(MI, SPAdj, i, + FrameIndexVirtualScavenging ? NULL : RS); + + // Reset the iterator if we were at the beginning of the BB. + if (AtBeginning) { + I = BB->begin(); + DoIncr = false; } + + MI = 0; + break; } if (DoIncr && I != BB->end()) ++I; -- cgit v1.1 From 9f285c6c31019f9db237932a56eff95d307fce06 Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Fri, 1 Feb 2013 18:57:06 +0000 Subject: Fix misplaced 'break'. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174205 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/ExecutionEngine/Interpreter/Execution.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp index b9a3fc7..ec4f7f6 100644 --- a/lib/ExecutionEngine/Interpreter/Execution.cpp +++ b/lib/ExecutionEngine/Interpreter/Execution.cpp @@ -1171,10 +1171,10 @@ void Interpreter::visitVAArgInst(VAArgInst &I) { switch (Ty->getTypeID()) { case Type::IntegerTyID: Dest.IntVal = Src.IntVal; - IMPLEMENT_VAARG(Pointer); - IMPLEMENT_VAARG(Float); - IMPLEMENT_VAARG(Double); break; + IMPLEMENT_VAARG(Pointer); + IMPLEMENT_VAARG(Float); + IMPLEMENT_VAARG(Double); default: dbgs() << "Unhandled dest type for vaarg instruction: " << *Ty << "\n"; llvm_unreachable(0); -- cgit v1.1 From 5882e566a560691bef3eb817f6390ee93242b6a6 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Fri, 1 Feb 2013 19:25:23 +0000 Subject: Fix errant fallthrough in the generation of the lifetime markers. Found by Alexander Kornienko. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174207 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 393a6be..9bd6ae6 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -5170,6 +5170,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { Res = DAG.getNode(Opcode, dl, MVT::Other, Ops, 2); DAG.setRoot(Res); } + return 0; } case Intrinsic::invariant_start: // Discard region information. -- cgit v1.1 From 693c37aa86506be657dfaf8835845b0998531c3c Mon Sep 17 00:00:00 2001 From: David Sehr Date: Fri, 1 Feb 2013 19:28:09 +0000 Subject: Two changes relevant to LEA and x32: 1) allows the use of RIP-relative addressing in 32-bit LEA instructions under x86-64 (ILP32 and LP64) 2) separates the size of address registers in 64-bit LEA instructions from control by ILP32/LP64. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174208 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrArithmetic.td | 4 ++-- lib/Target/X86/X86InstrInfo.td | 13 +++++++++++++ lib/Target/X86/X86MCInstLower.cpp | 3 ++- 3 files changed, 17 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td index 0eecd5f..7db611c 100644 --- a/lib/Target/X86/X86InstrArithmetic.td +++ b/lib/Target/X86/X86InstrArithmetic.td @@ -29,11 +29,11 @@ def LEA32r : I<0x8D, MRMSrcMem, def LEA64_32r : I<0x8D, MRMSrcMem, (outs GR32:$dst), (ins lea64_32mem:$src), "lea{l}\t{$src|$dst}, {$dst|$src}", - [(set GR32:$dst, lea32addr:$src)], IIC_LEA>, + [(set GR32:$dst, lea64_32addr:$src)], IIC_LEA>, Requires<[In64BitMode]>; let isReMaterializable = 1 in -def LEA64r : RI<0x8D, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), +def LEA64r : RI<0x8D, MRMSrcMem, (outs GR64:$dst), (ins lea64mem:$src), "lea{q}\t{$src|$dst}, {$dst|$src}", [(set GR64:$dst, lea64addr:$src)], IIC_LEA>; diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 9ecf5e2..0d32506 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -525,6 +525,13 @@ def lea64_32mem : Operand { let ParserMatchClass = X86MemAsmOperand; } +// Memory operands that use 64-bit pointers in both ILP32 and LP64. +def lea64mem : Operand { + let PrintMethod = "printi64mem"; + let MIOperandInfo = (ops GR64, i8imm, GR64_NOSP, i32imm, i8imm); + let ParserMatchClass = X86MemAsmOperand; +} + //===----------------------------------------------------------------------===// // X86 Complex Pattern Definitions. @@ -535,6 +542,12 @@ def addr : ComplexPattern; def lea32addr : ComplexPattern; +// In 64-bit mode 32-bit LEAs can use RIP-relative addressing. +def lea64_32addr : ComplexPattern; + def tls32addr : ComplexPattern; diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index 5a1e1b8..3af1b3e 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -239,7 +239,8 @@ static void lower_lea64_32mem(MCInst *MI, unsigned OpNo) { if (!MI->getOperand(OpNo+i).isReg()) continue; unsigned Reg = MI->getOperand(OpNo+i).getReg(); - if (Reg == 0) continue; + // LEAs can use RIP-relative addressing, and RIP has no sub/super register. + if (Reg == 0 || Reg == X86::RIP) continue; MI->getOperand(OpNo+i).setReg(getX86SubSuperRegister(Reg, MVT::i64)); } -- cgit v1.1 From 83474ee594b5b14e9071564814a90571805cc433 Mon Sep 17 00:00:00 2001 From: Preston Gurd Date: Fri, 1 Feb 2013 20:41:27 +0000 Subject: This patch aims to improve compile time performance by increasing the SCEV vector size in LoopStrengthReduce. It is observed that the BaseRegs vector size is 4 in most cases, and elements are frequently copied when it is initialized as SmallVector BaseRegs. Our benchmark results show that the compilation time performance improved by ~0.5%. Patch by Wan Xiaofei. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174219 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/LoopStrengthReduce.cpp | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 87e3447..9237077 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -237,7 +237,7 @@ struct Formula { /// BaseRegs - The list of "base" registers for this use. When this is /// non-empty, - SmallVector BaseRegs; + SmallVector BaseRegs; /// ScaledReg - The 'scaled' register for this use. This should be non-null /// when Scale is not zero. @@ -1087,19 +1087,19 @@ namespace { /// UniquifierDenseMapInfo - A DenseMapInfo implementation for holding /// DenseMaps and DenseSets of sorted SmallVectors of const SCEV*. struct UniquifierDenseMapInfo { - static SmallVector getEmptyKey() { - SmallVector V; + static SmallVector getEmptyKey() { + SmallVector V; V.push_back(reinterpret_cast(-1)); return V; } - static SmallVector getTombstoneKey() { - SmallVector V; + static SmallVector getTombstoneKey() { + SmallVector V; V.push_back(reinterpret_cast(-2)); return V; } - static unsigned getHashValue(const SmallVector &V) { + static unsigned getHashValue(const SmallVector &V) { unsigned Result = 0; for (SmallVectorImpl::const_iterator I = V.begin(), E = V.end(); I != E; ++I) @@ -1107,8 +1107,8 @@ struct UniquifierDenseMapInfo { return Result; } - static bool isEqual(const SmallVector &LHS, - const SmallVector &RHS) { + static bool isEqual(const SmallVector &LHS, + const SmallVector &RHS) { return LHS == RHS; } }; @@ -1119,7 +1119,7 @@ struct UniquifierDenseMapInfo { /// the user itself, and information about how the use may be satisfied. /// TODO: Represent multiple users of the same expression in common? class LSRUse { - DenseSet, UniquifierDenseMapInfo> Uniquifier; + DenseSet, UniquifierDenseMapInfo> Uniquifier; public: /// KindType - An enum for a kind of use, indicating what types of @@ -1178,7 +1178,7 @@ public: /// HasFormula - Test whether this use as a formula which has the same /// registers as the given formula. bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const { - SmallVector Key = F.BaseRegs; + SmallVector Key = F.BaseRegs; if (F.ScaledReg) Key.push_back(F.ScaledReg); // Unstable sort by host order ok, because this is only used for uniquifying. std::sort(Key.begin(), Key.end()); @@ -1188,7 +1188,7 @@ bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const { /// InsertFormula - If the given formula has not yet been inserted, add it to /// the list, and return true. Return false otherwise. bool LSRUse::InsertFormula(const Formula &F) { - SmallVector Key = F.BaseRegs; + SmallVector Key = F.BaseRegs; if (F.ScaledReg) Key.push_back(F.ScaledReg); // Unstable sort by host order ok, because this is only used for uniquifying. std::sort(Key.begin(), Key.end()); @@ -3656,7 +3656,7 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() { // Collect the best formula for each unique set of shared registers. This // is reset for each use. - typedef DenseMap, size_t, UniquifierDenseMapInfo> + typedef DenseMap, size_t, UniquifierDenseMapInfo> BestFormulaeTy; BestFormulaeTy BestFormulae; @@ -3691,7 +3691,7 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() { dbgs() << "\n"); } else { - SmallVector Key; + SmallVector Key; for (SmallVectorImpl::const_iterator J = F.BaseRegs.begin(), JE = F.BaseRegs.end(); J != JE; ++J) { const SCEV *Reg = *J; -- cgit v1.1 From 5a4041e7282ca1dba93fe1a97c8260c0ef621f5d Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Fri, 1 Feb 2013 22:32:30 +0000 Subject: Change the AttributeImpl to hold a single Constant* for the values. This Constant could be an aggregate to represent multiple values. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174228 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/AttributeImpl.h | 22 ++++++++++------------ lib/IR/Attributes.cpp | 28 +++++++++++++--------------- 2 files changed, 23 insertions(+), 27 deletions(-) (limited to 'lib') diff --git a/lib/IR/AttributeImpl.h b/lib/IR/AttributeImpl.h index 3fbd723..66001f7 100644 --- a/lib/IR/AttributeImpl.h +++ b/lib/IR/AttributeImpl.h @@ -30,24 +30,23 @@ class LLVMContext; /// \brief This class represents a single, uniqued attribute. That attribute /// could be a single enum, a tuple, or a string. class AttributeImpl : public FoldingSetNode { - LLVMContext &Context; - Constant *Kind; - SmallVector Vals; + LLVMContext &Context; ///< Global context for uniquing objects + Constant *Kind; ///< Kind of attribute: enum or string + Constant *Values; ///< Values associated with the attribute // AttributesImpl is uniqued, these should not be publicly available. void operator=(const AttributeImpl &) LLVM_DELETED_FUNCTION; AttributeImpl(const AttributeImpl &) LLVM_DELETED_FUNCTION; public: - AttributeImpl(LLVMContext &C, Constant *Kind, - ArrayRef Vals = ArrayRef()) - : Context(C), Kind(Kind), Vals(Vals.begin(), Vals.end()) {} + AttributeImpl(LLVMContext &C, Constant *Kind, Constant *Values = 0) + : Context(C), Kind(Kind), Values(Values) {} LLVMContext &getContext() { return Context; } bool hasAttribute(Attribute::AttrKind A) const; Constant *getAttributeKind() const { return Kind; } - ArrayRef getAttributeValues() const { return Vals; } + Constant *getAttributeValues() const { return Values; } uint64_t getAlignment() const; uint64_t getStackAlignment() const; @@ -63,13 +62,12 @@ public: bool operator<(const AttributeImpl &AI) const; void Profile(FoldingSetNodeID &ID) const { - Profile(ID, Kind, Vals); + Profile(ID, Kind, Values); } - static void Profile(FoldingSetNodeID &ID, Constant *Kind, - ArrayRef Vals) { + static void Profile(FoldingSetNodeID &ID, Constant *Kind, Constant *Values) { ID.AddPointer(Kind); - for (unsigned I = 0, E = Vals.size(); I != E; ++I) - ID.AddPointer(Vals[I]); + if (Values) + ID.AddPointer(Values); } // FIXME: Remove this! diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 412d83e..f8ca9f1 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -84,8 +84,8 @@ Constant *Attribute::getAttributeKind() const { return pImpl ? pImpl->getAttributeKind() : 0; } -ArrayRef Attribute::getAttributeValues() const { - return pImpl ? pImpl->getAttributeValues() : ArrayRef(); +Constant *Attribute::getAttributeValues() const { + return pImpl ? pImpl->getAttributeValues() : 0; } /// This returns the alignment field of an attribute as a byte alignment value. @@ -186,24 +186,22 @@ std::string Attribute::getAsString() const { // // "kind" // "kind" = "value" - // "kind" = ("value1" "value2" "value3" ) + // "kind" = ( "value1" "value2" "value3" ) // if (ConstantDataArray *CDA = dyn_cast(pImpl->getAttributeKind())) { std::string Result; Result += '\"' + CDA->getAsString().str() + '"'; - ArrayRef Vals = pImpl->getAttributeValues(); - if (Vals.empty()) return Result; + Constant *Vals = pImpl->getAttributeValues(); + if (!Vals) return Result; + + // FIXME: This should support more than just ConstantDataArrays. Also, + // support a vector of attribute values. + Result += " = "; - if (Vals.size() > 1) Result += '('; - for (ArrayRef::iterator I = Vals.begin(), E = Vals.end(); - I != E; ) { - ConstantDataArray *CDA = cast(*I++); - Result += '\"' + CDA->getAsString().str() + '"'; - if (I != E) Result += ' '; - } - if (Vals.size() > 1) Result += ')'; + Result += '\"' + cast(Vals)->getAsString().str() + '"'; + return Result; } @@ -237,13 +235,13 @@ bool AttributeImpl::hasAttribute(Attribute::AttrKind A) const { uint64_t AttributeImpl::getAlignment() const { assert(hasAttribute(Attribute::Alignment) && "Trying to retrieve the alignment from a non-alignment attr!"); - return cast(Vals[0])->getZExtValue(); + return cast(Values)->getZExtValue(); } uint64_t AttributeImpl::getStackAlignment() const { assert(hasAttribute(Attribute::StackAlignment) && "Trying to retrieve the stack alignment from a non-alignment attr!"); - return cast(Vals[0])->getZExtValue(); + return cast(Values)->getZExtValue(); } bool AttributeImpl::operator==(Attribute::AttrKind kind) const { -- cgit v1.1 From cdc3b74cfb8a1fb3d053106cd8843a39fbb5e2e5 Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Fri, 1 Feb 2013 22:59:51 +0000 Subject: LLVM enablement for some older PowerPC CPUs git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174230 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPC.td | 20 ++++++++++++++++++++ lib/Target/PowerPC/PPCSubtarget.h | 5 +++++ 2 files changed, 25 insertions(+) (limited to 'lib') diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td index d3bcbae..71d7d76 100644 --- a/lib/Target/PowerPC/PPC.td +++ b/lib/Target/PowerPC/PPC.td @@ -39,7 +39,12 @@ def DirectiveE500mc : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_E500mc", "">; def DirectiveE5500 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_E5500", "">; +def DirectivePwr3: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR3", "">; +def DirectivePwr4: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR4", "">; +def DirectivePwr5: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR5", "">; +def DirectivePwr5x: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR5X", "">; def DirectivePwr6: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6", "">; +def DirectivePwr6x: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6X", "">; def DirectivePwr7: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR7", "">; def Feature64Bit : SubtargetFeature<"64bit","Has64BitSupport", "true", @@ -116,10 +121,25 @@ def : Processor<"a2q", PPCA2Itineraries, [DirectiveA2, FeatureBookE, FeatureSTFIWX, FeatureISEL, Feature64Bit /*, Feature64BitRegs */, FeatureQPX]>; +def : Processor<"pwr3", G5Itineraries, + [DirectivePwr3, FeatureAltivec, FeatureMFOCRF, + FeatureSTFIWX, Feature64Bit]>; +def : Processor<"pwr4", G5Itineraries, + [DirectivePwr4, FeatureAltivec, FeatureMFOCRF, + FeatureFSqrt, FeatureSTFIWX, Feature64Bit]>; +def : Processor<"pwr5", G5Itineraries, + [DirectivePwr5, FeatureAltivec, FeatureMFOCRF, + FeatureFSqrt, FeatureSTFIWX, Feature64Bit]>; +def : Processor<"pwr5x", G5Itineraries, + [DirectivePwr5x, FeatureAltivec, FeatureMFOCRF, + FeatureFSqrt, FeatureSTFIWX, Feature64Bit]>; def : Processor<"pwr6", G5Itineraries, [DirectivePwr6, FeatureAltivec, FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX, Feature64Bit /*, Feature64BitRegs */]>; +def : Processor<"pwr6x", G5Itineraries, + [DirectivePwr5x, FeatureAltivec, FeatureMFOCRF, + FeatureFSqrt, FeatureSTFIWX, Feature64Bit]>; def : Processor<"pwr7", G5Itineraries, [DirectivePwr7, FeatureAltivec, FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX, diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index 28f8587..15885bd 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -43,7 +43,12 @@ namespace PPC { DIR_A2, DIR_E500mc, DIR_E5500, + DIR_PWR3, + DIR_PWR4, + DIR_PWR5, + DIR_PWR5X, DIR_PWR6, + DIR_PWR6X, DIR_PWR7, DIR_64 }; -- cgit v1.1 From dbc86b98f2acd459ab3270cd8500afd32eba7b09 Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Fri, 1 Feb 2013 23:10:09 +0000 Subject: Add notes about future PowerPC features git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174232 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPC.td | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'lib') diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td index 71d7d76..9929136 100644 --- a/lib/Target/PowerPC/PPC.td +++ b/lib/Target/PowerPC/PPC.td @@ -66,6 +66,23 @@ def FeatureBookE : SubtargetFeature<"booke", "IsBookE", "true", def FeatureQPX : SubtargetFeature<"qpx","HasQPX", "true", "Enable QPX instructions">; +// Note: Future features to add when support is extended to more +// recent ISA levels: +// +// CMPB p6, p6x, p7 cmpb +// DFP p6, p6x, p7 decimal floating-point instructions +// FLT_CVT p7 fcfids, fcfidu, fcfidus, fcfiduz, fctiwuz +// FPRND p5x, p6, p6x, p7 frim, frin, frip, friz +// FRE p5 through p7 fre (vs. fres, available since p3) +// FRSQRTES p5 through p7 frsqrtes (vs. frsqrte, available since p3) +// LDBRX p7 load with byte reversal +// LFIWAX p6, p6x, p7 lfiwax +// LFIWZX p7 lfiwzx +// POPCNTB p5 through p7 popcntb and related instructions +// POPCNTD p7 popcntd and related instructions +// RECIP_PREC p6, p6x, p7 higher precision reciprocal estimates +// VSX p7 vector-scalar instruction set + //===----------------------------------------------------------------------===// // Register File Description //===----------------------------------------------------------------------===// -- cgit v1.1 From 945e828003b746f6bbe86390940cf1433d18b0a1 Mon Sep 17 00:00:00 2001 From: Manman Ren Date: Fri, 1 Feb 2013 23:54:37 +0000 Subject: [Dwarf] avoid emitting multiple AT_const_value for static memebers. Testing case is reduced from MultiSource/BenchMarks/Prolangs-C++/deriv1. rdar://problem/13071590 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174235 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index f0655e6..dad1054 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -1286,6 +1286,7 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) { // If this is a static data member definition, some attributes belong // to the declaration DIE. DIE *VariableDIE = NULL; + bool IsStaticMember = false; DIDerivedType SDMDecl = GV.getStaticDataMemberDeclaration(); if (SDMDecl.Verify()) { assert(SDMDecl.isStaticMember() && "Expected static member decl"); @@ -1295,6 +1296,7 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) { getOrCreateContextDIE(SDMDecl.getContext()); VariableDIE = getDIE(SDMDecl); assert(VariableDIE && "Static member decl has no context?"); + IsStaticMember = true; } // If this is not a static data member definition, create the variable @@ -1348,9 +1350,13 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) { addString(VariableDIE, dwarf::DW_AT_MIPS_linkage_name, getRealLinkageName(LinkageName)); } else if (const ConstantInt *CI = - dyn_cast_or_null(GV.getConstant())) - addConstantValue(VariableDIE, CI, GTy.isUnsignedDIType()); - else if (const ConstantExpr *CE = getMergedGlobalExpr(N->getOperand(11))) { + dyn_cast_or_null(GV.getConstant())) { + // AT_const_value was added when the static memeber was created. To avoid + // emitting AT_const_value multiple times, we only add AT_const_value when + // it is not a static member. + if (!IsStaticMember) + addConstantValue(VariableDIE, CI, GTy.isUnsignedDIType()); + } else if (const ConstantExpr *CE = getMergedGlobalExpr(N->getOperand(11))) { addToAccelTable = true; // GV is a merged global. DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); -- cgit v1.1 From 7650d9b893b83b6261d1bbc892464aa9d61cc23f Mon Sep 17 00:00:00 2001 From: Manman Ren Date: Sat, 2 Feb 2013 00:02:03 +0000 Subject: Correct indentation for dumping LexicalScope. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174237 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/LexicalScopes.cpp | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/LexicalScopes.cpp b/lib/CodeGen/LexicalScopes.cpp index 3c01d91..8172154 100644 --- a/lib/CodeGen/LexicalScopes.cpp +++ b/lib/CodeGen/LexicalScopes.cpp @@ -314,24 +314,22 @@ bool LexicalScopes::dominates(DebugLoc DL, MachineBasicBlock *MBB) { void LexicalScope::anchor() { } /// dump - Print data structures. -void LexicalScope::dump() const { +void LexicalScope::dump(unsigned Indent) const { #ifndef NDEBUG raw_ostream &err = dbgs(); - err.indent(IndentLevel); + err.indent(Indent); err << "DFSIn: " << DFSIn << " DFSOut: " << DFSOut << "\n"; const MDNode *N = Desc; + err.indent(Indent); N->dump(); if (AbstractScope) - err << "Abstract Scope\n"; + err << std::string(Indent, ' ') << "Abstract Scope\n"; - IndentLevel += 2; if (!Children.empty()) - err << "Children ...\n"; + err << std::string(Indent + 2, ' ') << "Children ...\n"; for (unsigned i = 0, e = Children.size(); i != e; ++i) if (Children[i] != this) - Children[i]->dump(); - - IndentLevel -= 2; + Children[i]->dump(Indent + 2); #endif } -- cgit v1.1 From 98b93e5a94f69f6c7b12152a98455f501db1895d Mon Sep 17 00:00:00 2001 From: Shuxin Yang Date: Sat, 2 Feb 2013 00:22:03 +0000 Subject: rdar://13126763 Fix a bug in DAGCombine. The symptom is mistakenly optimizing expression "x + x*x" into "x * 3.0". git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174239 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 33 +++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index a82410a..79ec227 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -5832,13 +5832,6 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { N1, NewCFP); } - // (fadd (fadd x, x), x) -> (fmul 3.0, x) - if (!CFP00 && !CFP01 && N0.getOperand(0) == N0.getOperand(1) && - N0.getOperand(0) == N1) { - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, - N1, DAG.getConstantFP(3.0, VT)); - } - // (fadd (fmul c, x), (fadd x, x)) -> (fmul c+2, x) if (CFP00 && !CFP01 && N1.getOpcode() == ISD::FADD && N1.getOperand(0) == N1.getOperand(1) && @@ -5884,12 +5877,6 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { N0, NewCFP); } - // (fadd x, (fadd x, x)) -> (fmul 3.0, x) - if (!CFP10 && !CFP11 && N1.getOperand(0) == N1.getOperand(1) && - N1.getOperand(0) == N0) { - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, - N0, DAG.getConstantFP(3.0, VT)); - } // (fadd (fadd x, x), (fmul c, x)) -> (fmul c+2, x) if (CFP10 && !CFP11 && N1.getOpcode() == ISD::FADD && @@ -5914,6 +5901,26 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { } } + if (N0.getOpcode() == ISD::FADD) { + ConstantFPSDNode *CFP = dyn_cast(N0.getOperand(0)); + // (fadd (fadd x, x), x) -> (fmul 3.0, x) + if (!CFP && N0.getOperand(0) == N0.getOperand(1) && + (N0.getOperand(0) == N1)) { + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N1, DAG.getConstantFP(3.0, VT)); + } + } + + if (N1.getOpcode() == ISD::FADD) { + ConstantFPSDNode *CFP10 = dyn_cast(N1.getOperand(0)); + // (fadd x, (fadd x, x)) -> (fmul 3.0, x) + if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) && + N1.getOperand(0) == N0) { + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N0, DAG.getConstantFP(3.0, VT)); + } + } + // (fadd (fadd x, x), (fadd x, x)) -> (fmul 4.0, x) if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD && N0.getOperand(0) == N0.getOperand(1) && -- cgit v1.1 From 214a5661d1c76468222d6f941e5dcb415f0f482a Mon Sep 17 00:00:00 2001 From: Nick Lewycky Date: Sat, 2 Feb 2013 00:22:37 +0000 Subject: This patch makes "&Cls::purevfn" not an odr use. This isn't what the standard says, but that's a defect (to be filed). "Cls::purevfn()" is still an odr use. Also fixes a bug in the previous patch that caused us to not mark the function referenced just because we didn't want to mark it odr used. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174240 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/IPO/DeadArgumentElimination.cpp | 68 +++++++++++++++++++++++--- 1 file changed, 61 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp index 49ef1e7..512f19d 100644 --- a/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -36,12 +36,14 @@ #include "llvm/Support/CallSite.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/Local.h" #include #include using namespace llvm; -STATISTIC(NumArgumentsEliminated, "Number of unread args removed"); -STATISTIC(NumRetValsEliminated , "Number of unused return values removed"); +STATISTIC(NumArgumentsEliminated , "Number of unread args removed"); +STATISTIC(NumRetValsEliminated , "Number of unused return values removed"); +STATISTIC(NumParametersEliminated, "Number of parameters replaced with undef"); STATISTIC(NumArgumentsReplacedWithUndef, "Number of unread args replaced with undef"); namespace { @@ -161,6 +163,7 @@ namespace { void MarkLive(const Function &F); void PropagateLiveness(const RetOrArg &RA); bool RemoveDeadStuffFromFunction(Function *F); + bool RemoveDeadParamsFromCallersOf(Function *F); bool DeleteDeadVarargs(Function &Fn); bool RemoveDeadArgumentsFromCallers(Function &Fn); }; @@ -249,8 +252,7 @@ bool DAE::DeleteDeadVarargs(Function &Fn) { FunctionType *FTy = Fn.getFunctionType(); std::vector Params(FTy->param_begin(), FTy->param_end()); - FunctionType *NFTy = FunctionType::get(FTy->getReturnType(), - Params, false); + FunctionType *NFTy = FunctionType::get(FTy->getReturnType(), Params, false); unsigned NumArgs = Params.size(); // Create the new function body and insert it into the module... @@ -505,7 +507,9 @@ DAE::Liveness DAE::SurveyUses(const Value *V, UseVector &MaybeLiveUses) { // map. // // We consider arguments of non-internal functions to be intrinsically alive as -// well as arguments to functions which have their "address taken". +// well as arguments to functions which have their "address taken". Externally +// visible functions are assumed to only have their return values intrinsically +// alive, permitting removal of parameters to unused arguments in callers. // void DAE::SurveyFunction(const Function &F) { unsigned RetCount = NumRetVals(&F); @@ -528,7 +532,14 @@ void DAE::SurveyFunction(const Function &F) { return; } - if (!F.hasLocalLinkage() && (!ShouldHackArguments() || F.isIntrinsic())) { + if (F.hasExternalLinkage() && !F.isDeclaration()) { + DEBUG(dbgs() << "DAE - Intrinsically live return from " << F.getName() + << "\n"); + // Mark the return values alive. + for (unsigned i = 0, e = NumRetVals(&F); i != e; ++i) + MarkLive(CreateRet(&F, i)); + } else if (!F.hasLocalLinkage() && + (!ShouldHackArguments() || F.isIntrinsic())) { MarkLive(F); return; } @@ -1032,6 +1043,46 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { return true; } +// RemoveDeadParamsFromCallersOf - Replace any parameters that are never used +// by the callee with undef. +// +bool DAE::RemoveDeadParamsFromCallersOf(Function *F) { + // Don't modify fully live functions + if (LiveFunctions.count(F)) + return false; + + // Make a list of the dead arguments. + SmallVector ArgDead; + unsigned i = 0; + for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); + I != E; ++I, ++i) { + RetOrArg Arg = CreateArg(F, i); + if (!LiveValues.count(Arg)) + ArgDead.push_back(i); + } + if (ArgDead.empty()) + return false; + + bool MadeChange = false; + for (Function::use_iterator I = F->use_begin(), E = F->use_end(); + I != E; ++I) { + CallSite CS = CallSite(*I); + if (CS.getInstruction() && CS.isCallee(I)) { + for (unsigned i = 0, e = ArgDead.size(); i != e; ++i) { + Value *A = CS.getArgument(ArgDead[i]); + if (!isa(A)) { + ++NumParametersEliminated; + MadeChange = true; + CS.setArgument(ArgDead[i], UndefValue::get(A->getType())); + RecursivelyDeleteTriviallyDeadInstructions(A); + } + } + } + } + + return MadeChange; +} + bool DAE::runOnModule(Module &M) { bool Changed = false; @@ -1063,7 +1114,10 @@ bool DAE::runOnModule(Module &M) { // Increment now, because the function will probably get removed (ie. // replaced by a new one). Function *F = I++; - Changed |= RemoveDeadStuffFromFunction(F); + if (F->hasExternalLinkage() && !F->isDeclaration()) + Changed |= RemoveDeadParamsFromCallersOf(F); + else + Changed |= RemoveDeadStuffFromFunction(F); } // Finally, look for any unused parameters in functions with non-local -- cgit v1.1 From fb10b256aa018a1ef59a18f7c9634bd72a95e6e3 Mon Sep 17 00:00:00 2001 From: Nick Lewycky Date: Sat, 2 Feb 2013 00:25:26 +0000 Subject: Revert accidental commit (ran svn commit from wrong directory). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174241 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/IPO/DeadArgumentElimination.cpp | 68 +++----------------------- 1 file changed, 7 insertions(+), 61 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp index 512f19d..49ef1e7 100644 --- a/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -36,14 +36,12 @@ #include "llvm/Support/CallSite.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Utils/Local.h" #include #include using namespace llvm; -STATISTIC(NumArgumentsEliminated , "Number of unread args removed"); -STATISTIC(NumRetValsEliminated , "Number of unused return values removed"); -STATISTIC(NumParametersEliminated, "Number of parameters replaced with undef"); +STATISTIC(NumArgumentsEliminated, "Number of unread args removed"); +STATISTIC(NumRetValsEliminated , "Number of unused return values removed"); STATISTIC(NumArgumentsReplacedWithUndef, "Number of unread args replaced with undef"); namespace { @@ -163,7 +161,6 @@ namespace { void MarkLive(const Function &F); void PropagateLiveness(const RetOrArg &RA); bool RemoveDeadStuffFromFunction(Function *F); - bool RemoveDeadParamsFromCallersOf(Function *F); bool DeleteDeadVarargs(Function &Fn); bool RemoveDeadArgumentsFromCallers(Function &Fn); }; @@ -252,7 +249,8 @@ bool DAE::DeleteDeadVarargs(Function &Fn) { FunctionType *FTy = Fn.getFunctionType(); std::vector Params(FTy->param_begin(), FTy->param_end()); - FunctionType *NFTy = FunctionType::get(FTy->getReturnType(), Params, false); + FunctionType *NFTy = FunctionType::get(FTy->getReturnType(), + Params, false); unsigned NumArgs = Params.size(); // Create the new function body and insert it into the module... @@ -507,9 +505,7 @@ DAE::Liveness DAE::SurveyUses(const Value *V, UseVector &MaybeLiveUses) { // map. // // We consider arguments of non-internal functions to be intrinsically alive as -// well as arguments to functions which have their "address taken". Externally -// visible functions are assumed to only have their return values intrinsically -// alive, permitting removal of parameters to unused arguments in callers. +// well as arguments to functions which have their "address taken". // void DAE::SurveyFunction(const Function &F) { unsigned RetCount = NumRetVals(&F); @@ -532,14 +528,7 @@ void DAE::SurveyFunction(const Function &F) { return; } - if (F.hasExternalLinkage() && !F.isDeclaration()) { - DEBUG(dbgs() << "DAE - Intrinsically live return from " << F.getName() - << "\n"); - // Mark the return values alive. - for (unsigned i = 0, e = NumRetVals(&F); i != e; ++i) - MarkLive(CreateRet(&F, i)); - } else if (!F.hasLocalLinkage() && - (!ShouldHackArguments() || F.isIntrinsic())) { + if (!F.hasLocalLinkage() && (!ShouldHackArguments() || F.isIntrinsic())) { MarkLive(F); return; } @@ -1043,46 +1032,6 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { return true; } -// RemoveDeadParamsFromCallersOf - Replace any parameters that are never used -// by the callee with undef. -// -bool DAE::RemoveDeadParamsFromCallersOf(Function *F) { - // Don't modify fully live functions - if (LiveFunctions.count(F)) - return false; - - // Make a list of the dead arguments. - SmallVector ArgDead; - unsigned i = 0; - for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); - I != E; ++I, ++i) { - RetOrArg Arg = CreateArg(F, i); - if (!LiveValues.count(Arg)) - ArgDead.push_back(i); - } - if (ArgDead.empty()) - return false; - - bool MadeChange = false; - for (Function::use_iterator I = F->use_begin(), E = F->use_end(); - I != E; ++I) { - CallSite CS = CallSite(*I); - if (CS.getInstruction() && CS.isCallee(I)) { - for (unsigned i = 0, e = ArgDead.size(); i != e; ++i) { - Value *A = CS.getArgument(ArgDead[i]); - if (!isa(A)) { - ++NumParametersEliminated; - MadeChange = true; - CS.setArgument(ArgDead[i], UndefValue::get(A->getType())); - RecursivelyDeleteTriviallyDeadInstructions(A); - } - } - } - } - - return MadeChange; -} - bool DAE::runOnModule(Module &M) { bool Changed = false; @@ -1114,10 +1063,7 @@ bool DAE::runOnModule(Module &M) { // Increment now, because the function will probably get removed (ie. // replaced by a new one). Function *F = I++; - if (F->hasExternalLinkage() && !F->isDeclaration()) - Changed |= RemoveDeadParamsFromCallersOf(F); - else - Changed |= RemoveDeadStuffFromFunction(F); + Changed |= RemoveDeadStuffFromFunction(F); } // Finally, look for any unused parameters in functions with non-local -- cgit v1.1 From bdcbccc710a0528b4abce947782fd502bafb848d Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Sat, 2 Feb 2013 00:42:06 +0000 Subject: Use the AttributeSet's iterators. Use the AttributeSet's iterators in AttrBuilder::hasAttributes() when determining of the intersection of the AttrBuilder and AttributeSet is non-null. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174250 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/AttributeImpl.h | 1 - lib/IR/Attributes.cpp | 26 ++++++++++++++++++++++---- 2 files changed, 22 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/IR/AttributeImpl.h b/lib/IR/AttributeImpl.h index 66001f7..bf87562 100644 --- a/lib/IR/AttributeImpl.h +++ b/lib/IR/AttributeImpl.h @@ -153,7 +153,6 @@ public: /// \p Slot is an index into the AttrNodes list, not the index of the return / /// parameter/ function which the attributes apply to. AttributeSet getSlotAttributes(unsigned Slot) const { - // FIXME: This needs to use AttrNodes instead. return AttributeSet::get(Context, AttrNodes[Slot]); } diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index f8ca9f1..d585843 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -42,9 +42,7 @@ Attribute Attribute::get(LLVMContext &Context, Constant *Kind, Constant *Val) { if (!PA) { // If we didn't find any existing attributes of the same shape then create a // new one and insert it. - PA = (!Val) ? - new AttributeImpl(Context, Kind) : - new AttributeImpl(Context, Kind, Val); + PA = new AttributeImpl(Context, Kind, Val); pImpl->AttrsSet.InsertNode(PA, InsertPoint); } @@ -884,7 +882,27 @@ bool AttrBuilder::hasAttributes() const { } bool AttrBuilder::hasAttributes(AttributeSet A, uint64_t Index) const { - return Raw() & A.Raw(Index); + unsigned Idx = ~0U; + for (unsigned I = 0, E = A.getNumSlots(); I != E; ++I) + if (A.getSlotIndex(I) == Index) { + Idx = I; + break; + } + + assert(Idx != ~0U && "Couldn't find the index!"); + + for (AttributeSet::iterator I = A.begin(Idx), E = A.end(Idx); + I != E; ++I) { + Attribute Attr = *I; + // FIXME: Support StringRefs. + Attribute::AttrKind Kind = Attribute::AttrKind( + cast(Attr.getAttributeKind())->getZExtValue()); + + if (Attrs.count(Kind)) + return true; + } + + return false; } bool AttrBuilder::hasAlignmentAttr() const { -- cgit v1.1 From fca0ed28c81a505b0b71605e8b59e4bb6daeda0e Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Sat, 2 Feb 2013 00:52:44 +0000 Subject: Remove AttrBuilder::Raw(). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174251 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Attributes.cpp | 43 ++++++++++++++++++------------------------- 1 file changed, 18 insertions(+), 25 deletions(-) (limited to 'lib') diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index d585843..3b4ece9 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -408,12 +408,23 @@ uint64_t AttributeSetImpl::Raw(uint64_t Index) const { for (unsigned I = 0, E = getNumAttributes(); I != E; ++I) { if (getSlotIndex(I) != Index) continue; const AttributeSetNode *ASN = AttrNodes[I].second; - AttrBuilder B; + uint64_t Mask = 0; for (AttributeSetNode::const_iterator II = ASN->begin(), - IE = ASN->end(); II != IE; ++II) - B.addAttribute(*II); - return B.Raw(); + IE = ASN->end(); II != IE; ++II) { + Attribute Attr = *II; + ConstantInt *Kind = cast(Attr.getAttributeKind()); + Attribute::AttrKind KindVal = Attribute::AttrKind(Kind->getZExtValue()); + + if (KindVal == Attribute::Alignment) + Mask |= (Log2_32(ASN->getAlignment()) + 1) << 16; + else if (KindVal == Attribute::StackAlignment) + Mask |= (Log2_32(ASN->getStackAlignment()) + 1) << 26; + else + Mask |= AttributeImpl::getAttrMask(KindVal); + } + + return Mask; } return 0; @@ -895,10 +906,10 @@ bool AttrBuilder::hasAttributes(AttributeSet A, uint64_t Index) const { I != E; ++I) { Attribute Attr = *I; // FIXME: Support StringRefs. - Attribute::AttrKind Kind = Attribute::AttrKind( - cast(Attr.getAttributeKind())->getZExtValue()); + ConstantInt *Kind = cast(Attr.getAttributeKind()); + Attribute::AttrKind KindVal = Attribute::AttrKind(Kind->getZExtValue()); - if (Attrs.count(Kind)) + if (Attrs.count(KindVal)) return true; } @@ -933,24 +944,6 @@ AttrBuilder &AttrBuilder::addRawValue(uint64_t Val) { return *this; } -uint64_t AttrBuilder::Raw() const { - uint64_t Mask = 0; - - for (DenseSet::const_iterator I = Attrs.begin(), - E = Attrs.end(); I != E; ++I) { - Attribute::AttrKind Kind = *I; - - if (Kind == Attribute::Alignment) - Mask |= (Log2_32(Alignment) + 1) << 16; - else if (Kind == Attribute::StackAlignment) - Mask |= (Log2_32(StackAlignment) + 1) << 26; - else - Mask |= AttributeImpl::getAttrMask(Kind); - } - - return Mask; -} - //===----------------------------------------------------------------------===// // AttributeFuncs Function Defintions //===----------------------------------------------------------------------===// -- cgit v1.1 From 63f331235513a6f562ab4b2809b497c518b132e9 Mon Sep 17 00:00:00 2001 From: Reed Kotler Date: Sat, 2 Feb 2013 04:07:35 +0000 Subject: Start static relocation implementation for mips16. This checkin makes hello world work. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174264 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips16InstrInfo.td | 27 ++++++++++++++++++++++++++- lib/Target/Mips/MipsInstrInfo.td | 4 ++++ 2 files changed, 30 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td index 56553d9..b906f6b 100644 --- a/lib/Target/Mips/Mips16InstrInfo.td +++ b/lib/Target/Mips/Mips16InstrInfo.td @@ -44,6 +44,15 @@ class FEXT_CCRXI16_ins _op, string asmstr, let isCodeGenOnly=1; } +// JAL and JALX instruction format +// +class FJAL16_ins _X, string asmstr, + InstrItinClass itin>: + FJAL16<_X, (outs), (ins simm20:$imm), + !strconcat(asmstr, "\t$imm\n\tnop"),[], + itin> { + let isCodeGenOnly=1; +} // // EXT-I instruction format // @@ -526,7 +535,19 @@ def DivRxRy16: FRR16_div_ins<0b11010, "div", IIAlu> { def DivuRxRy16: FRR16_div_ins<0b11011, "divu", IIAlu> { let Defs = [HI, LO]; } +// +// Format: JAL target MIPS16e +// Purpose: Jump and Link +// To execute a procedure call within the current 256 MB-aligned +// region and preserve the current ISA. +// +def Jal16 : FJAL16_ins<0b0, "jal", IIAlu> { + let isBranch = 1; + let hasDelaySlot = 0; // not true, but we add the nop for now + let isTerminator=1; + let isBarrier=1; +} // // Format: JR ra MIPS16e @@ -1070,12 +1091,14 @@ class UncondBranch16_pat: let Predicates = [RelocPIC, InMips16Mode]; } +def : Mips16Pat<(MipsJmpLink (i32 tglobaladdr:$dst)), + (Jal16 tglobaladdr:$dst)>; + // Indirect branch def: Mips16Pat< (brind CPU16Regs:$rs), (JrcRx16 CPU16Regs:$rs)>; - // Jump and Link (Call) let isCall=1, hasDelaySlot=0 in def JumpLinkReg16: @@ -1562,6 +1585,8 @@ def: Mips16Pat<(add CPU16Regs:$hi, (MipsLo tglobaladdr:$lo)), // hi/lo relocs +def : Mips16Pat<(MipsHi tglobaladdr:$in), + (SllX16 (LiRxImmX16 tglobaladdr:$in), 16)>; def : Mips16Pat<(MipsHi tglobaltlsaddr:$in), (SllX16 (LiRxImmX16 tglobaltlsaddr:$in), 16)>; diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 162ffb0..ac99a13 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -232,6 +232,10 @@ def calltarget64: Operand; def simm16 : Operand { let DecoderMethod= "DecodeSimm16"; } + +def simm20 : Operand { +} + def simm16_64 : Operand; def shamt : Operand; -- cgit v1.1 From a8eefc7cc7c6e8be06d570fff76dc42b43a23392 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Sat, 2 Feb 2013 05:56:24 +0000 Subject: Remove the (apparently) unnecessary debug info metadata indirection. The main lists of debug info metadata attached to the compile_unit had an extra layer of metadata nodes they went through for no apparent reason. This patch removes that (& still passes just as much of the GDB 7.5 test suite). If anyone can show evidence as to why these extra metadata nodes are there I'm open to reverting this patch & documenting why they're there. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174266 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/DIBuilder.cpp | 16 ++++------------ lib/IR/DebugInfo.cpp | 13 ++++--------- 2 files changed, 8 insertions(+), 21 deletions(-) (limited to 'lib') diff --git a/lib/IR/DIBuilder.cpp b/lib/IR/DIBuilder.cpp index 0d106ac..40655ec 100644 --- a/lib/IR/DIBuilder.cpp +++ b/lib/IR/DIBuilder.cpp @@ -84,20 +84,12 @@ void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename, "Unable to create compile unit without filename"); Value *TElts[] = { GetTagConstant(VMContext, DW_TAG_base_type) }; TempEnumTypes = MDNode::getTemporary(VMContext, TElts); - Value *THElts[] = { TempEnumTypes }; - MDNode *EnumHolder = MDNode::get(VMContext, THElts); TempRetainTypes = MDNode::getTemporary(VMContext, TElts); - Value *TRElts[] = { TempRetainTypes }; - MDNode *RetainHolder = MDNode::get(VMContext, TRElts); TempSubprograms = MDNode::getTemporary(VMContext, TElts); - Value *TSElts[] = { TempSubprograms }; - MDNode *SPHolder = MDNode::get(VMContext, TSElts); TempGVs = MDNode::getTemporary(VMContext, TElts); - Value *TVElts[] = { TempGVs }; - MDNode *GVHolder = MDNode::get(VMContext, TVElts); Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_compile_unit), @@ -111,10 +103,10 @@ void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename, ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized), MDString::get(VMContext, Flags), ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeVer), - EnumHolder, - RetainHolder, - SPHolder, - GVHolder + TempEnumTypes, + TempRetainTypes, + TempSubprograms, + TempGVs }; TheCU = DICompileUnit(MDNode::get(VMContext, Elts)); diff --git a/lib/IR/DebugInfo.cpp b/lib/IR/DebugInfo.cpp index b431184..c983787 100644 --- a/lib/IR/DebugInfo.cpp +++ b/lib/IR/DebugInfo.cpp @@ -652,8 +652,7 @@ DIArray DICompileUnit::getEnumTypes() const { return DIArray(); if (MDNode *N = dyn_cast_or_null(DbgNode->getOperand(10))) - if (MDNode *A = dyn_cast_or_null(N->getOperand(0))) - return DIArray(A); + return DIArray(N); return DIArray(); } @@ -662,8 +661,7 @@ DIArray DICompileUnit::getRetainedTypes() const { return DIArray(); if (MDNode *N = dyn_cast_or_null(DbgNode->getOperand(11))) - if (MDNode *A = dyn_cast_or_null(N->getOperand(0))) - return DIArray(A); + return DIArray(N); return DIArray(); } @@ -672,9 +670,7 @@ DIArray DICompileUnit::getSubprograms() const { return DIArray(); if (MDNode *N = dyn_cast_or_null(DbgNode->getOperand(12))) - if (N->getNumOperands() > 0) - if (MDNode *A = dyn_cast_or_null(N->getOperand(0))) - return DIArray(A); + return DIArray(N); return DIArray(); } @@ -684,8 +680,7 @@ DIArray DICompileUnit::getGlobalVariables() const { return DIArray(); if (MDNode *N = dyn_cast_or_null(DbgNode->getOperand(13))) - if (MDNode *A = dyn_cast_or_null(N->getOperand(0))) - return DIArray(A); + return DIArray(N); return DIArray(); } -- cgit v1.1 From d9d2f187759d0154574657c195068d367c338473 Mon Sep 17 00:00:00 2001 From: "Michael J. Spencer" Date: Sun, 3 Feb 2013 10:48:31 +0000 Subject: [Support] Add LLVM_IS_UNALIGNED_ACCESS_FAST. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174271 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Object/ELFObjectFile.cpp | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/Object/ELFObjectFile.cpp b/lib/Object/ELFObjectFile.cpp index 160053d..cfe0eb4 100644 --- a/lib/Object/ELFObjectFile.cpp +++ b/lib/Object/ELFObjectFile.cpp @@ -27,30 +27,42 @@ ObjectFile *ObjectFile::createELFObjectFile(MemoryBuffer *Object) { 1ULL << CountTrailingZeros_64(uintptr_t(Object->getBufferStart())); if (Ident.first == ELF::ELFCLASS32 && Ident.second == ELF::ELFDATA2LSB) +#if !LLVM_IS_UNALIGNED_ACCESS_FAST if (MaxAlignment >= 4) return new ELFObjectFile >(Object, ec); - else if (MaxAlignment >= 2) + else +#endif + if (MaxAlignment >= 2) return new ELFObjectFile >(Object, ec); else llvm_unreachable("Invalid alignment for ELF file!"); else if (Ident.first == ELF::ELFCLASS32 && Ident.second == ELF::ELFDATA2MSB) +#if !LLVM_IS_UNALIGNED_ACCESS_FAST if (MaxAlignment >= 4) return new ELFObjectFile >(Object, ec); - else if (MaxAlignment >= 2) + else +#endif + if (MaxAlignment >= 2) return new ELFObjectFile >(Object, ec); else llvm_unreachable("Invalid alignment for ELF file!"); else if (Ident.first == ELF::ELFCLASS64 && Ident.second == ELF::ELFDATA2MSB) +#if !LLVM_IS_UNALIGNED_ACCESS_FAST if (MaxAlignment >= 8) return new ELFObjectFile >(Object, ec); - else if (MaxAlignment >= 2) + else +#endif + if (MaxAlignment >= 2) return new ELFObjectFile >(Object, ec); else llvm_unreachable("Invalid alignment for ELF file!"); else if (Ident.first == ELF::ELFCLASS64 && Ident.second == ELF::ELFDATA2LSB) { +#if !LLVM_IS_UNALIGNED_ACCESS_FAST if (MaxAlignment >= 8) return new ELFObjectFile >(Object, ec); - else if (MaxAlignment >= 2) + else +#endif + if (MaxAlignment >= 2) return new ELFObjectFile >(Object, ec); else llvm_unreachable("Invalid alignment for ELF file!"); -- cgit v1.1 From 0f76e648d800d7641b4e6e6decb90949cd680b03 Mon Sep 17 00:00:00 2001 From: "Michael J. Spencer" Date: Sun, 3 Feb 2013 10:48:50 +0000 Subject: [Object][Archive] Improve performance. Improve performance of iterating over children and accessing the member file buffer by caching the file size and moving code out to the header. This also makes getBuffer return a StringRef instead of a MemoryBuffer. Both fixing a memory leak and removing a malloc. This takes getBuffer from ~10% of the time in lld to unmeasurable. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174272 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Object/Archive.cpp | 108 +++++-------------------------------------------- 1 file changed, 10 insertions(+), 98 deletions(-) (limited to 'lib') diff --git a/lib/Object/Archive.cpp b/lib/Object/Archive.cpp index e143338..0e13d05 100644 --- a/lib/Object/Archive.cpp +++ b/lib/Object/Archive.cpp @@ -14,7 +14,6 @@ #include "llvm/Object/Archive.h" #include "llvm/ADT/APInt.h" #include "llvm/Support/Endian.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MemoryBuffer.h" using namespace llvm; @@ -22,45 +21,6 @@ using namespace object; static const char *Magic = "!\n"; -namespace { -struct ArchiveMemberHeader { - char Name[16]; - char LastModified[12]; - char UID[6]; - char GID[6]; - char AccessMode[8]; - char Size[10]; ///< Size of data, not including header or padding. - char Terminator[2]; - - ///! Get the name without looking up long names. - StringRef getName() const { - char EndCond; - if (Name[0] == '/' || Name[0] == '#') - EndCond = ' '; - else - EndCond = '/'; - StringRef::size_type end = StringRef(Name, sizeof(Name)).find(EndCond); - if (end == StringRef::npos) - end = sizeof(Name); - assert(end <= sizeof(Name) && end > 0); - // Don't include the EndCond if there is one. - return StringRef(Name, end); - } - - uint64_t getSize() const { - uint64_t ret; - if (StringRef(Size, sizeof(Size)).rtrim(" ").getAsInteger(10, ret)) - llvm_unreachable("Size is not an integer."); - return ret; - } -}; -} - -static const ArchiveMemberHeader *ToHeader(const char *base) { - return reinterpret_cast(base); -} - - static bool isInternalMember(const ArchiveMemberHeader &amh) { static const char *const internals[] = { "/", @@ -78,25 +38,6 @@ static bool isInternalMember(const ArchiveMemberHeader &amh) { void Archive::anchor() { } -Archive::Child Archive::Child::getNext() const { - size_t SpaceToSkip = sizeof(ArchiveMemberHeader) + - ToHeader(Data.data())->getSize(); - // If it's odd, add 1 to make it even. - if (SpaceToSkip & 1) - ++SpaceToSkip; - - const char *NextLoc = Data.data() + SpaceToSkip; - - // Check to see if this is past the end of the archive. - if (NextLoc >= Parent->Data->getBufferEnd()) - return Child(Parent, StringRef(0, 0)); - - size_t NextSize = sizeof(ArchiveMemberHeader) + - ToHeader(NextLoc)->getSize(); - - return Child(Parent, StringRef(NextLoc, NextSize)); -} - error_code Archive::Child::getName(StringRef &Result) const { StringRef name = ToHeader(Data.data())->getName(); // Check if it's a special name. @@ -149,39 +90,12 @@ error_code Archive::Child::getName(StringRef &Result) const { return object_error::success; } -uint64_t Archive::Child::getSize() const { - uint64_t size = ToHeader(Data.data())->getSize(); - // Don't include attached name. - StringRef name = ToHeader(Data.data())->getName(); - if (name.startswith("#1/")) { - uint64_t name_size; - if (name.substr(3).rtrim(" ").getAsInteger(10, name_size)) - llvm_unreachable("Long name length is not an integer"); - size -= name_size; - } - return size; -} - -MemoryBuffer *Archive::Child::getBuffer() const { - StringRef name = ToHeader(Data.data())->getName(); - int size = sizeof(ArchiveMemberHeader); - if (name.startswith("#1/")) { - uint64_t name_size; - if (name.substr(3).rtrim(" ").getAsInteger(10, name_size)) - llvm_unreachable("Long name length is not an integer"); - size += name_size; - } - if (getName(name)) - return 0; - return MemoryBuffer::getMemBuffer(Data.substr(size, getSize()), - name, - false); -} - error_code Archive::Child::getAsBinary(OwningPtr &Result) const { OwningPtr ret; - if (error_code ec = - createBinary(getBuffer(), ret)) + OwningPtr Buff; + if (error_code ec = getMemoryBuffer(Buff)) + return ec; + if (error_code ec = createBinary(Buff.take(), ret)) return ec; Result.swap(ret); return object_error::success; @@ -270,13 +184,12 @@ Archive::child_iterator Archive::end_children() const { } error_code Archive::Symbol::getName(StringRef &Result) const { - Result = - StringRef(Parent->SymbolTable->getBuffer()->getBufferStart() + StringIndex); + Result = StringRef(Parent->SymbolTable->getBuffer().begin() + StringIndex); return object_error::success; } error_code Archive::Symbol::getMember(child_iterator &Result) const { - const char *Buf = Parent->SymbolTable->getBuffer()->getBufferStart(); + const char *Buf = Parent->SymbolTable->getBuffer().begin(); const char *Offsets = Buf + 4; uint32_t Offset = 0; if (Parent->kind() == K_GNU) { @@ -326,13 +239,13 @@ Archive::Symbol Archive::Symbol::getNext() const { Symbol t(*this); // Go to one past next null. t.StringIndex = - Parent->SymbolTable->getBuffer()->getBuffer().find('\0', t.StringIndex) + 1; + Parent->SymbolTable->getBuffer().find('\0', t.StringIndex) + 1; ++t.SymbolIndex; return t; } Archive::symbol_iterator Archive::begin_symbols() const { - const char *buf = SymbolTable->getBuffer()->getBufferStart(); + const char *buf = SymbolTable->getBuffer().begin(); if (kind() == K_GNU) { uint32_t symbol_count = 0; symbol_count = *reinterpret_cast(buf); @@ -347,13 +260,12 @@ Archive::symbol_iterator Archive::begin_symbols() const { symbol_count = *reinterpret_cast(buf); buf += 4 + (symbol_count * 2); // Skip indices. } - uint32_t string_start_offset = - buf - SymbolTable->getBuffer()->getBufferStart(); + uint32_t string_start_offset = buf - SymbolTable->getBuffer().begin(); return symbol_iterator(Symbol(this, 0, string_start_offset)); } Archive::symbol_iterator Archive::end_symbols() const { - const char *buf = SymbolTable->getBuffer()->getBufferStart(); + const char *buf = SymbolTable->getBuffer().begin(); uint32_t symbol_count = 0; if (kind() == K_GNU) { symbol_count = *reinterpret_cast(buf); -- cgit v1.1 From cc00cc7e0427ab96a049703dd7b941eccdd4e7bd Mon Sep 17 00:00:00 2001 From: Nuno Lopes Date: Sun, 3 Feb 2013 13:17:11 +0000 Subject: use GEP::accumulateConstantOffset() to replace custom written code to compute GEP offset git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174279 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/ConstantFolding.cpp | 29 ++--------------------------- 1 file changed, 2 insertions(+), 27 deletions(-) (limited to 'lib') diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index 4b4fa5c..400ce72 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -235,38 +235,13 @@ static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, return IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, TD); // i32* getelementptr ([5 x i32]* @a, i32 0, i32 5) - if (CE->getOpcode() == Instruction::GetElementPtr) { - // Cannot compute this if the element type of the pointer is missing size - // info. - if (!cast(CE->getOperand(0)->getType()) - ->getElementType()->isSized()) - return false; - + if (GEPOperator *GEP = dyn_cast(CE)) { // If the base isn't a global+constant, we aren't either. if (!IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, TD)) return false; // Otherwise, add any offset that our operands provide. - gep_type_iterator GTI = gep_type_begin(CE); - for (User::const_op_iterator i = CE->op_begin() + 1, e = CE->op_end(); - i != e; ++i, ++GTI) { - ConstantInt *CI = dyn_cast(*i); - if (!CI) return false; // Index isn't a simple constant? - if (CI->isZero()) continue; // Not adding anything. - - if (StructType *ST = dyn_cast(*GTI)) { - // N = N + Offset - Offset += - APInt(Offset.getBitWidth(), - TD.getStructLayout(ST)->getElementOffset(CI->getZExtValue())); - } else { - SequentialType *SQT = cast(*GTI); - Offset += APInt(Offset.getBitWidth(), - TD.getTypeAllocSize(SQT->getElementType()) * - CI->getSExtValue()); - } - } - return true; + return GEP->accumulateConstantOffset(TD, Offset); } return false; -- cgit v1.1 From af2f494c2d87a77c46efb85856f235ddd7f3c6ab Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Sun, 3 Feb 2013 21:54:38 +0000 Subject: Added instance variable/initializers/getter/setters for new keyword externally initialized to GlobalVariable. No *TRUE* functionality change. I am going to add in the actual test cases with the actual functionality changes in a later patch because I want to include some test cases. To be clear when I say no *TRUE* functionality change I mean that this patch (like it says in the title) only contains getters/setters and sets up a default initial value of the instance variable to false so that this patch does not affect any other uses of Global Variable.h. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174295 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Globals.cpp | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/IR/Globals.cpp b/lib/IR/Globals.cpp index 2e52aa3..6d547f3 100644 --- a/lib/IR/Globals.cpp +++ b/lib/IR/Globals.cpp @@ -82,13 +82,16 @@ bool GlobalValue::isDeclaration() const { //===----------------------------------------------------------------------===// GlobalVariable::GlobalVariable(Type *Ty, bool constant, LinkageTypes Link, - Constant *InitVal, const Twine &Name, - ThreadLocalMode TLMode, unsigned AddressSpace) + Constant *InitVal, + const Twine &Name, ThreadLocalMode TLMode, + unsigned AddressSpace, + bool isExternallyInitialized) : GlobalValue(PointerType::get(Ty, AddressSpace), Value::GlobalVariableVal, OperandTraits::op_begin(this), InitVal != 0, Link, Name), - isConstantGlobal(constant), threadLocalMode(TLMode) { + isConstantGlobal(constant), threadLocalMode(TLMode), + isExternallyInitializedConstant(isExternallyInitialized) { if (InitVal) { assert(InitVal->getType() == Ty && "Initializer should be the same type as the GlobalVariable!"); @@ -102,12 +105,14 @@ GlobalVariable::GlobalVariable(Module &M, Type *Ty, bool constant, LinkageTypes Link, Constant *InitVal, const Twine &Name, GlobalVariable *Before, ThreadLocalMode TLMode, - unsigned AddressSpace) + unsigned AddressSpace, + bool isExternallyInitialized) : GlobalValue(PointerType::get(Ty, AddressSpace), Value::GlobalVariableVal, OperandTraits::op_begin(this), InitVal != 0, Link, Name), - isConstantGlobal(constant), threadLocalMode(TLMode) { + isConstantGlobal(constant), threadLocalMode(TLMode), + isExternallyInitializedConstant(isExternallyInitialized) { if (InitVal) { assert(InitVal->getType() == Ty && "Initializer should be the same type as the GlobalVariable!"); -- cgit v1.1 From b516e9b64850e0e1a50f680fd1443ac12e56557b Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Mon, 4 Feb 2013 00:47:33 +0000 Subject: PPCDarwinAsmPrinter::EmitStartOfAsmFile(): Add possible elements in CPUDirectives[]. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174297 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCAsmPrinter.cpp | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'lib') diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index 839f918..b2d9ab2 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -821,7 +821,12 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) { "ppcA2", "ppce500mc", "ppce5500", + "power3", + "power4", + "power5", + "power5x", "power6", + "power6x", "power7", "ppc64" }; -- cgit v1.1 From 87b1a453f08fd0d56a074d2d665f779232a6cac0 Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Mon, 4 Feb 2013 00:47:38 +0000 Subject: PPCDarwinAsmPrinter::EmitStartOfAsmFile(): Add checking range in CPUDirectives[]. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174298 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCAsmPrinter.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index b2d9ab2..a440667 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -841,8 +841,11 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) { assert(Directive <= PPC::DIR_64 && "Directive out of range."); // FIXME: This is a total hack, finish mc'izing the PPC backend. - if (OutStreamer.hasRawTextSupport()) + if (OutStreamer.hasRawTextSupport()) { + assert(Directive < sizeof(CPUDirectives) / sizeof(*CPUDirectives) && + "CPUDirectives[] might not be up-to-date!"); OutStreamer.EmitRawText("\t.machine " + Twine(CPUDirectives[Directive])); + } // Prime text sections so they are adjacent. This reduces the likelihood a // large data or debug section causes a branch to exceed 16M limit. -- cgit v1.1 From a7ad84851b018602487779d97195bad0536f9a7a Mon Sep 17 00:00:00 2001 From: Arnold Schwaighofer Date: Mon, 4 Feb 2013 02:52:05 +0000 Subject: ARM cost model: Penalize insertelement into D subregisters Swift has a renaming dependency if we load into D subregisters. We don't have a way of distinguishing between insertelement operations of values from loads and other values. Therefore, we are pessimistic for now (The performance problem showed up in example 14 of gcc-loops). radar://13096933 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174300 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMTargetTransformInfo.cpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'lib') diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp index 61cb1f6..2ded63f 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -117,6 +117,7 @@ public: unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const; + unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const; /// @} }; @@ -197,3 +198,15 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst, return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); } + +unsigned ARMTTI::getVectorInstrCost(unsigned Opcode, Type *ValTy, + unsigned Index) const { + // Penalize inserting into an D-subregister. + if (ST->isSwift() && + Opcode == Instruction::InsertElement && + ValTy->isVectorTy() && + ValTy->getScalarSizeInBits() <= 32) + return 2; + + return TargetTransformInfo::getVectorInstrCost(Opcode, ValTy, Index); +} -- cgit v1.1 From a9b1317443990533f56a8420c0fbfb8868dad566 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Mon, 4 Feb 2013 05:31:37 +0000 Subject: PR15149: crash when printing debug info metadata containing an invalid language spec git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174304 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/DebugInfo.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/IR/DebugInfo.cpp b/lib/IR/DebugInfo.cpp index c983787..ecaab1f 100644 --- a/lib/IR/DebugInfo.cpp +++ b/lib/IR/DebugInfo.cpp @@ -1058,8 +1058,8 @@ void DIScope::printInternal(raw_ostream &OS) const { void DICompileUnit::printInternal(raw_ostream &OS) const { DIScope::printInternal(OS); - if (unsigned Lang = getLanguage()) - OS << " [" << dwarf::LanguageString(Lang) << ']'; + if (const char *Lang = dwarf::LanguageString(getLanguage())) + OS << " [" << Lang << ']'; } void DIEnumerator::printInternal(raw_ostream &OS) const { -- cgit v1.1 From f839eed5ea2bfd395256d60e15d1e31dbad6ca8b Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Mon, 4 Feb 2013 05:56:36 +0000 Subject: [DebugInfo] remove more node indirection (this time from the subprogram's variable lists) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174305 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/DIBuilder.cpp | 12 ++---------- lib/IR/DebugInfo.cpp | 7 ++----- 2 files changed, 4 insertions(+), 15 deletions(-) (limited to 'lib') diff --git a/lib/IR/DIBuilder.cpp b/lib/IR/DIBuilder.cpp index 40655ec..cc397cd 100644 --- a/lib/IR/DIBuilder.cpp +++ b/lib/IR/DIBuilder.cpp @@ -900,10 +900,6 @@ DISubprogram DIBuilder::createFunction(DIDescriptor Context, MDNode *TParams, MDNode *Decl) { Value *TElts[] = { GetTagConstant(VMContext, DW_TAG_base_type) }; - MDNode *Temp = MDNode::getTemporary(VMContext, TElts); - Value *TVElts[] = { Temp }; - MDNode *THolder = MDNode::get(VMContext, TVElts); - Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_subprogram), Constant::getNullValue(Type::getInt32Ty(VMContext)), @@ -924,7 +920,7 @@ DISubprogram DIBuilder::createFunction(DIDescriptor Context, Fn, TParams, Decl, - THolder, + MDNode::getTemporary(VMContext, TElts), ConstantInt::get(Type::getInt32Ty(VMContext), ScopeLine) }; MDNode *Node = MDNode::get(VMContext, Elts); @@ -949,10 +945,6 @@ DISubprogram DIBuilder::createMethod(DIDescriptor Context, Function *Fn, MDNode *TParam) { Value *TElts[] = { GetTagConstant(VMContext, DW_TAG_base_type) }; - MDNode *Temp = MDNode::getTemporary(VMContext, TElts); - Value *TVElts[] = { Temp }; - MDNode *THolder = MDNode::get(VMContext, TVElts); - Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_subprogram), Constant::getNullValue(Type::getInt32Ty(VMContext)), @@ -973,7 +965,7 @@ DISubprogram DIBuilder::createMethod(DIDescriptor Context, Fn, TParam, Constant::getNullValue(Type::getInt32Ty(VMContext)), - THolder, + MDNode::getTemporary(VMContext, TElts), // FIXME: Do we want to use different scope/lines? ConstantInt::get(Type::getInt32Ty(VMContext), LineNo) }; diff --git a/lib/IR/DebugInfo.cpp b/lib/IR/DebugInfo.cpp index ecaab1f..a59fdcd 100644 --- a/lib/IR/DebugInfo.cpp +++ b/lib/IR/DebugInfo.cpp @@ -593,17 +593,14 @@ unsigned DISubprogram::isOptimized() const { MDNode *DISubprogram::getVariablesNodes() const { if (!DbgNode || DbgNode->getNumOperands() <= 19) return NULL; - if (MDNode *Temp = dyn_cast_or_null(DbgNode->getOperand(19))) - return dyn_cast_or_null(Temp->getOperand(0)); - return NULL; + return dyn_cast_or_null(DbgNode->getOperand(19)); } DIArray DISubprogram::getVariables() const { if (!DbgNode || DbgNode->getNumOperands() <= 19) return DIArray(); if (MDNode *T = dyn_cast_or_null(DbgNode->getOperand(19))) - if (MDNode *A = dyn_cast_or_null(T->getOperand(0))) - return DIArray(A); + return DIArray(T); return DIArray(); } -- cgit v1.1 From 9c02a276049cbd1d1511a88ebc7a22bb33658237 Mon Sep 17 00:00:00 2001 From: Evgeniy Stepanov Date: Mon, 4 Feb 2013 07:03:24 +0000 Subject: More MSan/ASan annotations. This change lets us bootstrap LLVM/Clang under ASan and MSan. It contains fixes for 2 issues: - X86JIT reads return address from stack, which MSan does not know is initialized. - bugpoint tests run binaries with RLIMIT_AS. This does not work with certain Sanitizers. We are no longer including config.h in Compiler.h with this change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174306 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Support/Allocator.cpp | 4 ++-- lib/Support/Unix/Program.inc | 5 +++++ lib/Target/X86/X86JITInfo.cpp | 3 +++ 3 files changed, 10 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Support/Allocator.cpp b/lib/Support/Allocator.cpp index b4fdc1e..e269cf9 100644 --- a/lib/Support/Allocator.cpp +++ b/lib/Support/Allocator.cpp @@ -104,8 +104,8 @@ void *BumpPtrAllocator::Allocate(size_t Size, size_t Alignment) { if (Ptr + Size <= End) { CurPtr = Ptr + Size; // Update the allocation point of this memory block in MemorySanitizer. - // Without this, MemorySanitizer reports for values originating from it will - // point to the allocation point of the entire slab. + // Without this, MemorySanitizer messages for values originated from here + // will point to the allocation of the entire slab. __msan_allocated_memory(Ptr, Size); return Ptr; } diff --git a/lib/Support/Unix/Program.inc b/lib/Support/Unix/Program.inc index c384316..117151c 100644 --- a/lib/Support/Unix/Program.inc +++ b/lib/Support/Unix/Program.inc @@ -17,6 +17,7 @@ //===----------------------------------------------------------------------===// #include "Unix.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/FileSystem.h" #include #if HAVE_SYS_STAT_H @@ -164,12 +165,16 @@ static void SetMemoryLimits (unsigned size) setrlimit (RLIMIT_RSS, &r); #endif #ifdef RLIMIT_AS // e.g. NetBSD doesn't have it. + // Don't set virtual memory limit if built with any Sanitizer. They need 80Tb + // of virtual memory for shadow memory mapping. +#if !LLVM_MEMORY_SANITIZER_BUILD && !LLVM_ADDRESS_SANITIZER_BUILD // Virtual memory. getrlimit (RLIMIT_AS, &r); r.rlim_cur = limit; setrlimit (RLIMIT_AS, &r); #endif #endif +#endif } bool diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp index cca391f..0a4acce 100644 --- a/lib/Target/X86/X86JITInfo.cpp +++ b/lib/Target/X86/X86JITInfo.cpp @@ -351,6 +351,9 @@ static void LLVM_ATTRIBUTE_USED X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) { intptr_t *RetAddrLoc = &StackPtr[1]; + // We are reading raw stack data here. Tell MemorySanitizer that it is + // sufficiently initialized. + __msan_unpoison(RetAddrLoc, sizeof(*RetAddrLoc)); assert(*RetAddrLoc == RetAddr && "Could not find return address on the stack!"); -- cgit v1.1 From 17174e59004d4df12f9037f0dc99ae530d71eab9 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Mon, 4 Feb 2013 14:14:58 +0000 Subject: Give explicit suffix to integer constant over 32-bits. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174324 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp index 1c09369..a206fd1 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp @@ -552,7 +552,7 @@ static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) { // 2^32. assert((int64_t)Value >= -(1LL << 32) && (int64_t)Value < (1LL << 32) && "Out of range ADRP fixup"); - return ADRImmBits((Value & 0x1fffff000) >> 12); + return ADRImmBits((Value & 0x1fffff000ULL) >> 12); case AArch64::fixup_a64_ld64_gottprel_lo12_nc: // R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC: Set an LD offset field to bits FF8 -- cgit v1.1 From 4969310052f45b1e2e5d21735e38641a20be0e21 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Mon, 4 Feb 2013 15:19:18 +0000 Subject: SelectionDAG: Teach FoldConstantArithmetic how to deal with vectors. This required disabling a PowerPC optimization that did the following: input: x = BUILD_VECTOR lowered to: tmp = BUILD_VECTOR x = ADD tmp, tmp The add now gets folded immediately and we're back at the BUILD_VECTOR we started from. I don't see a way to fix this currently so I left it disabled for now. Fix some trivially foldable X86 tests too. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174325 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 159 +++++++++++++++++++++--------- lib/Target/PowerPC/PPCISelLowering.cpp | 5 + 2 files changed, 120 insertions(+), 44 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 5724122..09885d8 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2680,44 +2680,117 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, return SDValue(N, 0); } -SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, - EVT VT, - ConstantSDNode *Cst1, - ConstantSDNode *Cst2) { - const APInt &C1 = Cst1->getAPIntValue(), &C2 = Cst2->getAPIntValue(); +SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, EVT VT, + SDNode *Cst1, SDNode *Cst2) { + SmallVector, 4> Inputs; + SmallVector Outputs; + EVT SVT = VT.getScalarType(); + + ConstantSDNode *Scalar1 = dyn_cast(Cst1); + ConstantSDNode *Scalar2 = dyn_cast(Cst2); + if (Scalar1 && Scalar2) { + // Scalar instruction. + Inputs.push_back(std::make_pair(Scalar1, Scalar2)); + } else { + // For vectors extract each constant element into Inputs so we can constant + // fold them individually. + BuildVectorSDNode *BV1 = dyn_cast(Cst1); + BuildVectorSDNode *BV2 = dyn_cast(Cst2); + if (!BV1 || !BV2) + return SDValue(); + + assert(BV1->getNumOperands() == BV2->getNumOperands() && "Out of sync!"); + + for (unsigned I = 0, E = BV1->getNumOperands(); I != E; ++I) { + ConstantSDNode *V1 = dyn_cast(BV1->getOperand(I)); + ConstantSDNode *V2 = dyn_cast(BV2->getOperand(I)); + if (!V1 || !V2) // Not a constant, bail. + return SDValue(); + + // Avoid BUILD_VECTOR nodes that perform implicit truncation. + // FIXME: This is valid and could be handled by truncating the APInts. + if (V1->getValueType(0) != SVT || V2->getValueType(0) != SVT) + return SDValue(); + + Inputs.push_back(std::make_pair(V1, V2)); + } + } - switch (Opcode) { - case ISD::ADD: return getConstant(C1 + C2, VT); - case ISD::SUB: return getConstant(C1 - C2, VT); - case ISD::MUL: return getConstant(C1 * C2, VT); - case ISD::UDIV: - if (C2.getBoolValue()) return getConstant(C1.udiv(C2), VT); - break; - case ISD::UREM: - if (C2.getBoolValue()) return getConstant(C1.urem(C2), VT); - break; - case ISD::SDIV: - if (C2.getBoolValue()) return getConstant(C1.sdiv(C2), VT); - break; - case ISD::SREM: - if (C2.getBoolValue()) return getConstant(C1.srem(C2), VT); - break; - case ISD::AND: return getConstant(C1 & C2, VT); - case ISD::OR: return getConstant(C1 | C2, VT); - case ISD::XOR: return getConstant(C1 ^ C2, VT); - case ISD::SHL: return getConstant(C1 << C2, VT); - case ISD::SRL: return getConstant(C1.lshr(C2), VT); - case ISD::SRA: return getConstant(C1.ashr(C2), VT); - case ISD::ROTL: return getConstant(C1.rotl(C2), VT); - case ISD::ROTR: return getConstant(C1.rotr(C2), VT); - default: break; + // We have a number of constant values, constant fold them element by element. + for (unsigned I = 0, E = Inputs.size(); I != E; ++I) { + const APInt &C1 = Inputs[I].first->getAPIntValue(); + const APInt &C2 = Inputs[I].second->getAPIntValue(); + + switch (Opcode) { + case ISD::ADD: + Outputs.push_back(getConstant(C1 + C2, SVT)); + break; + case ISD::SUB: + Outputs.push_back(getConstant(C1 - C2, SVT)); + break; + case ISD::MUL: + Outputs.push_back(getConstant(C1 * C2, SVT)); + break; + case ISD::UDIV: + if (!C2.getBoolValue()) + return SDValue(); + Outputs.push_back(getConstant(C1.udiv(C2), SVT)); + break; + case ISD::UREM: + if (!C2.getBoolValue()) + return SDValue(); + Outputs.push_back(getConstant(C1.urem(C2), SVT)); + break; + case ISD::SDIV: + if (!C2.getBoolValue()) + return SDValue(); + Outputs.push_back(getConstant(C1.sdiv(C2), SVT)); + break; + case ISD::SREM: + if (!C2.getBoolValue()) + return SDValue(); + Outputs.push_back(getConstant(C1.srem(C2), SVT)); + break; + case ISD::AND: + Outputs.push_back(getConstant(C1 & C2, SVT)); + break; + case ISD::OR: + Outputs.push_back(getConstant(C1 | C2, SVT)); + break; + case ISD::XOR: + Outputs.push_back(getConstant(C1 ^ C2, SVT)); + break; + case ISD::SHL: + Outputs.push_back(getConstant(C1 << C2, SVT)); + break; + case ISD::SRL: + Outputs.push_back(getConstant(C1.lshr(C2), SVT)); + break; + case ISD::SRA: + Outputs.push_back(getConstant(C1.ashr(C2), SVT)); + break; + case ISD::ROTL: + Outputs.push_back(getConstant(C1.rotl(C2), SVT)); + break; + case ISD::ROTR: + Outputs.push_back(getConstant(C1.rotr(C2), SVT)); + break; + default: + return SDValue(); + } } - return SDValue(); + // Handle the scalar case first. + if (Outputs.size() == 1) + return Outputs.back(); + + // Otherwise build a big vector out of the scalar elements we generated. + return getNode(ISD::BUILD_VECTOR, DebugLoc(), VT, Outputs.data(), + Outputs.size()); } -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, - SDValue N1, SDValue N2) { +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, SDValue N1, + SDValue N2) { ConstantSDNode *N1C = dyn_cast(N1.getNode()); ConstantSDNode *N2C = dyn_cast(N2.getNode()); switch (Opcode) { @@ -3013,16 +3086,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, } } - if (N1C) { - if (N2C) { - SDValue SV = FoldConstantArithmetic(Opcode, VT, N1C, N2C); - if (SV.getNode()) return SV; - } else { // Cannonicalize constant to RHS if commutative - if (isCommutativeBinOp(Opcode)) { - std::swap(N1C, N2C); - std::swap(N1, N2); - } - } + // Perform trivial constant folding. + SDValue SV = FoldConstantArithmetic(Opcode, VT, N1.getNode(), N2.getNode()); + if (SV.getNode()) return SV; + + // Canonicalize constant to RHS if commutative. + if (N1C && !N2C && isCommutativeBinOp(Opcode)) { + std::swap(N1C, N2C); + std::swap(N1, N2); } // Constant fold FP operations. @@ -3030,7 +3101,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, ConstantFPSDNode *N2CFP = dyn_cast(N2.getNode()); if (N1CFP) { if (!N2CFP && isCommutativeBinOp(Opcode)) { - // Cannonicalize constant to RHS if commutative + // Canonicalize constant to RHS if commutative. std::swap(N1CFP, N2CFP); std::swap(N1, N2); } else if (N2CFP) { diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 9a68927..4cd0fc4 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -5032,9 +5032,14 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, // If this value is in the range [-32,30] and is even, use: // tmp = VSPLTI[bhw], result = add tmp, tmp if (SextVal >= -32 && SextVal <= 30 && (SextVal & 1) == 0) { + // FIXME: This is currently disabled because the ADD will be folded back + // into an invalid BUILD_VECTOR immediately. + return SDValue(); +#if 0 SDValue Res = BuildSplatI(SextVal >> 1, SplatSize, MVT::Other, DAG, dl); Res = DAG.getNode(ISD::ADD, dl, Res.getValueType(), Res, Res); return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); +#endif } // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is -- cgit v1.1 From 9fa9251bbad191feb52273b0f3d44531a5fd78a0 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Mon, 4 Feb 2013 15:19:25 +0000 Subject: X86: Simplify code. No functionality change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174326 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 24 +++++------------------- 1 file changed, 5 insertions(+), 19 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 04237e7..5b3f066 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -11467,7 +11467,6 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); SDValue R = Op.getOperand(0); SDValue Amt = Op.getOperand(1); - LLVMContext *Context = DAG.getContext(); if (!Subtarget->hasSSE2()) return SDValue(); @@ -11587,14 +11586,7 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { Op = DAG.getNode(X86ISD::VSHLI, dl, VT, Op.getOperand(1), DAG.getConstant(23, MVT::i32)); - const uint32_t CV[] = { 0x3f800000U, 0x3f800000U, 0x3f800000U, 0x3f800000U}; - Constant *C = ConstantDataVector::get(*Context, CV); - SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); - SDValue Addend = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, - MachinePointerInfo::getConstantPool(), - false, false, false, 16); - - Op = DAG.getNode(ISD::ADD, dl, VT, Op, Addend); + Op = DAG.getNode(ISD::ADD, dl, VT, Op, DAG.getConstant(0x3f800000U, VT)); Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, Op); Op = DAG.getNode(ISD::FP_TO_SINT, dl, VT, Op); return DAG.getNode(ISD::MUL, dl, VT, Op, R); @@ -15369,13 +15361,9 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, if (CC == ISD::SETUGT && Other->getOpcode() == ISD::ADD && isSplatVector(CondRHS.getNode()) && isSplatVector(OpRHS.getNode())) { APInt A = cast(OpRHS.getOperand(0))->getAPIntValue(); - if (CondRHS.getConstantOperandVal(0) == -A-1) { - SmallVector V(VT.getVectorNumElements(), - DAG.getConstant(-A, VT.getScalarType())); + if (CondRHS.getConstantOperandVal(0) == -A-1) return DAG.getNode(X86ISD::SUBUS, DL, VT, OpLHS, - DAG.getNode(ISD::BUILD_VECTOR, DL, VT, - V.data(), V.size())); - } + DAG.getConstant(-A, VT)); } // Another special case: If C was a sign bit, the sub has been @@ -16582,10 +16570,8 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG, // Build the arithmetic shift. unsigned Amt = RegVT.getVectorElementType().getSizeInBits() - MemVT.getVectorElementType().getSizeInBits(); - SmallVector C(NumElems, - DAG.getConstant(Amt, RegVT.getScalarType())); - SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, RegVT, &C[0], C.size()); - Shuff = DAG.getNode(ISD::SRA, dl, RegVT, Shuff, BV); + Shuff = DAG.getNode(ISD::SRA, dl, RegVT, Shuff, + DAG.getConstant(Amt, RegVT)); return DCI.CombineTo(N, Shuff, TF, true); } -- cgit v1.1 From a220aeb58f30a6b3fc991883e8107d5e1198c53b Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Mon, 4 Feb 2013 15:19:33 +0000 Subject: X86: Open up some opportunities for constant folding by postponing shift lowering. Fixes PR15141. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174327 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 5b3f066..4962023 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -11583,8 +11583,7 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { // Lower SHL with variable shift amount. if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SHL) { - Op = DAG.getNode(X86ISD::VSHLI, dl, VT, Op.getOperand(1), - DAG.getConstant(23, MVT::i32)); + Op = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(23, VT)); Op = DAG.getNode(ISD::ADD, dl, VT, Op, DAG.getConstant(0x3f800000U, VT)); Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, Op); @@ -11595,8 +11594,7 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { assert(Subtarget->hasSSE2() && "Need SSE2 for pslli/pcmpeq."); // a = a << 5; - Op = DAG.getNode(X86ISD::VSHLI, dl, MVT::v8i16, Op.getOperand(1), - DAG.getConstant(5, MVT::i32)); + Op = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(5, VT)); Op = DAG.getNode(ISD::BITCAST, dl, VT, Op); // Turn 'a' into a mask suitable for VSELECT -- cgit v1.1 From 42a68443a15d74ada014ad2c8a2349fe134a44f3 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Mon, 4 Feb 2013 15:44:38 +0000 Subject: Fix some abuses of StringRef We were taking a StringRef to a temporary result, which can go horribly wrong. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174328 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 3402634..49bec59 100644 --- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -1207,8 +1207,8 @@ AArch64AsmParser::ParseRelocPrefix(AArch64MCExpr::VariantKind &RefKind) { return MatchOperand_ParseFail; } - StringRef lowerCase = Parser.getTok().getIdentifier().lower(); - RefKind = StringSwitch(lowerCase) + std::string LowerCase = Parser.getTok().getIdentifier().lower(); + RefKind = StringSwitch(LowerCase) .Case("got", AArch64MCExpr::VK_AARCH64_GOT) .Case("got_lo12", AArch64MCExpr::VK_AARCH64_GOT_LO12) .Case("lo12", AArch64MCExpr::VK_AARCH64_LO12) @@ -1343,7 +1343,8 @@ AArch64AsmParser::ParseCRxOperand( return MatchOperand_ParseFail; } - StringRef Tok = Parser.getTok().getIdentifier().lower(); + std::string LowerTok = Parser.getTok().getIdentifier().lower(); + StringRef Tok(LowerTok); if (Tok[0] != 'c') { Error(S, "Expected cN operand where 0 <= N <= 15"); return MatchOperand_ParseFail; -- cgit v1.1 From 0d3731478e6242cceae7c006a071b8b17f7fd298 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Mon, 4 Feb 2013 15:52:32 +0000 Subject: Disable a couple more vector splat optimizations on PPC. I didn't see those because the test case used "not grep". FileCheck the test and XFAIL it, preserving the old optimization, so this can be fixed eventually. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174330 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCISelLowering.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 4cd0fc4..5631c93 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -5138,14 +5138,16 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, // Three instruction sequences. // Odd, in range [17,31]: (vsplti C)-(vsplti -16). - if (SextVal >= 0 && SextVal <= 31) { + // FIXME: Disabled because the add gets constant folded. + if (0 && SextVal >= 0 && SextVal <= 31) { SDValue LHS = BuildSplatI(SextVal-16, SplatSize, MVT::Other, DAG, dl); SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl); LHS = DAG.getNode(ISD::SUB, dl, LHS.getValueType(), LHS, RHS); return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), LHS); } // Odd, in range [-31,-17]: (vsplti C)+(vsplti -16). - if (SextVal >= -31 && SextVal <= 0) { + // FIXME: Disabled because the add gets constant folded. + if (0 && SextVal >= -31 && SextVal <= 0) { SDValue LHS = BuildSplatI(SextVal+16, SplatSize, MVT::Other, DAG, dl); SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl); LHS = DAG.getNode(ISD::ADD, dl, LHS.getValueType(), LHS, RHS); -- cgit v1.1 From 3e1635d08ce786d32dd53f772ccc6a8b4f720ff3 Mon Sep 17 00:00:00 2001 From: Jyotsna Verma Date: Mon, 4 Feb 2013 15:52:56 +0000 Subject: Hexagon: Add V4 combine instructions and some more Def Pats for V2. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174331 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Hexagon/HexagonISelLowering.h | 2 + lib/Target/Hexagon/HexagonInstrInfo.td | 61 ++++++++++++++++++++---- lib/Target/Hexagon/HexagonInstrInfoV4.td | 80 ++++++++++++++++++++++++++++++++ 3 files changed, 135 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h index 5a415eb..65dab85 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.h +++ b/lib/Target/Hexagon/HexagonISelLowering.h @@ -52,6 +52,8 @@ namespace llvm { WrapperCP, WrapperCombineII, WrapperCombineRR, + WrapperCombineRI_V4, + WrapperCombineIR_V4, WrapperPackhl, WrapperSplatB, WrapperSplatH, diff --git a/lib/Target/Hexagon/HexagonInstrInfo.td b/lib/Target/Hexagon/HexagonInstrInfo.td index 11c0167..6caab26 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.td +++ b/lib/Target/Hexagon/HexagonInstrInfo.td @@ -2825,23 +2825,42 @@ def : Pat <(i32 (zext (i1 PredRegs:$src1))), // i1 -> i64 def : Pat <(i64 (zext (i1 PredRegs:$src1))), - (i64 (COMBINE_rr (TFRI 0), (MUX_ii (i1 PredRegs:$src1), 1, 0)))>; + (i64 (COMBINE_rr (TFRI 0), (MUX_ii (i1 PredRegs:$src1), 1, 0)))>, + Requires<[NoV4T]>; // i32 -> i64 def : Pat <(i64 (zext (i32 IntRegs:$src1))), - (i64 (COMBINE_rr (TFRI 0), (i32 IntRegs:$src1)))>; + (i64 (COMBINE_rr (TFRI 0), (i32 IntRegs:$src1)))>, + Requires<[NoV4T]>; // i8 -> i64 def: Pat <(i64 (zextloadi8 ADDRriS11_0:$src1)), - (i64 (COMBINE_rr (TFRI 0), (LDriub ADDRriS11_0:$src1)))>; + (i64 (COMBINE_rr (TFRI 0), (LDriub ADDRriS11_0:$src1)))>, + Requires<[NoV4T]>; + +let AddedComplexity = 20 in +def: Pat <(i64 (zextloadi8 (add (i32 IntRegs:$src1), + s11_0ExtPred:$offset))), + (i64 (COMBINE_rr (TFRI 0), (LDriub_indexed IntRegs:$src1, + s11_0ExtPred:$offset)))>, + Requires<[NoV4T]>; // i16 -> i64 def: Pat <(i64 (zextloadi16 ADDRriS11_1:$src1)), - (i64 (COMBINE_rr (TFRI 0), (LDriuh ADDRriS11_1:$src1)))>; + (i64 (COMBINE_rr (TFRI 0), (LDriuh ADDRriS11_1:$src1)))>, + Requires<[NoV4T]>; + +let AddedComplexity = 20 in +def: Pat <(i64 (zextloadi16 (add (i32 IntRegs:$src1), + s11_1ExtPred:$offset))), + (i64 (COMBINE_rr (TFRI 0), (LDriuh_indexed IntRegs:$src1, + s11_1ExtPred:$offset)))>, + Requires<[NoV4T]>; // i32 -> i64 def: Pat <(i64 (zextloadi32 ADDRriS11_2:$src1)), - (i64 (COMBINE_rr (TFRI 0), (LDriw ADDRriS11_2:$src1)))>; + (i64 (COMBINE_rr (TFRI 0), (LDriw ADDRriS11_2:$src1)))>, + Requires<[NoV4T]>; def: Pat <(i32 (zextloadi1 ADDRriS11_0:$src1)), (i32 (LDriw ADDRriS11_0:$src1))>; @@ -2862,15 +2881,41 @@ def : Pat <(i64 (anyext (i1 PredRegs:$src1))), // Any extended 64-bit load. // anyext i32 -> i64 def: Pat <(i64 (extloadi32 ADDRriS11_2:$src1)), - (i64 (COMBINE_rr (TFRI 0), (LDriw ADDRriS11_2:$src1)))>; + (i64 (COMBINE_rr (TFRI 0), (LDriw ADDRriS11_2:$src1)))>, + Requires<[NoV4T]>; + +// When there is an offset we should prefer the pattern below over the pattern above. +// The complexity of the above is 13 (gleaned from HexagonGenDAGIsel.inc) +// So this complexity below is comfortably higher to allow for choosing the below. +// If this is not done then we generate addresses such as +// ******************************************** +// r1 = add (r0, #4) +// r1 = memw(r1 + #0) +// instead of +// r1 = memw(r0 + #4) +// ******************************************** +let AddedComplexity = 100 in +def: Pat <(i64 (extloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))), + (i64 (COMBINE_rr (TFRI 0), (LDriw_indexed IntRegs:$src1, + s11_2ExtPred:$offset)))>, + Requires<[NoV4T]>; // anyext i16 -> i64. def: Pat <(i64 (extloadi16 ADDRriS11_2:$src1)), - (i64 (COMBINE_rr (TFRI 0), (LDrih ADDRriS11_2:$src1)))>; + (i64 (COMBINE_rr (TFRI 0), (LDrih ADDRriS11_2:$src1)))>, + Requires<[NoV4T]>; + +let AddedComplexity = 20 in +def: Pat <(i64 (extloadi16 (add (i32 IntRegs:$src1), + s11_1ExtPred:$offset))), + (i64 (COMBINE_rr (TFRI 0), (LDrih_indexed IntRegs:$src1, + s11_1ExtPred:$offset)))>, + Requires<[NoV4T]>; // Map from Rdd = zxtw(Rs) -> Rdd = combine(0, Rs). def : Pat<(i64 (zext (i32 IntRegs:$src1))), - (i64 (COMBINE_rr (TFRI 0), (i32 IntRegs:$src1)))>; + (i64 (COMBINE_rr (TFRI 0), (i32 IntRegs:$src1)))>, + Requires<[NoV4T]>; // Multiply 64-bit unsigned and use upper result. def : Pat <(mulhu (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2)), diff --git a/lib/Target/Hexagon/HexagonInstrInfoV4.td b/lib/Target/Hexagon/HexagonInstrInfoV4.td index 617eef4..08225e4 100644 --- a/lib/Target/Hexagon/HexagonInstrInfoV4.td +++ b/lib/Target/Hexagon/HexagonInstrInfoV4.td @@ -280,6 +280,19 @@ def COMBINE_Ir_V4 : ALU32_ir<(outs DoubleRegs:$dst), []>, Requires<[HasV4T]>; +def HexagonWrapperCombineRI_V4 : + SDNode<"HexagonISD::WrapperCombineRI_V4", SDTHexagonI64I32I32>; +def HexagonWrapperCombineIR_V4 : + SDNode<"HexagonISD::WrapperCombineIR_V4", SDTHexagonI64I32I32>; + +def : Pat <(HexagonWrapperCombineRI_V4 IntRegs:$r, s8ExtPred:$i), + (COMBINE_rI_V4 IntRegs:$r, s8ExtPred:$i)>, + Requires<[HasV4T]>; + +def : Pat <(HexagonWrapperCombineIR_V4 s8ExtPred:$i, IntRegs:$r), + (COMBINE_Ir_V4 s8ExtPred:$i, IntRegs:$r)>, + Requires<[HasV4T]>; + let isExtendable = 1, opExtendable = 2, isExtentSigned = 0, opExtentBits = 6, neverHasSideEffects = 1, validSubTargets = HasV4SubT in def COMBINE_iI_V4 : ALU32_ii<(outs DoubleRegs:$dst), @@ -1143,6 +1156,73 @@ def : Pat <(i32 (load (add (HexagonCONST32_GP tglobaladdr:$global), u16ImmPred:$offset))), (i32 (LDriw_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>, Requires<[HasV4T]>; +// zext i1->i64 +def : Pat <(i64 (zext (i1 PredRegs:$src1))), + (i64 (COMBINE_Ir_V4 0, (MUX_ii (i1 PredRegs:$src1), 1, 0)))>, + Requires<[HasV4T]>; + +// zext i32->i64 +def : Pat <(i64 (zext (i32 IntRegs:$src1))), + (i64 (COMBINE_Ir_V4 0, (i32 IntRegs:$src1)))>, + Requires<[HasV4T]>; +// zext i8->i64 +def: Pat <(i64 (zextloadi8 ADDRriS11_0:$src1)), + (i64 (COMBINE_Ir_V4 0, (LDriub ADDRriS11_0:$src1)))>, + Requires<[HasV4T]>; + +let AddedComplexity = 20 in +def: Pat <(i64 (zextloadi8 (add (i32 IntRegs:$src1), + s11_0ExtPred:$offset))), + (i64 (COMBINE_Ir_V4 0, (LDriub_indexed IntRegs:$src1, + s11_0ExtPred:$offset)))>, + Requires<[HasV4T]>; + +// zext i16->i64 +def: Pat <(i64 (zextloadi16 ADDRriS11_1:$src1)), + (i64 (COMBINE_Ir_V4 0, (LDriuh ADDRriS11_1:$src1)))>, + Requires<[HasV4T]>; + +let AddedComplexity = 20 in +def: Pat <(i64 (zextloadi16 (add (i32 IntRegs:$src1), + s11_1ExtPred:$offset))), + (i64 (COMBINE_Ir_V4 0, (LDriuh_indexed IntRegs:$src1, + s11_1ExtPred:$offset)))>, + Requires<[HasV4T]>; + +// anyext i16->i64 +def: Pat <(i64 (extloadi16 ADDRriS11_2:$src1)), + (i64 (COMBINE_Ir_V4 0, (LDrih ADDRriS11_2:$src1)))>, + Requires<[HasV4T]>; + +let AddedComplexity = 20 in +def: Pat <(i64 (extloadi16 (add (i32 IntRegs:$src1), + s11_1ExtPred:$offset))), + (i64 (COMBINE_Ir_V4 0, (LDrih_indexed IntRegs:$src1, + s11_1ExtPred:$offset)))>, + Requires<[HasV4T]>; + +// zext i32->i64 +def: Pat <(i64 (zextloadi32 ADDRriS11_2:$src1)), + (i64 (COMBINE_Ir_V4 0, (LDriw ADDRriS11_2:$src1)))>, + Requires<[HasV4T]>; + +let AddedComplexity = 100 in +def: Pat <(i64 (zextloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))), + (i64 (COMBINE_Ir_V4 0, (LDriw_indexed IntRegs:$src1, + s11_2ExtPred:$offset)))>, + Requires<[HasV4T]>; + +// anyext i32->i64 +def: Pat <(i64 (extloadi32 ADDRriS11_2:$src1)), + (i64 (COMBINE_Ir_V4 0, (LDriw ADDRriS11_2:$src1)))>, + Requires<[HasV4T]>; + +let AddedComplexity = 100 in +def: Pat <(i64 (extloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))), + (i64 (COMBINE_Ir_V4 0, (LDriw_indexed IntRegs:$src1, + s11_2ExtPred:$offset)))>, + Requires<[HasV4T]>; + //===----------------------------------------------------------------------===// -- cgit v1.1 From f9271ea159b97e2febedcf095c3c4122cb24d077 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Mon, 4 Feb 2013 23:32:23 +0000 Subject: Initial cleanups of the param-attribute code in the bitcode reader/writer. Rename the PARAMATTR_CODE_ENTRY to PARAMATTR_CODE_ENTRY_OLD. It will be replaced by another encoding. Keep around the current LLVM attribute encoder/decoder code, but move it to the bitcode directories so that no one's tempted to use them. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174335 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Bitcode/Reader/BitcodeReader.cpp | 26 +++++++++++++++++++--- lib/Bitcode/Writer/BitcodeWriter.cpp | 30 ++++++++++++++++++++++---- lib/IR/Attributes.cpp | 42 +----------------------------------- 3 files changed, 50 insertions(+), 48 deletions(-) (limited to 'lib') diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index ca299c0..6ab7011 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -428,6 +428,26 @@ Type *BitcodeReader::getTypeByID(unsigned ID) { // Functions for parsing blocks from the bitcode file //===----------------------------------------------------------------------===// + +/// \brief This fills an AttrBuilder object with the LLVM attributes that have +/// been decoded from the given integer. This function must stay in sync with +/// 'encodeLLVMAttributesForBitcode'. +static void decodeLLVMAttributesForBitcode(AttrBuilder &B, + uint64_t EncodedAttrs) { + // FIXME: Remove in 4.0. + + // The alignment is stored as a 16-bit raw value from bits 31--16. We shift + // the bits above 31 down by 11 bits. + unsigned Alignment = (EncodedAttrs & (0xffffULL << 16)) >> 16; + assert((!Alignment || isPowerOf2_32(Alignment)) && + "Alignment must be a power of two."); + + if (Alignment) + B.addAlignmentAttr(Alignment); + B.addRawValue(((EncodedAttrs & (0xffffULL << 32)) >> 11) | + (EncodedAttrs & 0xffff)); +} + bool BitcodeReader::ParseAttributeBlock() { if (Stream.EnterSubBlock(bitc::PARAMATTR_BLOCK_ID)) return Error("Malformed block record"); @@ -459,14 +479,14 @@ bool BitcodeReader::ParseAttributeBlock() { switch (Stream.readRecord(Entry.ID, Record)) { default: // Default behavior: ignore. break; - case bitc::PARAMATTR_CODE_ENTRY: { // ENTRY: [paramidx0, attr0, ...] + case bitc::PARAMATTR_CODE_ENTRY_OLD: { // ENTRY: [paramidx0, attr0, ...] + // FIXME: Remove in 4.0. if (Record.size() & 1) return Error("Invalid ENTRY record"); for (unsigned i = 0, e = Record.size(); i != e; i += 2) { AttrBuilder B; - AttributeFuncs::decodeLLVMAttributesForBitcode(Context, B, - Record[i+1]); + decodeLLVMAttributesForBitcode(B, Record[i+1]); Attrs.push_back(AttributeSet::get(Context, Record[i], B)); } diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index b6c2bc0..c6d0371 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -161,7 +161,30 @@ static void WriteStringRecord(unsigned Code, StringRef Str, Stream.EmitRecord(Code, Vals, AbbrevToUse); } -// Emit information about parameter attributes. +/// \brief This returns an integer containing an encoding of all the LLVM +/// attributes found in the given attribute bitset. Any change to this encoding +/// is a breaking change to bitcode compatibility. +/// N.B. This should be used only by the bitcode writer! +static uint64_t encodeLLVMAttributesForBitcode(AttributeSet Attrs, + unsigned Index) { + // FIXME: Remove in 4.0! + + // FIXME: It doesn't make sense to store the alignment information as an + // expanded out value, we should store it as a log2 value. However, we can't + // just change that here without breaking bitcode compatibility. If this ever + // becomes a problem in practice, we should introduce new tag numbers in the + // bitcode file and have those tags use a more efficiently encoded alignment + // field. + + // Store the alignment in the bitcode as a 16-bit raw value instead of a 5-bit + // log2 encoded value. Shift the bits above the alignment up by 11 bits. + uint64_t EncodedAttrs = Attrs.Raw(Index) & 0xffff; + if (Attrs.hasAttribute(Index, Attribute::Alignment)) + EncodedAttrs |= Attrs.getParamAlignment(Index) << 16; + EncodedAttrs |= (Attrs.Raw(Index) & (0xffffULL << 21)) << 11; + return EncodedAttrs; +} + static void WriteAttributeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) { const std::vector &Attrs = VE.getAttributes(); @@ -175,12 +198,11 @@ static void WriteAttributeTable(const ValueEnumerator &VE, for (unsigned i = 0, e = A.getNumSlots(); i != e; ++i) { unsigned Index = A.getSlotIndex(i); Record.push_back(Index); - Record.push_back(AttributeFuncs:: - encodeLLVMAttributesForBitcode(A.getSlotAttributes(i), + Record.push_back(encodeLLVMAttributesForBitcode(A.getSlotAttributes(i), Index)); } - Stream.EmitRecord(bitc::PARAMATTR_CODE_ENTRY, Record); + Stream.EmitRecord(bitc::PARAMATTR_CODE_ENTRY_OLD, Record); Record.clear(); } diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 3b4ece9..909f22f 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -927,6 +927,7 @@ bool AttrBuilder::operator==(const AttrBuilder &B) { } AttrBuilder &AttrBuilder::addRawValue(uint64_t Val) { + // FIXME: Remove this in 4.0. if (!Val) return *this; for (Attribute::AttrKind I = Attribute::None; I != Attribute::EndAttrKinds; @@ -967,44 +968,3 @@ AttributeSet AttributeFuncs::typeIncompatible(Type *Ty, uint64_t Index) { return AttributeSet::get(Ty->getContext(), Index, Incompatible); } - -/// \brief This returns an integer containing an encoding of all the LLVM -/// attributes found in the given attribute bitset. Any change to this encoding -/// is a breaking change to bitcode compatibility. -/// N.B. This should be used only by the bitcode reader! -uint64_t AttributeFuncs::encodeLLVMAttributesForBitcode(AttributeSet Attrs, - unsigned Index) { - // FIXME: It doesn't make sense to store the alignment information as an - // expanded out value, we should store it as a log2 value. However, we can't - // just change that here without breaking bitcode compatibility. If this ever - // becomes a problem in practice, we should introduce new tag numbers in the - // bitcode file and have those tags use a more efficiently encoded alignment - // field. - - // Store the alignment in the bitcode as a 16-bit raw value instead of a 5-bit - // log2 encoded value. Shift the bits above the alignment up by 11 bits. - uint64_t EncodedAttrs = Attrs.Raw(Index) & 0xffff; - if (Attrs.hasAttribute(Index, Attribute::Alignment)) - EncodedAttrs |= Attrs.getParamAlignment(Index) << 16; - EncodedAttrs |= (Attrs.Raw(Index) & (0xffffULL << 21)) << 11; - return EncodedAttrs; -} - -/// \brief This fills an AttrBuilder object with the LLVM attributes that have -/// been decoded from the given integer. This function must stay in sync with -/// 'encodeLLVMAttributesForBitcode'. -/// N.B. This should be used only by the bitcode reader! -void AttributeFuncs::decodeLLVMAttributesForBitcode(LLVMContext &C, - AttrBuilder &B, - uint64_t EncodedAttrs) { - // The alignment is stored as a 16-bit raw value from bits 31--16. We shift - // the bits above 31 down by 11 bits. - unsigned Alignment = (EncodedAttrs & (0xffffULL << 16)) >> 16; - assert((!Alignment || isPowerOf2_32(Alignment)) && - "Alignment must be a power of two."); - - if (Alignment) - B.addAlignmentAttr(Alignment); - B.addRawValue(((EncodedAttrs & (0xffffULL << 32)) >> 11) | - (EncodedAttrs & 0xffff)); -} -- cgit v1.1 From 91b978e15774039f991cf91048dc4cc74726744b Mon Sep 17 00:00:00 2001 From: Manman Ren Date: Mon, 4 Feb 2013 23:45:08 +0000 Subject: [Stack Alignment] emit warning instead of a hard error Per discussion in rdar://13127907, we should emit a hard error only if people write code where the requested alignment is larger than achievable and assumes the low bits are zeros. A warning should be good enough when we are not sure if the source code assumes the low bits are zeros. rdar://13127907 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174336 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineFunction.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index 3d7d20d..4a9a62a 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -479,11 +479,11 @@ static inline unsigned clampStackAlignment(bool ShouldClamp, unsigned PrefAlign, if (!ShouldClamp || PrefAlign <= StackAlign) return PrefAlign; if (Alloca && MinAlign > StackAlign) - Alloca->getParent()->getContext().emitError(Alloca, - "Requested Minimal Alignment exceeds the Stack Alignment!"); + Alloca->getParent()->getContext().emitWarning(Alloca, + "Requested alignment exceeds the stack alignment!"); else assert(MinAlign <= StackAlign && - "Requested Minimal Alignment exceeds the Stack Alignment!"); + "Requested alignment exceeds the stack alignment!"); return StackAlign; } -- cgit v1.1 From a2de37c897556fbd1f94a3ed84ad27accd8f8deb Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Tue, 5 Feb 2013 05:57:38 +0000 Subject: Added LLVM Asm/Bitcode Reader/Writer support for new IR keyword externally_initialized. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174340 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/AsmParser/LLLexer.cpp | 1 + lib/AsmParser/LLParser.cpp | 13 ++++++++++--- lib/AsmParser/LLToken.h | 1 + lib/Bitcode/Reader/BitcodeReader.cpp | 6 +++++- lib/Bitcode/Writer/BitcodeWriter.cpp | 3 ++- lib/IR/AsmWriter.cpp | 1 + 6 files changed, 20 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp index 85e7574..2c806be 100644 --- a/lib/AsmParser/LLLexer.cpp +++ b/lib/AsmParser/LLLexer.cpp @@ -463,6 +463,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(hidden); KEYWORD(protected); KEYWORD(unnamed_addr); + KEYWORD(externally_initialized); KEYWORD(extern_weak); KEYWORD(external); KEYWORD(thread_local); diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index 4910222..2b6b165 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -632,9 +632,11 @@ bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc, /// ParseGlobal /// ::= GlobalVar '=' OptionalLinkage OptionalVisibility OptionalThreadLocal -/// OptionalAddrSpace OptionalUnNammedAddr GlobalType Type Const +/// OptionalAddrSpace OptionalUnNammedAddr +/// OptionalExternallyInitialized GlobalType Type Const /// ::= OptionalLinkage OptionalVisibility OptionalThreadLocal -/// OptionalAddrSpace OptionalUnNammedAddr GlobalType Type Const +/// OptionalAddrSpace OptionalUnNammedAddr +/// OptionalExternallyInitialized GlobalType Type Const /// /// Everything through visibility has been parsed already. /// @@ -642,9 +644,10 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc, unsigned Linkage, bool HasLinkage, unsigned Visibility) { unsigned AddrSpace; - bool IsConstant, UnnamedAddr; + bool IsConstant, UnnamedAddr, IsExternallyInitialized; GlobalVariable::ThreadLocalMode TLM; LocTy UnnamedAddrLoc; + LocTy IsExternallyInitializedLoc; LocTy TyLoc; Type *Ty = 0; @@ -652,6 +655,9 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc, ParseOptionalAddrSpace(AddrSpace) || ParseOptionalToken(lltok::kw_unnamed_addr, UnnamedAddr, &UnnamedAddrLoc) || + ParseOptionalToken(lltok::kw_externally_initialized, + IsExternallyInitialized, + &IsExternallyInitializedLoc) || ParseGlobalType(IsConstant) || ParseType(Ty, TyLoc)) return true; @@ -709,6 +715,7 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc, GV->setConstant(IsConstant); GV->setLinkage((GlobalValue::LinkageTypes)Linkage); GV->setVisibility((GlobalValue::VisibilityTypes)Visibility); + GV->setExternallyInitialized(IsExternallyInitialized); GV->setThreadLocalMode(TLM); GV->setUnnamedAddr(UnnamedAddr); diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h index fea5f75..c9ecd21 100644 --- a/lib/AsmParser/LLToken.h +++ b/lib/AsmParser/LLToken.h @@ -44,6 +44,7 @@ namespace lltok { kw_dllimport, kw_dllexport, kw_common, kw_available_externally, kw_default, kw_hidden, kw_protected, kw_unnamed_addr, + kw_externally_initialized, kw_extern_weak, kw_external, kw_thread_local, kw_localdynamic, kw_initialexec, kw_localexec, diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index 6ab7011..e10e6d6 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -1596,9 +1596,13 @@ bool BitcodeReader::ParseModule(bool Resume) { if (Record.size() > 8) UnnamedAddr = Record[8]; + bool ExternallyInitialized = false; + if (Record.size() > 9) + ExternallyInitialized = Record[9]; + GlobalVariable *NewGV = new GlobalVariable(*TheModule, Ty, isConstant, Linkage, 0, "", 0, - TLM, AddressSpace); + TLM, AddressSpace, ExternallyInitialized); NewGV->setAlignment(Alignment); if (!Section.empty()) NewGV->setSection(Section); diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index c6d0371..74bbaf2 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -514,10 +514,11 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE, Vals.push_back(GV->hasSection() ? SectionMap[GV->getSection()] : 0); if (GV->isThreadLocal() || GV->getVisibility() != GlobalValue::DefaultVisibility || - GV->hasUnnamedAddr()) { + GV->hasUnnamedAddr() || GV->isExternallyInitialized()) { Vals.push_back(getEncodedVisibility(GV)); Vals.push_back(getEncodedThreadLocalMode(GV)); Vals.push_back(GV->hasUnnamedAddr()); + Vals.push_back(GV->isExternallyInitialized()); } else { AbbrevToUse = SimpleGVarAbbrev; } diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp index ff43128..11e06c9 100644 --- a/lib/IR/AsmWriter.cpp +++ b/lib/IR/AsmWriter.cpp @@ -1443,6 +1443,7 @@ void AssemblyWriter::printGlobal(const GlobalVariable *GV) { if (unsigned AddressSpace = GV->getType()->getAddressSpace()) Out << "addrspace(" << AddressSpace << ") "; if (GV->hasUnnamedAddr()) Out << "unnamed_addr "; + if (GV->isExternallyInitialized()) Out << "externally_initialized "; Out << (GV->isConstant() ? "constant " : "global "); TypePrinter.print(GV->getType()->getElementType(), Out); -- cgit v1.1 From 429f7ef0c116c0504052b9a6655ef4d973177e9d Mon Sep 17 00:00:00 2001 From: Owen Anderson Date: Tue, 5 Feb 2013 06:25:30 +0000 Subject: When the target-independent DAGCombiner inferred a higher alignment for a load, it would replace the load with one with the higher alignment. However, it did not place the new load in the worklist, which prevented later DAG combines in the same phase (for example, target-specific combines) from ever seeing it. This patch corrects that oversight, and updates some tests whose output changed due to slightly different DAGCombine outputs. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174343 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 79ec227..39a8e82 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7199,12 +7199,15 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { // Try to infer better alignment information than the load already has. if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) { if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { - if (Align > LD->getAlignment()) - return DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(), + if (Align > LD->getAlignment()) { + SDValue NewLoad = + DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(), LD->getValueType(0), Chain, Ptr, LD->getPointerInfo(), LD->getMemoryVT(), LD->isVolatile(), LD->isNonTemporal(), Align); + return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true); + } } } -- cgit v1.1 From ae510f3936f2510cebff86ea38536b30fdf2a30c Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Tue, 5 Feb 2013 07:19:31 +0000 Subject: Use the do-while(0) thing for this #define. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174347 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/AsmParser/LLLexer.cpp | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp index 2c806be..72136d0 100644 --- a/lib/AsmParser/LLLexer.cpp +++ b/lib/AsmParser/LLLexer.cpp @@ -436,9 +436,11 @@ lltok::Kind LLLexer::LexIdentifier() { CurPtr = KeywordEnd; --StartChar; unsigned Len = CurPtr-StartChar; -#define KEYWORD(STR) \ - if (Len == strlen(#STR) && !memcmp(StartChar, #STR, strlen(#STR))) \ - return lltok::kw_##STR; +#define KEYWORD(STR) \ + do { \ + if (Len == strlen(#STR) && !memcmp(StartChar, #STR, strlen(#STR))) \ + return lltok::kw_##STR; \ + } while (0) KEYWORD(true); KEYWORD(false); KEYWORD(declare); KEYWORD(define); @@ -490,11 +492,11 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(seq_cst); KEYWORD(singlethread); - KEYWORD(nnan) - KEYWORD(ninf) - KEYWORD(nsz) - KEYWORD(arcp) - KEYWORD(fast) + KEYWORD(nnan); + KEYWORD(ninf); + KEYWORD(nsz); + KEYWORD(arcp); + KEYWORD(fast); KEYWORD(nuw); KEYWORD(nsw); KEYWORD(exact); -- cgit v1.1 From 208cc6f48975320da1a71cf7f7eff497f4f43b75 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Tue, 5 Feb 2013 07:31:55 +0000 Subject: Add support for emitting a stub DW_AT_GNU_dwo_id as part of the DWARF5 split dwarf proposal. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174350 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 13ad353..6b19c5a 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -685,6 +685,11 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) { if (!FirstCU) FirstCU = NewCU; + + // This should be a unique identifier when we want to build .dwp files. + if (useSplitDwarf()) + NewCU->addUInt(Die, dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8, 0); + if (useSplitDwarf() && !SkeletonCU) SkeletonCU = constructSkeletonCU(N); @@ -2450,7 +2455,8 @@ CompileUnit *DwarfDebug::constructSkeletonCU(const MDNode *N) { StringRef FN = sys::path::filename(T); NewCU->addLocalString(Die, dwarf::DW_AT_GNU_dwo_name, FN); - // FIXME: We also need DW_AT_dwo_id. + // This should be a unique identifier when we want to build .dwp files. + NewCU->addUInt(Die, dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8, 0); // FIXME: The addr base should be relative for each compile unit, however, // this one is going to be 0 anyhow. -- cgit v1.1 From 31fb5dafce280ddcc945e2136d11e5c69a3d3493 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Tue, 5 Feb 2013 07:32:03 +0000 Subject: Construct a skeleton cu for each compile unit in the module, not just for the first compile unit. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174352 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 6b19c5a..967c149 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -686,12 +686,12 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) { if (!FirstCU) FirstCU = NewCU; - // This should be a unique identifier when we want to build .dwp files. - if (useSplitDwarf()) + if (useSplitDwarf()) { + // This should be a unique identifier when we want to build .dwp files. NewCU->addUInt(Die, dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8, 0); - - if (useSplitDwarf() && !SkeletonCU) - SkeletonCU = constructSkeletonCU(N); + // Now construct the skeleton CU associated. + constructSkeletonCU(N); + } InfoHolder.addUnit(NewCU); @@ -2477,6 +2477,9 @@ CompileUnit *DwarfDebug::constructSkeletonCU(const MDNode *N) { if (!CompilationDir.empty()) NewCU->addLocalString(Die, dwarf::DW_AT_comp_dir, CompilationDir); + if (!SkeletonCU) + SkeletonCU = NewCU; + SkeletonHolder.addUnit(NewCU); return NewCU; -- cgit v1.1 From 9c5b94b6be08afe22b576d007353a0002603cef1 Mon Sep 17 00:00:00 2001 From: Jack Carter Date: Tue, 5 Feb 2013 07:47:41 +0000 Subject: This patch changes a static_cast to dyn_cast for MipsELFStreamer objects. Contributer: Jack Carter git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174354 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h | 6 +++++- lib/Target/Mips/MipsAsmPrinter.cpp | 4 ++-- 2 files changed, 7 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h index 0d067e0..360dbe1 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h +++ b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h @@ -19,12 +19,16 @@ public: MipsELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS, MCCodeEmitter *Emitter, bool RelaxAll, bool NoExecStack) - : MCELFStreamer(Context, TAB, OS, Emitter) { + : MCELFStreamer(SK_MipsELFStreamer, Context, TAB, OS, Emitter) { } ~MipsELFStreamer() {} void emitELFHeaderFlagsCG(const MipsSubtarget &Subtarget); // void emitELFHeaderFlagCG(unsigned Val); + + static bool classof(const MCStreamer *S) { + return S->getKind() == SK_MipsELFStreamer; + } }; MCELFStreamer* createMipsELFStreamer(MCContext &Context, MCAsmBackend &TAB, diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp index df6baaf..07a4faa 100644 --- a/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/MipsAsmPrinter.cpp @@ -551,8 +551,8 @@ void MipsAsmPrinter::EmitEndOfAsmFile(Module &M) { // Emit Mips ELF register info Subtarget->getMReginfo().emitMipsReginfoSectionCG( OutStreamer, getObjFileLowering(), *Subtarget); - MipsELFStreamer & MES = static_cast(OutStreamer); - MES.emitELFHeaderFlagsCG(*Subtarget); + if (MipsELFStreamer *MES = dyn_cast(&OutStreamer)) + MES->emitELFHeaderFlagsCG(*Subtarget); } MachineLocation -- cgit v1.1 From ea59f896a672c2e1ef9f02277bce60257aa60989 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Tue, 5 Feb 2013 08:09:32 +0000 Subject: Add target-dependent versions of addAttribute/removeAttribute to AttrBuilder. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174356 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Attributes.cpp | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 909f22f..8a0551c 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -46,7 +46,7 @@ Attribute Attribute::get(LLVMContext &Context, Constant *Kind, Constant *Val) { pImpl->AttrsSet.InsertNode(PA, InsertPoint); } - // Return the AttributesList that we found or created. + // Return the Attribute that we found or created. return Attribute(PA); } @@ -826,6 +826,11 @@ AttrBuilder &AttrBuilder::addAttribute(Attribute Attr) { return *this; } +AttrBuilder &AttrBuilder::addAttribute(StringRef A, StringRef V) { + TargetDepAttrs[A] = V; + return *this; +} + AttrBuilder &AttrBuilder::removeAttribute(Attribute::AttrKind Val) { Attrs.erase(Val); @@ -861,6 +866,13 @@ AttrBuilder &AttrBuilder::removeAttributes(AttributeSet A, uint64_t Index) { return *this; } +AttrBuilder &AttrBuilder::removeAttribute(StringRef A) { + std::map::iterator I = TargetDepAttrs.find(A); + if (I != TargetDepAttrs.end()) + TargetDepAttrs.erase(I); + return *this; +} + AttrBuilder &AttrBuilder::addAlignmentAttr(unsigned Align) { if (Align == 0) return *this; -- cgit v1.1 From 37ef65b9c1b93c386d13089d9ace6a1cc00e82dc Mon Sep 17 00:00:00 2001 From: Jack Carter Date: Tue, 5 Feb 2013 08:32:10 +0000 Subject: This patch that sets the EmitAlias flag in td files and enables the instruction printer to print aliased instructions. Due to usage of RegisterOperands a change in common code (utils/TableGen/AsmWriterEmitter.cpp) is required to get the correct register value if it is a RegisterOperand. Contributer: Vladimir Medic git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174358 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp | 5 ++- lib/Target/Mips/InstPrinter/MipsInstPrinter.h | 2 + lib/Target/Mips/Mips64InstrInfo.td | 33 +++++++++------- lib/Target/Mips/MipsInstrInfo.td | 52 ++++++++++++++----------- 4 files changed, 56 insertions(+), 36 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp index 97c367f..fc23cd3 100644 --- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp +++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp @@ -23,6 +23,7 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; +#define PRINT_ALIAS_INSTR #include "MipsGenAsmWriter.inc" const char* Mips::MipsFCCToString(Mips::CondCode CC) { @@ -78,7 +79,9 @@ void MipsInstPrinter::printInst(const MCInst *MI, raw_ostream &O, O << "\t.set\tmips32r2\n"; } - printInstruction(MI, O); + // Try to print any aliases first. + if (!printAliasInstr(MI, O)) + printInstruction(MI, O); printAnnotation(O, Annot); switch (MI->getOpcode()) { diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h index 38cac68..d1b561f 100644 --- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h +++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h @@ -89,6 +89,8 @@ public: virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); void printCPURegs(const MCInst *MI, unsigned OpNo, raw_ostream &O); + bool printAliasInstr(const MCInst *MI, raw_ostream &OS); + private: void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printUnsignedImm(const MCInst *MI, int opNum, raw_ostream &O); diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index 2b2c6b7..db92c64 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -308,26 +308,33 @@ def : MipsPat<(bswap CPU64Regs:$rt), (DSHD (DSBH CPU64Regs:$rt))>; //===----------------------------------------------------------------------===// // Instruction aliases //===----------------------------------------------------------------------===// -def : InstAlias<"move $dst, $src", (DADDu CPU64RegsOpnd:$dst, - CPU64RegsOpnd:$src,ZERO_64)>, +def : InstAlias<"move $dst, $src", + (DADDu CPU64RegsOpnd:$dst, CPU64RegsOpnd:$src, ZERO_64), 1>, + Requires<[HasMips64]>; +def : InstAlias<"move $dst, $src", + (OR64 CPU64RegsOpnd:$dst, CPU64RegsOpnd:$src, ZERO_64), 0>, Requires<[HasMips64]>; def : InstAlias<"and $rs, $rt, $imm", - (DANDi CPU64RegsOpnd:$rs, CPU64RegsOpnd:$rt, uimm16_64:$imm)>, + (DANDi CPU64RegsOpnd:$rs, CPU64RegsOpnd:$rt, uimm16_64:$imm), + 1>, Requires<[HasMips64]>; def : InstAlias<"slt $rs, $rt, $imm", - (SLTi64 CPURegsOpnd:$rs, CPU64Regs:$rt, simm16_64:$imm)>, + (SLTi64 CPURegsOpnd:$rs, CPU64Regs:$rt, simm16_64:$imm), 1>, Requires<[HasMips64]>; def : InstAlias<"xor $rs, $rt, $imm", - (XORi64 CPU64RegsOpnd:$rs, CPU64RegsOpnd:$rt, uimm16_64:$imm)>, + (XORi64 CPU64RegsOpnd:$rs, CPU64RegsOpnd:$rt, uimm16_64:$imm), + 1>, Requires<[HasMips64]>; def : InstAlias<"not $rt, $rs", - (NOR64 CPU64RegsOpnd:$rt, CPU64RegsOpnd:$rs, ZERO_64)>, + (NOR64 CPU64RegsOpnd:$rt, CPU64RegsOpnd:$rs, ZERO_64), 1>, Requires<[HasMips64]>; -def : InstAlias<"j $rs", (JR64 CPU64Regs:$rs)>, Requires<[HasMips64]>; +def : InstAlias<"j $rs", (JR64 CPU64Regs:$rs), 0>, Requires<[HasMips64]>; def : InstAlias<"daddu $rs, $rt, $imm", - (DADDiu CPU64RegsOpnd:$rs, CPU64RegsOpnd:$rt, simm16_64:$imm)>; + (DADDiu CPU64RegsOpnd:$rs, CPU64RegsOpnd:$rt, simm16_64:$imm), + 1>; def : InstAlias<"dadd $rs, $rt, $imm", - (DADDi CPU64RegsOpnd:$rs, CPU64RegsOpnd:$rt, simm16_64:$imm)>; + (DADDi CPU64RegsOpnd:$rs, CPU64RegsOpnd:$rt, simm16_64:$imm), + 1>; /// Move between CPU and coprocessor registers @@ -348,11 +355,11 @@ def DMTC2_3OP64 : MFC3OP<(outs CPU64RegsOpnd:$rd, uimm16:$sel), // Two operand (implicit 0 selector) versions: def : InstAlias<"dmfc0 $rt, $rd", - (DMFC0_3OP64 CPU64RegsOpnd:$rt, CPU64RegsOpnd:$rd, 0)>; + (DMFC0_3OP64 CPU64RegsOpnd:$rt, CPU64RegsOpnd:$rd, 0), 0>; def : InstAlias<"dmtc0 $rt, $rd", - (DMTC0_3OP64 CPU64RegsOpnd:$rd, 0, CPU64RegsOpnd:$rt)>; + (DMTC0_3OP64 CPU64RegsOpnd:$rd, 0, CPU64RegsOpnd:$rt), 0>; def : InstAlias<"dmfc2 $rt, $rd", - (DMFC2_3OP64 CPU64RegsOpnd:$rt, CPU64RegsOpnd:$rd, 0)>; + (DMFC2_3OP64 CPU64RegsOpnd:$rt, CPU64RegsOpnd:$rd, 0), 0>; def : InstAlias<"dmtc2 $rt, $rd", - (DMTC2_3OP64 CPU64RegsOpnd:$rd, 0, CPU64RegsOpnd:$rt)>; + (DMTC2_3OP64 CPU64RegsOpnd:$rd, 0, CPU64RegsOpnd:$rt), 0>; diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index ac99a13..b563b8f 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -957,33 +957,41 @@ def MTC2_3OP : MFC3OP<(outs CPURegsOpnd:$rd, uimm16:$sel), //===----------------------------------------------------------------------===// // Instruction aliases //===----------------------------------------------------------------------===// -def : InstAlias<"move $dst,$src", (ADDu CPURegsOpnd:$dst, - CPURegsOpnd:$src,ZERO)>, Requires<[NotMips64]>; -def : InstAlias<"bal $offset", (BGEZAL RA, brtarget:$offset)>; +def : InstAlias<"move $dst, $src", + (ADDu CPURegsOpnd:$dst, CPURegsOpnd:$src,ZERO), 1>, + Requires<[NotMips64]>; +def : InstAlias<"move $dst, $src", + (OR CPURegsOpnd:$dst, CPURegsOpnd:$src,ZERO), 0>, + Requires<[NotMips64]>; +def : InstAlias<"bal $offset", (BGEZAL RA, brtarget:$offset), 1>; def : InstAlias<"addu $rs, $rt, $imm", - (ADDiu CPURegsOpnd:$rs, CPURegsOpnd:$rt, simm16:$imm)>; + (ADDiu CPURegsOpnd:$rs, CPURegsOpnd:$rt, simm16:$imm), 0>; def : InstAlias<"add $rs, $rt, $imm", - (ADDi CPURegsOpnd:$rs, CPURegsOpnd:$rt, simm16:$imm)>; + (ADDi CPURegsOpnd:$rs, CPURegsOpnd:$rt, simm16:$imm), 0>; def : InstAlias<"and $rs, $rt, $imm", - (ANDi CPURegsOpnd:$rs, CPURegsOpnd:$rt, simm16:$imm)>; -def : InstAlias<"j $rs", (JR CPURegs:$rs)>, Requires<[NotMips64]>; -def : InstAlias<"not $rt, $rs", (NOR CPURegsOpnd:$rt, CPURegsOpnd:$rs, ZERO)>; -def : InstAlias<"neg $rt, $rs", (SUB CPURegsOpnd:$rt, ZERO, CPURegsOpnd:$rs)>; -def : InstAlias<"negu $rt, $rs", (SUBu CPURegsOpnd:$rt, ZERO, - CPURegsOpnd:$rs)>; + (ANDi CPURegsOpnd:$rs, CPURegsOpnd:$rt, simm16:$imm), 0>; +def : InstAlias<"j $rs", (JR CPURegs:$rs), 0>, + Requires<[NotMips64]>; +def : InstAlias<"not $rt, $rs", + (NOR CPURegsOpnd:$rt, CPURegsOpnd:$rs, ZERO), 1>; +def : InstAlias<"neg $rt, $rs", + (SUB CPURegsOpnd:$rt, ZERO, CPURegsOpnd:$rs), 1>; +def : InstAlias<"negu $rt, $rs", + (SUBu CPURegsOpnd:$rt, ZERO, CPURegsOpnd:$rs), 1>; def : InstAlias<"slt $rs, $rt, $imm", - (SLTi CPURegsOpnd:$rs, CPURegs:$rt, simm16:$imm)>; + (SLTi CPURegsOpnd:$rs, CPURegs:$rt, simm16:$imm), 0>; def : InstAlias<"xor $rs, $rt, $imm", - (XORi CPURegsOpnd:$rs, CPURegsOpnd:$rt, simm16:$imm)>, - Requires<[NotMips64]>; -def : InstAlias<"mfc0 $rt, $rd", (MFC0_3OP CPURegsOpnd:$rt, - CPURegsOpnd:$rd, 0)>; -def : InstAlias<"mtc0 $rt, $rd", (MTC0_3OP CPURegsOpnd:$rd, 0, - CPURegsOpnd:$rt)>; -def : InstAlias<"mfc2 $rt, $rd", (MFC2_3OP CPURegsOpnd:$rt, - CPURegsOpnd:$rd, 0)>; -def : InstAlias<"mtc2 $rt, $rd", (MTC2_3OP CPURegsOpnd:$rd, 0, - CPURegsOpnd:$rt)>; + (XORi CPURegsOpnd:$rs, CPURegsOpnd:$rt, simm16:$imm), 0>, + Requires<[NotMips64]>; +def : InstAlias<"nop", (SLL ZERO, ZERO, 0), 1>; +def : InstAlias<"mfc0 $rt, $rd", + (MFC0_3OP CPURegsOpnd:$rt, CPURegsOpnd:$rd, 0), 0>; +def : InstAlias<"mtc0 $rt, $rd", + (MTC0_3OP CPURegsOpnd:$rd, 0, CPURegsOpnd:$rt), 0>; +def : InstAlias<"mfc2 $rt, $rd", + (MFC2_3OP CPURegsOpnd:$rt, CPURegsOpnd:$rd, 0), 0>; +def : InstAlias<"mtc2 $rt, $rd", + (MTC2_3OP CPURegsOpnd:$rd, 0, CPURegsOpnd:$rt), 0>; //===----------------------------------------------------------------------===// // Assembler Pseudo Instructions -- cgit v1.1 From 7304702ef99f98897d15baae0eede55f294bc602 Mon Sep 17 00:00:00 2001 From: Jack Carter Date: Tue, 5 Feb 2013 09:30:03 +0000 Subject: This patch that sets the Mips ELF header flag for MicroMips architectures. Contributer: Zoran Jovanovic git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174360 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp | 3 +++ lib/Target/Mips/Mips.td | 3 +++ lib/Target/Mips/MipsSubtarget.cpp | 3 ++- lib/Target/Mips/MipsSubtarget.h | 4 ++++ 4 files changed, 12 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp index 89891ff..9c454d6 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp @@ -45,6 +45,9 @@ namespace llvm { else EFlags |= ELF::EF_MIPS_ARCH_32; + if (Subtarget.inMicroMipsMode()) + EFlags |= ELF::EF_MIPS_MICROMIPS; + // Relocation Model Reloc::Model RM = Subtarget.getRelocationModel(); if (RM == Reloc::PIC_ || RM == Reloc::Default) diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td index 23e2a94..1326623 100644 --- a/lib/Target/Mips/Mips.td +++ b/lib/Target/Mips/Mips.td @@ -80,6 +80,9 @@ def FeatureDSP : SubtargetFeature<"dsp", "HasDSP", "true", "Mips DSP ASE">; def FeatureDSPR2 : SubtargetFeature<"dspr2", "HasDSPR2", "true", "Mips DSP-R2 ASE", [FeatureDSP]>; +def FeatureMicroMips : SubtargetFeature<"micromips", "InMicroMipsMode", "true", + "microMips mode">; + //===----------------------------------------------------------------------===// // Mips processors supported. //===----------------------------------------------------------------------===// diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp index 6ad97db..75b4c98 100644 --- a/lib/Target/Mips/MipsSubtarget.cpp +++ b/lib/Target/Mips/MipsSubtarget.cpp @@ -32,7 +32,8 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU, IsSingleFloat(false), IsFP64bit(false), IsGP64bit(false), HasVFPU(false), IsLinux(true), HasSEInReg(false), HasCondMov(false), HasSwap(false), HasBitCount(false), HasFPIdx(false), - InMips16Mode(false), HasDSP(false), HasDSPR2(false), IsAndroid(false), RM(_RM) + InMips16Mode(false), InMicroMipsMode(false), HasDSP(false), HasDSPR2(false), + IsAndroid(false), RM(_RM) { std::string CPUName = CPU; if (CPUName.empty()) diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h index 63cde8d..32baa3d 100644 --- a/lib/Target/Mips/MipsSubtarget.h +++ b/lib/Target/Mips/MipsSubtarget.h @@ -89,6 +89,9 @@ protected: // InMips16 -- can process Mips16 instructions bool InMips16Mode; + // InMicroMips -- can process MicroMips instructions + bool InMicroMipsMode; + // HasDSP, HasDSPR2 -- supports DSP ASE. bool HasDSP, HasDSPR2; @@ -138,6 +141,7 @@ public: bool isNotSingleFloat() const { return !IsSingleFloat; } bool hasVFPU() const { return HasVFPU; } bool inMips16Mode() const { return InMips16Mode; } + bool inMicroMipsMode() const { return InMicroMipsMode; } bool hasDSP() const { return HasDSP; } bool hasDSPR2() const { return HasDSPR2; } bool isAndroid() const { return IsAndroid; } -- cgit v1.1 From 19254c49a8752fe8c6fa648a6eb29f20a1f62c8b Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Tue, 5 Feb 2013 13:24:47 +0000 Subject: Remove cyclic dependency in AArch64 libraries This moves the bit twiddling and string fiddling functions required by other parts of the backend into a separate library. Previously they resided in AArch64Desc, which created a circular dependency between various components. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174369 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64ConstantIslandPass.cpp | 2 +- lib/Target/AArch64/AArch64ISelDAGToDAG.cpp | 2 +- lib/Target/AArch64/AArch64ISelLowering.cpp | 2 +- lib/Target/AArch64/AArch64ISelLowering.h | 2 +- lib/Target/AArch64/AArch64InstrInfo.cpp | 2 +- lib/Target/AArch64/AArch64MCInstLower.cpp | 2 +- lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp | 2 +- lib/Target/AArch64/CMakeLists.txt | 1 + .../AArch64/Disassembler/AArch64Disassembler.cpp | 2 +- lib/Target/AArch64/Disassembler/LLVMBuild.txt | 2 +- .../AArch64/InstPrinter/AArch64InstPrinter.cpp | 2 +- .../AArch64/InstPrinter/AArch64InstPrinter.h | 3 +- lib/Target/AArch64/InstPrinter/LLVMBuild.txt | 2 +- lib/Target/AArch64/LLVMBuild.txt | 2 +- lib/Target/AArch64/MCTargetDesc/AArch64BaseInfo.h | 779 -------------------- .../AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp | 3 +- .../AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp | 797 -------------------- lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt | 2 +- lib/Target/AArch64/Makefile | 2 +- lib/Target/AArch64/Utils/AArch64BaseInfo.cpp | 812 +++++++++++++++++++++ lib/Target/AArch64/Utils/AArch64BaseInfo.h | 778 ++++++++++++++++++++ lib/Target/AArch64/Utils/CMakeLists.txt | 5 + lib/Target/AArch64/Utils/LLVMBuild.txt | 23 + lib/Target/AArch64/Utils/Makefile | 15 + 24 files changed, 1652 insertions(+), 1592 deletions(-) delete mode 100644 lib/Target/AArch64/MCTargetDesc/AArch64BaseInfo.h create mode 100644 lib/Target/AArch64/Utils/AArch64BaseInfo.cpp create mode 100644 lib/Target/AArch64/Utils/AArch64BaseInfo.h create mode 100644 lib/Target/AArch64/Utils/CMakeLists.txt create mode 100644 lib/Target/AArch64/Utils/LLVMBuild.txt create mode 100644 lib/Target/AArch64/Utils/Makefile (limited to 'lib') diff --git a/lib/Target/AArch64/AArch64ConstantIslandPass.cpp b/lib/Target/AArch64/AArch64ConstantIslandPass.cpp index 7734866..f5e5c64 100644 --- a/lib/Target/AArch64/AArch64ConstantIslandPass.cpp +++ b/lib/Target/AArch64/AArch64ConstantIslandPass.cpp @@ -19,7 +19,7 @@ #include "AArch64MachineFunctionInfo.h" #include "AArch64Subtarget.h" #include "AArch64MachineFunctionInfo.h" -#include "MCTargetDesc/AArch64BaseInfo.h" +#include "Utils/AArch64BaseInfo.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 9be8ba1..28f152c 100644 --- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -16,7 +16,7 @@ #include "AArch64InstrInfo.h" #include "AArch64Subtarget.h" #include "AArch64TargetMachine.h" -#include "MCTargetDesc/AArch64BaseInfo.h" +#include "Utils/AArch64BaseInfo.h" #include "llvm/ADT/APSInt.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/IR/GlobalValue.h" diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 42e8f09..9b26b1f 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -18,7 +18,7 @@ #include "AArch64MachineFunctionInfo.h" #include "AArch64TargetMachine.h" #include "AArch64TargetObjectFile.h" -#include "MCTargetDesc/AArch64BaseInfo.h" +#include "Utils/AArch64BaseInfo.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h index 66985c1..ec4e432 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.h +++ b/lib/Target/AArch64/AArch64ISelLowering.h @@ -15,7 +15,7 @@ #ifndef LLVM_TARGET_AARCH64_ISELLOWERING_H #define LLVM_TARGET_AARCH64_ISELLOWERING_H -#include "MCTargetDesc/AArch64BaseInfo.h" +#include "Utils/AArch64BaseInfo.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Target/TargetLowering.h" diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp index 967960c..d59f2f1 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -15,8 +15,8 @@ #include "AArch64InstrInfo.h" #include "AArch64MachineFunctionInfo.h" #include "AArch64TargetMachine.h" -#include "MCTargetDesc/AArch64BaseInfo.h" #include "MCTargetDesc/AArch64MCTargetDesc.h" +#include "Utils/AArch64BaseInfo.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" diff --git a/lib/Target/AArch64/AArch64MCInstLower.cpp b/lib/Target/AArch64/AArch64MCInstLower.cpp index 0603574..c96bf85 100644 --- a/lib/Target/AArch64/AArch64MCInstLower.cpp +++ b/lib/Target/AArch64/AArch64MCInstLower.cpp @@ -14,8 +14,8 @@ #include "AArch64AsmPrinter.h" #include "AArch64TargetMachine.h" -#include "MCTargetDesc/AArch64BaseInfo.h" #include "MCTargetDesc/AArch64MCExpr.h" +#include "Utils/AArch64BaseInfo.h" #include "llvm/ADT/SmallString.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFunction.h" diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 49bec59..bc0f396 100644 --- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -8,9 +8,9 @@ //===----------------------------------------------------------------------===// -#include "MCTargetDesc/AArch64BaseInfo.h" #include "MCTargetDesc/AArch64MCTargetDesc.h" #include "MCTargetDesc/AArch64MCExpr.h" +#include "Utils/AArch64BaseInfo.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/StringSwitch.h" diff --git a/lib/Target/AArch64/CMakeLists.txt b/lib/Target/AArch64/CMakeLists.txt index a89861f..d133b95 100644 --- a/lib/Target/AArch64/CMakeLists.txt +++ b/lib/Target/AArch64/CMakeLists.txt @@ -33,3 +33,4 @@ add_subdirectory(Disassembler) add_subdirectory(InstPrinter) add_subdirectory(MCTargetDesc) add_subdirectory(TargetInfo) +add_subdirectory(Utils) \ No newline at end of file diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp index a7fc352..017b509 100644 --- a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp +++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp @@ -12,7 +12,7 @@ #include "AArch64.h" #include "AArch64RegisterInfo.h" #include "AArch64Subtarget.h" -#include "MCTargetDesc/AArch64BaseInfo.h" +#include "Utils/AArch64BaseInfo.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCExpr.h" diff --git a/lib/Target/AArch64/Disassembler/LLVMBuild.txt b/lib/Target/AArch64/Disassembler/LLVMBuild.txt index 123eb3e..a93e343 100644 --- a/lib/Target/AArch64/Disassembler/LLVMBuild.txt +++ b/lib/Target/AArch64/Disassembler/LLVMBuild.txt @@ -19,6 +19,6 @@ type = Library name = AArch64Disassembler parent = AArch64 -required_libraries = AArch64CodeGen AArch64Desc AArch64Info MC Support +required_libraries = AArch64CodeGen AArch64Desc AArch64Info AArch64Utils MC Support add_to_library_groups = AArch64 diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp index 909810f..82ce80c 100644 --- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp +++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp @@ -13,8 +13,8 @@ #define DEBUG_TYPE "asm-printer" #include "AArch64InstPrinter.h" -#include "MCTargetDesc/AArch64BaseInfo.h" #include "MCTargetDesc/AArch64MCTargetDesc.h" +#include "Utils/AArch64BaseInfo.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCRegisterInfo.h" diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h index 1890082..ec14595 100644 --- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h +++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h @@ -14,7 +14,8 @@ #ifndef LLVM_AARCH64INSTPRINTER_H #define LLVM_AARCH64INSTPRINTER_H -#include "MCTargetDesc/AArch64BaseInfo.h" +#include "MCTargetDesc/AArch64MCTargetDesc.h" +#include "Utils/AArch64BaseInfo.h" #include "llvm/MC/MCInstPrinter.h" #include "llvm/MC/MCSubtargetInfo.h" diff --git a/lib/Target/AArch64/InstPrinter/LLVMBuild.txt b/lib/Target/AArch64/InstPrinter/LLVMBuild.txt index 40fdc55..4836c7c 100644 --- a/lib/Target/AArch64/InstPrinter/LLVMBuild.txt +++ b/lib/Target/AArch64/InstPrinter/LLVMBuild.txt @@ -19,6 +19,6 @@ type = Library name = AArch64AsmPrinter parent = AArch64 -required_libraries = MC Support +required_libraries = AArch64Utils MC Support add_to_library_groups = AArch64 diff --git a/lib/Target/AArch64/LLVMBuild.txt b/lib/Target/AArch64/LLVMBuild.txt index 09c7448..3b296fd 100644 --- a/lib/Target/AArch64/LLVMBuild.txt +++ b/lib/Target/AArch64/LLVMBuild.txt @@ -16,7 +16,7 @@ ;===------------------------------------------------------------------------===; [common] -subdirectories = AsmParser Disassembler InstPrinter MCTargetDesc TargetInfo +subdirectories = AsmParser Disassembler InstPrinter MCTargetDesc TargetInfo Utils [component_0] type = TargetGroup diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64BaseInfo.h b/lib/Target/AArch64/MCTargetDesc/AArch64BaseInfo.h deleted file mode 100644 index b71eb0d..0000000 --- a/lib/Target/AArch64/MCTargetDesc/AArch64BaseInfo.h +++ /dev/null @@ -1,779 +0,0 @@ -//===-- AArch64BaseInfo.h - Top level definitions for AArch64- --*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains small standalone helper functions and enum definitions for -// the AArch64 target useful for the compiler back-end and the MC libraries. -// As such, it deliberately does not include references to LLVM core -// code gen types, passes, etc.. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_AARCH64_BASEINFO_H -#define LLVM_AARCH64_BASEINFO_H - -#include "AArch64MCTargetDesc.h" -#include "llvm/ADT/StringSwitch.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/Support/ErrorHandling.h" - -namespace llvm { - -// // Enums corresponding to AArch64 condition codes -namespace A64CC { - // The CondCodes constants map directly to the 4-bit encoding of the - // condition field for predicated instructions. - enum CondCodes { // Meaning (integer) Meaning (floating-point) - EQ = 0, // Equal Equal - NE, // Not equal Not equal, or unordered - HS, // Unsigned higher or same >, ==, or unordered - LO, // Unsigned lower or same Less than - MI, // Minus, negative Less than - PL, // Plus, positive or zero >, ==, or unordered - VS, // Overflow Unordered - VC, // No overflow Ordered - HI, // Unsigned higher Greater than, or unordered - LS, // Unsigned lower or same Less than or equal - GE, // Greater than or equal Greater than or equal - LT, // Less than Less than, or unordered - GT, // Signed greater than Greater than - LE, // Signed less than or equal <, ==, or unordered - AL, // Always (unconditional) Always (unconditional) - NV, // Always (unconditional) Always (unconditional) - // Note the NV exists purely to disassemble 0b1111. Execution - // is "always". - Invalid - }; - -} // namespace A64CC - -inline static const char *A64CondCodeToString(A64CC::CondCodes CC) { - switch (CC) { - default: llvm_unreachable("Unknown condition code"); - case A64CC::EQ: return "eq"; - case A64CC::NE: return "ne"; - case A64CC::HS: return "hs"; - case A64CC::LO: return "lo"; - case A64CC::MI: return "mi"; - case A64CC::PL: return "pl"; - case A64CC::VS: return "vs"; - case A64CC::VC: return "vc"; - case A64CC::HI: return "hi"; - case A64CC::LS: return "ls"; - case A64CC::GE: return "ge"; - case A64CC::LT: return "lt"; - case A64CC::GT: return "gt"; - case A64CC::LE: return "le"; - case A64CC::AL: return "al"; - case A64CC::NV: return "nv"; - } -} - -inline static A64CC::CondCodes A64StringToCondCode(StringRef CondStr) { - return StringSwitch(CondStr.lower()) - .Case("eq", A64CC::EQ) - .Case("ne", A64CC::NE) - .Case("ne", A64CC::NE) - .Case("hs", A64CC::HS) - .Case("cs", A64CC::HS) - .Case("lo", A64CC::LO) - .Case("cc", A64CC::LO) - .Case("mi", A64CC::MI) - .Case("pl", A64CC::PL) - .Case("vs", A64CC::VS) - .Case("vc", A64CC::VC) - .Case("hi", A64CC::HI) - .Case("ls", A64CC::LS) - .Case("ge", A64CC::GE) - .Case("lt", A64CC::LT) - .Case("gt", A64CC::GT) - .Case("le", A64CC::LE) - .Case("al", A64CC::AL) - .Case("nv", A64CC::NV) - .Default(A64CC::Invalid); -} - -inline static A64CC::CondCodes A64InvertCondCode(A64CC::CondCodes CC) { - // It turns out that the condition codes have been designed so that in order - // to reverse the intent of the condition you only have to invert the low bit: - - return static_cast(static_cast(CC) ^ 0x1); -} - -/// Instances of this class can perform bidirectional mapping from random -/// identifier strings to operand encodings. For example "MSR" takes a named -/// system-register which must be encoded somehow and decoded for printing. This -/// central location means that the information for those transformations is not -/// duplicated and remains in sync. -/// -/// FIXME: currently the algorithm is a completely unoptimised linear -/// search. Obviously this could be improved, but we would probably want to work -/// out just how often these instructions are emitted before working on it. It -/// might even be optimal to just reorder the tables for the common instructions -/// rather than changing the algorithm. -struct NamedImmMapper { - struct Mapping { - const char *Name; - uint32_t Value; - }; - - template - NamedImmMapper(const Mapping (&Pairs)[N], uint32_t TooBigImm) - : Pairs(&Pairs[0]), NumPairs(N), TooBigImm(TooBigImm) {} - - StringRef toString(uint32_t Value, bool &Valid) const; - uint32_t fromString(StringRef Name, bool &Valid) const; - - /// Many of the instructions allow an alternative assembly form consisting of - /// a simple immediate. Currently the only valid forms are ranges [0, N) where - /// N being 0 indicates no immediate syntax-form is allowed. - bool validImm(uint32_t Value) const; -protected: - const Mapping *Pairs; - size_t NumPairs; - uint32_t TooBigImm; -}; - -namespace A64AT { - enum ATValues { - Invalid = -1, // Op0 Op1 CRn CRm Op2 - S1E1R = 0x43c0, // 01 000 0111 1000 000 - S1E2R = 0x63c0, // 01 100 0111 1000 000 - S1E3R = 0x73c0, // 01 110 0111 1000 000 - S1E1W = 0x43c1, // 01 000 0111 1000 001 - S1E2W = 0x63c1, // 01 100 0111 1000 001 - S1E3W = 0x73c1, // 01 110 0111 1000 001 - S1E0R = 0x43c2, // 01 000 0111 1000 010 - S1E0W = 0x43c3, // 01 000 0111 1000 011 - S12E1R = 0x63c4, // 01 100 0111 1000 100 - S12E1W = 0x63c5, // 01 100 0111 1000 101 - S12E0R = 0x63c6, // 01 100 0111 1000 110 - S12E0W = 0x63c7 // 01 100 0111 1000 111 - }; - - struct ATMapper : NamedImmMapper { - const static Mapping ATPairs[]; - - ATMapper(); - }; - -} -namespace A64DB { - enum DBValues { - Invalid = -1, - OSHLD = 0x1, - OSHST = 0x2, - OSH = 0x3, - NSHLD = 0x5, - NSHST = 0x6, - NSH = 0x7, - ISHLD = 0x9, - ISHST = 0xa, - ISH = 0xb, - LD = 0xd, - ST = 0xe, - SY = 0xf - }; - - struct DBarrierMapper : NamedImmMapper { - const static Mapping DBarrierPairs[]; - - DBarrierMapper(); - }; -} - -namespace A64DC { - enum DCValues { - Invalid = -1, // Op1 CRn CRm Op2 - ZVA = 0x5ba1, // 01 011 0111 0100 001 - IVAC = 0x43b1, // 01 000 0111 0110 001 - ISW = 0x43b2, // 01 000 0111 0110 010 - CVAC = 0x5bd1, // 01 011 0111 1010 001 - CSW = 0x43d2, // 01 000 0111 1010 010 - CVAU = 0x5bd9, // 01 011 0111 1011 001 - CIVAC = 0x5bf1, // 01 011 0111 1110 001 - CISW = 0x43f2 // 01 000 0111 1110 010 - }; - - struct DCMapper : NamedImmMapper { - const static Mapping DCPairs[]; - - DCMapper(); - }; - -} - -namespace A64IC { - enum ICValues { - Invalid = -1, // Op1 CRn CRm Op2 - IALLUIS = 0x0388, // 000 0111 0001 000 - IALLU = 0x03a8, // 000 0111 0101 000 - IVAU = 0x1ba9 // 011 0111 0101 001 - }; - - - struct ICMapper : NamedImmMapper { - const static Mapping ICPairs[]; - - ICMapper(); - }; - - static inline bool NeedsRegister(ICValues Val) { - return Val == IVAU; - } -} - -namespace A64ISB { - enum ISBValues { - Invalid = -1, - SY = 0xf - }; - struct ISBMapper : NamedImmMapper { - const static Mapping ISBPairs[]; - - ISBMapper(); - }; -} - -namespace A64PRFM { - enum PRFMValues { - Invalid = -1, - PLDL1KEEP = 0x00, - PLDL1STRM = 0x01, - PLDL2KEEP = 0x02, - PLDL2STRM = 0x03, - PLDL3KEEP = 0x04, - PLDL3STRM = 0x05, - PSTL1KEEP = 0x10, - PSTL1STRM = 0x11, - PSTL2KEEP = 0x12, - PSTL2STRM = 0x13, - PSTL3KEEP = 0x14, - PSTL3STRM = 0x15 - }; - - struct PRFMMapper : NamedImmMapper { - const static Mapping PRFMPairs[]; - - PRFMMapper(); - }; -} - -namespace A64PState { - enum PStateValues { - Invalid = -1, - SPSel = 0x05, - DAIFSet = 0x1e, - DAIFClr = 0x1f - }; - - struct PStateMapper : NamedImmMapper { - const static Mapping PStatePairs[]; - - PStateMapper(); - }; - -} - -namespace A64SE { - enum ShiftExtSpecifiers { - Invalid = -1, - LSL, - LSR, - ASR, - ROR, - - UXTB, - UXTH, - UXTW, - UXTX, - - SXTB, - SXTH, - SXTW, - SXTX - }; -} - -namespace A64SysReg { - enum SysRegROValues { - MDCCSR_EL0 = 0x9808, // 10 011 0000 0001 000 - DBGDTRRX_EL0 = 0x9828, // 10 011 0000 0101 000 - MDRAR_EL1 = 0x8080, // 10 000 0001 0000 000 - OSLSR_EL1 = 0x808c, // 10 000 0001 0001 100 - DBGAUTHSTATUS_EL1 = 0x83f6, // 10 000 0111 1110 110 - PMCEID0_EL0 = 0xdce6, // 11 011 1001 1100 110 - PMCEID1_EL0 = 0xdce7, // 11 011 1001 1100 111 - MIDR_EL1 = 0xc000, // 11 000 0000 0000 000 - CCSIDR_EL1 = 0xc800, // 11 001 0000 0000 000 - CLIDR_EL1 = 0xc801, // 11 001 0000 0000 001 - CTR_EL0 = 0xd801, // 11 011 0000 0000 001 - MPIDR_EL1 = 0xc005, // 11 000 0000 0000 101 - REVIDR_EL1 = 0xc006, // 11 000 0000 0000 110 - AIDR_EL1 = 0xc807, // 11 001 0000 0000 111 - DCZID_EL0 = 0xd807, // 11 011 0000 0000 111 - ID_PFR0_EL1 = 0xc008, // 11 000 0000 0001 000 - ID_PFR1_EL1 = 0xc009, // 11 000 0000 0001 001 - ID_DFR0_EL1 = 0xc00a, // 11 000 0000 0001 010 - ID_AFR0_EL1 = 0xc00b, // 11 000 0000 0001 011 - ID_MMFR0_EL1 = 0xc00c, // 11 000 0000 0001 100 - ID_MMFR1_EL1 = 0xc00d, // 11 000 0000 0001 101 - ID_MMFR2_EL1 = 0xc00e, // 11 000 0000 0001 110 - ID_MMFR3_EL1 = 0xc00f, // 11 000 0000 0001 111 - ID_ISAR0_EL1 = 0xc010, // 11 000 0000 0010 000 - ID_ISAR1_EL1 = 0xc011, // 11 000 0000 0010 001 - ID_ISAR2_EL1 = 0xc012, // 11 000 0000 0010 010 - ID_ISAR3_EL1 = 0xc013, // 11 000 0000 0010 011 - ID_ISAR4_EL1 = 0xc014, // 11 000 0000 0010 100 - ID_ISAR5_EL1 = 0xc015, // 11 000 0000 0010 101 - ID_AA64PFR0_EL1 = 0xc020, // 11 000 0000 0100 000 - ID_AA64PFR1_EL1 = 0xc021, // 11 000 0000 0100 001 - ID_AA64DFR0_EL1 = 0xc028, // 11 000 0000 0101 000 - ID_AA64DFR1_EL1 = 0xc029, // 11 000 0000 0101 001 - ID_AA64AFR0_EL1 = 0xc02c, // 11 000 0000 0101 100 - ID_AA64AFR1_EL1 = 0xc02d, // 11 000 0000 0101 101 - ID_AA64ISAR0_EL1 = 0xc030, // 11 000 0000 0110 000 - ID_AA64ISAR1_EL1 = 0xc031, // 11 000 0000 0110 001 - ID_AA64MMFR0_EL1 = 0xc038, // 11 000 0000 0111 000 - ID_AA64MMFR1_EL1 = 0xc039, // 11 000 0000 0111 001 - MVFR0_EL1 = 0xc018, // 11 000 0000 0011 000 - MVFR1_EL1 = 0xc019, // 11 000 0000 0011 001 - MVFR2_EL1 = 0xc01a, // 11 000 0000 0011 010 - RVBAR_EL1 = 0xc601, // 11 000 1100 0000 001 - RVBAR_EL2 = 0xe601, // 11 100 1100 0000 001 - RVBAR_EL3 = 0xf601, // 11 110 1100 0000 001 - ISR_EL1 = 0xc608, // 11 000 1100 0001 000 - CNTPCT_EL0 = 0xdf01, // 11 011 1110 0000 001 - CNTVCT_EL0 = 0xdf02 // 11 011 1110 0000 010 - }; - - enum SysRegWOValues { - DBGDTRTX_EL0 = 0x9828, // 10 011 0000 0101 000 - OSLAR_EL1 = 0x8084, // 10 000 0001 0000 100 - PMSWINC_EL0 = 0xdce4 // 11 011 1001 1100 100 - }; - - enum SysRegValues { - Invalid = -1, // Op0 Op1 CRn CRm Op2 - OSDTRRX_EL1 = 0x8002, // 10 000 0000 0000 010 - OSDTRTX_EL1 = 0x801a, // 10 000 0000 0011 010 - TEECR32_EL1 = 0x9000, // 10 010 0000 0000 000 - MDCCINT_EL1 = 0x8010, // 10 000 0000 0010 000 - MDSCR_EL1 = 0x8012, // 10 000 0000 0010 010 - DBGDTR_EL0 = 0x9820, // 10 011 0000 0100 000 - OSECCR_EL1 = 0x8032, // 10 000 0000 0110 010 - DBGVCR32_EL2 = 0xa038, // 10 100 0000 0111 000 - DBGBVR0_EL1 = 0x8004, // 10 000 0000 0000 100 - DBGBVR1_EL1 = 0x800c, // 10 000 0000 0001 100 - DBGBVR2_EL1 = 0x8014, // 10 000 0000 0010 100 - DBGBVR3_EL1 = 0x801c, // 10 000 0000 0011 100 - DBGBVR4_EL1 = 0x8024, // 10 000 0000 0100 100 - DBGBVR5_EL1 = 0x802c, // 10 000 0000 0101 100 - DBGBVR6_EL1 = 0x8034, // 10 000 0000 0110 100 - DBGBVR7_EL1 = 0x803c, // 10 000 0000 0111 100 - DBGBVR8_EL1 = 0x8044, // 10 000 0000 1000 100 - DBGBVR9_EL1 = 0x804c, // 10 000 0000 1001 100 - DBGBVR10_EL1 = 0x8054, // 10 000 0000 1010 100 - DBGBVR11_EL1 = 0x805c, // 10 000 0000 1011 100 - DBGBVR12_EL1 = 0x8064, // 10 000 0000 1100 100 - DBGBVR13_EL1 = 0x806c, // 10 000 0000 1101 100 - DBGBVR14_EL1 = 0x8074, // 10 000 0000 1110 100 - DBGBVR15_EL1 = 0x807c, // 10 000 0000 1111 100 - DBGBCR0_EL1 = 0x8005, // 10 000 0000 0000 101 - DBGBCR1_EL1 = 0x800d, // 10 000 0000 0001 101 - DBGBCR2_EL1 = 0x8015, // 10 000 0000 0010 101 - DBGBCR3_EL1 = 0x801d, // 10 000 0000 0011 101 - DBGBCR4_EL1 = 0x8025, // 10 000 0000 0100 101 - DBGBCR5_EL1 = 0x802d, // 10 000 0000 0101 101 - DBGBCR6_EL1 = 0x8035, // 10 000 0000 0110 101 - DBGBCR7_EL1 = 0x803d, // 10 000 0000 0111 101 - DBGBCR8_EL1 = 0x8045, // 10 000 0000 1000 101 - DBGBCR9_EL1 = 0x804d, // 10 000 0000 1001 101 - DBGBCR10_EL1 = 0x8055, // 10 000 0000 1010 101 - DBGBCR11_EL1 = 0x805d, // 10 000 0000 1011 101 - DBGBCR12_EL1 = 0x8065, // 10 000 0000 1100 101 - DBGBCR13_EL1 = 0x806d, // 10 000 0000 1101 101 - DBGBCR14_EL1 = 0x8075, // 10 000 0000 1110 101 - DBGBCR15_EL1 = 0x807d, // 10 000 0000 1111 101 - DBGWVR0_EL1 = 0x8006, // 10 000 0000 0000 110 - DBGWVR1_EL1 = 0x800e, // 10 000 0000 0001 110 - DBGWVR2_EL1 = 0x8016, // 10 000 0000 0010 110 - DBGWVR3_EL1 = 0x801e, // 10 000 0000 0011 110 - DBGWVR4_EL1 = 0x8026, // 10 000 0000 0100 110 - DBGWVR5_EL1 = 0x802e, // 10 000 0000 0101 110 - DBGWVR6_EL1 = 0x8036, // 10 000 0000 0110 110 - DBGWVR7_EL1 = 0x803e, // 10 000 0000 0111 110 - DBGWVR8_EL1 = 0x8046, // 10 000 0000 1000 110 - DBGWVR9_EL1 = 0x804e, // 10 000 0000 1001 110 - DBGWVR10_EL1 = 0x8056, // 10 000 0000 1010 110 - DBGWVR11_EL1 = 0x805e, // 10 000 0000 1011 110 - DBGWVR12_EL1 = 0x8066, // 10 000 0000 1100 110 - DBGWVR13_EL1 = 0x806e, // 10 000 0000 1101 110 - DBGWVR14_EL1 = 0x8076, // 10 000 0000 1110 110 - DBGWVR15_EL1 = 0x807e, // 10 000 0000 1111 110 - DBGWCR0_EL1 = 0x8007, // 10 000 0000 0000 111 - DBGWCR1_EL1 = 0x800f, // 10 000 0000 0001 111 - DBGWCR2_EL1 = 0x8017, // 10 000 0000 0010 111 - DBGWCR3_EL1 = 0x801f, // 10 000 0000 0011 111 - DBGWCR4_EL1 = 0x8027, // 10 000 0000 0100 111 - DBGWCR5_EL1 = 0x802f, // 10 000 0000 0101 111 - DBGWCR6_EL1 = 0x8037, // 10 000 0000 0110 111 - DBGWCR7_EL1 = 0x803f, // 10 000 0000 0111 111 - DBGWCR8_EL1 = 0x8047, // 10 000 0000 1000 111 - DBGWCR9_EL1 = 0x804f, // 10 000 0000 1001 111 - DBGWCR10_EL1 = 0x8057, // 10 000 0000 1010 111 - DBGWCR11_EL1 = 0x805f, // 10 000 0000 1011 111 - DBGWCR12_EL1 = 0x8067, // 10 000 0000 1100 111 - DBGWCR13_EL1 = 0x806f, // 10 000 0000 1101 111 - DBGWCR14_EL1 = 0x8077, // 10 000 0000 1110 111 - DBGWCR15_EL1 = 0x807f, // 10 000 0000 1111 111 - TEEHBR32_EL1 = 0x9080, // 10 010 0001 0000 000 - OSDLR_EL1 = 0x809c, // 10 000 0001 0011 100 - DBGPRCR_EL1 = 0x80a4, // 10 000 0001 0100 100 - DBGCLAIMSET_EL1 = 0x83c6, // 10 000 0111 1000 110 - DBGCLAIMCLR_EL1 = 0x83ce, // 10 000 0111 1001 110 - CSSELR_EL1 = 0xd000, // 11 010 0000 0000 000 - VPIDR_EL2 = 0xe000, // 11 100 0000 0000 000 - VMPIDR_EL2 = 0xe005, // 11 100 0000 0000 101 - CPACR_EL1 = 0xc082, // 11 000 0001 0000 010 - SCTLR_EL1 = 0xc080, // 11 000 0001 0000 000 - SCTLR_EL2 = 0xe080, // 11 100 0001 0000 000 - SCTLR_EL3 = 0xf080, // 11 110 0001 0000 000 - ACTLR_EL1 = 0xc081, // 11 000 0001 0000 001 - ACTLR_EL2 = 0xe081, // 11 100 0001 0000 001 - ACTLR_EL3 = 0xf081, // 11 110 0001 0000 001 - HCR_EL2 = 0xe088, // 11 100 0001 0001 000 - SCR_EL3 = 0xf088, // 11 110 0001 0001 000 - MDCR_EL2 = 0xe089, // 11 100 0001 0001 001 - SDER32_EL3 = 0xf089, // 11 110 0001 0001 001 - CPTR_EL2 = 0xe08a, // 11 100 0001 0001 010 - CPTR_EL3 = 0xf08a, // 11 110 0001 0001 010 - HSTR_EL2 = 0xe08b, // 11 100 0001 0001 011 - HACR_EL2 = 0xe08f, // 11 100 0001 0001 111 - MDCR_EL3 = 0xf099, // 11 110 0001 0011 001 - TTBR0_EL1 = 0xc100, // 11 000 0010 0000 000 - TTBR0_EL2 = 0xe100, // 11 100 0010 0000 000 - TTBR0_EL3 = 0xf100, // 11 110 0010 0000 000 - TTBR1_EL1 = 0xc101, // 11 000 0010 0000 001 - TCR_EL1 = 0xc102, // 11 000 0010 0000 010 - TCR_EL2 = 0xe102, // 11 100 0010 0000 010 - TCR_EL3 = 0xf102, // 11 110 0010 0000 010 - VTTBR_EL2 = 0xe108, // 11 100 0010 0001 000 - VTCR_EL2 = 0xe10a, // 11 100 0010 0001 010 - DACR32_EL2 = 0xe180, // 11 100 0011 0000 000 - SPSR_EL1 = 0xc200, // 11 000 0100 0000 000 - SPSR_EL2 = 0xe200, // 11 100 0100 0000 000 - SPSR_EL3 = 0xf200, // 11 110 0100 0000 000 - ELR_EL1 = 0xc201, // 11 000 0100 0000 001 - ELR_EL2 = 0xe201, // 11 100 0100 0000 001 - ELR_EL3 = 0xf201, // 11 110 0100 0000 001 - SP_EL0 = 0xc208, // 11 000 0100 0001 000 - SP_EL1 = 0xe208, // 11 100 0100 0001 000 - SP_EL2 = 0xf208, // 11 110 0100 0001 000 - SPSel = 0xc210, // 11 000 0100 0010 000 - NZCV = 0xda10, // 11 011 0100 0010 000 - DAIF = 0xda11, // 11 011 0100 0010 001 - CurrentEL = 0xc212, // 11 000 0100 0010 010 - SPSR_irq = 0xe218, // 11 100 0100 0011 000 - SPSR_abt = 0xe219, // 11 100 0100 0011 001 - SPSR_und = 0xe21a, // 11 100 0100 0011 010 - SPSR_fiq = 0xe21b, // 11 100 0100 0011 011 - FPCR = 0xda20, // 11 011 0100 0100 000 - FPSR = 0xda21, // 11 011 0100 0100 001 - DSPSR_EL0 = 0xda28, // 11 011 0100 0101 000 - DLR_EL0 = 0xda29, // 11 011 0100 0101 001 - IFSR32_EL2 = 0xe281, // 11 100 0101 0000 001 - AFSR0_EL1 = 0xc288, // 11 000 0101 0001 000 - AFSR0_EL2 = 0xe288, // 11 100 0101 0001 000 - AFSR0_EL3 = 0xf288, // 11 110 0101 0001 000 - AFSR1_EL1 = 0xc289, // 11 000 0101 0001 001 - AFSR1_EL2 = 0xe289, // 11 100 0101 0001 001 - AFSR1_EL3 = 0xf289, // 11 110 0101 0001 001 - ESR_EL1 = 0xc290, // 11 000 0101 0010 000 - ESR_EL2 = 0xe290, // 11 100 0101 0010 000 - ESR_EL3 = 0xf290, // 11 110 0101 0010 000 - FPEXC32_EL2 = 0xe298, // 11 100 0101 0011 000 - FAR_EL1 = 0xc300, // 11 000 0110 0000 000 - FAR_EL2 = 0xe300, // 11 100 0110 0000 000 - FAR_EL3 = 0xf300, // 11 110 0110 0000 000 - HPFAR_EL2 = 0xe304, // 11 100 0110 0000 100 - PAR_EL1 = 0xc3a0, // 11 000 0111 0100 000 - PMCR_EL0 = 0xdce0, // 11 011 1001 1100 000 - PMCNTENSET_EL0 = 0xdce1, // 11 011 1001 1100 001 - PMCNTENCLR_EL0 = 0xdce2, // 11 011 1001 1100 010 - PMOVSCLR_EL0 = 0xdce3, // 11 011 1001 1100 011 - PMSELR_EL0 = 0xdce5, // 11 011 1001 1100 101 - PMCCNTR_EL0 = 0xdce8, // 11 011 1001 1101 000 - PMXEVTYPER_EL0 = 0xdce9, // 11 011 1001 1101 001 - PMXEVCNTR_EL0 = 0xdcea, // 11 011 1001 1101 010 - PMUSERENR_EL0 = 0xdcf0, // 11 011 1001 1110 000 - PMINTENSET_EL1 = 0xc4f1, // 11 000 1001 1110 001 - PMINTENCLR_EL1 = 0xc4f2, // 11 000 1001 1110 010 - PMOVSSET_EL0 = 0xdcf3, // 11 011 1001 1110 011 - MAIR_EL1 = 0xc510, // 11 000 1010 0010 000 - MAIR_EL2 = 0xe510, // 11 100 1010 0010 000 - MAIR_EL3 = 0xf510, // 11 110 1010 0010 000 - AMAIR_EL1 = 0xc518, // 11 000 1010 0011 000 - AMAIR_EL2 = 0xe518, // 11 100 1010 0011 000 - AMAIR_EL3 = 0xf518, // 11 110 1010 0011 000 - VBAR_EL1 = 0xc600, // 11 000 1100 0000 000 - VBAR_EL2 = 0xe600, // 11 100 1100 0000 000 - VBAR_EL3 = 0xf600, // 11 110 1100 0000 000 - RMR_EL1 = 0xc602, // 11 000 1100 0000 010 - RMR_EL2 = 0xe602, // 11 100 1100 0000 010 - RMR_EL3 = 0xf602, // 11 110 1100 0000 010 - CONTEXTIDR_EL1 = 0xc681, // 11 000 1101 0000 001 - TPIDR_EL0 = 0xde82, // 11 011 1101 0000 010 - TPIDR_EL2 = 0xe682, // 11 100 1101 0000 010 - TPIDR_EL3 = 0xf682, // 11 110 1101 0000 010 - TPIDRRO_EL0 = 0xde83, // 11 011 1101 0000 011 - TPIDR_EL1 = 0xc684, // 11 000 1101 0000 100 - CNTFRQ_EL0 = 0xdf00, // 11 011 1110 0000 000 - CNTVOFF_EL2 = 0xe703, // 11 100 1110 0000 011 - CNTKCTL_EL1 = 0xc708, // 11 000 1110 0001 000 - CNTHCTL_EL2 = 0xe708, // 11 100 1110 0001 000 - CNTP_TVAL_EL0 = 0xdf10, // 11 011 1110 0010 000 - CNTHP_TVAL_EL2 = 0xe710, // 11 100 1110 0010 000 - CNTPS_TVAL_EL1 = 0xff10, // 11 111 1110 0010 000 - CNTP_CTL_EL0 = 0xdf11, // 11 011 1110 0010 001 - CNTHP_CTL_EL2 = 0xe711, // 11 100 1110 0010 001 - CNTPS_CTL_EL1 = 0xff11, // 11 111 1110 0010 001 - CNTP_CVAL_EL0 = 0xdf12, // 11 011 1110 0010 010 - CNTHP_CVAL_EL2 = 0xe712, // 11 100 1110 0010 010 - CNTPS_CVAL_EL1 = 0xff12, // 11 111 1110 0010 010 - CNTV_TVAL_EL0 = 0xdf18, // 11 011 1110 0011 000 - CNTV_CTL_EL0 = 0xdf19, // 11 011 1110 0011 001 - CNTV_CVAL_EL0 = 0xdf1a, // 11 011 1110 0011 010 - PMEVCNTR0_EL0 = 0xdf40, // 11 011 1110 1000 000 - PMEVCNTR1_EL0 = 0xdf41, // 11 011 1110 1000 001 - PMEVCNTR2_EL0 = 0xdf42, // 11 011 1110 1000 010 - PMEVCNTR3_EL0 = 0xdf43, // 11 011 1110 1000 011 - PMEVCNTR4_EL0 = 0xdf44, // 11 011 1110 1000 100 - PMEVCNTR5_EL0 = 0xdf45, // 11 011 1110 1000 101 - PMEVCNTR6_EL0 = 0xdf46, // 11 011 1110 1000 110 - PMEVCNTR7_EL0 = 0xdf47, // 11 011 1110 1000 111 - PMEVCNTR8_EL0 = 0xdf48, // 11 011 1110 1001 000 - PMEVCNTR9_EL0 = 0xdf49, // 11 011 1110 1001 001 - PMEVCNTR10_EL0 = 0xdf4a, // 11 011 1110 1001 010 - PMEVCNTR11_EL0 = 0xdf4b, // 11 011 1110 1001 011 - PMEVCNTR12_EL0 = 0xdf4c, // 11 011 1110 1001 100 - PMEVCNTR13_EL0 = 0xdf4d, // 11 011 1110 1001 101 - PMEVCNTR14_EL0 = 0xdf4e, // 11 011 1110 1001 110 - PMEVCNTR15_EL0 = 0xdf4f, // 11 011 1110 1001 111 - PMEVCNTR16_EL0 = 0xdf50, // 11 011 1110 1010 000 - PMEVCNTR17_EL0 = 0xdf51, // 11 011 1110 1010 001 - PMEVCNTR18_EL0 = 0xdf52, // 11 011 1110 1010 010 - PMEVCNTR19_EL0 = 0xdf53, // 11 011 1110 1010 011 - PMEVCNTR20_EL0 = 0xdf54, // 11 011 1110 1010 100 - PMEVCNTR21_EL0 = 0xdf55, // 11 011 1110 1010 101 - PMEVCNTR22_EL0 = 0xdf56, // 11 011 1110 1010 110 - PMEVCNTR23_EL0 = 0xdf57, // 11 011 1110 1010 111 - PMEVCNTR24_EL0 = 0xdf58, // 11 011 1110 1011 000 - PMEVCNTR25_EL0 = 0xdf59, // 11 011 1110 1011 001 - PMEVCNTR26_EL0 = 0xdf5a, // 11 011 1110 1011 010 - PMEVCNTR27_EL0 = 0xdf5b, // 11 011 1110 1011 011 - PMEVCNTR28_EL0 = 0xdf5c, // 11 011 1110 1011 100 - PMEVCNTR29_EL0 = 0xdf5d, // 11 011 1110 1011 101 - PMEVCNTR30_EL0 = 0xdf5e, // 11 011 1110 1011 110 - PMCCFILTR_EL0 = 0xdf7f, // 11 011 1110 1111 111 - PMEVTYPER0_EL0 = 0xdf60, // 11 011 1110 1100 000 - PMEVTYPER1_EL0 = 0xdf61, // 11 011 1110 1100 001 - PMEVTYPER2_EL0 = 0xdf62, // 11 011 1110 1100 010 - PMEVTYPER3_EL0 = 0xdf63, // 11 011 1110 1100 011 - PMEVTYPER4_EL0 = 0xdf64, // 11 011 1110 1100 100 - PMEVTYPER5_EL0 = 0xdf65, // 11 011 1110 1100 101 - PMEVTYPER6_EL0 = 0xdf66, // 11 011 1110 1100 110 - PMEVTYPER7_EL0 = 0xdf67, // 11 011 1110 1100 111 - PMEVTYPER8_EL0 = 0xdf68, // 11 011 1110 1101 000 - PMEVTYPER9_EL0 = 0xdf69, // 11 011 1110 1101 001 - PMEVTYPER10_EL0 = 0xdf6a, // 11 011 1110 1101 010 - PMEVTYPER11_EL0 = 0xdf6b, // 11 011 1110 1101 011 - PMEVTYPER12_EL0 = 0xdf6c, // 11 011 1110 1101 100 - PMEVTYPER13_EL0 = 0xdf6d, // 11 011 1110 1101 101 - PMEVTYPER14_EL0 = 0xdf6e, // 11 011 1110 1101 110 - PMEVTYPER15_EL0 = 0xdf6f, // 11 011 1110 1101 111 - PMEVTYPER16_EL0 = 0xdf70, // 11 011 1110 1110 000 - PMEVTYPER17_EL0 = 0xdf71, // 11 011 1110 1110 001 - PMEVTYPER18_EL0 = 0xdf72, // 11 011 1110 1110 010 - PMEVTYPER19_EL0 = 0xdf73, // 11 011 1110 1110 011 - PMEVTYPER20_EL0 = 0xdf74, // 11 011 1110 1110 100 - PMEVTYPER21_EL0 = 0xdf75, // 11 011 1110 1110 101 - PMEVTYPER22_EL0 = 0xdf76, // 11 011 1110 1110 110 - PMEVTYPER23_EL0 = 0xdf77, // 11 011 1110 1110 111 - PMEVTYPER24_EL0 = 0xdf78, // 11 011 1110 1111 000 - PMEVTYPER25_EL0 = 0xdf79, // 11 011 1110 1111 001 - PMEVTYPER26_EL0 = 0xdf7a, // 11 011 1110 1111 010 - PMEVTYPER27_EL0 = 0xdf7b, // 11 011 1110 1111 011 - PMEVTYPER28_EL0 = 0xdf7c, // 11 011 1110 1111 100 - PMEVTYPER29_EL0 = 0xdf7d, // 11 011 1110 1111 101 - PMEVTYPER30_EL0 = 0xdf7e // 11 011 1110 1111 110 - }; - - // Note that these do not inherit from NamedImmMapper. This class is - // sufficiently different in its behaviour that I don't believe it's worth - // burdening the common NamedImmMapper with abstractions only needed in - // this one case. - struct SysRegMapper { - static const NamedImmMapper::Mapping SysRegPairs[]; - - const NamedImmMapper::Mapping *InstPairs; - size_t NumInstPairs; - - SysRegMapper() {} - uint32_t fromString(StringRef Name, bool &Valid) const; - std::string toString(uint32_t Bits, bool &Valid) const; - }; - - struct MSRMapper : SysRegMapper { - static const NamedImmMapper::Mapping MSRPairs[]; - MSRMapper(); - }; - - struct MRSMapper : SysRegMapper { - static const NamedImmMapper::Mapping MRSPairs[]; - MRSMapper(); - }; - - uint32_t ParseGenericRegister(StringRef Name, bool &Valid); -} - -namespace A64TLBI { - enum TLBIValues { - Invalid = -1, // Op0 Op1 CRn CRm Op2 - IPAS2E1IS = 0x6401, // 01 100 1000 0000 001 - IPAS2LE1IS = 0x6405, // 01 100 1000 0000 101 - VMALLE1IS = 0x4418, // 01 000 1000 0011 000 - ALLE2IS = 0x6418, // 01 100 1000 0011 000 - ALLE3IS = 0x7418, // 01 110 1000 0011 000 - VAE1IS = 0x4419, // 01 000 1000 0011 001 - VAE2IS = 0x6419, // 01 100 1000 0011 001 - VAE3IS = 0x7419, // 01 110 1000 0011 001 - ASIDE1IS = 0x441a, // 01 000 1000 0011 010 - VAAE1IS = 0x441b, // 01 000 1000 0011 011 - ALLE1IS = 0x641c, // 01 100 1000 0011 100 - VALE1IS = 0x441d, // 01 000 1000 0011 101 - VALE2IS = 0x641d, // 01 100 1000 0011 101 - VALE3IS = 0x741d, // 01 110 1000 0011 101 - VMALLS12E1IS = 0x641e, // 01 100 1000 0011 110 - VAALE1IS = 0x441f, // 01 000 1000 0011 111 - IPAS2E1 = 0x6421, // 01 100 1000 0100 001 - IPAS2LE1 = 0x6425, // 01 100 1000 0100 101 - VMALLE1 = 0x4438, // 01 000 1000 0111 000 - ALLE2 = 0x6438, // 01 100 1000 0111 000 - ALLE3 = 0x7438, // 01 110 1000 0111 000 - VAE1 = 0x4439, // 01 000 1000 0111 001 - VAE2 = 0x6439, // 01 100 1000 0111 001 - VAE3 = 0x7439, // 01 110 1000 0111 001 - ASIDE1 = 0x443a, // 01 000 1000 0111 010 - VAAE1 = 0x443b, // 01 000 1000 0111 011 - ALLE1 = 0x643c, // 01 100 1000 0111 100 - VALE1 = 0x443d, // 01 000 1000 0111 101 - VALE2 = 0x643d, // 01 100 1000 0111 101 - VALE3 = 0x743d, // 01 110 1000 0111 101 - VMALLS12E1 = 0x643e, // 01 100 1000 0111 110 - VAALE1 = 0x443f // 01 000 1000 0111 111 - }; - - struct TLBIMapper : NamedImmMapper { - const static Mapping TLBIPairs[]; - - TLBIMapper(); - }; - - static inline bool NeedsRegister(TLBIValues Val) { - switch (Val) { - case VMALLE1IS: - case ALLE2IS: - case ALLE3IS: - case ALLE1IS: - case VMALLS12E1IS: - case VMALLE1: - case ALLE2: - case ALLE3: - case ALLE1: - case VMALLS12E1: - return false; - default: - return true; - } - } -} - -namespace AArch64II { - - enum TOF { - //===--------------------------------------------------------------===// - // AArch64 Specific MachineOperand flags. - - MO_NO_FLAG, - - // MO_GOT - Represents a relocation referring to the GOT entry of a given - // symbol. Used in adrp. - MO_GOT, - - // MO_GOT_LO12 - Represents a relocation referring to the low 12 bits of the - // GOT entry of a given symbol. Used in ldr only. - MO_GOT_LO12, - - // MO_DTPREL_* - Represents a relocation referring to the offset from a - // module's dynamic thread pointer. Used in the local-dynamic TLS access - // model. - MO_DTPREL_G1, - MO_DTPREL_G0_NC, - - // MO_GOTTPREL_* - Represents a relocation referring to a GOT entry - // providing the offset of a variable from the thread-pointer. Used in - // initial-exec TLS model where this offset is assigned in the static thread - // block and thus known by the dynamic linker. - MO_GOTTPREL, - MO_GOTTPREL_LO12, - - // MO_TLSDESC_* - Represents a relocation referring to a GOT entry providing - // a TLS descriptor chosen by the dynamic linker. Used for the - // general-dynamic and local-dynamic TLS access models where very littls is - // known at link-time. - MO_TLSDESC, - MO_TLSDESC_LO12, - - // MO_TPREL_* - Represents a relocation referring to the offset of a - // variable from the thread pointer itself. Used in the local-exec TLS - // access model. - MO_TPREL_G1, - MO_TPREL_G0_NC, - - // MO_LO12 - On a symbol operand, this represents a relocation containing - // lower 12 bits of the address. Used in add/sub/ldr/str. - MO_LO12 - }; -} - -class APFloat; - -namespace A64Imms { - bool isFPImm(const APFloat &Val, uint32_t &Imm8Bits); - - inline bool isFPImm(const APFloat &Val) { - uint32_t Imm8; - return isFPImm(Val, Imm8); - } - - bool isLogicalImm(unsigned RegWidth, uint64_t Imm, uint32_t &Bits); - bool isLogicalImmBits(unsigned RegWidth, uint32_t Bits, uint64_t &Imm); - - bool isMOVZImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift); - bool isMOVNImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift); - - // We sometimes want to know whether the immediate is representable with a - // MOVN but *not* with a MOVZ (because that would take priority). - bool isOnlyMOVNImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift); - -} - -} // end namespace llvm; - -#endif diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp index f2bbd85..ee77da2 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp @@ -12,9 +12,10 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "mccodeemitter" -#include "MCTargetDesc/AArch64BaseInfo.h" #include "MCTargetDesc/AArch64FixupKinds.h" #include "MCTargetDesc/AArch64MCExpr.h" +#include "MCTargetDesc/AArch64MCTargetDesc.h" +#include "Utils/AArch64BaseInfo.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCInst.h" diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp index 0d2855f..7960db0 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp @@ -12,20 +12,16 @@ //===----------------------------------------------------------------------===// #include "AArch64MCTargetDesc.h" -#include "AArch64BaseInfo.h" #include "AArch64ELFStreamer.h" #include "AArch64MCAsmInfo.h" #include "InstPrinter/AArch64InstPrinter.h" -#include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" -#include "llvm/ADT/StringExtras.h" #include "llvm/MC/MCCodeGenInfo.h" #include "llvm/MC/MCInstrAnalysis.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Support/Regex.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/ErrorHandling.h" @@ -40,799 +36,6 @@ using namespace llvm; -StringRef NamedImmMapper::toString(uint32_t Value, bool &Valid) const { - for (unsigned i = 0; i < NumPairs; ++i) { - if (Pairs[i].Value == Value) { - Valid = true; - return Pairs[i].Name; - } - } - - Valid = false; - return StringRef(); -} - -uint32_t NamedImmMapper::fromString(StringRef Name, bool &Valid) const { - std::string LowerCaseName = Name.lower(); - for (unsigned i = 0; i < NumPairs; ++i) { - if (Pairs[i].Name == LowerCaseName) { - Valid = true; - return Pairs[i].Value; - } - } - - Valid = false; - return -1; -} - -bool NamedImmMapper::validImm(uint32_t Value) const { - return Value < TooBigImm; -} - -const NamedImmMapper::Mapping A64AT::ATMapper::ATPairs[] = { - {"s1e1r", S1E1R}, - {"s1e2r", S1E2R}, - {"s1e3r", S1E3R}, - {"s1e1w", S1E1W}, - {"s1e2w", S1E2W}, - {"s1e3w", S1E3W}, - {"s1e0r", S1E0R}, - {"s1e0w", S1E0W}, - {"s12e1r", S12E1R}, - {"s12e1w", S12E1W}, - {"s12e0r", S12E0R}, - {"s12e0w", S12E0W}, -}; - -A64AT::ATMapper::ATMapper() - : NamedImmMapper(ATPairs, 0) {} - -const NamedImmMapper::Mapping A64DB::DBarrierMapper::DBarrierPairs[] = { - {"oshld", OSHLD}, - {"oshst", OSHST}, - {"osh", OSH}, - {"nshld", NSHLD}, - {"nshst", NSHST}, - {"nsh", NSH}, - {"ishld", ISHLD}, - {"ishst", ISHST}, - {"ish", ISH}, - {"ld", LD}, - {"st", ST}, - {"sy", SY} -}; - -A64DB::DBarrierMapper::DBarrierMapper() - : NamedImmMapper(DBarrierPairs, 16u) {} - -const NamedImmMapper::Mapping A64DC::DCMapper::DCPairs[] = { - {"zva", ZVA}, - {"ivac", IVAC}, - {"isw", ISW}, - {"cvac", CVAC}, - {"csw", CSW}, - {"cvau", CVAU}, - {"civac", CIVAC}, - {"cisw", CISW} -}; - -A64DC::DCMapper::DCMapper() - : NamedImmMapper(DCPairs, 0) {} - -const NamedImmMapper::Mapping A64IC::ICMapper::ICPairs[] = { - {"ialluis", IALLUIS}, - {"iallu", IALLU}, - {"ivau", IVAU} -}; - -A64IC::ICMapper::ICMapper() - : NamedImmMapper(ICPairs, 0) {} - -const NamedImmMapper::Mapping A64ISB::ISBMapper::ISBPairs[] = { - {"sy", SY}, -}; - -A64ISB::ISBMapper::ISBMapper() - : NamedImmMapper(ISBPairs, 16) {} - -const NamedImmMapper::Mapping A64PRFM::PRFMMapper::PRFMPairs[] = { - {"pldl1keep", PLDL1KEEP}, - {"pldl1strm", PLDL1STRM}, - {"pldl2keep", PLDL2KEEP}, - {"pldl2strm", PLDL2STRM}, - {"pldl3keep", PLDL3KEEP}, - {"pldl3strm", PLDL3STRM}, - {"pstl1keep", PSTL1KEEP}, - {"pstl1strm", PSTL1STRM}, - {"pstl2keep", PSTL2KEEP}, - {"pstl2strm", PSTL2STRM}, - {"pstl3keep", PSTL3KEEP}, - {"pstl3strm", PSTL3STRM} -}; - -A64PRFM::PRFMMapper::PRFMMapper() - : NamedImmMapper(PRFMPairs, 32) {} - -const NamedImmMapper::Mapping A64PState::PStateMapper::PStatePairs[] = { - {"spsel", SPSel}, - {"daifset", DAIFSet}, - {"daifclr", DAIFClr} -}; - -A64PState::PStateMapper::PStateMapper() - : NamedImmMapper(PStatePairs, 0) {} - -const NamedImmMapper::Mapping A64SysReg::MRSMapper::MRSPairs[] = { - {"mdccsr_el0", MDCCSR_EL0}, - {"dbgdtrrx_el0", DBGDTRRX_EL0}, - {"mdrar_el1", MDRAR_EL1}, - {"oslsr_el1", OSLSR_EL1}, - {"dbgauthstatus_el1", DBGAUTHSTATUS_EL1}, - {"pmceid0_el0", PMCEID0_EL0}, - {"pmceid1_el0", PMCEID1_EL0}, - {"midr_el1", MIDR_EL1}, - {"ccsidr_el1", CCSIDR_EL1}, - {"clidr_el1", CLIDR_EL1}, - {"ctr_el0", CTR_EL0}, - {"mpidr_el1", MPIDR_EL1}, - {"revidr_el1", REVIDR_EL1}, - {"aidr_el1", AIDR_EL1}, - {"dczid_el0", DCZID_EL0}, - {"id_pfr0_el1", ID_PFR0_EL1}, - {"id_pfr1_el1", ID_PFR1_EL1}, - {"id_dfr0_el1", ID_DFR0_EL1}, - {"id_afr0_el1", ID_AFR0_EL1}, - {"id_mmfr0_el1", ID_MMFR0_EL1}, - {"id_mmfr1_el1", ID_MMFR1_EL1}, - {"id_mmfr2_el1", ID_MMFR2_EL1}, - {"id_mmfr3_el1", ID_MMFR3_EL1}, - {"id_isar0_el1", ID_ISAR0_EL1}, - {"id_isar1_el1", ID_ISAR1_EL1}, - {"id_isar2_el1", ID_ISAR2_EL1}, - {"id_isar3_el1", ID_ISAR3_EL1}, - {"id_isar4_el1", ID_ISAR4_EL1}, - {"id_isar5_el1", ID_ISAR5_EL1}, - {"id_aa64pfr0_el1", ID_AA64PFR0_EL1}, - {"id_aa64pfr1_el1", ID_AA64PFR1_EL1}, - {"id_aa64dfr0_el1", ID_AA64DFR0_EL1}, - {"id_aa64dfr1_el1", ID_AA64DFR1_EL1}, - {"id_aa64afr0_el1", ID_AA64AFR0_EL1}, - {"id_aa64afr1_el1", ID_AA64AFR1_EL1}, - {"id_aa64isar0_el1", ID_AA64ISAR0_EL1}, - {"id_aa64isar1_el1", ID_AA64ISAR1_EL1}, - {"id_aa64mmfr0_el1", ID_AA64MMFR0_EL1}, - {"id_aa64mmfr1_el1", ID_AA64MMFR1_EL1}, - {"mvfr0_el1", MVFR0_EL1}, - {"mvfr1_el1", MVFR1_EL1}, - {"mvfr2_el1", MVFR2_EL1}, - {"rvbar_el1", RVBAR_EL1}, - {"rvbar_el2", RVBAR_EL2}, - {"rvbar_el3", RVBAR_EL3}, - {"isr_el1", ISR_EL1}, - {"cntpct_el0", CNTPCT_EL0}, - {"cntvct_el0", CNTVCT_EL0} -}; - -A64SysReg::MRSMapper::MRSMapper() { - InstPairs = &MRSPairs[0]; - NumInstPairs = llvm::array_lengthof(MRSPairs); -} - -const NamedImmMapper::Mapping A64SysReg::MSRMapper::MSRPairs[] = { - {"dbgdtrtx_el0", DBGDTRTX_EL0}, - {"oslar_el1", OSLAR_EL1}, - {"pmswinc_el0", PMSWINC_EL0} -}; - -A64SysReg::MSRMapper::MSRMapper() { - InstPairs = &MSRPairs[0]; - NumInstPairs = llvm::array_lengthof(MSRPairs); -} - - -const NamedImmMapper::Mapping A64SysReg::SysRegMapper::SysRegPairs[] = { - {"osdtrrx_el1", OSDTRRX_EL1}, - {"osdtrtx_el1", OSDTRTX_EL1}, - {"teecr32_el1", TEECR32_EL1}, - {"mdccint_el1", MDCCINT_EL1}, - {"mdscr_el1", MDSCR_EL1}, - {"dbgdtr_el0", DBGDTR_EL0}, - {"oseccr_el1", OSECCR_EL1}, - {"dbgvcr32_el2", DBGVCR32_EL2}, - {"dbgbvr0_el1", DBGBVR0_EL1}, - {"dbgbvr1_el1", DBGBVR1_EL1}, - {"dbgbvr2_el1", DBGBVR2_EL1}, - {"dbgbvr3_el1", DBGBVR3_EL1}, - {"dbgbvr4_el1", DBGBVR4_EL1}, - {"dbgbvr5_el1", DBGBVR5_EL1}, - {"dbgbvr6_el1", DBGBVR6_EL1}, - {"dbgbvr7_el1", DBGBVR7_EL1}, - {"dbgbvr8_el1", DBGBVR8_EL1}, - {"dbgbvr9_el1", DBGBVR9_EL1}, - {"dbgbvr10_el1", DBGBVR10_EL1}, - {"dbgbvr11_el1", DBGBVR11_EL1}, - {"dbgbvr12_el1", DBGBVR12_EL1}, - {"dbgbvr13_el1", DBGBVR13_EL1}, - {"dbgbvr14_el1", DBGBVR14_EL1}, - {"dbgbvr15_el1", DBGBVR15_EL1}, - {"dbgbcr0_el1", DBGBCR0_EL1}, - {"dbgbcr1_el1", DBGBCR1_EL1}, - {"dbgbcr2_el1", DBGBCR2_EL1}, - {"dbgbcr3_el1", DBGBCR3_EL1}, - {"dbgbcr4_el1", DBGBCR4_EL1}, - {"dbgbcr5_el1", DBGBCR5_EL1}, - {"dbgbcr6_el1", DBGBCR6_EL1}, - {"dbgbcr7_el1", DBGBCR7_EL1}, - {"dbgbcr8_el1", DBGBCR8_EL1}, - {"dbgbcr9_el1", DBGBCR9_EL1}, - {"dbgbcr10_el1", DBGBCR10_EL1}, - {"dbgbcr11_el1", DBGBCR11_EL1}, - {"dbgbcr12_el1", DBGBCR12_EL1}, - {"dbgbcr13_el1", DBGBCR13_EL1}, - {"dbgbcr14_el1", DBGBCR14_EL1}, - {"dbgbcr15_el1", DBGBCR15_EL1}, - {"dbgwvr0_el1", DBGWVR0_EL1}, - {"dbgwvr1_el1", DBGWVR1_EL1}, - {"dbgwvr2_el1", DBGWVR2_EL1}, - {"dbgwvr3_el1", DBGWVR3_EL1}, - {"dbgwvr4_el1", DBGWVR4_EL1}, - {"dbgwvr5_el1", DBGWVR5_EL1}, - {"dbgwvr6_el1", DBGWVR6_EL1}, - {"dbgwvr7_el1", DBGWVR7_EL1}, - {"dbgwvr8_el1", DBGWVR8_EL1}, - {"dbgwvr9_el1", DBGWVR9_EL1}, - {"dbgwvr10_el1", DBGWVR10_EL1}, - {"dbgwvr11_el1", DBGWVR11_EL1}, - {"dbgwvr12_el1", DBGWVR12_EL1}, - {"dbgwvr13_el1", DBGWVR13_EL1}, - {"dbgwvr14_el1", DBGWVR14_EL1}, - {"dbgwvr15_el1", DBGWVR15_EL1}, - {"dbgwcr0_el1", DBGWCR0_EL1}, - {"dbgwcr1_el1", DBGWCR1_EL1}, - {"dbgwcr2_el1", DBGWCR2_EL1}, - {"dbgwcr3_el1", DBGWCR3_EL1}, - {"dbgwcr4_el1", DBGWCR4_EL1}, - {"dbgwcr5_el1", DBGWCR5_EL1}, - {"dbgwcr6_el1", DBGWCR6_EL1}, - {"dbgwcr7_el1", DBGWCR7_EL1}, - {"dbgwcr8_el1", DBGWCR8_EL1}, - {"dbgwcr9_el1", DBGWCR9_EL1}, - {"dbgwcr10_el1", DBGWCR10_EL1}, - {"dbgwcr11_el1", DBGWCR11_EL1}, - {"dbgwcr12_el1", DBGWCR12_EL1}, - {"dbgwcr13_el1", DBGWCR13_EL1}, - {"dbgwcr14_el1", DBGWCR14_EL1}, - {"dbgwcr15_el1", DBGWCR15_EL1}, - {"teehbr32_el1", TEEHBR32_EL1}, - {"osdlr_el1", OSDLR_EL1}, - {"dbgprcr_el1", DBGPRCR_EL1}, - {"dbgclaimset_el1", DBGCLAIMSET_EL1}, - {"dbgclaimclr_el1", DBGCLAIMCLR_EL1}, - {"csselr_el1", CSSELR_EL1}, - {"vpidr_el2", VPIDR_EL2}, - {"vmpidr_el2", VMPIDR_EL2}, - {"sctlr_el1", SCTLR_EL1}, - {"sctlr_el2", SCTLR_EL2}, - {"sctlr_el3", SCTLR_EL3}, - {"actlr_el1", ACTLR_EL1}, - {"actlr_el2", ACTLR_EL2}, - {"actlr_el3", ACTLR_EL3}, - {"cpacr_el1", CPACR_EL1}, - {"hcr_el2", HCR_EL2}, - {"scr_el3", SCR_EL3}, - {"mdcr_el2", MDCR_EL2}, - {"sder32_el3", SDER32_EL3}, - {"cptr_el2", CPTR_EL2}, - {"cptr_el3", CPTR_EL3}, - {"hstr_el2", HSTR_EL2}, - {"hacr_el2", HACR_EL2}, - {"mdcr_el3", MDCR_EL3}, - {"ttbr0_el1", TTBR0_EL1}, - {"ttbr0_el2", TTBR0_EL2}, - {"ttbr0_el3", TTBR0_EL3}, - {"ttbr1_el1", TTBR1_EL1}, - {"tcr_el1", TCR_EL1}, - {"tcr_el2", TCR_EL2}, - {"tcr_el3", TCR_EL3}, - {"vttbr_el2", VTTBR_EL2}, - {"vtcr_el2", VTCR_EL2}, - {"dacr32_el2", DACR32_EL2}, - {"spsr_el1", SPSR_EL1}, - {"spsr_el2", SPSR_EL2}, - {"spsr_el3", SPSR_EL3}, - {"elr_el1", ELR_EL1}, - {"elr_el2", ELR_EL2}, - {"elr_el3", ELR_EL3}, - {"sp_el0", SP_EL0}, - {"sp_el1", SP_EL1}, - {"sp_el2", SP_EL2}, - {"spsel", SPSel}, - {"nzcv", NZCV}, - {"daif", DAIF}, - {"currentel", CurrentEL}, - {"spsr_irq", SPSR_irq}, - {"spsr_abt", SPSR_abt}, - {"spsr_und", SPSR_und}, - {"spsr_fiq", SPSR_fiq}, - {"fpcr", FPCR}, - {"fpsr", FPSR}, - {"dspsr_el0", DSPSR_EL0}, - {"dlr_el0", DLR_EL0}, - {"ifsr32_el2", IFSR32_EL2}, - {"afsr0_el1", AFSR0_EL1}, - {"afsr0_el2", AFSR0_EL2}, - {"afsr0_el3", AFSR0_EL3}, - {"afsr1_el1", AFSR1_EL1}, - {"afsr1_el2", AFSR1_EL2}, - {"afsr1_el3", AFSR1_EL3}, - {"esr_el1", ESR_EL1}, - {"esr_el2", ESR_EL2}, - {"esr_el3", ESR_EL3}, - {"fpexc32_el2", FPEXC32_EL2}, - {"far_el1", FAR_EL1}, - {"far_el2", FAR_EL2}, - {"far_el3", FAR_EL3}, - {"hpfar_el2", HPFAR_EL2}, - {"par_el1", PAR_EL1}, - {"pmcr_el0", PMCR_EL0}, - {"pmcntenset_el0", PMCNTENSET_EL0}, - {"pmcntenclr_el0", PMCNTENCLR_EL0}, - {"pmovsclr_el0", PMOVSCLR_EL0}, - {"pmselr_el0", PMSELR_EL0}, - {"pmccntr_el0", PMCCNTR_EL0}, - {"pmxevtyper_el0", PMXEVTYPER_EL0}, - {"pmxevcntr_el0", PMXEVCNTR_EL0}, - {"pmuserenr_el0", PMUSERENR_EL0}, - {"pmintenset_el1", PMINTENSET_EL1}, - {"pmintenclr_el1", PMINTENCLR_EL1}, - {"pmovsset_el0", PMOVSSET_EL0}, - {"mair_el1", MAIR_EL1}, - {"mair_el2", MAIR_EL2}, - {"mair_el3", MAIR_EL3}, - {"amair_el1", AMAIR_EL1}, - {"amair_el2", AMAIR_EL2}, - {"amair_el3", AMAIR_EL3}, - {"vbar_el1", VBAR_EL1}, - {"vbar_el2", VBAR_EL2}, - {"vbar_el3", VBAR_EL3}, - {"rmr_el1", RMR_EL1}, - {"rmr_el2", RMR_EL2}, - {"rmr_el3", RMR_EL3}, - {"contextidr_el1", CONTEXTIDR_EL1}, - {"tpidr_el0", TPIDR_EL0}, - {"tpidr_el2", TPIDR_EL2}, - {"tpidr_el3", TPIDR_EL3}, - {"tpidrro_el0", TPIDRRO_EL0}, - {"tpidr_el1", TPIDR_EL1}, - {"cntfrq_el0", CNTFRQ_EL0}, - {"cntvoff_el2", CNTVOFF_EL2}, - {"cntkctl_el1", CNTKCTL_EL1}, - {"cnthctl_el2", CNTHCTL_EL2}, - {"cntp_tval_el0", CNTP_TVAL_EL0}, - {"cnthp_tval_el2", CNTHP_TVAL_EL2}, - {"cntps_tval_el1", CNTPS_TVAL_EL1}, - {"cntp_ctl_el0", CNTP_CTL_EL0}, - {"cnthp_ctl_el2", CNTHP_CTL_EL2}, - {"cntps_ctl_el1", CNTPS_CTL_EL1}, - {"cntp_cval_el0", CNTP_CVAL_EL0}, - {"cnthp_cval_el2", CNTHP_CVAL_EL2}, - {"cntps_cval_el1", CNTPS_CVAL_EL1}, - {"cntv_tval_el0", CNTV_TVAL_EL0}, - {"cntv_ctl_el0", CNTV_CTL_EL0}, - {"cntv_cval_el0", CNTV_CVAL_EL0}, - {"pmevcntr0_el0", PMEVCNTR0_EL0}, - {"pmevcntr1_el0", PMEVCNTR1_EL0}, - {"pmevcntr2_el0", PMEVCNTR2_EL0}, - {"pmevcntr3_el0", PMEVCNTR3_EL0}, - {"pmevcntr4_el0", PMEVCNTR4_EL0}, - {"pmevcntr5_el0", PMEVCNTR5_EL0}, - {"pmevcntr6_el0", PMEVCNTR6_EL0}, - {"pmevcntr7_el0", PMEVCNTR7_EL0}, - {"pmevcntr8_el0", PMEVCNTR8_EL0}, - {"pmevcntr9_el0", PMEVCNTR9_EL0}, - {"pmevcntr10_el0", PMEVCNTR10_EL0}, - {"pmevcntr11_el0", PMEVCNTR11_EL0}, - {"pmevcntr12_el0", PMEVCNTR12_EL0}, - {"pmevcntr13_el0", PMEVCNTR13_EL0}, - {"pmevcntr14_el0", PMEVCNTR14_EL0}, - {"pmevcntr15_el0", PMEVCNTR15_EL0}, - {"pmevcntr16_el0", PMEVCNTR16_EL0}, - {"pmevcntr17_el0", PMEVCNTR17_EL0}, - {"pmevcntr18_el0", PMEVCNTR18_EL0}, - {"pmevcntr19_el0", PMEVCNTR19_EL0}, - {"pmevcntr20_el0", PMEVCNTR20_EL0}, - {"pmevcntr21_el0", PMEVCNTR21_EL0}, - {"pmevcntr22_el0", PMEVCNTR22_EL0}, - {"pmevcntr23_el0", PMEVCNTR23_EL0}, - {"pmevcntr24_el0", PMEVCNTR24_EL0}, - {"pmevcntr25_el0", PMEVCNTR25_EL0}, - {"pmevcntr26_el0", PMEVCNTR26_EL0}, - {"pmevcntr27_el0", PMEVCNTR27_EL0}, - {"pmevcntr28_el0", PMEVCNTR28_EL0}, - {"pmevcntr29_el0", PMEVCNTR29_EL0}, - {"pmevcntr30_el0", PMEVCNTR30_EL0}, - {"pmccfiltr_el0", PMCCFILTR_EL0}, - {"pmevtyper0_el0", PMEVTYPER0_EL0}, - {"pmevtyper1_el0", PMEVTYPER1_EL0}, - {"pmevtyper2_el0", PMEVTYPER2_EL0}, - {"pmevtyper3_el0", PMEVTYPER3_EL0}, - {"pmevtyper4_el0", PMEVTYPER4_EL0}, - {"pmevtyper5_el0", PMEVTYPER5_EL0}, - {"pmevtyper6_el0", PMEVTYPER6_EL0}, - {"pmevtyper7_el0", PMEVTYPER7_EL0}, - {"pmevtyper8_el0", PMEVTYPER8_EL0}, - {"pmevtyper9_el0", PMEVTYPER9_EL0}, - {"pmevtyper10_el0", PMEVTYPER10_EL0}, - {"pmevtyper11_el0", PMEVTYPER11_EL0}, - {"pmevtyper12_el0", PMEVTYPER12_EL0}, - {"pmevtyper13_el0", PMEVTYPER13_EL0}, - {"pmevtyper14_el0", PMEVTYPER14_EL0}, - {"pmevtyper15_el0", PMEVTYPER15_EL0}, - {"pmevtyper16_el0", PMEVTYPER16_EL0}, - {"pmevtyper17_el0", PMEVTYPER17_EL0}, - {"pmevtyper18_el0", PMEVTYPER18_EL0}, - {"pmevtyper19_el0", PMEVTYPER19_EL0}, - {"pmevtyper20_el0", PMEVTYPER20_EL0}, - {"pmevtyper21_el0", PMEVTYPER21_EL0}, - {"pmevtyper22_el0", PMEVTYPER22_EL0}, - {"pmevtyper23_el0", PMEVTYPER23_EL0}, - {"pmevtyper24_el0", PMEVTYPER24_EL0}, - {"pmevtyper25_el0", PMEVTYPER25_EL0}, - {"pmevtyper26_el0", PMEVTYPER26_EL0}, - {"pmevtyper27_el0", PMEVTYPER27_EL0}, - {"pmevtyper28_el0", PMEVTYPER28_EL0}, - {"pmevtyper29_el0", PMEVTYPER29_EL0}, - {"pmevtyper30_el0", PMEVTYPER30_EL0}, -}; - -uint32_t -A64SysReg::SysRegMapper::fromString(StringRef Name, bool &Valid) const { - // First search the registers shared by all - std::string NameLower = Name.lower(); - for (unsigned i = 0; i < array_lengthof(SysRegPairs); ++i) { - if (SysRegPairs[i].Name == NameLower) { - Valid = true; - return SysRegPairs[i].Value; - } - } - - // Now try the instruction-specific registers (either read-only or - // write-only). - for (unsigned i = 0; i < NumInstPairs; ++i) { - if (InstPairs[i].Name == NameLower) { - Valid = true; - return InstPairs[i].Value; - } - } - - // Try to parse an S____ register name, where the bits - // are: 11 xxx 1x11 xxxx xxx - Regex GenericRegPattern("^s3_([0-7])_c(1[15])_c([0-9]|1[0-5])_([0-7])$"); - - SmallVector Ops; - if (!GenericRegPattern.match(NameLower, &Ops)) { - Valid = false; - return -1; - } - - uint32_t Op0 = 3, Op1 = 0, CRn = 0, CRm = 0, Op2 = 0; - uint32_t Bits; - Ops[1].getAsInteger(10, Op1); - Ops[2].getAsInteger(10, CRn); - Ops[3].getAsInteger(10, CRm); - Ops[4].getAsInteger(10, Op2); - Bits = (Op0 << 14) | (Op1 << 11) | (CRn << 7) | (CRm << 3) | Op2; - - Valid = true; - return Bits; -} - -std::string -A64SysReg::SysRegMapper::toString(uint32_t Bits, bool &Valid) const { - for (unsigned i = 0; i < array_lengthof(SysRegPairs); ++i) { - if (SysRegPairs[i].Value == Bits) { - Valid = true; - return SysRegPairs[i].Name; - } - } - - for (unsigned i = 0; i < NumInstPairs; ++i) { - if (InstPairs[i].Value == Bits) { - Valid = true; - return InstPairs[i].Name; - } - } - - uint32_t Op0 = (Bits >> 14) & 0x3; - uint32_t Op1 = (Bits >> 11) & 0x7; - uint32_t CRn = (Bits >> 7) & 0xf; - uint32_t CRm = (Bits >> 3) & 0xf; - uint32_t Op2 = Bits & 0x7; - - // Only combinations matching: 11 xxx 1x11 xxxx xxx are valid for a generic - // name. - if (Op0 != 3 || (CRn != 11 && CRn != 15)) { - Valid = false; - return ""; - } - - assert(Op0 == 3 && (CRn == 11 || CRn == 15) && "Invalid generic sysreg"); - - Valid = true; - return "s3_" + utostr(Op1) + "_c" + utostr(CRn) - + "_c" + utostr(CRm) + "_" + utostr(Op2); -} - -const NamedImmMapper::Mapping A64TLBI::TLBIMapper::TLBIPairs[] = { - {"ipas2e1is", IPAS2E1IS}, - {"ipas2le1is", IPAS2LE1IS}, - {"vmalle1is", VMALLE1IS}, - {"alle2is", ALLE2IS}, - {"alle3is", ALLE3IS}, - {"vae1is", VAE1IS}, - {"vae2is", VAE2IS}, - {"vae3is", VAE3IS}, - {"aside1is", ASIDE1IS}, - {"vaae1is", VAAE1IS}, - {"alle1is", ALLE1IS}, - {"vale1is", VALE1IS}, - {"vale2is", VALE2IS}, - {"vale3is", VALE3IS}, - {"vmalls12e1is", VMALLS12E1IS}, - {"vaale1is", VAALE1IS}, - {"ipas2e1", IPAS2E1}, - {"ipas2le1", IPAS2LE1}, - {"vmalle1", VMALLE1}, - {"alle2", ALLE2}, - {"alle3", ALLE3}, - {"vae1", VAE1}, - {"vae2", VAE2}, - {"vae3", VAE3}, - {"aside1", ASIDE1}, - {"vaae1", VAAE1}, - {"alle1", ALLE1}, - {"vale1", VALE1}, - {"vale2", VALE2}, - {"vale3", VALE3}, - {"vmalls12e1", VMALLS12E1}, - {"vaale1", VAALE1} -}; - -A64TLBI::TLBIMapper::TLBIMapper() - : NamedImmMapper(TLBIPairs, 0) {} - -bool A64Imms::isFPImm(const APFloat &Val, uint32_t &Imm8Bits) { - const fltSemantics &Sem = Val.getSemantics(); - unsigned FracBits = APFloat::semanticsPrecision(Sem) - 1; - - uint32_t ExpMask; - switch (FracBits) { - case 10: // IEEE half-precision - ExpMask = 0x1f; - break; - case 23: // IEEE single-precision - ExpMask = 0xff; - break; - case 52: // IEEE double-precision - ExpMask = 0x7ff; - break; - case 112: // IEEE quad-precision - // No immediates are valid for double precision. - return false; - default: - llvm_unreachable("Only half, single and double precision supported"); - } - - uint32_t ExpStart = FracBits; - uint64_t FracMask = (1ULL << FracBits) - 1; - - uint32_t Sign = Val.isNegative(); - - uint64_t Bits= Val.bitcastToAPInt().getLimitedValue(); - uint64_t Fraction = Bits & FracMask; - int32_t Exponent = ((Bits >> ExpStart) & ExpMask); - Exponent -= ExpMask >> 1; - - // S[d] = imm8<7>:NOT(imm8<6>):Replicate(imm8<6>, 5):imm8<5:0>:Zeros(19) - // D[d] = imm8<7>:NOT(imm8<6>):Replicate(imm8<6>, 8):imm8<5:0>:Zeros(48) - // This translates to: only 4 bits of fraction; -3 <= exp <= 4. - uint64_t A64FracStart = FracBits - 4; - uint64_t A64FracMask = 0xf; - - // Are there too many fraction bits? - if (Fraction & ~(A64FracMask << A64FracStart)) - return false; - - if (Exponent < -3 || Exponent > 4) - return false; - - uint32_t PackedFraction = (Fraction >> A64FracStart) & A64FracMask; - uint32_t PackedExp = (Exponent + 7) & 0x7; - - Imm8Bits = (Sign << 7) | (PackedExp << 4) | PackedFraction; - return true; -} - -// Encoding of the immediate for logical (immediate) instructions: -// -// | N | imms | immr | size | R | S | -// |---+--------+--------+------+--------------+--------------| -// | 1 | ssssss | rrrrrr | 64 | UInt(rrrrrr) | UInt(ssssss) | -// | 0 | 0sssss | xrrrrr | 32 | UInt(rrrrr) | UInt(sssss) | -// | 0 | 10ssss | xxrrrr | 16 | UInt(rrrr) | UInt(ssss) | -// | 0 | 110sss | xxxrrr | 8 | UInt(rrr) | UInt(sss) | -// | 0 | 1110ss | xxxxrr | 4 | UInt(rr) | UInt(ss) | -// | 0 | 11110s | xxxxxr | 2 | UInt(r) | UInt(s) | -// | 0 | 11111x | - | | UNALLOCATED | | -// -// Columns 'R', 'S' and 'size' specify a "bitmask immediate" of size bits in -// which the lower S+1 bits are ones and the remaining bits are zero, then -// rotated right by R bits, which is then replicated across the datapath. -// -// + Values of 'N', 'imms' and 'immr' which do not match the above table are -// RESERVED. -// + If all 's' bits in the imms field are set then the instruction is -// RESERVED. -// + The 'x' bits in the 'immr' field are IGNORED. - -bool A64Imms::isLogicalImm(unsigned RegWidth, uint64_t Imm, uint32_t &Bits) { - int RepeatWidth; - int Rotation = 0; - int Num1s = 0; - - // Because there are S+1 ones in the replicated mask, an immediate of all - // zeros is not allowed. Filtering it here is probably more efficient. - if (Imm == 0) return false; - - for (RepeatWidth = RegWidth; RepeatWidth > 1; RepeatWidth /= 2) { - uint64_t RepeatMask = RepeatWidth == 64 ? -1 : (1ULL << RepeatWidth) - 1; - uint64_t ReplicatedMask = Imm & RepeatMask; - - if (ReplicatedMask == 0) continue; - - // First we have to make sure the mask is actually repeated in each slot for - // this width-specifier. - bool IsReplicatedMask = true; - for (unsigned i = RepeatWidth; i < RegWidth; i += RepeatWidth) { - if (((Imm >> i) & RepeatMask) != ReplicatedMask) { - IsReplicatedMask = false; - break; - } - } - if (!IsReplicatedMask) continue; - - // Now we have to work out the amount of rotation needed. The first part of - // this calculation is actually independent of RepeatWidth, but the complex - // case will depend on it. - Rotation = CountTrailingZeros_64(Imm); - if (Rotation == 0) { - // There were no leading zeros, which means it's either in place or there - // are 1s at each end (e.g. 0x8003 needs rotating). - Rotation = RegWidth == 64 ? CountLeadingOnes_64(Imm) - : CountLeadingOnes_32(Imm); - Rotation = RepeatWidth - Rotation; - } - - uint64_t ReplicatedOnes = (ReplicatedMask >> Rotation) - | ((ReplicatedMask << (RepeatWidth - Rotation)) & RepeatMask); - // Of course, they may not actually be ones, so we have to check that: - if (!isMask_64(ReplicatedOnes)) - continue; - - Num1s = CountTrailingOnes_64(ReplicatedOnes); - - // We know we've got an almost valid encoding (certainly, if this is invalid - // no other parameters would work). - break; - } - - // The encodings which would produce all 1s are RESERVED. - if (RepeatWidth == 1 || Num1s == RepeatWidth) return false; - - uint32_t N = RepeatWidth == 64; - uint32_t ImmR = RepeatWidth - Rotation; - uint32_t ImmS = Num1s - 1; - - switch (RepeatWidth) { - default: break; // No action required for other valid rotations. - case 16: ImmS |= 0x20; break; // 10ssss - case 8: ImmS |= 0x30; break; // 110sss - case 4: ImmS |= 0x38; break; // 1110ss - case 2: ImmS |= 0x3c; break; // 11110s - } - - Bits = ImmS | (ImmR << 6) | (N << 12); - - return true; -} - - -bool A64Imms::isLogicalImmBits(unsigned RegWidth, uint32_t Bits, uint64_t &Imm) { - uint32_t N = Bits >> 12; - uint32_t ImmR = (Bits >> 6) & 0x3f; - uint32_t ImmS = Bits & 0x3f; - - // N=1 encodes a 64-bit replication and is invalid for the 32-bit - // instructions. - if (RegWidth == 32 && N != 0) return false; - - int Width = 0; - if (N == 1) - Width = 64; - else if ((ImmS & 0x20) == 0) - Width = 32; - else if ((ImmS & 0x10) == 0) - Width = 16; - else if ((ImmS & 0x08) == 0) - Width = 8; - else if ((ImmS & 0x04) == 0) - Width = 4; - else if ((ImmS & 0x02) == 0) - Width = 2; - else { - // ImmS is 0b11111x: UNALLOCATED - return false; - } - - int Num1s = (ImmS & (Width - 1)) + 1; - - // All encodings which would map to -1 (signed) are RESERVED. - if (Num1s == Width) return false; - - int Rotation = (ImmR & (Width - 1)); - uint64_t Mask = (1ULL << Num1s) - 1; - uint64_t WidthMask = Width == 64 ? -1 : (1ULL << Width) - 1; - Mask = (Mask >> Rotation) - | ((Mask << (Width - Rotation)) & WidthMask); - - Imm = 0; - for (unsigned i = 0; i < RegWidth / Width; ++i) { - Imm |= Mask; - Mask <<= Width; - } - - return true; -} - -bool A64Imms::isMOVZImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift) { - // If high bits are set then a 32-bit MOVZ can't possibly work. - if (RegWidth == 32 && (Value & ~0xffffffffULL)) - return false; - - for (int i = 0; i < RegWidth; i += 16) { - // If the value is 0 when we mask out all the bits that could be set with - // the current LSL value then it's representable. - if ((Value & ~(0xffffULL << i)) == 0) { - Shift = i / 16; - UImm16 = (Value >> i) & 0xffff; - return true; - } - } - return false; -} - -bool A64Imms::isMOVNImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift) { - // MOVN is defined to set its register to NOT(LSL(imm16, shift)). - - // We have to be a little careful about a 32-bit register: 0xffff_1234 *is* - // representable, but ~0xffff_1234 == 0xffff_ffff_0000_edcb which is not - // a valid input for isMOVZImm. - if (RegWidth == 32 && (Value & ~0xffffffffULL)) - return false; - - uint64_t MOVZEquivalent = RegWidth == 32 ? ~Value & 0xffffffff : ~Value; - - return isMOVZImm(RegWidth, MOVZEquivalent, UImm16, Shift); -} - -bool A64Imms::isOnlyMOVNImm(int RegWidth, uint64_t Value, - int &UImm16, int &Shift) { - if (isMOVZImm(RegWidth, Value, UImm16, Shift)) - return false; - - return isMOVNImm(RegWidth, Value, UImm16, Shift); -} - MCSubtargetInfo *AArch64_MC::createAArch64MCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS) { diff --git a/lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt b/lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt index 5a2f467..37c8035 100644 --- a/lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt +++ b/lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt @@ -19,6 +19,6 @@ type = Library name = AArch64Desc parent = AArch64 -required_libraries = AArch64AsmPrinter MC Support +required_libraries = AArch64AsmPrinter AArch64Info MC Support add_to_library_groups = AArch64 diff --git a/lib/Target/AArch64/Makefile b/lib/Target/AArch64/Makefile index b2ca278..641bb83 100644 --- a/lib/Target/AArch64/Makefile +++ b/lib/Target/AArch64/Makefile @@ -23,7 +23,7 @@ BUILT_SOURCES = AArch64GenAsmMatcher.inc \ AArch64GenRegisterInfo.inc \ AArch64GenSubtargetInfo.inc -DIRS = InstPrinter AsmParser Disassembler TargetInfo MCTargetDesc +DIRS = InstPrinter AsmParser Disassembler TargetInfo MCTargetDesc Utils include $(LEVEL)/Makefile.common diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp new file mode 100644 index 0000000..cf9a638 --- /dev/null +++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp @@ -0,0 +1,812 @@ +//===-- AArch64BaseInfo.cpp - AArch64 Base encoding information------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides basic encoding and assembly information for AArch64. +// +//===----------------------------------------------------------------------===// +#include "AArch64BaseInfo.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/Regex.h" + +using namespace llvm; + +StringRef NamedImmMapper::toString(uint32_t Value, bool &Valid) const { + for (unsigned i = 0; i < NumPairs; ++i) { + if (Pairs[i].Value == Value) { + Valid = true; + return Pairs[i].Name; + } + } + + Valid = false; + return StringRef(); +} + +uint32_t NamedImmMapper::fromString(StringRef Name, bool &Valid) const { + std::string LowerCaseName = Name.lower(); + for (unsigned i = 0; i < NumPairs; ++i) { + if (Pairs[i].Name == LowerCaseName) { + Valid = true; + return Pairs[i].Value; + } + } + + Valid = false; + return -1; +} + +bool NamedImmMapper::validImm(uint32_t Value) const { + return Value < TooBigImm; +} + +const NamedImmMapper::Mapping A64AT::ATMapper::ATPairs[] = { + {"s1e1r", S1E1R}, + {"s1e2r", S1E2R}, + {"s1e3r", S1E3R}, + {"s1e1w", S1E1W}, + {"s1e2w", S1E2W}, + {"s1e3w", S1E3W}, + {"s1e0r", S1E0R}, + {"s1e0w", S1E0W}, + {"s12e1r", S12E1R}, + {"s12e1w", S12E1W}, + {"s12e0r", S12E0R}, + {"s12e0w", S12E0W}, +}; + +A64AT::ATMapper::ATMapper() + : NamedImmMapper(ATPairs, 0) {} + +const NamedImmMapper::Mapping A64DB::DBarrierMapper::DBarrierPairs[] = { + {"oshld", OSHLD}, + {"oshst", OSHST}, + {"osh", OSH}, + {"nshld", NSHLD}, + {"nshst", NSHST}, + {"nsh", NSH}, + {"ishld", ISHLD}, + {"ishst", ISHST}, + {"ish", ISH}, + {"ld", LD}, + {"st", ST}, + {"sy", SY} +}; + +A64DB::DBarrierMapper::DBarrierMapper() + : NamedImmMapper(DBarrierPairs, 16u) {} + +const NamedImmMapper::Mapping A64DC::DCMapper::DCPairs[] = { + {"zva", ZVA}, + {"ivac", IVAC}, + {"isw", ISW}, + {"cvac", CVAC}, + {"csw", CSW}, + {"cvau", CVAU}, + {"civac", CIVAC}, + {"cisw", CISW} +}; + +A64DC::DCMapper::DCMapper() + : NamedImmMapper(DCPairs, 0) {} + +const NamedImmMapper::Mapping A64IC::ICMapper::ICPairs[] = { + {"ialluis", IALLUIS}, + {"iallu", IALLU}, + {"ivau", IVAU} +}; + +A64IC::ICMapper::ICMapper() + : NamedImmMapper(ICPairs, 0) {} + +const NamedImmMapper::Mapping A64ISB::ISBMapper::ISBPairs[] = { + {"sy", SY}, +}; + +A64ISB::ISBMapper::ISBMapper() + : NamedImmMapper(ISBPairs, 16) {} + +const NamedImmMapper::Mapping A64PRFM::PRFMMapper::PRFMPairs[] = { + {"pldl1keep", PLDL1KEEP}, + {"pldl1strm", PLDL1STRM}, + {"pldl2keep", PLDL2KEEP}, + {"pldl2strm", PLDL2STRM}, + {"pldl3keep", PLDL3KEEP}, + {"pldl3strm", PLDL3STRM}, + {"pstl1keep", PSTL1KEEP}, + {"pstl1strm", PSTL1STRM}, + {"pstl2keep", PSTL2KEEP}, + {"pstl2strm", PSTL2STRM}, + {"pstl3keep", PSTL3KEEP}, + {"pstl3strm", PSTL3STRM} +}; + +A64PRFM::PRFMMapper::PRFMMapper() + : NamedImmMapper(PRFMPairs, 32) {} + +const NamedImmMapper::Mapping A64PState::PStateMapper::PStatePairs[] = { + {"spsel", SPSel}, + {"daifset", DAIFSet}, + {"daifclr", DAIFClr} +}; + +A64PState::PStateMapper::PStateMapper() + : NamedImmMapper(PStatePairs, 0) {} + +const NamedImmMapper::Mapping A64SysReg::MRSMapper::MRSPairs[] = { + {"mdccsr_el0", MDCCSR_EL0}, + {"dbgdtrrx_el0", DBGDTRRX_EL0}, + {"mdrar_el1", MDRAR_EL1}, + {"oslsr_el1", OSLSR_EL1}, + {"dbgauthstatus_el1", DBGAUTHSTATUS_EL1}, + {"pmceid0_el0", PMCEID0_EL0}, + {"pmceid1_el0", PMCEID1_EL0}, + {"midr_el1", MIDR_EL1}, + {"ccsidr_el1", CCSIDR_EL1}, + {"clidr_el1", CLIDR_EL1}, + {"ctr_el0", CTR_EL0}, + {"mpidr_el1", MPIDR_EL1}, + {"revidr_el1", REVIDR_EL1}, + {"aidr_el1", AIDR_EL1}, + {"dczid_el0", DCZID_EL0}, + {"id_pfr0_el1", ID_PFR0_EL1}, + {"id_pfr1_el1", ID_PFR1_EL1}, + {"id_dfr0_el1", ID_DFR0_EL1}, + {"id_afr0_el1", ID_AFR0_EL1}, + {"id_mmfr0_el1", ID_MMFR0_EL1}, + {"id_mmfr1_el1", ID_MMFR1_EL1}, + {"id_mmfr2_el1", ID_MMFR2_EL1}, + {"id_mmfr3_el1", ID_MMFR3_EL1}, + {"id_isar0_el1", ID_ISAR0_EL1}, + {"id_isar1_el1", ID_ISAR1_EL1}, + {"id_isar2_el1", ID_ISAR2_EL1}, + {"id_isar3_el1", ID_ISAR3_EL1}, + {"id_isar4_el1", ID_ISAR4_EL1}, + {"id_isar5_el1", ID_ISAR5_EL1}, + {"id_aa64pfr0_el1", ID_AA64PFR0_EL1}, + {"id_aa64pfr1_el1", ID_AA64PFR1_EL1}, + {"id_aa64dfr0_el1", ID_AA64DFR0_EL1}, + {"id_aa64dfr1_el1", ID_AA64DFR1_EL1}, + {"id_aa64afr0_el1", ID_AA64AFR0_EL1}, + {"id_aa64afr1_el1", ID_AA64AFR1_EL1}, + {"id_aa64isar0_el1", ID_AA64ISAR0_EL1}, + {"id_aa64isar1_el1", ID_AA64ISAR1_EL1}, + {"id_aa64mmfr0_el1", ID_AA64MMFR0_EL1}, + {"id_aa64mmfr1_el1", ID_AA64MMFR1_EL1}, + {"mvfr0_el1", MVFR0_EL1}, + {"mvfr1_el1", MVFR1_EL1}, + {"mvfr2_el1", MVFR2_EL1}, + {"rvbar_el1", RVBAR_EL1}, + {"rvbar_el2", RVBAR_EL2}, + {"rvbar_el3", RVBAR_EL3}, + {"isr_el1", ISR_EL1}, + {"cntpct_el0", CNTPCT_EL0}, + {"cntvct_el0", CNTVCT_EL0} +}; + +A64SysReg::MRSMapper::MRSMapper() { + InstPairs = &MRSPairs[0]; + NumInstPairs = llvm::array_lengthof(MRSPairs); +} + +const NamedImmMapper::Mapping A64SysReg::MSRMapper::MSRPairs[] = { + {"dbgdtrtx_el0", DBGDTRTX_EL0}, + {"oslar_el1", OSLAR_EL1}, + {"pmswinc_el0", PMSWINC_EL0} +}; + +A64SysReg::MSRMapper::MSRMapper() { + InstPairs = &MSRPairs[0]; + NumInstPairs = llvm::array_lengthof(MSRPairs); +} + + +const NamedImmMapper::Mapping A64SysReg::SysRegMapper::SysRegPairs[] = { + {"osdtrrx_el1", OSDTRRX_EL1}, + {"osdtrtx_el1", OSDTRTX_EL1}, + {"teecr32_el1", TEECR32_EL1}, + {"mdccint_el1", MDCCINT_EL1}, + {"mdscr_el1", MDSCR_EL1}, + {"dbgdtr_el0", DBGDTR_EL0}, + {"oseccr_el1", OSECCR_EL1}, + {"dbgvcr32_el2", DBGVCR32_EL2}, + {"dbgbvr0_el1", DBGBVR0_EL1}, + {"dbgbvr1_el1", DBGBVR1_EL1}, + {"dbgbvr2_el1", DBGBVR2_EL1}, + {"dbgbvr3_el1", DBGBVR3_EL1}, + {"dbgbvr4_el1", DBGBVR4_EL1}, + {"dbgbvr5_el1", DBGBVR5_EL1}, + {"dbgbvr6_el1", DBGBVR6_EL1}, + {"dbgbvr7_el1", DBGBVR7_EL1}, + {"dbgbvr8_el1", DBGBVR8_EL1}, + {"dbgbvr9_el1", DBGBVR9_EL1}, + {"dbgbvr10_el1", DBGBVR10_EL1}, + {"dbgbvr11_el1", DBGBVR11_EL1}, + {"dbgbvr12_el1", DBGBVR12_EL1}, + {"dbgbvr13_el1", DBGBVR13_EL1}, + {"dbgbvr14_el1", DBGBVR14_EL1}, + {"dbgbvr15_el1", DBGBVR15_EL1}, + {"dbgbcr0_el1", DBGBCR0_EL1}, + {"dbgbcr1_el1", DBGBCR1_EL1}, + {"dbgbcr2_el1", DBGBCR2_EL1}, + {"dbgbcr3_el1", DBGBCR3_EL1}, + {"dbgbcr4_el1", DBGBCR4_EL1}, + {"dbgbcr5_el1", DBGBCR5_EL1}, + {"dbgbcr6_el1", DBGBCR6_EL1}, + {"dbgbcr7_el1", DBGBCR7_EL1}, + {"dbgbcr8_el1", DBGBCR8_EL1}, + {"dbgbcr9_el1", DBGBCR9_EL1}, + {"dbgbcr10_el1", DBGBCR10_EL1}, + {"dbgbcr11_el1", DBGBCR11_EL1}, + {"dbgbcr12_el1", DBGBCR12_EL1}, + {"dbgbcr13_el1", DBGBCR13_EL1}, + {"dbgbcr14_el1", DBGBCR14_EL1}, + {"dbgbcr15_el1", DBGBCR15_EL1}, + {"dbgwvr0_el1", DBGWVR0_EL1}, + {"dbgwvr1_el1", DBGWVR1_EL1}, + {"dbgwvr2_el1", DBGWVR2_EL1}, + {"dbgwvr3_el1", DBGWVR3_EL1}, + {"dbgwvr4_el1", DBGWVR4_EL1}, + {"dbgwvr5_el1", DBGWVR5_EL1}, + {"dbgwvr6_el1", DBGWVR6_EL1}, + {"dbgwvr7_el1", DBGWVR7_EL1}, + {"dbgwvr8_el1", DBGWVR8_EL1}, + {"dbgwvr9_el1", DBGWVR9_EL1}, + {"dbgwvr10_el1", DBGWVR10_EL1}, + {"dbgwvr11_el1", DBGWVR11_EL1}, + {"dbgwvr12_el1", DBGWVR12_EL1}, + {"dbgwvr13_el1", DBGWVR13_EL1}, + {"dbgwvr14_el1", DBGWVR14_EL1}, + {"dbgwvr15_el1", DBGWVR15_EL1}, + {"dbgwcr0_el1", DBGWCR0_EL1}, + {"dbgwcr1_el1", DBGWCR1_EL1}, + {"dbgwcr2_el1", DBGWCR2_EL1}, + {"dbgwcr3_el1", DBGWCR3_EL1}, + {"dbgwcr4_el1", DBGWCR4_EL1}, + {"dbgwcr5_el1", DBGWCR5_EL1}, + {"dbgwcr6_el1", DBGWCR6_EL1}, + {"dbgwcr7_el1", DBGWCR7_EL1}, + {"dbgwcr8_el1", DBGWCR8_EL1}, + {"dbgwcr9_el1", DBGWCR9_EL1}, + {"dbgwcr10_el1", DBGWCR10_EL1}, + {"dbgwcr11_el1", DBGWCR11_EL1}, + {"dbgwcr12_el1", DBGWCR12_EL1}, + {"dbgwcr13_el1", DBGWCR13_EL1}, + {"dbgwcr14_el1", DBGWCR14_EL1}, + {"dbgwcr15_el1", DBGWCR15_EL1}, + {"teehbr32_el1", TEEHBR32_EL1}, + {"osdlr_el1", OSDLR_EL1}, + {"dbgprcr_el1", DBGPRCR_EL1}, + {"dbgclaimset_el1", DBGCLAIMSET_EL1}, + {"dbgclaimclr_el1", DBGCLAIMCLR_EL1}, + {"csselr_el1", CSSELR_EL1}, + {"vpidr_el2", VPIDR_EL2}, + {"vmpidr_el2", VMPIDR_EL2}, + {"sctlr_el1", SCTLR_EL1}, + {"sctlr_el2", SCTLR_EL2}, + {"sctlr_el3", SCTLR_EL3}, + {"actlr_el1", ACTLR_EL1}, + {"actlr_el2", ACTLR_EL2}, + {"actlr_el3", ACTLR_EL3}, + {"cpacr_el1", CPACR_EL1}, + {"hcr_el2", HCR_EL2}, + {"scr_el3", SCR_EL3}, + {"mdcr_el2", MDCR_EL2}, + {"sder32_el3", SDER32_EL3}, + {"cptr_el2", CPTR_EL2}, + {"cptr_el3", CPTR_EL3}, + {"hstr_el2", HSTR_EL2}, + {"hacr_el2", HACR_EL2}, + {"mdcr_el3", MDCR_EL3}, + {"ttbr0_el1", TTBR0_EL1}, + {"ttbr0_el2", TTBR0_EL2}, + {"ttbr0_el3", TTBR0_EL3}, + {"ttbr1_el1", TTBR1_EL1}, + {"tcr_el1", TCR_EL1}, + {"tcr_el2", TCR_EL2}, + {"tcr_el3", TCR_EL3}, + {"vttbr_el2", VTTBR_EL2}, + {"vtcr_el2", VTCR_EL2}, + {"dacr32_el2", DACR32_EL2}, + {"spsr_el1", SPSR_EL1}, + {"spsr_el2", SPSR_EL2}, + {"spsr_el3", SPSR_EL3}, + {"elr_el1", ELR_EL1}, + {"elr_el2", ELR_EL2}, + {"elr_el3", ELR_EL3}, + {"sp_el0", SP_EL0}, + {"sp_el1", SP_EL1}, + {"sp_el2", SP_EL2}, + {"spsel", SPSel}, + {"nzcv", NZCV}, + {"daif", DAIF}, + {"currentel", CurrentEL}, + {"spsr_irq", SPSR_irq}, + {"spsr_abt", SPSR_abt}, + {"spsr_und", SPSR_und}, + {"spsr_fiq", SPSR_fiq}, + {"fpcr", FPCR}, + {"fpsr", FPSR}, + {"dspsr_el0", DSPSR_EL0}, + {"dlr_el0", DLR_EL0}, + {"ifsr32_el2", IFSR32_EL2}, + {"afsr0_el1", AFSR0_EL1}, + {"afsr0_el2", AFSR0_EL2}, + {"afsr0_el3", AFSR0_EL3}, + {"afsr1_el1", AFSR1_EL1}, + {"afsr1_el2", AFSR1_EL2}, + {"afsr1_el3", AFSR1_EL3}, + {"esr_el1", ESR_EL1}, + {"esr_el2", ESR_EL2}, + {"esr_el3", ESR_EL3}, + {"fpexc32_el2", FPEXC32_EL2}, + {"far_el1", FAR_EL1}, + {"far_el2", FAR_EL2}, + {"far_el3", FAR_EL3}, + {"hpfar_el2", HPFAR_EL2}, + {"par_el1", PAR_EL1}, + {"pmcr_el0", PMCR_EL0}, + {"pmcntenset_el0", PMCNTENSET_EL0}, + {"pmcntenclr_el0", PMCNTENCLR_EL0}, + {"pmovsclr_el0", PMOVSCLR_EL0}, + {"pmselr_el0", PMSELR_EL0}, + {"pmccntr_el0", PMCCNTR_EL0}, + {"pmxevtyper_el0", PMXEVTYPER_EL0}, + {"pmxevcntr_el0", PMXEVCNTR_EL0}, + {"pmuserenr_el0", PMUSERENR_EL0}, + {"pmintenset_el1", PMINTENSET_EL1}, + {"pmintenclr_el1", PMINTENCLR_EL1}, + {"pmovsset_el0", PMOVSSET_EL0}, + {"mair_el1", MAIR_EL1}, + {"mair_el2", MAIR_EL2}, + {"mair_el3", MAIR_EL3}, + {"amair_el1", AMAIR_EL1}, + {"amair_el2", AMAIR_EL2}, + {"amair_el3", AMAIR_EL3}, + {"vbar_el1", VBAR_EL1}, + {"vbar_el2", VBAR_EL2}, + {"vbar_el3", VBAR_EL3}, + {"rmr_el1", RMR_EL1}, + {"rmr_el2", RMR_EL2}, + {"rmr_el3", RMR_EL3}, + {"contextidr_el1", CONTEXTIDR_EL1}, + {"tpidr_el0", TPIDR_EL0}, + {"tpidr_el2", TPIDR_EL2}, + {"tpidr_el3", TPIDR_EL3}, + {"tpidrro_el0", TPIDRRO_EL0}, + {"tpidr_el1", TPIDR_EL1}, + {"cntfrq_el0", CNTFRQ_EL0}, + {"cntvoff_el2", CNTVOFF_EL2}, + {"cntkctl_el1", CNTKCTL_EL1}, + {"cnthctl_el2", CNTHCTL_EL2}, + {"cntp_tval_el0", CNTP_TVAL_EL0}, + {"cnthp_tval_el2", CNTHP_TVAL_EL2}, + {"cntps_tval_el1", CNTPS_TVAL_EL1}, + {"cntp_ctl_el0", CNTP_CTL_EL0}, + {"cnthp_ctl_el2", CNTHP_CTL_EL2}, + {"cntps_ctl_el1", CNTPS_CTL_EL1}, + {"cntp_cval_el0", CNTP_CVAL_EL0}, + {"cnthp_cval_el2", CNTHP_CVAL_EL2}, + {"cntps_cval_el1", CNTPS_CVAL_EL1}, + {"cntv_tval_el0", CNTV_TVAL_EL0}, + {"cntv_ctl_el0", CNTV_CTL_EL0}, + {"cntv_cval_el0", CNTV_CVAL_EL0}, + {"pmevcntr0_el0", PMEVCNTR0_EL0}, + {"pmevcntr1_el0", PMEVCNTR1_EL0}, + {"pmevcntr2_el0", PMEVCNTR2_EL0}, + {"pmevcntr3_el0", PMEVCNTR3_EL0}, + {"pmevcntr4_el0", PMEVCNTR4_EL0}, + {"pmevcntr5_el0", PMEVCNTR5_EL0}, + {"pmevcntr6_el0", PMEVCNTR6_EL0}, + {"pmevcntr7_el0", PMEVCNTR7_EL0}, + {"pmevcntr8_el0", PMEVCNTR8_EL0}, + {"pmevcntr9_el0", PMEVCNTR9_EL0}, + {"pmevcntr10_el0", PMEVCNTR10_EL0}, + {"pmevcntr11_el0", PMEVCNTR11_EL0}, + {"pmevcntr12_el0", PMEVCNTR12_EL0}, + {"pmevcntr13_el0", PMEVCNTR13_EL0}, + {"pmevcntr14_el0", PMEVCNTR14_EL0}, + {"pmevcntr15_el0", PMEVCNTR15_EL0}, + {"pmevcntr16_el0", PMEVCNTR16_EL0}, + {"pmevcntr17_el0", PMEVCNTR17_EL0}, + {"pmevcntr18_el0", PMEVCNTR18_EL0}, + {"pmevcntr19_el0", PMEVCNTR19_EL0}, + {"pmevcntr20_el0", PMEVCNTR20_EL0}, + {"pmevcntr21_el0", PMEVCNTR21_EL0}, + {"pmevcntr22_el0", PMEVCNTR22_EL0}, + {"pmevcntr23_el0", PMEVCNTR23_EL0}, + {"pmevcntr24_el0", PMEVCNTR24_EL0}, + {"pmevcntr25_el0", PMEVCNTR25_EL0}, + {"pmevcntr26_el0", PMEVCNTR26_EL0}, + {"pmevcntr27_el0", PMEVCNTR27_EL0}, + {"pmevcntr28_el0", PMEVCNTR28_EL0}, + {"pmevcntr29_el0", PMEVCNTR29_EL0}, + {"pmevcntr30_el0", PMEVCNTR30_EL0}, + {"pmccfiltr_el0", PMCCFILTR_EL0}, + {"pmevtyper0_el0", PMEVTYPER0_EL0}, + {"pmevtyper1_el0", PMEVTYPER1_EL0}, + {"pmevtyper2_el0", PMEVTYPER2_EL0}, + {"pmevtyper3_el0", PMEVTYPER3_EL0}, + {"pmevtyper4_el0", PMEVTYPER4_EL0}, + {"pmevtyper5_el0", PMEVTYPER5_EL0}, + {"pmevtyper6_el0", PMEVTYPER6_EL0}, + {"pmevtyper7_el0", PMEVTYPER7_EL0}, + {"pmevtyper8_el0", PMEVTYPER8_EL0}, + {"pmevtyper9_el0", PMEVTYPER9_EL0}, + {"pmevtyper10_el0", PMEVTYPER10_EL0}, + {"pmevtyper11_el0", PMEVTYPER11_EL0}, + {"pmevtyper12_el0", PMEVTYPER12_EL0}, + {"pmevtyper13_el0", PMEVTYPER13_EL0}, + {"pmevtyper14_el0", PMEVTYPER14_EL0}, + {"pmevtyper15_el0", PMEVTYPER15_EL0}, + {"pmevtyper16_el0", PMEVTYPER16_EL0}, + {"pmevtyper17_el0", PMEVTYPER17_EL0}, + {"pmevtyper18_el0", PMEVTYPER18_EL0}, + {"pmevtyper19_el0", PMEVTYPER19_EL0}, + {"pmevtyper20_el0", PMEVTYPER20_EL0}, + {"pmevtyper21_el0", PMEVTYPER21_EL0}, + {"pmevtyper22_el0", PMEVTYPER22_EL0}, + {"pmevtyper23_el0", PMEVTYPER23_EL0}, + {"pmevtyper24_el0", PMEVTYPER24_EL0}, + {"pmevtyper25_el0", PMEVTYPER25_EL0}, + {"pmevtyper26_el0", PMEVTYPER26_EL0}, + {"pmevtyper27_el0", PMEVTYPER27_EL0}, + {"pmevtyper28_el0", PMEVTYPER28_EL0}, + {"pmevtyper29_el0", PMEVTYPER29_EL0}, + {"pmevtyper30_el0", PMEVTYPER30_EL0}, +}; + +uint32_t +A64SysReg::SysRegMapper::fromString(StringRef Name, bool &Valid) const { + // First search the registers shared by all + std::string NameLower = Name.lower(); + for (unsigned i = 0; i < array_lengthof(SysRegPairs); ++i) { + if (SysRegPairs[i].Name == NameLower) { + Valid = true; + return SysRegPairs[i].Value; + } + } + + // Now try the instruction-specific registers (either read-only or + // write-only). + for (unsigned i = 0; i < NumInstPairs; ++i) { + if (InstPairs[i].Name == NameLower) { + Valid = true; + return InstPairs[i].Value; + } + } + + // Try to parse an S____ register name, where the bits + // are: 11 xxx 1x11 xxxx xxx + Regex GenericRegPattern("^s3_([0-7])_c(1[15])_c([0-9]|1[0-5])_([0-7])$"); + + SmallVector Ops; + if (!GenericRegPattern.match(NameLower, &Ops)) { + Valid = false; + return -1; + } + + uint32_t Op0 = 3, Op1 = 0, CRn = 0, CRm = 0, Op2 = 0; + uint32_t Bits; + Ops[1].getAsInteger(10, Op1); + Ops[2].getAsInteger(10, CRn); + Ops[3].getAsInteger(10, CRm); + Ops[4].getAsInteger(10, Op2); + Bits = (Op0 << 14) | (Op1 << 11) | (CRn << 7) | (CRm << 3) | Op2; + + Valid = true; + return Bits; +} + +std::string +A64SysReg::SysRegMapper::toString(uint32_t Bits, bool &Valid) const { + for (unsigned i = 0; i < array_lengthof(SysRegPairs); ++i) { + if (SysRegPairs[i].Value == Bits) { + Valid = true; + return SysRegPairs[i].Name; + } + } + + for (unsigned i = 0; i < NumInstPairs; ++i) { + if (InstPairs[i].Value == Bits) { + Valid = true; + return InstPairs[i].Name; + } + } + + uint32_t Op0 = (Bits >> 14) & 0x3; + uint32_t Op1 = (Bits >> 11) & 0x7; + uint32_t CRn = (Bits >> 7) & 0xf; + uint32_t CRm = (Bits >> 3) & 0xf; + uint32_t Op2 = Bits & 0x7; + + // Only combinations matching: 11 xxx 1x11 xxxx xxx are valid for a generic + // name. + if (Op0 != 3 || (CRn != 11 && CRn != 15)) { + Valid = false; + return ""; + } + + assert(Op0 == 3 && (CRn == 11 || CRn == 15) && "Invalid generic sysreg"); + + Valid = true; + return "s3_" + utostr(Op1) + "_c" + utostr(CRn) + + "_c" + utostr(CRm) + "_" + utostr(Op2); +} + +const NamedImmMapper::Mapping A64TLBI::TLBIMapper::TLBIPairs[] = { + {"ipas2e1is", IPAS2E1IS}, + {"ipas2le1is", IPAS2LE1IS}, + {"vmalle1is", VMALLE1IS}, + {"alle2is", ALLE2IS}, + {"alle3is", ALLE3IS}, + {"vae1is", VAE1IS}, + {"vae2is", VAE2IS}, + {"vae3is", VAE3IS}, + {"aside1is", ASIDE1IS}, + {"vaae1is", VAAE1IS}, + {"alle1is", ALLE1IS}, + {"vale1is", VALE1IS}, + {"vale2is", VALE2IS}, + {"vale3is", VALE3IS}, + {"vmalls12e1is", VMALLS12E1IS}, + {"vaale1is", VAALE1IS}, + {"ipas2e1", IPAS2E1}, + {"ipas2le1", IPAS2LE1}, + {"vmalle1", VMALLE1}, + {"alle2", ALLE2}, + {"alle3", ALLE3}, + {"vae1", VAE1}, + {"vae2", VAE2}, + {"vae3", VAE3}, + {"aside1", ASIDE1}, + {"vaae1", VAAE1}, + {"alle1", ALLE1}, + {"vale1", VALE1}, + {"vale2", VALE2}, + {"vale3", VALE3}, + {"vmalls12e1", VMALLS12E1}, + {"vaale1", VAALE1} +}; + +A64TLBI::TLBIMapper::TLBIMapper() + : NamedImmMapper(TLBIPairs, 0) {} + +bool A64Imms::isFPImm(const APFloat &Val, uint32_t &Imm8Bits) { + const fltSemantics &Sem = Val.getSemantics(); + unsigned FracBits = APFloat::semanticsPrecision(Sem) - 1; + + uint32_t ExpMask; + switch (FracBits) { + case 10: // IEEE half-precision + ExpMask = 0x1f; + break; + case 23: // IEEE single-precision + ExpMask = 0xff; + break; + case 52: // IEEE double-precision + ExpMask = 0x7ff; + break; + case 112: // IEEE quad-precision + // No immediates are valid for double precision. + return false; + default: + llvm_unreachable("Only half, single and double precision supported"); + } + + uint32_t ExpStart = FracBits; + uint64_t FracMask = (1ULL << FracBits) - 1; + + uint32_t Sign = Val.isNegative(); + + uint64_t Bits= Val.bitcastToAPInt().getLimitedValue(); + uint64_t Fraction = Bits & FracMask; + int32_t Exponent = ((Bits >> ExpStart) & ExpMask); + Exponent -= ExpMask >> 1; + + // S[d] = imm8<7>:NOT(imm8<6>):Replicate(imm8<6>, 5):imm8<5:0>:Zeros(19) + // D[d] = imm8<7>:NOT(imm8<6>):Replicate(imm8<6>, 8):imm8<5:0>:Zeros(48) + // This translates to: only 4 bits of fraction; -3 <= exp <= 4. + uint64_t A64FracStart = FracBits - 4; + uint64_t A64FracMask = 0xf; + + // Are there too many fraction bits? + if (Fraction & ~(A64FracMask << A64FracStart)) + return false; + + if (Exponent < -3 || Exponent > 4) + return false; + + uint32_t PackedFraction = (Fraction >> A64FracStart) & A64FracMask; + uint32_t PackedExp = (Exponent + 7) & 0x7; + + Imm8Bits = (Sign << 7) | (PackedExp << 4) | PackedFraction; + return true; +} + +// Encoding of the immediate for logical (immediate) instructions: +// +// | N | imms | immr | size | R | S | +// |---+--------+--------+------+--------------+--------------| +// | 1 | ssssss | rrrrrr | 64 | UInt(rrrrrr) | UInt(ssssss) | +// | 0 | 0sssss | xrrrrr | 32 | UInt(rrrrr) | UInt(sssss) | +// | 0 | 10ssss | xxrrrr | 16 | UInt(rrrr) | UInt(ssss) | +// | 0 | 110sss | xxxrrr | 8 | UInt(rrr) | UInt(sss) | +// | 0 | 1110ss | xxxxrr | 4 | UInt(rr) | UInt(ss) | +// | 0 | 11110s | xxxxxr | 2 | UInt(r) | UInt(s) | +// | 0 | 11111x | - | | UNALLOCATED | | +// +// Columns 'R', 'S' and 'size' specify a "bitmask immediate" of size bits in +// which the lower S+1 bits are ones and the remaining bits are zero, then +// rotated right by R bits, which is then replicated across the datapath. +// +// + Values of 'N', 'imms' and 'immr' which do not match the above table are +// RESERVED. +// + If all 's' bits in the imms field are set then the instruction is +// RESERVED. +// + The 'x' bits in the 'immr' field are IGNORED. + +bool A64Imms::isLogicalImm(unsigned RegWidth, uint64_t Imm, uint32_t &Bits) { + int RepeatWidth; + int Rotation = 0; + int Num1s = 0; + + // Because there are S+1 ones in the replicated mask, an immediate of all + // zeros is not allowed. Filtering it here is probably more efficient. + if (Imm == 0) return false; + + for (RepeatWidth = RegWidth; RepeatWidth > 1; RepeatWidth /= 2) { + uint64_t RepeatMask = RepeatWidth == 64 ? -1 : (1ULL << RepeatWidth) - 1; + uint64_t ReplicatedMask = Imm & RepeatMask; + + if (ReplicatedMask == 0) continue; + + // First we have to make sure the mask is actually repeated in each slot for + // this width-specifier. + bool IsReplicatedMask = true; + for (unsigned i = RepeatWidth; i < RegWidth; i += RepeatWidth) { + if (((Imm >> i) & RepeatMask) != ReplicatedMask) { + IsReplicatedMask = false; + break; + } + } + if (!IsReplicatedMask) continue; + + // Now we have to work out the amount of rotation needed. The first part of + // this calculation is actually independent of RepeatWidth, but the complex + // case will depend on it. + Rotation = CountTrailingZeros_64(Imm); + if (Rotation == 0) { + // There were no leading zeros, which means it's either in place or there + // are 1s at each end (e.g. 0x8003 needs rotating). + Rotation = RegWidth == 64 ? CountLeadingOnes_64(Imm) + : CountLeadingOnes_32(Imm); + Rotation = RepeatWidth - Rotation; + } + + uint64_t ReplicatedOnes = (ReplicatedMask >> Rotation) + | ((ReplicatedMask << (RepeatWidth - Rotation)) & RepeatMask); + // Of course, they may not actually be ones, so we have to check that: + if (!isMask_64(ReplicatedOnes)) + continue; + + Num1s = CountTrailingOnes_64(ReplicatedOnes); + + // We know we've got an almost valid encoding (certainly, if this is invalid + // no other parameters would work). + break; + } + + // The encodings which would produce all 1s are RESERVED. + if (RepeatWidth == 1 || Num1s == RepeatWidth) return false; + + uint32_t N = RepeatWidth == 64; + uint32_t ImmR = RepeatWidth - Rotation; + uint32_t ImmS = Num1s - 1; + + switch (RepeatWidth) { + default: break; // No action required for other valid rotations. + case 16: ImmS |= 0x20; break; // 10ssss + case 8: ImmS |= 0x30; break; // 110sss + case 4: ImmS |= 0x38; break; // 1110ss + case 2: ImmS |= 0x3c; break; // 11110s + } + + Bits = ImmS | (ImmR << 6) | (N << 12); + + return true; +} + + +bool A64Imms::isLogicalImmBits(unsigned RegWidth, uint32_t Bits, uint64_t &Imm) { + uint32_t N = Bits >> 12; + uint32_t ImmR = (Bits >> 6) & 0x3f; + uint32_t ImmS = Bits & 0x3f; + + // N=1 encodes a 64-bit replication and is invalid for the 32-bit + // instructions. + if (RegWidth == 32 && N != 0) return false; + + int Width = 0; + if (N == 1) + Width = 64; + else if ((ImmS & 0x20) == 0) + Width = 32; + else if ((ImmS & 0x10) == 0) + Width = 16; + else if ((ImmS & 0x08) == 0) + Width = 8; + else if ((ImmS & 0x04) == 0) + Width = 4; + else if ((ImmS & 0x02) == 0) + Width = 2; + else { + // ImmS is 0b11111x: UNALLOCATED + return false; + } + + int Num1s = (ImmS & (Width - 1)) + 1; + + // All encodings which would map to -1 (signed) are RESERVED. + if (Num1s == Width) return false; + + int Rotation = (ImmR & (Width - 1)); + uint64_t Mask = (1ULL << Num1s) - 1; + uint64_t WidthMask = Width == 64 ? -1 : (1ULL << Width) - 1; + Mask = (Mask >> Rotation) + | ((Mask << (Width - Rotation)) & WidthMask); + + Imm = 0; + for (unsigned i = 0; i < RegWidth / Width; ++i) { + Imm |= Mask; + Mask <<= Width; + } + + return true; +} + +bool A64Imms::isMOVZImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift) { + // If high bits are set then a 32-bit MOVZ can't possibly work. + if (RegWidth == 32 && (Value & ~0xffffffffULL)) + return false; + + for (int i = 0; i < RegWidth; i += 16) { + // If the value is 0 when we mask out all the bits that could be set with + // the current LSL value then it's representable. + if ((Value & ~(0xffffULL << i)) == 0) { + Shift = i / 16; + UImm16 = (Value >> i) & 0xffff; + return true; + } + } + return false; +} + +bool A64Imms::isMOVNImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift) { + // MOVN is defined to set its register to NOT(LSL(imm16, shift)). + + // We have to be a little careful about a 32-bit register: 0xffff_1234 *is* + // representable, but ~0xffff_1234 == 0xffff_ffff_0000_edcb which is not + // a valid input for isMOVZImm. + if (RegWidth == 32 && (Value & ~0xffffffffULL)) + return false; + + uint64_t MOVZEquivalent = RegWidth == 32 ? ~Value & 0xffffffff : ~Value; + + return isMOVZImm(RegWidth, MOVZEquivalent, UImm16, Shift); +} + +bool A64Imms::isOnlyMOVNImm(int RegWidth, uint64_t Value, + int &UImm16, int &Shift) { + if (isMOVZImm(RegWidth, Value, UImm16, Shift)) + return false; + + return isMOVNImm(RegWidth, Value, UImm16, Shift); +} diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/lib/Target/AArch64/Utils/AArch64BaseInfo.h new file mode 100644 index 0000000..48e6c83 --- /dev/null +++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.h @@ -0,0 +1,778 @@ +//===-- AArch64BaseInfo.h - Top level definitions for AArch64- --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains small standalone helper functions and enum definitions for +// the AArch64 target useful for the compiler back-end and the MC libraries. +// As such, it deliberately does not include references to LLVM core +// code gen types, passes, etc.. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_AARCH64_BASEINFO_H +#define LLVM_AARCH64_BASEINFO_H + +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/ErrorHandling.h" + +namespace llvm { + +// // Enums corresponding to AArch64 condition codes +namespace A64CC { + // The CondCodes constants map directly to the 4-bit encoding of the + // condition field for predicated instructions. + enum CondCodes { // Meaning (integer) Meaning (floating-point) + EQ = 0, // Equal Equal + NE, // Not equal Not equal, or unordered + HS, // Unsigned higher or same >, ==, or unordered + LO, // Unsigned lower or same Less than + MI, // Minus, negative Less than + PL, // Plus, positive or zero >, ==, or unordered + VS, // Overflow Unordered + VC, // No overflow Ordered + HI, // Unsigned higher Greater than, or unordered + LS, // Unsigned lower or same Less than or equal + GE, // Greater than or equal Greater than or equal + LT, // Less than Less than, or unordered + GT, // Signed greater than Greater than + LE, // Signed less than or equal <, ==, or unordered + AL, // Always (unconditional) Always (unconditional) + NV, // Always (unconditional) Always (unconditional) + // Note the NV exists purely to disassemble 0b1111. Execution + // is "always". + Invalid + }; + +} // namespace A64CC + +inline static const char *A64CondCodeToString(A64CC::CondCodes CC) { + switch (CC) { + default: llvm_unreachable("Unknown condition code"); + case A64CC::EQ: return "eq"; + case A64CC::NE: return "ne"; + case A64CC::HS: return "hs"; + case A64CC::LO: return "lo"; + case A64CC::MI: return "mi"; + case A64CC::PL: return "pl"; + case A64CC::VS: return "vs"; + case A64CC::VC: return "vc"; + case A64CC::HI: return "hi"; + case A64CC::LS: return "ls"; + case A64CC::GE: return "ge"; + case A64CC::LT: return "lt"; + case A64CC::GT: return "gt"; + case A64CC::LE: return "le"; + case A64CC::AL: return "al"; + case A64CC::NV: return "nv"; + } +} + +inline static A64CC::CondCodes A64StringToCondCode(StringRef CondStr) { + return StringSwitch(CondStr.lower()) + .Case("eq", A64CC::EQ) + .Case("ne", A64CC::NE) + .Case("ne", A64CC::NE) + .Case("hs", A64CC::HS) + .Case("cs", A64CC::HS) + .Case("lo", A64CC::LO) + .Case("cc", A64CC::LO) + .Case("mi", A64CC::MI) + .Case("pl", A64CC::PL) + .Case("vs", A64CC::VS) + .Case("vc", A64CC::VC) + .Case("hi", A64CC::HI) + .Case("ls", A64CC::LS) + .Case("ge", A64CC::GE) + .Case("lt", A64CC::LT) + .Case("gt", A64CC::GT) + .Case("le", A64CC::LE) + .Case("al", A64CC::AL) + .Case("nv", A64CC::NV) + .Default(A64CC::Invalid); +} + +inline static A64CC::CondCodes A64InvertCondCode(A64CC::CondCodes CC) { + // It turns out that the condition codes have been designed so that in order + // to reverse the intent of the condition you only have to invert the low bit: + + return static_cast(static_cast(CC) ^ 0x1); +} + +/// Instances of this class can perform bidirectional mapping from random +/// identifier strings to operand encodings. For example "MSR" takes a named +/// system-register which must be encoded somehow and decoded for printing. This +/// central location means that the information for those transformations is not +/// duplicated and remains in sync. +/// +/// FIXME: currently the algorithm is a completely unoptimised linear +/// search. Obviously this could be improved, but we would probably want to work +/// out just how often these instructions are emitted before working on it. It +/// might even be optimal to just reorder the tables for the common instructions +/// rather than changing the algorithm. +struct NamedImmMapper { + struct Mapping { + const char *Name; + uint32_t Value; + }; + + template + NamedImmMapper(const Mapping (&Pairs)[N], uint32_t TooBigImm) + : Pairs(&Pairs[0]), NumPairs(N), TooBigImm(TooBigImm) {} + + StringRef toString(uint32_t Value, bool &Valid) const; + uint32_t fromString(StringRef Name, bool &Valid) const; + + /// Many of the instructions allow an alternative assembly form consisting of + /// a simple immediate. Currently the only valid forms are ranges [0, N) where + /// N being 0 indicates no immediate syntax-form is allowed. + bool validImm(uint32_t Value) const; +protected: + const Mapping *Pairs; + size_t NumPairs; + uint32_t TooBigImm; +}; + +namespace A64AT { + enum ATValues { + Invalid = -1, // Op0 Op1 CRn CRm Op2 + S1E1R = 0x43c0, // 01 000 0111 1000 000 + S1E2R = 0x63c0, // 01 100 0111 1000 000 + S1E3R = 0x73c0, // 01 110 0111 1000 000 + S1E1W = 0x43c1, // 01 000 0111 1000 001 + S1E2W = 0x63c1, // 01 100 0111 1000 001 + S1E3W = 0x73c1, // 01 110 0111 1000 001 + S1E0R = 0x43c2, // 01 000 0111 1000 010 + S1E0W = 0x43c3, // 01 000 0111 1000 011 + S12E1R = 0x63c4, // 01 100 0111 1000 100 + S12E1W = 0x63c5, // 01 100 0111 1000 101 + S12E0R = 0x63c6, // 01 100 0111 1000 110 + S12E0W = 0x63c7 // 01 100 0111 1000 111 + }; + + struct ATMapper : NamedImmMapper { + const static Mapping ATPairs[]; + + ATMapper(); + }; + +} +namespace A64DB { + enum DBValues { + Invalid = -1, + OSHLD = 0x1, + OSHST = 0x2, + OSH = 0x3, + NSHLD = 0x5, + NSHST = 0x6, + NSH = 0x7, + ISHLD = 0x9, + ISHST = 0xa, + ISH = 0xb, + LD = 0xd, + ST = 0xe, + SY = 0xf + }; + + struct DBarrierMapper : NamedImmMapper { + const static Mapping DBarrierPairs[]; + + DBarrierMapper(); + }; +} + +namespace A64DC { + enum DCValues { + Invalid = -1, // Op1 CRn CRm Op2 + ZVA = 0x5ba1, // 01 011 0111 0100 001 + IVAC = 0x43b1, // 01 000 0111 0110 001 + ISW = 0x43b2, // 01 000 0111 0110 010 + CVAC = 0x5bd1, // 01 011 0111 1010 001 + CSW = 0x43d2, // 01 000 0111 1010 010 + CVAU = 0x5bd9, // 01 011 0111 1011 001 + CIVAC = 0x5bf1, // 01 011 0111 1110 001 + CISW = 0x43f2 // 01 000 0111 1110 010 + }; + + struct DCMapper : NamedImmMapper { + const static Mapping DCPairs[]; + + DCMapper(); + }; + +} + +namespace A64IC { + enum ICValues { + Invalid = -1, // Op1 CRn CRm Op2 + IALLUIS = 0x0388, // 000 0111 0001 000 + IALLU = 0x03a8, // 000 0111 0101 000 + IVAU = 0x1ba9 // 011 0111 0101 001 + }; + + + struct ICMapper : NamedImmMapper { + const static Mapping ICPairs[]; + + ICMapper(); + }; + + static inline bool NeedsRegister(ICValues Val) { + return Val == IVAU; + } +} + +namespace A64ISB { + enum ISBValues { + Invalid = -1, + SY = 0xf + }; + struct ISBMapper : NamedImmMapper { + const static Mapping ISBPairs[]; + + ISBMapper(); + }; +} + +namespace A64PRFM { + enum PRFMValues { + Invalid = -1, + PLDL1KEEP = 0x00, + PLDL1STRM = 0x01, + PLDL2KEEP = 0x02, + PLDL2STRM = 0x03, + PLDL3KEEP = 0x04, + PLDL3STRM = 0x05, + PSTL1KEEP = 0x10, + PSTL1STRM = 0x11, + PSTL2KEEP = 0x12, + PSTL2STRM = 0x13, + PSTL3KEEP = 0x14, + PSTL3STRM = 0x15 + }; + + struct PRFMMapper : NamedImmMapper { + const static Mapping PRFMPairs[]; + + PRFMMapper(); + }; +} + +namespace A64PState { + enum PStateValues { + Invalid = -1, + SPSel = 0x05, + DAIFSet = 0x1e, + DAIFClr = 0x1f + }; + + struct PStateMapper : NamedImmMapper { + const static Mapping PStatePairs[]; + + PStateMapper(); + }; + +} + +namespace A64SE { + enum ShiftExtSpecifiers { + Invalid = -1, + LSL, + LSR, + ASR, + ROR, + + UXTB, + UXTH, + UXTW, + UXTX, + + SXTB, + SXTH, + SXTW, + SXTX + }; +} + +namespace A64SysReg { + enum SysRegROValues { + MDCCSR_EL0 = 0x9808, // 10 011 0000 0001 000 + DBGDTRRX_EL0 = 0x9828, // 10 011 0000 0101 000 + MDRAR_EL1 = 0x8080, // 10 000 0001 0000 000 + OSLSR_EL1 = 0x808c, // 10 000 0001 0001 100 + DBGAUTHSTATUS_EL1 = 0x83f6, // 10 000 0111 1110 110 + PMCEID0_EL0 = 0xdce6, // 11 011 1001 1100 110 + PMCEID1_EL0 = 0xdce7, // 11 011 1001 1100 111 + MIDR_EL1 = 0xc000, // 11 000 0000 0000 000 + CCSIDR_EL1 = 0xc800, // 11 001 0000 0000 000 + CLIDR_EL1 = 0xc801, // 11 001 0000 0000 001 + CTR_EL0 = 0xd801, // 11 011 0000 0000 001 + MPIDR_EL1 = 0xc005, // 11 000 0000 0000 101 + REVIDR_EL1 = 0xc006, // 11 000 0000 0000 110 + AIDR_EL1 = 0xc807, // 11 001 0000 0000 111 + DCZID_EL0 = 0xd807, // 11 011 0000 0000 111 + ID_PFR0_EL1 = 0xc008, // 11 000 0000 0001 000 + ID_PFR1_EL1 = 0xc009, // 11 000 0000 0001 001 + ID_DFR0_EL1 = 0xc00a, // 11 000 0000 0001 010 + ID_AFR0_EL1 = 0xc00b, // 11 000 0000 0001 011 + ID_MMFR0_EL1 = 0xc00c, // 11 000 0000 0001 100 + ID_MMFR1_EL1 = 0xc00d, // 11 000 0000 0001 101 + ID_MMFR2_EL1 = 0xc00e, // 11 000 0000 0001 110 + ID_MMFR3_EL1 = 0xc00f, // 11 000 0000 0001 111 + ID_ISAR0_EL1 = 0xc010, // 11 000 0000 0010 000 + ID_ISAR1_EL1 = 0xc011, // 11 000 0000 0010 001 + ID_ISAR2_EL1 = 0xc012, // 11 000 0000 0010 010 + ID_ISAR3_EL1 = 0xc013, // 11 000 0000 0010 011 + ID_ISAR4_EL1 = 0xc014, // 11 000 0000 0010 100 + ID_ISAR5_EL1 = 0xc015, // 11 000 0000 0010 101 + ID_AA64PFR0_EL1 = 0xc020, // 11 000 0000 0100 000 + ID_AA64PFR1_EL1 = 0xc021, // 11 000 0000 0100 001 + ID_AA64DFR0_EL1 = 0xc028, // 11 000 0000 0101 000 + ID_AA64DFR1_EL1 = 0xc029, // 11 000 0000 0101 001 + ID_AA64AFR0_EL1 = 0xc02c, // 11 000 0000 0101 100 + ID_AA64AFR1_EL1 = 0xc02d, // 11 000 0000 0101 101 + ID_AA64ISAR0_EL1 = 0xc030, // 11 000 0000 0110 000 + ID_AA64ISAR1_EL1 = 0xc031, // 11 000 0000 0110 001 + ID_AA64MMFR0_EL1 = 0xc038, // 11 000 0000 0111 000 + ID_AA64MMFR1_EL1 = 0xc039, // 11 000 0000 0111 001 + MVFR0_EL1 = 0xc018, // 11 000 0000 0011 000 + MVFR1_EL1 = 0xc019, // 11 000 0000 0011 001 + MVFR2_EL1 = 0xc01a, // 11 000 0000 0011 010 + RVBAR_EL1 = 0xc601, // 11 000 1100 0000 001 + RVBAR_EL2 = 0xe601, // 11 100 1100 0000 001 + RVBAR_EL3 = 0xf601, // 11 110 1100 0000 001 + ISR_EL1 = 0xc608, // 11 000 1100 0001 000 + CNTPCT_EL0 = 0xdf01, // 11 011 1110 0000 001 + CNTVCT_EL0 = 0xdf02 // 11 011 1110 0000 010 + }; + + enum SysRegWOValues { + DBGDTRTX_EL0 = 0x9828, // 10 011 0000 0101 000 + OSLAR_EL1 = 0x8084, // 10 000 0001 0000 100 + PMSWINC_EL0 = 0xdce4 // 11 011 1001 1100 100 + }; + + enum SysRegValues { + Invalid = -1, // Op0 Op1 CRn CRm Op2 + OSDTRRX_EL1 = 0x8002, // 10 000 0000 0000 010 + OSDTRTX_EL1 = 0x801a, // 10 000 0000 0011 010 + TEECR32_EL1 = 0x9000, // 10 010 0000 0000 000 + MDCCINT_EL1 = 0x8010, // 10 000 0000 0010 000 + MDSCR_EL1 = 0x8012, // 10 000 0000 0010 010 + DBGDTR_EL0 = 0x9820, // 10 011 0000 0100 000 + OSECCR_EL1 = 0x8032, // 10 000 0000 0110 010 + DBGVCR32_EL2 = 0xa038, // 10 100 0000 0111 000 + DBGBVR0_EL1 = 0x8004, // 10 000 0000 0000 100 + DBGBVR1_EL1 = 0x800c, // 10 000 0000 0001 100 + DBGBVR2_EL1 = 0x8014, // 10 000 0000 0010 100 + DBGBVR3_EL1 = 0x801c, // 10 000 0000 0011 100 + DBGBVR4_EL1 = 0x8024, // 10 000 0000 0100 100 + DBGBVR5_EL1 = 0x802c, // 10 000 0000 0101 100 + DBGBVR6_EL1 = 0x8034, // 10 000 0000 0110 100 + DBGBVR7_EL1 = 0x803c, // 10 000 0000 0111 100 + DBGBVR8_EL1 = 0x8044, // 10 000 0000 1000 100 + DBGBVR9_EL1 = 0x804c, // 10 000 0000 1001 100 + DBGBVR10_EL1 = 0x8054, // 10 000 0000 1010 100 + DBGBVR11_EL1 = 0x805c, // 10 000 0000 1011 100 + DBGBVR12_EL1 = 0x8064, // 10 000 0000 1100 100 + DBGBVR13_EL1 = 0x806c, // 10 000 0000 1101 100 + DBGBVR14_EL1 = 0x8074, // 10 000 0000 1110 100 + DBGBVR15_EL1 = 0x807c, // 10 000 0000 1111 100 + DBGBCR0_EL1 = 0x8005, // 10 000 0000 0000 101 + DBGBCR1_EL1 = 0x800d, // 10 000 0000 0001 101 + DBGBCR2_EL1 = 0x8015, // 10 000 0000 0010 101 + DBGBCR3_EL1 = 0x801d, // 10 000 0000 0011 101 + DBGBCR4_EL1 = 0x8025, // 10 000 0000 0100 101 + DBGBCR5_EL1 = 0x802d, // 10 000 0000 0101 101 + DBGBCR6_EL1 = 0x8035, // 10 000 0000 0110 101 + DBGBCR7_EL1 = 0x803d, // 10 000 0000 0111 101 + DBGBCR8_EL1 = 0x8045, // 10 000 0000 1000 101 + DBGBCR9_EL1 = 0x804d, // 10 000 0000 1001 101 + DBGBCR10_EL1 = 0x8055, // 10 000 0000 1010 101 + DBGBCR11_EL1 = 0x805d, // 10 000 0000 1011 101 + DBGBCR12_EL1 = 0x8065, // 10 000 0000 1100 101 + DBGBCR13_EL1 = 0x806d, // 10 000 0000 1101 101 + DBGBCR14_EL1 = 0x8075, // 10 000 0000 1110 101 + DBGBCR15_EL1 = 0x807d, // 10 000 0000 1111 101 + DBGWVR0_EL1 = 0x8006, // 10 000 0000 0000 110 + DBGWVR1_EL1 = 0x800e, // 10 000 0000 0001 110 + DBGWVR2_EL1 = 0x8016, // 10 000 0000 0010 110 + DBGWVR3_EL1 = 0x801e, // 10 000 0000 0011 110 + DBGWVR4_EL1 = 0x8026, // 10 000 0000 0100 110 + DBGWVR5_EL1 = 0x802e, // 10 000 0000 0101 110 + DBGWVR6_EL1 = 0x8036, // 10 000 0000 0110 110 + DBGWVR7_EL1 = 0x803e, // 10 000 0000 0111 110 + DBGWVR8_EL1 = 0x8046, // 10 000 0000 1000 110 + DBGWVR9_EL1 = 0x804e, // 10 000 0000 1001 110 + DBGWVR10_EL1 = 0x8056, // 10 000 0000 1010 110 + DBGWVR11_EL1 = 0x805e, // 10 000 0000 1011 110 + DBGWVR12_EL1 = 0x8066, // 10 000 0000 1100 110 + DBGWVR13_EL1 = 0x806e, // 10 000 0000 1101 110 + DBGWVR14_EL1 = 0x8076, // 10 000 0000 1110 110 + DBGWVR15_EL1 = 0x807e, // 10 000 0000 1111 110 + DBGWCR0_EL1 = 0x8007, // 10 000 0000 0000 111 + DBGWCR1_EL1 = 0x800f, // 10 000 0000 0001 111 + DBGWCR2_EL1 = 0x8017, // 10 000 0000 0010 111 + DBGWCR3_EL1 = 0x801f, // 10 000 0000 0011 111 + DBGWCR4_EL1 = 0x8027, // 10 000 0000 0100 111 + DBGWCR5_EL1 = 0x802f, // 10 000 0000 0101 111 + DBGWCR6_EL1 = 0x8037, // 10 000 0000 0110 111 + DBGWCR7_EL1 = 0x803f, // 10 000 0000 0111 111 + DBGWCR8_EL1 = 0x8047, // 10 000 0000 1000 111 + DBGWCR9_EL1 = 0x804f, // 10 000 0000 1001 111 + DBGWCR10_EL1 = 0x8057, // 10 000 0000 1010 111 + DBGWCR11_EL1 = 0x805f, // 10 000 0000 1011 111 + DBGWCR12_EL1 = 0x8067, // 10 000 0000 1100 111 + DBGWCR13_EL1 = 0x806f, // 10 000 0000 1101 111 + DBGWCR14_EL1 = 0x8077, // 10 000 0000 1110 111 + DBGWCR15_EL1 = 0x807f, // 10 000 0000 1111 111 + TEEHBR32_EL1 = 0x9080, // 10 010 0001 0000 000 + OSDLR_EL1 = 0x809c, // 10 000 0001 0011 100 + DBGPRCR_EL1 = 0x80a4, // 10 000 0001 0100 100 + DBGCLAIMSET_EL1 = 0x83c6, // 10 000 0111 1000 110 + DBGCLAIMCLR_EL1 = 0x83ce, // 10 000 0111 1001 110 + CSSELR_EL1 = 0xd000, // 11 010 0000 0000 000 + VPIDR_EL2 = 0xe000, // 11 100 0000 0000 000 + VMPIDR_EL2 = 0xe005, // 11 100 0000 0000 101 + CPACR_EL1 = 0xc082, // 11 000 0001 0000 010 + SCTLR_EL1 = 0xc080, // 11 000 0001 0000 000 + SCTLR_EL2 = 0xe080, // 11 100 0001 0000 000 + SCTLR_EL3 = 0xf080, // 11 110 0001 0000 000 + ACTLR_EL1 = 0xc081, // 11 000 0001 0000 001 + ACTLR_EL2 = 0xe081, // 11 100 0001 0000 001 + ACTLR_EL3 = 0xf081, // 11 110 0001 0000 001 + HCR_EL2 = 0xe088, // 11 100 0001 0001 000 + SCR_EL3 = 0xf088, // 11 110 0001 0001 000 + MDCR_EL2 = 0xe089, // 11 100 0001 0001 001 + SDER32_EL3 = 0xf089, // 11 110 0001 0001 001 + CPTR_EL2 = 0xe08a, // 11 100 0001 0001 010 + CPTR_EL3 = 0xf08a, // 11 110 0001 0001 010 + HSTR_EL2 = 0xe08b, // 11 100 0001 0001 011 + HACR_EL2 = 0xe08f, // 11 100 0001 0001 111 + MDCR_EL3 = 0xf099, // 11 110 0001 0011 001 + TTBR0_EL1 = 0xc100, // 11 000 0010 0000 000 + TTBR0_EL2 = 0xe100, // 11 100 0010 0000 000 + TTBR0_EL3 = 0xf100, // 11 110 0010 0000 000 + TTBR1_EL1 = 0xc101, // 11 000 0010 0000 001 + TCR_EL1 = 0xc102, // 11 000 0010 0000 010 + TCR_EL2 = 0xe102, // 11 100 0010 0000 010 + TCR_EL3 = 0xf102, // 11 110 0010 0000 010 + VTTBR_EL2 = 0xe108, // 11 100 0010 0001 000 + VTCR_EL2 = 0xe10a, // 11 100 0010 0001 010 + DACR32_EL2 = 0xe180, // 11 100 0011 0000 000 + SPSR_EL1 = 0xc200, // 11 000 0100 0000 000 + SPSR_EL2 = 0xe200, // 11 100 0100 0000 000 + SPSR_EL3 = 0xf200, // 11 110 0100 0000 000 + ELR_EL1 = 0xc201, // 11 000 0100 0000 001 + ELR_EL2 = 0xe201, // 11 100 0100 0000 001 + ELR_EL3 = 0xf201, // 11 110 0100 0000 001 + SP_EL0 = 0xc208, // 11 000 0100 0001 000 + SP_EL1 = 0xe208, // 11 100 0100 0001 000 + SP_EL2 = 0xf208, // 11 110 0100 0001 000 + SPSel = 0xc210, // 11 000 0100 0010 000 + NZCV = 0xda10, // 11 011 0100 0010 000 + DAIF = 0xda11, // 11 011 0100 0010 001 + CurrentEL = 0xc212, // 11 000 0100 0010 010 + SPSR_irq = 0xe218, // 11 100 0100 0011 000 + SPSR_abt = 0xe219, // 11 100 0100 0011 001 + SPSR_und = 0xe21a, // 11 100 0100 0011 010 + SPSR_fiq = 0xe21b, // 11 100 0100 0011 011 + FPCR = 0xda20, // 11 011 0100 0100 000 + FPSR = 0xda21, // 11 011 0100 0100 001 + DSPSR_EL0 = 0xda28, // 11 011 0100 0101 000 + DLR_EL0 = 0xda29, // 11 011 0100 0101 001 + IFSR32_EL2 = 0xe281, // 11 100 0101 0000 001 + AFSR0_EL1 = 0xc288, // 11 000 0101 0001 000 + AFSR0_EL2 = 0xe288, // 11 100 0101 0001 000 + AFSR0_EL3 = 0xf288, // 11 110 0101 0001 000 + AFSR1_EL1 = 0xc289, // 11 000 0101 0001 001 + AFSR1_EL2 = 0xe289, // 11 100 0101 0001 001 + AFSR1_EL3 = 0xf289, // 11 110 0101 0001 001 + ESR_EL1 = 0xc290, // 11 000 0101 0010 000 + ESR_EL2 = 0xe290, // 11 100 0101 0010 000 + ESR_EL3 = 0xf290, // 11 110 0101 0010 000 + FPEXC32_EL2 = 0xe298, // 11 100 0101 0011 000 + FAR_EL1 = 0xc300, // 11 000 0110 0000 000 + FAR_EL2 = 0xe300, // 11 100 0110 0000 000 + FAR_EL3 = 0xf300, // 11 110 0110 0000 000 + HPFAR_EL2 = 0xe304, // 11 100 0110 0000 100 + PAR_EL1 = 0xc3a0, // 11 000 0111 0100 000 + PMCR_EL0 = 0xdce0, // 11 011 1001 1100 000 + PMCNTENSET_EL0 = 0xdce1, // 11 011 1001 1100 001 + PMCNTENCLR_EL0 = 0xdce2, // 11 011 1001 1100 010 + PMOVSCLR_EL0 = 0xdce3, // 11 011 1001 1100 011 + PMSELR_EL0 = 0xdce5, // 11 011 1001 1100 101 + PMCCNTR_EL0 = 0xdce8, // 11 011 1001 1101 000 + PMXEVTYPER_EL0 = 0xdce9, // 11 011 1001 1101 001 + PMXEVCNTR_EL0 = 0xdcea, // 11 011 1001 1101 010 + PMUSERENR_EL0 = 0xdcf0, // 11 011 1001 1110 000 + PMINTENSET_EL1 = 0xc4f1, // 11 000 1001 1110 001 + PMINTENCLR_EL1 = 0xc4f2, // 11 000 1001 1110 010 + PMOVSSET_EL0 = 0xdcf3, // 11 011 1001 1110 011 + MAIR_EL1 = 0xc510, // 11 000 1010 0010 000 + MAIR_EL2 = 0xe510, // 11 100 1010 0010 000 + MAIR_EL3 = 0xf510, // 11 110 1010 0010 000 + AMAIR_EL1 = 0xc518, // 11 000 1010 0011 000 + AMAIR_EL2 = 0xe518, // 11 100 1010 0011 000 + AMAIR_EL3 = 0xf518, // 11 110 1010 0011 000 + VBAR_EL1 = 0xc600, // 11 000 1100 0000 000 + VBAR_EL2 = 0xe600, // 11 100 1100 0000 000 + VBAR_EL3 = 0xf600, // 11 110 1100 0000 000 + RMR_EL1 = 0xc602, // 11 000 1100 0000 010 + RMR_EL2 = 0xe602, // 11 100 1100 0000 010 + RMR_EL3 = 0xf602, // 11 110 1100 0000 010 + CONTEXTIDR_EL1 = 0xc681, // 11 000 1101 0000 001 + TPIDR_EL0 = 0xde82, // 11 011 1101 0000 010 + TPIDR_EL2 = 0xe682, // 11 100 1101 0000 010 + TPIDR_EL3 = 0xf682, // 11 110 1101 0000 010 + TPIDRRO_EL0 = 0xde83, // 11 011 1101 0000 011 + TPIDR_EL1 = 0xc684, // 11 000 1101 0000 100 + CNTFRQ_EL0 = 0xdf00, // 11 011 1110 0000 000 + CNTVOFF_EL2 = 0xe703, // 11 100 1110 0000 011 + CNTKCTL_EL1 = 0xc708, // 11 000 1110 0001 000 + CNTHCTL_EL2 = 0xe708, // 11 100 1110 0001 000 + CNTP_TVAL_EL0 = 0xdf10, // 11 011 1110 0010 000 + CNTHP_TVAL_EL2 = 0xe710, // 11 100 1110 0010 000 + CNTPS_TVAL_EL1 = 0xff10, // 11 111 1110 0010 000 + CNTP_CTL_EL0 = 0xdf11, // 11 011 1110 0010 001 + CNTHP_CTL_EL2 = 0xe711, // 11 100 1110 0010 001 + CNTPS_CTL_EL1 = 0xff11, // 11 111 1110 0010 001 + CNTP_CVAL_EL0 = 0xdf12, // 11 011 1110 0010 010 + CNTHP_CVAL_EL2 = 0xe712, // 11 100 1110 0010 010 + CNTPS_CVAL_EL1 = 0xff12, // 11 111 1110 0010 010 + CNTV_TVAL_EL0 = 0xdf18, // 11 011 1110 0011 000 + CNTV_CTL_EL0 = 0xdf19, // 11 011 1110 0011 001 + CNTV_CVAL_EL0 = 0xdf1a, // 11 011 1110 0011 010 + PMEVCNTR0_EL0 = 0xdf40, // 11 011 1110 1000 000 + PMEVCNTR1_EL0 = 0xdf41, // 11 011 1110 1000 001 + PMEVCNTR2_EL0 = 0xdf42, // 11 011 1110 1000 010 + PMEVCNTR3_EL0 = 0xdf43, // 11 011 1110 1000 011 + PMEVCNTR4_EL0 = 0xdf44, // 11 011 1110 1000 100 + PMEVCNTR5_EL0 = 0xdf45, // 11 011 1110 1000 101 + PMEVCNTR6_EL0 = 0xdf46, // 11 011 1110 1000 110 + PMEVCNTR7_EL0 = 0xdf47, // 11 011 1110 1000 111 + PMEVCNTR8_EL0 = 0xdf48, // 11 011 1110 1001 000 + PMEVCNTR9_EL0 = 0xdf49, // 11 011 1110 1001 001 + PMEVCNTR10_EL0 = 0xdf4a, // 11 011 1110 1001 010 + PMEVCNTR11_EL0 = 0xdf4b, // 11 011 1110 1001 011 + PMEVCNTR12_EL0 = 0xdf4c, // 11 011 1110 1001 100 + PMEVCNTR13_EL0 = 0xdf4d, // 11 011 1110 1001 101 + PMEVCNTR14_EL0 = 0xdf4e, // 11 011 1110 1001 110 + PMEVCNTR15_EL0 = 0xdf4f, // 11 011 1110 1001 111 + PMEVCNTR16_EL0 = 0xdf50, // 11 011 1110 1010 000 + PMEVCNTR17_EL0 = 0xdf51, // 11 011 1110 1010 001 + PMEVCNTR18_EL0 = 0xdf52, // 11 011 1110 1010 010 + PMEVCNTR19_EL0 = 0xdf53, // 11 011 1110 1010 011 + PMEVCNTR20_EL0 = 0xdf54, // 11 011 1110 1010 100 + PMEVCNTR21_EL0 = 0xdf55, // 11 011 1110 1010 101 + PMEVCNTR22_EL0 = 0xdf56, // 11 011 1110 1010 110 + PMEVCNTR23_EL0 = 0xdf57, // 11 011 1110 1010 111 + PMEVCNTR24_EL0 = 0xdf58, // 11 011 1110 1011 000 + PMEVCNTR25_EL0 = 0xdf59, // 11 011 1110 1011 001 + PMEVCNTR26_EL0 = 0xdf5a, // 11 011 1110 1011 010 + PMEVCNTR27_EL0 = 0xdf5b, // 11 011 1110 1011 011 + PMEVCNTR28_EL0 = 0xdf5c, // 11 011 1110 1011 100 + PMEVCNTR29_EL0 = 0xdf5d, // 11 011 1110 1011 101 + PMEVCNTR30_EL0 = 0xdf5e, // 11 011 1110 1011 110 + PMCCFILTR_EL0 = 0xdf7f, // 11 011 1110 1111 111 + PMEVTYPER0_EL0 = 0xdf60, // 11 011 1110 1100 000 + PMEVTYPER1_EL0 = 0xdf61, // 11 011 1110 1100 001 + PMEVTYPER2_EL0 = 0xdf62, // 11 011 1110 1100 010 + PMEVTYPER3_EL0 = 0xdf63, // 11 011 1110 1100 011 + PMEVTYPER4_EL0 = 0xdf64, // 11 011 1110 1100 100 + PMEVTYPER5_EL0 = 0xdf65, // 11 011 1110 1100 101 + PMEVTYPER6_EL0 = 0xdf66, // 11 011 1110 1100 110 + PMEVTYPER7_EL0 = 0xdf67, // 11 011 1110 1100 111 + PMEVTYPER8_EL0 = 0xdf68, // 11 011 1110 1101 000 + PMEVTYPER9_EL0 = 0xdf69, // 11 011 1110 1101 001 + PMEVTYPER10_EL0 = 0xdf6a, // 11 011 1110 1101 010 + PMEVTYPER11_EL0 = 0xdf6b, // 11 011 1110 1101 011 + PMEVTYPER12_EL0 = 0xdf6c, // 11 011 1110 1101 100 + PMEVTYPER13_EL0 = 0xdf6d, // 11 011 1110 1101 101 + PMEVTYPER14_EL0 = 0xdf6e, // 11 011 1110 1101 110 + PMEVTYPER15_EL0 = 0xdf6f, // 11 011 1110 1101 111 + PMEVTYPER16_EL0 = 0xdf70, // 11 011 1110 1110 000 + PMEVTYPER17_EL0 = 0xdf71, // 11 011 1110 1110 001 + PMEVTYPER18_EL0 = 0xdf72, // 11 011 1110 1110 010 + PMEVTYPER19_EL0 = 0xdf73, // 11 011 1110 1110 011 + PMEVTYPER20_EL0 = 0xdf74, // 11 011 1110 1110 100 + PMEVTYPER21_EL0 = 0xdf75, // 11 011 1110 1110 101 + PMEVTYPER22_EL0 = 0xdf76, // 11 011 1110 1110 110 + PMEVTYPER23_EL0 = 0xdf77, // 11 011 1110 1110 111 + PMEVTYPER24_EL0 = 0xdf78, // 11 011 1110 1111 000 + PMEVTYPER25_EL0 = 0xdf79, // 11 011 1110 1111 001 + PMEVTYPER26_EL0 = 0xdf7a, // 11 011 1110 1111 010 + PMEVTYPER27_EL0 = 0xdf7b, // 11 011 1110 1111 011 + PMEVTYPER28_EL0 = 0xdf7c, // 11 011 1110 1111 100 + PMEVTYPER29_EL0 = 0xdf7d, // 11 011 1110 1111 101 + PMEVTYPER30_EL0 = 0xdf7e // 11 011 1110 1111 110 + }; + + // Note that these do not inherit from NamedImmMapper. This class is + // sufficiently different in its behaviour that I don't believe it's worth + // burdening the common NamedImmMapper with abstractions only needed in + // this one case. + struct SysRegMapper { + static const NamedImmMapper::Mapping SysRegPairs[]; + + const NamedImmMapper::Mapping *InstPairs; + size_t NumInstPairs; + + SysRegMapper() {} + uint32_t fromString(StringRef Name, bool &Valid) const; + std::string toString(uint32_t Bits, bool &Valid) const; + }; + + struct MSRMapper : SysRegMapper { + static const NamedImmMapper::Mapping MSRPairs[]; + MSRMapper(); + }; + + struct MRSMapper : SysRegMapper { + static const NamedImmMapper::Mapping MRSPairs[]; + MRSMapper(); + }; + + uint32_t ParseGenericRegister(StringRef Name, bool &Valid); +} + +namespace A64TLBI { + enum TLBIValues { + Invalid = -1, // Op0 Op1 CRn CRm Op2 + IPAS2E1IS = 0x6401, // 01 100 1000 0000 001 + IPAS2LE1IS = 0x6405, // 01 100 1000 0000 101 + VMALLE1IS = 0x4418, // 01 000 1000 0011 000 + ALLE2IS = 0x6418, // 01 100 1000 0011 000 + ALLE3IS = 0x7418, // 01 110 1000 0011 000 + VAE1IS = 0x4419, // 01 000 1000 0011 001 + VAE2IS = 0x6419, // 01 100 1000 0011 001 + VAE3IS = 0x7419, // 01 110 1000 0011 001 + ASIDE1IS = 0x441a, // 01 000 1000 0011 010 + VAAE1IS = 0x441b, // 01 000 1000 0011 011 + ALLE1IS = 0x641c, // 01 100 1000 0011 100 + VALE1IS = 0x441d, // 01 000 1000 0011 101 + VALE2IS = 0x641d, // 01 100 1000 0011 101 + VALE3IS = 0x741d, // 01 110 1000 0011 101 + VMALLS12E1IS = 0x641e, // 01 100 1000 0011 110 + VAALE1IS = 0x441f, // 01 000 1000 0011 111 + IPAS2E1 = 0x6421, // 01 100 1000 0100 001 + IPAS2LE1 = 0x6425, // 01 100 1000 0100 101 + VMALLE1 = 0x4438, // 01 000 1000 0111 000 + ALLE2 = 0x6438, // 01 100 1000 0111 000 + ALLE3 = 0x7438, // 01 110 1000 0111 000 + VAE1 = 0x4439, // 01 000 1000 0111 001 + VAE2 = 0x6439, // 01 100 1000 0111 001 + VAE3 = 0x7439, // 01 110 1000 0111 001 + ASIDE1 = 0x443a, // 01 000 1000 0111 010 + VAAE1 = 0x443b, // 01 000 1000 0111 011 + ALLE1 = 0x643c, // 01 100 1000 0111 100 + VALE1 = 0x443d, // 01 000 1000 0111 101 + VALE2 = 0x643d, // 01 100 1000 0111 101 + VALE3 = 0x743d, // 01 110 1000 0111 101 + VMALLS12E1 = 0x643e, // 01 100 1000 0111 110 + VAALE1 = 0x443f // 01 000 1000 0111 111 + }; + + struct TLBIMapper : NamedImmMapper { + const static Mapping TLBIPairs[]; + + TLBIMapper(); + }; + + static inline bool NeedsRegister(TLBIValues Val) { + switch (Val) { + case VMALLE1IS: + case ALLE2IS: + case ALLE3IS: + case ALLE1IS: + case VMALLS12E1IS: + case VMALLE1: + case ALLE2: + case ALLE3: + case ALLE1: + case VMALLS12E1: + return false; + default: + return true; + } + } +} + +namespace AArch64II { + + enum TOF { + //===--------------------------------------------------------------===// + // AArch64 Specific MachineOperand flags. + + MO_NO_FLAG, + + // MO_GOT - Represents a relocation referring to the GOT entry of a given + // symbol. Used in adrp. + MO_GOT, + + // MO_GOT_LO12 - Represents a relocation referring to the low 12 bits of the + // GOT entry of a given symbol. Used in ldr only. + MO_GOT_LO12, + + // MO_DTPREL_* - Represents a relocation referring to the offset from a + // module's dynamic thread pointer. Used in the local-dynamic TLS access + // model. + MO_DTPREL_G1, + MO_DTPREL_G0_NC, + + // MO_GOTTPREL_* - Represents a relocation referring to a GOT entry + // providing the offset of a variable from the thread-pointer. Used in + // initial-exec TLS model where this offset is assigned in the static thread + // block and thus known by the dynamic linker. + MO_GOTTPREL, + MO_GOTTPREL_LO12, + + // MO_TLSDESC_* - Represents a relocation referring to a GOT entry providing + // a TLS descriptor chosen by the dynamic linker. Used for the + // general-dynamic and local-dynamic TLS access models where very littls is + // known at link-time. + MO_TLSDESC, + MO_TLSDESC_LO12, + + // MO_TPREL_* - Represents a relocation referring to the offset of a + // variable from the thread pointer itself. Used in the local-exec TLS + // access model. + MO_TPREL_G1, + MO_TPREL_G0_NC, + + // MO_LO12 - On a symbol operand, this represents a relocation containing + // lower 12 bits of the address. Used in add/sub/ldr/str. + MO_LO12 + }; +} + +class APFloat; + +namespace A64Imms { + bool isFPImm(const APFloat &Val, uint32_t &Imm8Bits); + + inline bool isFPImm(const APFloat &Val) { + uint32_t Imm8; + return isFPImm(Val, Imm8); + } + + bool isLogicalImm(unsigned RegWidth, uint64_t Imm, uint32_t &Bits); + bool isLogicalImmBits(unsigned RegWidth, uint32_t Bits, uint64_t &Imm); + + bool isMOVZImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift); + bool isMOVNImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift); + + // We sometimes want to know whether the immediate is representable with a + // MOVN but *not* with a MOVZ (because that would take priority). + bool isOnlyMOVNImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift); + +} + +} // end namespace llvm; + +#endif diff --git a/lib/Target/AArch64/Utils/CMakeLists.txt b/lib/Target/AArch64/Utils/CMakeLists.txt new file mode 100644 index 0000000..2c28348 --- /dev/null +++ b/lib/Target/AArch64/Utils/CMakeLists.txt @@ -0,0 +1,5 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMAArch64Utils + AArch64BaseInfo.cpp + ) diff --git a/lib/Target/AArch64/Utils/LLVMBuild.txt b/lib/Target/AArch64/Utils/LLVMBuild.txt new file mode 100644 index 0000000..1be5375 --- /dev/null +++ b/lib/Target/AArch64/Utils/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/Target/AArch646/Utils/LLVMBuild.txt ----------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = AArch64Utils +parent = AArch64 +required_libraries = Core Support +add_to_library_groups = AArch64 diff --git a/lib/Target/AArch64/Utils/Makefile b/lib/Target/AArch64/Utils/Makefile new file mode 100644 index 0000000..0f4a645 --- /dev/null +++ b/lib/Target/AArch64/Utils/Makefile @@ -0,0 +1,15 @@ +##===- lib/Target/AArch64/Utils/Makefile -------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../../.. +LIBRARYNAME = LLVMAArch64Utils + +# Hack: we need to include 'main' AArch64 target directory to grab private headers +#CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common -- cgit v1.1 From dfe076af9879eb68a7b8331f9c02eecf563d85be Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Tue, 5 Feb 2013 13:24:56 +0000 Subject: Fix formatting in AArch64 backend. This should fix three purely whitespace issues: + 80 column violations. + Tab characters. + TableGen brace placement. No functional changes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174370 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64.td | 2 +- lib/Target/AArch64/AArch64AsmPrinter.cpp | 6 +- lib/Target/AArch64/AArch64ConstantIslandPass.cpp | 9 +- lib/Target/AArch64/AArch64FrameLowering.cpp | 12 +- lib/Target/AArch64/AArch64FrameLowering.h | 2 +- lib/Target/AArch64/AArch64ISelDAGToDAG.cpp | 9 +- lib/Target/AArch64/AArch64ISelLowering.cpp | 58 +- lib/Target/AArch64/AArch64ISelLowering.h | 4 +- lib/Target/AArch64/AArch64InstrFormats.td | 159 ++-- lib/Target/AArch64/AArch64InstrInfo.cpp | 6 +- lib/Target/AArch64/AArch64InstrInfo.td | 943 ++++++++------------- lib/Target/AArch64/AArch64MachineFunctionInfo.h | 4 +- lib/Target/AArch64/AArch64RegisterInfo.cpp | 8 +- lib/Target/AArch64/AArch64RegisterInfo.td | 6 +- lib/Target/AArch64/AArch64SelectionDAGInfo.h | 2 +- lib/Target/AArch64/AArch64TargetObjectFile.cpp | 2 +- lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp | 12 +- .../AArch64/Disassembler/AArch64Disassembler.cpp | 28 +- .../AArch64/InstPrinter/AArch64InstPrinter.h | 2 +- .../AArch64/MCTargetDesc/AArch64AsmBackend.cpp | 134 +-- .../AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp | 2 +- lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h | 3 +- .../AArch64/TargetInfo/AArch64TargetInfo.cpp | 2 +- lib/Target/AArch64/Utils/AArch64BaseInfo.cpp | 3 +- 24 files changed, 577 insertions(+), 841 deletions(-) (limited to 'lib') diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td index 750fec7..0e4f5fb 100644 --- a/lib/Target/AArch64/AArch64.td +++ b/lib/Target/AArch64/AArch64.td @@ -1,4 +1,4 @@ -//===- AArch64.td - Describe the AArch64 Target Machine ---------*- tblgen -*-==// +//===- AArch64.td - Describe the AArch64 Target Machine -------*- tblgen -*-==// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp index 63cc88f..61839b6 100644 --- a/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -34,7 +34,8 @@ AArch64AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const { // expected to be created. assert(MI->getNumOperands() == 4 && MI->getOperand(0).isReg() && MI->getOperand(1).isImm() && "unexpected custom DBG_VALUE"); - return MachineLocation(MI->getOperand(0).getReg(), MI->getOperand(1).getImm()); + return MachineLocation(MI->getOperand(0).getReg(), + MI->getOperand(1).getImm()); } /// Try to print a floating-point register as if it belonged to a specified @@ -90,7 +91,8 @@ bool AArch64AsmPrinter::printSymbolicAddress(const MachineOperand &MO, StringRef Name; StringRef Modifier; switch (MO.getType()) { - default: llvm_unreachable("Unexpected operand for symbolic address constraint"); + default: + llvm_unreachable("Unexpected operand for symbolic address constraint"); case MachineOperand::MO_GlobalAddress: Name = Mang->getSymbol(MO.getGlobal())->getName(); diff --git a/lib/Target/AArch64/AArch64ConstantIslandPass.cpp b/lib/Target/AArch64/AArch64ConstantIslandPass.cpp index f5e5c64..ab482bd 100644 --- a/lib/Target/AArch64/AArch64ConstantIslandPass.cpp +++ b/lib/Target/AArch64/AArch64ConstantIslandPass.cpp @@ -46,7 +46,8 @@ STATISTIC(NumCBrFixed, "Number of cond branches fixed"); // FIXME: This option should be removed once it has received sufficient testing. static cl::opt AlignConstantIslands("aarch64-align-constant-islands", cl::Hidden, - cl::init(true), cl::desc("Align constant islands in code")); + cl::init(true), + cl::desc("Align constant islands in code")); /// Return the worst case padding that could result from unknown offset bits. /// This does not include alignment padding caused by known offset bits. @@ -828,7 +829,8 @@ bool AArch64ConstantIslands::isWaterInRange(unsigned UserOffset, bool AArch64ConstantIslands::isCPEntryInRange(MachineInstr *MI, unsigned UserOffset, MachineInstr *CPEMI, - unsigned OffsetBits, bool DoDump) { + unsigned OffsetBits, + bool DoDump) { unsigned CPEOffset = getOffsetOf(CPEMI); if (DoDump) { @@ -930,7 +932,8 @@ int AArch64ConstantIslands::findInRangeCPEntry(CPUser& U, unsigned UserOffset) // Removing CPEs can leave empty entries, skip if (CPEs[i].CPEMI == NULL) continue; - if (isCPEntryInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.getOffsetBits())) { + if (isCPEntryInRange(UserMI, UserOffset, CPEs[i].CPEMI, + U.getOffsetBits())) { DEBUG(dbgs() << "Replacing CPE#" << CPI << " with CPE#" << CPEs[i].CPI << "\n"); // Point the CPUser node to the replacement diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp index 2301114..24d1576 100644 --- a/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -180,7 +180,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const { for (std::vector::const_iterator I = CSI.begin(), E = CSI.end(); I != E; ++I) { - MachineLocation Dst(MachineLocation::VirtualFP, MFI->getObjectOffset(I->getFrameIdx())); + MachineLocation Dst(MachineLocation::VirtualFP, + MFI->getObjectOffset(I->getFrameIdx())); MachineLocation Src(I->getReg()); Moves.push_back(MachineMove(CSLabel, Dst, Src)); } @@ -537,7 +538,8 @@ AArch64FrameLowering::emitFrameMemOps(bool isPrologue, MachineBasicBlock &MBB, State = RegState::Define; } - NewMI = BuildMI(MBB, MBBI, DL, TII.get(PossClasses[ClassIdx].SingleOpcode)) + NewMI = BuildMI(MBB, MBBI, DL, + TII.get(PossClasses[ClassIdx].SingleOpcode)) .addReg(CSI[i].getReg(), State); } @@ -549,9 +551,9 @@ AArch64FrameLowering::emitFrameMemOps(bool isPrologue, MachineBasicBlock &MBB, Flags = isPrologue ? MachineMemOperand::MOStore : MachineMemOperand::MOLoad; MachineMemOperand *MMO = MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx), - Flags, - Pair ? TheClass.getSize() * 2 : TheClass.getSize(), - MFI.getObjectAlignment(FrameIdx)); + Flags, + Pair ? TheClass.getSize() * 2 : TheClass.getSize(), + MFI.getObjectAlignment(FrameIdx)); NewMI.addFrameIndex(FrameIdx) .addImm(0) // address-register offset diff --git a/lib/Target/AArch64/AArch64FrameLowering.h b/lib/Target/AArch64/AArch64FrameLowering.h index dfa66ec..a14c2bb 100644 --- a/lib/Target/AArch64/AArch64FrameLowering.h +++ b/lib/Target/AArch64/AArch64FrameLowering.h @@ -29,7 +29,7 @@ private: struct LoadStoreMethod { const TargetRegisterClass *RegClass; // E.g. GPR64RegClass - // The preferred instruction. + // The preferred instruction. unsigned PairOpcode; // E.g. LSPair64_STR // Sometimes only a single register can be handled at once. diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 28f152c..c933555 100644 --- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -72,7 +72,8 @@ public: bool SelectFPZeroOperand(SDValue N, SDValue &Dummy); - bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned RegWidth); + bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, + unsigned RegWidth); bool SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, @@ -130,8 +131,8 @@ AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op, - char ConstraintCode, - std::vector &OutOps) { + char ConstraintCode, + std::vector &OutOps) { switch (ConstraintCode) { default: llvm_unreachable("Unrecognised AArch64 memory constraint"); case 'm': @@ -152,7 +153,7 @@ AArch64DAGToDAGISel::SelectFPZeroOperand(SDValue N, SDValue &Dummy) { ConstantFPSDNode *Imm = dyn_cast(N); if (!Imm || !Imm->getValueAPF().isPosZero()) return false; - + // Doesn't actually carry any information, but keeps TableGen quiet. Dummy = CurDAG->getTargetConstant(0, MVT::i32); return true; diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 9b26b1f..2158b05 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -841,7 +841,8 @@ AArch64TargetLowering::SaveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, DebugLoc DL, SDValue &Chain) const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - AArch64MachineFunctionInfo *FuncInfo = MF.getInfo(); + AArch64MachineFunctionInfo *FuncInfo + = MF.getInfo(); SmallVector MemOps; @@ -1045,10 +1046,11 @@ AArch64TargetLowering::LowerReturn(SDValue Chain, SDValue Flag; for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { - // PCS: "If the type, T, of the result of a function is such that void func(T - // arg) would require that arg be passed as a value in a register (or set of - // registers) according to the rules in 5.4, then the result is returned in - // the same registers as would be used for such an argument. + // PCS: "If the type, T, of the result of a function is such that + // void func(T arg) would require that arg be passed as a value in a + // register (or set of registers) according to the rules in 5.4, then the + // result is returned in the same registers as would be used for such an + // argument. // // Otherwise, the caller shall reserve a block of memory of sufficient // size and alignment to hold the result. The address of the memory block @@ -1166,7 +1168,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, if (!IsSibCall) Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); - SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, AArch64::XSP, getPointerTy()); + SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, AArch64::XSP, + getPointerTy()); SmallVector MemOpChains; SmallVector, 8> RegsToPass; @@ -1874,9 +1877,10 @@ AArch64TargetLowering::LowerGlobalAddressELF(SDValue Op, if (Alignment == 0) { const PointerType *GVPtrTy = cast(GV->getType()); - if (GVPtrTy->getElementType()->isSized()) - Alignment = getDataLayout()->getABITypeAlignment(GVPtrTy->getElementType()); - else { + if (GVPtrTy->getElementType()->isSized()) { + Alignment + = getDataLayout()->getABITypeAlignment(GVPtrTy->getElementType()); + } else { // Be conservative if we can't guess, not that it really matters: // functions and labels aren't valid for loads, and the methods used to // actually calculate an address work with any alignment. @@ -1954,7 +1958,8 @@ SDValue AArch64TargetLowering::LowerTLSDescCall(SDValue SymAddr, Ops.push_back(Glue); SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); - Chain = DAG.getNode(AArch64ISD::TLSDESCCALL, DL, NodeTys, &Ops[0], Ops.size()); + Chain = DAG.getNode(AArch64ISD::TLSDESCCALL, DL, NodeTys, &Ops[0], + Ops.size()); Glue = Chain.getValue(1); // After the call, the offset from TPIDR_EL0 is in X0, copy it out and pass it @@ -1995,7 +2000,8 @@ AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op, TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZxii, DL, PtrVT, HiVar, DAG.getTargetConstant(0, MVT::i32)), 0); - TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKxii, DL, PtrVT, TPOff, LoVar, + TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKxii, DL, PtrVT, + TPOff, LoVar, DAG.getTargetConstant(0, MVT::i32)), 0); } else if (Model == TLSModel::GeneralDynamic) { // Accesses used in this sequence go via the TLS descriptor which lives in @@ -2005,7 +2011,8 @@ AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op, SDValue LoDesc = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLSDESC_LO12); SDValue DescAddr = DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT, - HiDesc, LoDesc, DAG.getConstant(8, MVT::i32)); + HiDesc, LoDesc, + DAG.getConstant(8, MVT::i32)); SDValue SymAddr = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0); TPOff = LowerTLSDescCall(SymAddr, DescAddr, DL, DAG); @@ -2027,7 +2034,8 @@ AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op, SDValue LoDesc = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT, AArch64II::MO_TLSDESC_LO12); SDValue DescAddr = DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT, - HiDesc, LoDesc, DAG.getConstant(8, MVT::i32)); + HiDesc, LoDesc, + DAG.getConstant(8, MVT::i32)); SDValue SymAddr = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT); ThreadBase = LowerTLSDescCall(SymAddr, DescAddr, DL, DAG); @@ -2040,7 +2048,8 @@ AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op, TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZxii, DL, PtrVT, HiVar, DAG.getTargetConstant(0, MVT::i32)), 0); - TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKxii, DL, PtrVT, TPOff, LoVar, + TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKxii, DL, PtrVT, + TPOff, LoVar, DAG.getTargetConstant(0, MVT::i32)), 0); } else llvm_unreachable("Unsupported TLS access model"); @@ -2123,7 +2132,8 @@ AArch64TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { SDValue A64cc = DAG.getConstant(CondCode, MVT::i32); SDValue SetCC = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS, DAG.getCondCode(CC)); - SDValue A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(), + SDValue A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, + Op.getValueType(), SetCC, IfTrue, IfFalse, A64cc); if (Alternative != A64CC::Invalid) { @@ -2231,7 +2241,8 @@ AArch64TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { // The layout of the va_list struct is specified in the AArch64 Procedure Call // Standard, section B.3. MachineFunction &MF = DAG.getMachineFunction(); - AArch64MachineFunctionInfo *FuncInfo = MF.getInfo(); + AArch64MachineFunctionInfo *FuncInfo + = MF.getInfo(); DebugLoc DL = Op.getDebugLoc(); SDValue Chain = Op.getOperand(0); @@ -2365,7 +2376,7 @@ static SDValue PerformANDCombine(SDNode *N, } static SDValue PerformATOMIC_FENCECombine(SDNode *FenceNode, - TargetLowering::DAGCombinerInfo &DCI) { + TargetLowering::DAGCombinerInfo &DCI) { // An atomic operation followed by an acquiring atomic fence can be reduced to // an acquiring load. The atomic operation provides a convenient pointer to // load from. If the original operation was a load anyway we can actually @@ -2407,7 +2418,7 @@ static SDValue PerformATOMIC_FENCECombine(SDNode *FenceNode, } static SDValue PerformATOMIC_STORECombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI) { + TargetLowering::DAGCombinerInfo &DCI) { // A releasing atomic fence followed by an atomic store can be combined into a // single store operation. SelectionDAG &DAG = DCI.DAG; @@ -2821,7 +2832,8 @@ AArch64TargetLowering::getConstraintType(const std::string &Constraint) const { } // FIXME: Ump, Utf, Usa, Ush - // Ump: A memory address suitable for ldp/stp in SI, DI, SF and DF modes, whatever they may be + // Ump: A memory address suitable for ldp/stp in SI, DI, SF and DF modes, + // whatever they may be // Utf: A memory address suitable for ldp/stp in TF mode, whatever it may be // Usa: An absolute symbolic address // Ush: The high part (bits 32:12) of a pc-relative symbolic address @@ -2893,7 +2905,8 @@ AArch64TargetLowering::LowerAsmOperandForConstraint(SDValue Op, if (const GlobalAddressSDNode *GA = dyn_cast(Op)) { Result = DAG.getTargetGlobalAddress(GA->getGlobal(), Op.getDebugLoc(), GA->getValueType(0)); - } else if (const BlockAddressSDNode *BA = dyn_cast(Op)) { + } else if (const BlockAddressSDNode *BA + = dyn_cast(Op)) { Result = DAG.getTargetBlockAddress(BA->getBlockAddress(), BA->getValueType(0)); } else if (const ExternalSymbolSDNode *ES @@ -2924,8 +2937,9 @@ AArch64TargetLowering::LowerAsmOperandForConstraint(SDValue Op, } std::pair -AArch64TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const { +AArch64TargetLowering::getRegForInlineAsmConstraint( + const std::string &Constraint, + EVT VT) const { if (Constraint.size() == 1) { switch (Constraint[0]) { case 'r': diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h index ec4e432..4960d28 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.h +++ b/lib/Target/AArch64/AArch64ISelLowering.h @@ -161,8 +161,8 @@ public: SelectionDAG& DAG) const; /// Finds the incoming stack arguments which overlap the given fixed stack - /// object and incorporates their load into the current chain. This prevents an - /// upcoming store from clobbering the stack argument before it's used. + /// object and incorporates their load into the current chain. This prevents + /// an upcoming store from clobbering the stack argument before it's used. SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG, MachineFrameInfo *MFI, int ClobberedFI) const; diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td index ce66396..c6aa265 100644 --- a/lib/Target/AArch64/AArch64InstrFormats.td +++ b/lib/Target/AArch64/AArch64InstrFormats.td @@ -16,8 +16,7 @@ // architecture. class A64Inst patterns, InstrItinClass itin> - : Instruction -{ + : Instruction { // All A64 instructions are 32-bit. This field will be filled in // graually going down the hierarchy. field bits<32> Inst; @@ -40,8 +39,7 @@ class A64Inst patterns, let Itinerary = itin; } -class PseudoInst patterns> : Instruction -{ +class PseudoInst patterns> : Instruction { let Namespace = "AArch64"; let OutOperandList = outs; @@ -54,8 +52,7 @@ class PseudoInst patterns> : Instruction // Represents a pseudo-instruction that represents a single A64 instruction for // whatever reason, the eventual result will be a 32-bit real instruction. class A64PseudoInst patterns> - : PseudoInst -{ + : PseudoInst { let Size = 4; } @@ -70,8 +67,7 @@ class A64PseudoExpand patterns, dag Result> class A64InstRd patterns, InstrItinClass itin> - : A64Inst -{ + : A64Inst { bits<5> Rd; let Inst{4-0} = Rd; @@ -79,8 +75,7 @@ class A64InstRd patterns, InstrItinClass itin> - : A64Inst -{ + : A64Inst { bits<5> Rt; let Inst{4-0} = Rt; @@ -89,8 +84,7 @@ class A64InstRt patterns, InstrItinClass itin> - : A64InstRd -{ + : A64InstRd { // Inherit rdt bits<5> Rn; @@ -99,8 +93,7 @@ class A64InstRdn patterns, InstrItinClass itin> - : A64InstRt -{ + : A64InstRt { // Inherit rdt bits<5> Rn; @@ -110,8 +103,7 @@ class A64InstRtn patterns, InstrItinClass itin> - : A64InstRtn -{ + : A64InstRtn { bits<5> Rt2; let Inst{14-10} = Rt2; @@ -119,8 +111,7 @@ class A64InstRtt2n patterns, InstrItinClass itin> - : A64InstRdn -{ + : A64InstRdn { bits<5> Rm; let Inst{20-16} = Rm; @@ -135,8 +126,7 @@ class A64InstRdnm opt, bits<3> option, dag outs, dag ins, string asmstr, list patterns, InstrItinClass itin> - : A64InstRdnm -{ + : A64InstRdnm { bits<3> Imm3; let Inst{31} = sf; @@ -156,8 +146,7 @@ class A64I_addsubext opt, bits<3> option, class A64I_addsubimm shift, dag outs, dag ins, string asmstr, list patterns, InstrItinClass itin> - : A64InstRdn -{ + : A64InstRdn { bits<12> Imm12; let Inst{31} = sf; @@ -172,8 +161,7 @@ class A64I_addsubimm shift, class A64I_addsubshift shift, dag outs, dag ins, string asmstr, list patterns, InstrItinClass itin> - : A64InstRdnm -{ + : A64InstRdnm { bits<6> Imm6; let Inst{31} = sf; @@ -192,8 +180,7 @@ class A64I_addsubshift shift, class A64I_addsubcarry opcode2, dag outs, dag ins, string asmstr, list patterns, InstrItinClass itin> - : A64InstRdnm -{ + : A64InstRdnm { let Inst{31} = sf; let Inst{30} = op; let Inst{29} = S; @@ -209,8 +196,7 @@ class A64I_addsubcarry opcode2, class A64I_bitfield opc, bit n, dag outs, dag ins, string asmstr, list patterns, InstrItinClass itin> - : A64InstRdn -{ + : A64InstRdn { bits<6> ImmR; bits<6> ImmS; @@ -228,8 +214,7 @@ class A64I_bitfield opc, bit n, class A64I_cmpbr patterns, InstrItinClass itin> - : A64InstRt -{ + : A64InstRt { bits<19> Label; let Inst{31} = sf; @@ -243,8 +228,7 @@ class A64I_cmpbr patterns, InstrItinClass itin> - : A64Inst -{ + : A64Inst { bits<19> Label; bits<4> Cond; @@ -259,8 +243,7 @@ class A64I_condbr patterns, InstrItinClass itin> - : A64Inst -{ + : A64Inst { bits<5> Rn; bits<5> UImm5; bits<4> NZCVImm; @@ -283,8 +266,7 @@ class A64I_condcmpimm patterns, InstrItinClass itin> - : A64Inst -{ + : A64Inst { bits<5> Rn; bits<5> Rm; bits<4> NZCVImm; @@ -308,8 +290,7 @@ class A64I_condcmpreg op2, dag outs, dag ins, string asmstr, list patterns, InstrItinClass itin> - : A64InstRdnm -{ + : A64InstRdnm { bits<4> Cond; let Inst{31} = sf; @@ -327,8 +308,7 @@ class A64I_condsel op2, class A64I_dp_1src opcode2, bits<6> opcode, string asmstr, dag outs, dag ins, list patterns, InstrItinClass itin> - : A64InstRdn -{ + : A64InstRdn { let Inst{31} = sf; let Inst{30} = 0b1; let Inst{29} = S; @@ -341,8 +321,7 @@ class A64I_dp_1src opcode2, bits<6> opcode, class A64I_dp_2src opcode, bit S, string asmstr, dag outs, dag ins, list patterns, InstrItinClass itin> - : A64InstRdnm -{ + : A64InstRdnm { let Inst{31} = sf; let Inst{30} = 0b0; let Inst{29} = S; @@ -355,8 +334,7 @@ class A64I_dp_2src opcode, bit S, class A64I_dp3 opcode, dag outs, dag ins, string asmstr, list patterns, InstrItinClass itin> - : A64InstRdnm -{ + : A64InstRdnm { bits<5> Ra; let Inst{31} = sf; @@ -374,8 +352,7 @@ class A64I_dp3 opcode, class A64I_exception opc, bits<3> op2, bits<2> ll, dag outs, dag ins, string asmstr, list patterns, InstrItinClass itin> - : A64Inst -{ + : A64Inst { bits<16> UImm16; let Inst{31-24} = 0b11010100; @@ -389,8 +366,7 @@ class A64I_exception opc, bits<3> op2, bits<2> ll, class A64I_extract op, bit n, dag outs, dag ins, string asmstr, list patterns, InstrItinClass itin> - : A64InstRdnm -{ + : A64InstRdnm { bits<6> LSB; let Inst{31} = sf; @@ -408,8 +384,7 @@ class A64I_extract op, bit n, class A64I_fpcmp type, bits<2> op, bits<5> opcode2, dag outs, dag ins, string asmstr, list patterns, InstrItinClass itin> - : A64Inst -{ + : A64Inst { bits<5> Rn; bits<5> Rm; @@ -430,8 +405,7 @@ class A64I_fpcmp type, bits<2> op, bits<5> opcode2, class A64I_fpccmp type, bit op, dag outs, dag ins, string asmstr, list patterns, InstrItinClass itin> - : A64InstRdn -{ + : A64InstRdn { bits<5> Rn; bits<5> Rm; bits<4> NZCVImm; @@ -455,8 +429,7 @@ class A64I_fpccmp type, bit op, class A64I_fpcondsel type, dag outs, dag ins, string asmstr, list patterns, InstrItinClass itin> - : A64InstRdnm -{ + : A64InstRdnm { bits<4> Cond; let Inst{31} = m; @@ -477,8 +450,7 @@ class A64I_fpcondsel type, class A64I_fpdp1 type, bits<6> opcode, dag outs, dag ins, string asmstr, list patterns, InstrItinClass itin> - : A64InstRdn -{ + : A64InstRdn { let Inst{31} = m; let Inst{30} = 0b0; let Inst{29} = s; @@ -495,8 +467,7 @@ class A64I_fpdp1 type, bits<6> opcode, class A64I_fpdp2 type, bits<4> opcode, dag outs, dag ins, string asmstr, list patterns, InstrItinClass itin> - : A64InstRdnm -{ + : A64InstRdnm { let Inst{31} = m; let Inst{30} = 0b0; let Inst{29} = s; @@ -514,8 +485,7 @@ class A64I_fpdp2 type, bits<4> opcode, class A64I_fpdp3 type, bit o1, bit o0, dag outs, dag ins, string asmstr, list patterns, InstrItinClass itin> - : A64InstRdnm -{ + : A64InstRdnm { bits<5> Ra; let Inst{31} = m; @@ -535,8 +505,7 @@ class A64I_fpdp3 type, bit o1, bit o0, class A64I_fpfixed type, bits<2> mode, bits<3> opcode, dag outs, dag ins, string asmstr, list patterns, InstrItinClass itin> - : A64InstRdn -{ + : A64InstRdn { bits<6> Scale; let Inst{31} = sf; @@ -556,8 +525,7 @@ class A64I_fpfixed type, bits<2> mode, bits<3> opcode, class A64I_fpint type, bits<2> rmode, bits<3> opcode, dag outs, dag ins, string asmstr, list patterns, InstrItinClass itin> - : A64InstRdn -{ + : A64InstRdn { let Inst{31} = sf; let Inst{30} = 0b0; let Inst{29} = s; @@ -576,8 +544,7 @@ class A64I_fpint type, bits<2> rmode, bits<3> opcode, class A64I_fpimm type, bits<5> imm5, dag outs, dag ins, string asmstr, list patterns, InstrItinClass itin> - : A64InstRd -{ + : A64InstRd { bits<8> Imm8; let Inst{31} = m; @@ -596,8 +563,7 @@ class A64I_fpimm type, bits<5> imm5, class A64I_LDRlit opc, bit v, dag outs, dag ins, string asmstr, list patterns, InstrItinClass itin> - : A64InstRt -{ + : A64InstRt { bits<19> Imm19; let Inst{31-30} = opc; @@ -612,8 +578,7 @@ class A64I_LDRlit opc, bit v, class A64I_LDSTex_tn size, bit o2, bit L, bit o1, bit o0, dag outs, dag ins, string asmstr, list patterns, InstrItinClass itin> - : A64InstRtn -{ + : A64InstRtn { let Inst{31-30} = size; let Inst{29-24} = 0b001000; let Inst{23} = o2; @@ -650,8 +615,7 @@ class A64I_LDSTex_stt2n size, bit o2, bit L, bit o1, bit o0, class A64I_LSpostind size, bit v, bits<2> opc, dag outs, dag ins, string asmstr, list patterns, InstrItinClass itin> - : A64InstRtn -{ + : A64InstRtn { bits<9> SImm9; let Inst{31-30} = size; @@ -670,8 +634,7 @@ class A64I_LSpostind size, bit v, bits<2> opc, class A64I_LSpreind size, bit v, bits<2> opc, dag outs, dag ins, string asmstr, list patterns, InstrItinClass itin> - : A64InstRtn -{ + : A64InstRtn { bits<9> SImm9; @@ -691,8 +654,7 @@ class A64I_LSpreind size, bit v, bits<2> opc, class A64I_LSunpriv size, bit v, bits<2> opc, dag outs, dag ins, string asmstr, list patterns, InstrItinClass itin> - : A64InstRtn -{ + : A64InstRtn { bits<9> SImm9; @@ -712,8 +674,7 @@ class A64I_LSunpriv size, bit v, bits<2> opc, class A64I_LSunalimm size, bit v, bits<2> opc, dag outs, dag ins, string asmstr, list patterns, InstrItinClass itin> - : A64InstRtn -{ + : A64InstRtn { bits<9> SImm9; let Inst{31-30} = size; @@ -733,8 +694,7 @@ class A64I_LSunalimm size, bit v, bits<2> opc, class A64I_LSunsigimm size, bit v, bits<2> opc, dag outs, dag ins, string asmstr, list patterns, InstrItinClass itin> - : A64InstRtn -{ + : A64InstRtn { bits<12> UImm12; let Inst{31-30} = size; @@ -749,8 +709,7 @@ class A64I_LSunsigimm size, bit v, bits<2> opc, class A64I_LSregoff size, bit v, bits<2> opc, bit optionlo, dag outs, dag ins, string asmstr, list patterns, InstrItinClass itin> - : A64InstRtn -{ + : A64InstRtn { bits<5> Rm; // Complex operand selection needed for these instructions, so they @@ -780,8 +739,7 @@ class A64I_LSregoff size, bit v, bits<2> opc, bit optionlo, class A64I_LSPoffset opc, bit v, bit l, dag outs, dag ins, string asmstr, list patterns, InstrItinClass itin> - : A64InstRtt2n -{ + : A64InstRtt2n { bits<7> SImm7; let Inst{31-30} = opc; @@ -799,8 +757,7 @@ class A64I_LSPoffset opc, bit v, bit l, class A64I_LSPpostind opc, bit v, bit l, dag outs, dag ins, string asmstr, list patterns, InstrItinClass itin> - : A64InstRtt2n -{ + : A64InstRtt2n { bits<7> SImm7; let Inst{31-30} = opc; @@ -818,8 +775,7 @@ class A64I_LSPpostind opc, bit v, bit l, class A64I_LSPpreind opc, bit v, bit l, dag outs, dag ins, string asmstr, list patterns, InstrItinClass itin> - : A64InstRtt2n -{ + : A64InstRtt2n { bits<7> SImm7; let Inst{31-30} = opc; @@ -837,8 +793,7 @@ class A64I_LSPpreind opc, bit v, bit l, class A64I_LSPnontemp opc, bit v, bit l, dag outs, dag ins, string asmstr, list patterns, InstrItinClass itin> - : A64InstRtt2n -{ + : A64InstRtt2n { bits<7> SImm7; let Inst{31-30} = opc; @@ -856,8 +811,7 @@ class A64I_LSPnontemp opc, bit v, bit l, class A64I_logicalimm opc, dag outs, dag ins, string asmstr, list patterns, InstrItinClass itin> - : A64InstRdn -{ + : A64InstRdn { bit N; bits<6> ImmR; bits<6> ImmS; @@ -883,8 +837,7 @@ class A64I_logicalimm opc, class A64I_logicalshift opc, bits<2> shift, bit N, dag outs, dag ins, string asmstr, list patterns, InstrItinClass itin> - : A64InstRdnm -{ + : A64InstRdnm { bits<6> Imm6; let Inst{31} = sf; @@ -902,8 +855,7 @@ class A64I_logicalshift opc, bits<2> shift, bit N, class A64I_movw opc, dag outs, dag ins, string asmstr, list patterns, InstrItinClass itin> - : A64InstRd -{ + : A64InstRd { bits<16> UImm16; bits<2> Shift; // Called "hw" officially @@ -919,8 +871,7 @@ class A64I_movw opc, class A64I_PCADR patterns, InstrItinClass itin> - : A64InstRd -{ + : A64InstRd { bits<21> Label; let Inst{31} = op; @@ -933,8 +884,7 @@ class A64I_PCADR patterns, InstrItinClass itin> - : A64Inst -{ + : A64Inst { bits<2> Op0; bits<3> Op1; bits<4> CRn; @@ -959,8 +909,7 @@ class A64I_system patterns, InstrItinClass itin> - : A64Inst -{ + : A64Inst { // Doubly special in not even sharing register fields with other // instructions, so we create our own Rn here. bits<26> Label; @@ -974,8 +923,7 @@ class A64I_Bimm patterns, InstrItinClass itin> - : A64InstRt -{ + : A64InstRt { // Doubly special in not even sharing register fields with other // instructions, so we create our own Rn here. bits<6> Imm; @@ -995,8 +943,7 @@ class A64I_TBimm opc, bits<5> op2, bits<6> op3, bits<5> op4, dag outs, dag ins, string asmstr, list patterns, InstrItinClass itin> - : A64Inst -{ + : A64Inst { // Doubly special in not even sharing register fields with other // instructions, so we create our own Rn here. bits<5> Rn; diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp index d59f2f1..94b3429 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -613,7 +613,8 @@ bool llvm::rewriteA64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, llvm_unreachable("Unimplemented rewriteFrameIndex"); } -void llvm::emitRegUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, +void llvm::emitRegUpdate(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, DebugLoc dl, const TargetInstrInfo &TII, unsigned DstReg, unsigned SrcReg, unsigned ScratchReg, int64_t NumBytes, MachineInstr::MIFlag MIFlags) { @@ -695,7 +696,8 @@ namespace { LDTLSCleanup() : MachineFunctionPass(ID) {} virtual bool runOnMachineFunction(MachineFunction &MF) { - AArch64MachineFunctionInfo* MFI = MF.getInfo(); + AArch64MachineFunctionInfo* MFI + = MF.getInfo(); if (MFI->getNumLocalDynamicTLSAccesses() < 2) { // No point folding accesses if there isn't at least two. return false; diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index 3c15200..673e051 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -113,7 +113,8 @@ def AArch64tcret : SDNode<"AArch64ISD::TC_RETURN", SDT_AArch64Call, def SDTTLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>; def A64tlsdesc_blr : SDNode<"AArch64ISD::TLSDESCCALL", SDTTLSDescCall, - [SDNPInGlue, SDNPOutGlue, SDNPHasChain, SDNPVariadic]>; + [SDNPInGlue, SDNPOutGlue, SDNPHasChain, + SDNPVariadic]>; def SDT_AArch64CallSeqStart : SDCallSeqStart<[ SDTCisPtrTy<0> ]>; @@ -132,8 +133,7 @@ def AArch64callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_AArch64CallSeqEnd, // arguments passed on the stack. Here we select those markers to // pseudo-instructions which explicitly set the stack, and finally in the // RegisterInfo we convert them to a true stack adjustment. -let Defs = [XSP], Uses = [XSP] in -{ +let Defs = [XSP], Uses = [XSP] in { def ADJCALLSTACKDOWN : PseudoInst<(outs), (ins i64imm:$amt), [(AArch64callseq_start timm:$amt)]>; @@ -146,16 +146,15 @@ let Defs = [XSP], Uses = [XSP] in //===----------------------------------------------------------------------===// let usesCustomInserter = 1, Defs = [NZCV] in { -multiclass AtomicSizes -{ +multiclass AtomicSizes { def _I8 : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$incr), [(set GPR32:$dst, (!cast(opname # "_8") GPR64:$ptr, GPR32:$incr))]>; def _I16 : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$incr), - [(set GPR32:$dst, (!cast(opname # "_16") GPR64:$ptr, GPR32:$incr))]>; + [(set GPR32:$dst, (!cast(opname # "_16") GPR64:$ptr, GPR32:$incr))]>; def _I32 : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$incr), - [(set GPR32:$dst, (!cast(opname # "_32") GPR64:$ptr, GPR32:$incr))]>; + [(set GPR32:$dst, (!cast(opname # "_32") GPR64:$ptr, GPR32:$incr))]>; def _I64 : PseudoInst<(outs GPR64:$dst), (ins GPR64:$ptr, GPR64:$incr), - [(set GPR64:$dst, (!cast(opname # "_64") GPR64:$ptr, GPR64:$incr))]>; + [(set GPR64:$dst, (!cast(opname # "_64") GPR64:$ptr, GPR64:$incr))]>; } } @@ -205,17 +204,15 @@ def ATOMIC_CMP_SWAP_I64 // is not optional in that case (but can explicitly be 0), and the // entire suffix can be skipped (e.g. "add sp, x3, x2"). -multiclass extend_operands -{ - def _asmoperand : AsmOperandClass - { +multiclass extend_operands { + def _asmoperand : AsmOperandClass { let Name = PREFIX; let RenderMethod = "addRegExtendOperands"; let PredicateMethod = "isRegExtend"; } - def _operand : Operand, ImmLeaf= 0 && Imm <= 4; }]> - { + def _operand : Operand, + ImmLeaf= 0 && Imm <= 4; }]> { let PrintMethod = "printRegExtendOperand"; let DecoderMethod = "DecodeRegExtendOperand"; let ParserMatchClass = !cast(PREFIX # "_asmoperand"); @@ -231,14 +228,12 @@ defm SXTH : extend_operands<"SXTH">; defm SXTW : extend_operands<"SXTW">; defm SXTX : extend_operands<"SXTX">; -def LSL_extasmoperand : AsmOperandClass -{ +def LSL_extasmoperand : AsmOperandClass { let Name = "RegExtendLSL"; let RenderMethod = "addRegExtendOperands"; } -def LSL_extoperand : Operand -{ +def LSL_extoperand : Operand { let ParserMatchClass = LSL_extasmoperand; } @@ -247,14 +242,12 @@ def LSL_extoperand : Operand // non-uniform because everything has already been promoted to the // legal i64 and i32 types. We'll wrap the various variants up in a // class for use later. -class extend_types -{ +class extend_types { dag uxtb; dag uxth; dag uxtw; dag uxtx; dag sxtb; dag sxth; dag sxtw; dag sxtx; } -def extends_to_i64 : extend_types -{ +def extends_to_i64 : extend_types { let uxtb = (and (anyext GPR32:$Rm), 255); let uxth = (and (anyext GPR32:$Rm), 65535); let uxtw = (zext GPR32:$Rm); @@ -267,8 +260,7 @@ def extends_to_i64 : extend_types } -def extends_to_i32 : extend_types -{ +def extends_to_i32 : extend_types { let uxtb = (and GPR32:$Rm, 255); let uxth = (and GPR32:$Rm, 65535); let uxtw = (i32 GPR32:$Rm); @@ -290,9 +282,9 @@ def extends_to_i32 : extend_types // + Patterns are very different as well. // + Passing different registers would be ugly (more fields in extend_types // would probably be the best option). -multiclass addsub_exts -{ +multiclass addsub_exts { def w_uxtb : A64I_addsubext -{ +multiclass addsub_xxtx { def x_uxtx : A64I_addsubext<0b1, op, S, 0b00, 0b011, outs, (ins GPR64xsp:$Rn, GPR64:$Rm, UXTX_operand:$Imm3), @@ -351,8 +343,7 @@ multiclass addsub_xxtx; } -multiclass addsub_wxtx -{ +multiclass addsub_wxtx { def w_uxtx : A64I_addsubext<0b0, op, S, 0b00, 0b011, outs, (ins GPR32wsp:$Rn, GPR32:$Rm, UXTX_operand:$Imm3), @@ -429,8 +420,7 @@ defm CMPw : addsub_exts<0b0, 0b1, 0b1, "cmp\t", SetNZCV, // created for uxtx/sxtx since they're non-uniform and it's expected that // add/sub (shifted register) will handle those cases anyway. multiclass addsubext_noshift_patterns -{ + RegisterClass GPRsp, extend_types exts> { def : Pat<(nodeop GPRsp:$Rn, exts.uxtb), (!cast(prefix # "w_uxtb") GPRsp:$Rn, GPR32:$Rm, 0)>; def : Pat<(nodeop GPRsp:$Rn, exts.uxth), @@ -461,8 +451,7 @@ defm : addsubext_noshift_patterns<"CMPw", A64cmp, GPR32wsp, extends_to_i32>; // operation. Also permitted in this case is complete omission of the argument, // which implies "lsl #0". multiclass lsl_aliases -{ + RegisterClass GPR_Rn, RegisterClass GPR_Rm> { def : InstAlias; @@ -490,8 +479,7 @@ defm : lsl_aliases<"subs", SUBSwww_uxtw, GPR32, Rwsp, GPR32>; // CMP unfortunately has to be different because the instruction doesn't have a // dest register. multiclass cmp_lsl_aliases -{ + RegisterClass GPR_Rn, RegisterClass GPR_Rm> { def : InstAlias; @@ -547,16 +535,13 @@ defm : cmp_lsl_aliases<"cmn", CMNww_uxtw, Rwsp, GPR32>; // should be parsed: there was no way to accommodate an "lsl #12". let ParserMethod = "ParseImmWithLSLOperand", - RenderMethod = "addImmWithLSLOperands" in -{ + RenderMethod = "addImmWithLSLOperands" in { // Derived PredicateMethod fields are different for each - def addsubimm_lsl0_asmoperand : AsmOperandClass - { + def addsubimm_lsl0_asmoperand : AsmOperandClass { let Name = "AddSubImmLSL0"; } - def addsubimm_lsl12_asmoperand : AsmOperandClass - { + def addsubimm_lsl12_asmoperand : AsmOperandClass { let Name = "AddSubImmLSL12"; } } @@ -574,12 +559,10 @@ def neg_XFORM : SDNodeXForm; -multiclass addsub_imm_operands -{ +multiclass addsub_imm_operands { let PrintMethod = "printAddSubImmLSL0Operand", EncoderMethod = "getAddSubImmOpValue", - ParserMatchClass = addsubimm_lsl0_asmoperand in - { + ParserMatchClass = addsubimm_lsl0_asmoperand in { def _posimm_lsl0 : Operand, ImmLeaf= 0 && (Imm & ~0xfff) == 0; }]>; def _negimm_lsl0 : Operand, @@ -589,8 +572,7 @@ multiclass addsub_imm_operands let PrintMethod = "printAddSubImmLSL12Operand", EncoderMethod = "getAddSubImmOpValue", - ParserMatchClass = addsubimm_lsl12_asmoperand in - { + ParserMatchClass = addsubimm_lsl12_asmoperand in { def _posimm_lsl12 : Operand, ImmLeaf= 0 && (Imm & ~0xfff000) == 0; }], shr_12_XFORM>; @@ -609,8 +591,7 @@ multiclass addsubimm_varieties shift, string asmop, string cmpasmop, Operand imm_operand, Operand cmp_imm_operand, RegisterClass GPR, RegisterClass GPRsp, - AArch64Reg ZR> -{ + AArch64Reg ZR> { // All registers for non-S variants allow SP def _s : A64I_addsubimm shift, !strconcat(cmpasmop, " $Rn, $Imm12"), [(set NZCV, (A64cmp GPRsp:$Rn, cmp_imm_operand:$Imm12))], - NoItinerary> - { + NoItinerary> { let Rd = 0b11111; let Defs = [NZCV]; let isCompare = 1; @@ -650,8 +630,7 @@ multiclass addsubimm_varieties shift, multiclass addsubimm_shifts -{ + RegisterClass GPR, RegisterClass GPRsp, AArch64Reg ZR> { defm _lsl0 : addsubimm_varieties(operand # "_lsl0"), @@ -682,8 +661,7 @@ defm SUBxxi : addsubimm_shifts<"SUBxi", 0b1, 0b1, "sub", "cmp", "addsubimm_operand_i64_posimm", GPR64, GPR64xsp, XZR>; -multiclass MOVsp -{ +multiclass MOVsp { def _fromsp : InstAlias<"mov $Rd, $Rn", (addop GPRsp:$Rd, SP:$Rn, 0), 0b1>; @@ -706,10 +684,8 @@ defm MOVww : MOVsp; // 1. The "shifed register" operands. Shared with logical insts. //===------------------------------- -multiclass shift_operands -{ - def _asmoperand_i32 : AsmOperandClass - { +multiclass shift_operands { + def _asmoperand_i32 : AsmOperandClass { let Name = "Shift" # form # "i32"; let RenderMethod = "addShiftOperands"; let PredicateMethod @@ -718,24 +694,21 @@ multiclass shift_operands // Note that the operand type is intentionally i64 because the DAGCombiner // puts these into a canonical form. - def _i32 : Operand, ImmLeaf= 0 && Imm <= 31; }]> - { + def _i32 : Operand, ImmLeaf= 0 && Imm <= 31; }]> { let ParserMatchClass = !cast(prefix # "_asmoperand_i32"); let PrintMethod = "printShiftOperand"; let DecoderMethod = "Decode32BitShiftOperand"; } - def _asmoperand_i64 : AsmOperandClass - { + def _asmoperand_i64 : AsmOperandClass { let Name = "Shift" # form # "i64"; let RenderMethod = "addShiftOperands"; let PredicateMethod = "isShift"; } - def _i64 : Operand, ImmLeaf= 0 && Imm <= 63; }]> - { + def _i64 : Operand, ImmLeaf= 0 && Imm <= 63; }]> { let ParserMatchClass = !cast(prefix # "_asmoperand_i64"); let PrintMethod = "printShiftOperand"; @@ -758,8 +731,7 @@ defm ror_operand : shift_operands<"ror_operand", "ROR">; // when the revolution comes. multiclass addsub_shifts defs> -{ + RegisterClass GPR, list defs> { let isCommutable = commutable, Defs = defs in { def _lsl : A64I_addsubshift defs> -{ + list defs> { defm xxx : addsub_shifts; defm www : addsub_shifts; //===------------------------------- multiclass neg_alias -{ + Register ZR, Operand shift_operand, SDNode shiftop> { def : InstAlias<"neg $Rd, $Rm, $Imm6", (INST GPR:$Rd, ZR, GPR:$Rm, shift_operand:$Imm6)>; @@ -867,8 +837,7 @@ def : InstAlias<"negs $Rd, $Rm", (SUBSxxx_lsl GPR64:$Rd, XZR, GPR64:$Rm, 0)>; multiclass cmp_shifts -{ + RegisterClass GPR> { let isCommutable = commutable, Rd = 0b11111, Defs = [NZCV] in { def _lsl : A64I_addsubshift; //===----------------------------------------------------------------------===// // Contains: ADC, ADCS, SBC, SBCS + aliases NGC, NGCS -multiclass A64I_addsubcarrySizes -{ - let Uses = [NZCV] in - { +multiclass A64I_addsubcarrySizes { + let Uses = [NZCV] in { def www : A64I_addsubcarry<0b0, op, s, 0b000000, (outs GPR32:$Rd), (ins GPR32:$Rn, GPR32:$Rm), !strconcat(asmop, "\t$Rd, $Rn, $Rm"), @@ -936,17 +903,14 @@ multiclass A64I_addsubcarrySizes } } -let isCommutable = 1 in -{ +let isCommutable = 1 in { defm ADC : A64I_addsubcarrySizes<0b0, 0b0, "adc">; } defm SBC : A64I_addsubcarrySizes<0b1, 0b0, "sbc">; -let Defs = [NZCV] in -{ - let isCommutable = 1 in - { +let Defs = [NZCV] in { + let isCommutable = 1 in { defm ADCS : A64I_addsubcarrySizes<0b0, 0b1, "adcs">; } @@ -988,23 +952,20 @@ def : Pat<(sube GPR64:$Rn, GPR64:$Rm), (SBCSxxx GPR64:$Rn, GPR64:$Rm)>; // 1. The architectural BFM instructions //===------------------------------- -def uimm5_asmoperand : AsmOperandClass -{ +def uimm5_asmoperand : AsmOperandClass { let Name = "UImm5"; let PredicateMethod = "isUImm<5>"; let RenderMethod = "addImmOperands"; } -def uimm6_asmoperand : AsmOperandClass -{ +def uimm6_asmoperand : AsmOperandClass { let Name = "UImm6"; let PredicateMethod = "isUImm<6>"; let RenderMethod = "addImmOperands"; } def bitfield32_imm : Operand, - ImmLeaf= 0 && Imm < 32; }]> -{ + ImmLeaf= 0 && Imm < 32; }]> { let ParserMatchClass = uimm5_asmoperand; let DecoderMethod = "DecodeBitfield32ImmOperand"; @@ -1012,28 +973,24 @@ def bitfield32_imm : Operand, def bitfield64_imm : Operand, - ImmLeaf= 0 && Imm < 64; }]> -{ + ImmLeaf= 0 && Imm < 64; }]> { let ParserMatchClass = uimm6_asmoperand; // Default decoder works in 64-bit case: the 6-bit field can take any value. } -multiclass A64I_bitfieldSizes opc, string asmop> -{ +multiclass A64I_bitfieldSizes opc, string asmop> { def wwii : A64I_bitfield<0b0, opc, 0b0, (outs GPR32:$Rd), (ins GPR32:$Rn, bitfield32_imm:$ImmR, bitfield32_imm:$ImmS), !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"), - [], NoItinerary> - { + [], NoItinerary> { let DecoderMethod = "DecodeBitfieldInstruction"; } def xxii : A64I_bitfield<0b1, opc, 0b1, (outs GPR64:$Rd), (ins GPR64:$Rn, bitfield64_imm:$ImmR, bitfield64_imm:$ImmS), !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"), - [], NoItinerary> - { + [], NoItinerary> { let DecoderMethod = "DecodeBitfieldInstruction"; } } @@ -1046,8 +1003,7 @@ defm UBFM : A64I_bitfieldSizes<0b10, "ubfm">; def BFMwwii : A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd), (ins GPR32:$src, GPR32:$Rn, bitfield32_imm:$ImmR, bitfield32_imm:$ImmS), - "bfm\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> -{ + "bfm\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> { let DecoderMethod = "DecodeBitfieldInstruction"; let Constraints = "$src = $Rd"; } @@ -1055,8 +1011,7 @@ def BFMwwii : def BFMxxii : A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd), (ins GPR64:$src, GPR64:$Rn, bitfield64_imm:$ImmR, bitfield64_imm:$ImmS), - "bfm\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> -{ + "bfm\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> { let DecoderMethod = "DecodeBitfieldInstruction"; let Constraints = "$src = $Rd"; } @@ -1078,8 +1033,7 @@ class A64I_bf_ext opc, RegisterClass GPRDest, string asmop, : A64I_bitfield -{ + [(set GPRDest:$Rd, pattern)], NoItinerary> { let ImmR = 0b000000; let ImmS = imms; } @@ -1103,8 +1057,7 @@ def UXTHww : A64I_bf_ext<0b0, 0b10, GPR32, "uxth", 15, // The 64-bit unsigned variants are not strictly architectural but recommended // for consistency. -let isAsmParserOnly = 1 in -{ +let isAsmParserOnly = 1 in { def UXTBxw : A64I_bf_ext<0b0, 0b10, GPR64, "uxtb", 7, (and (anyext GPR32:$Rn), 255)>; def UXTHxw : A64I_bf_ext<0b0, 0b10, GPR64, "uxth", 15, @@ -1129,14 +1082,12 @@ def : Pat<(sext_inreg GPR64:$Rn, i32), // These also handle their own decoding because ImmS being set makes // them take precedence over BFM. -multiclass A64I_shift opc, string asmop, SDNode opnode> -{ +multiclass A64I_shift opc, string asmop, SDNode opnode> { def wwi : A64I_bitfield<0b0, opc, 0b0, (outs GPR32:$Rd), (ins GPR32:$Rn, bitfield32_imm:$ImmR), !strconcat(asmop, "\t$Rd, $Rn, $ImmR"), [(set GPR32:$Rd, (opnode GPR32:$Rn, bitfield32_imm:$ImmR))], - NoItinerary> - { + NoItinerary> { let ImmS = 31; } @@ -1144,8 +1095,7 @@ multiclass A64I_shift opc, string asmop, SDNode opnode> (outs GPR64:$Rd), (ins GPR64:$Rn, bitfield64_imm:$ImmR), !strconcat(asmop, "\t$Rd, $Rn, $ImmR"), [(set GPR64:$Rd, (opnode GPR64:$Rn, bitfield64_imm:$ImmR))], - NoItinerary> - { + NoItinerary> { let ImmS = 63; } @@ -1170,15 +1120,13 @@ defm LSR : A64I_shift<0b10, "lsr", srl>; // outweighed the benefits in this case (custom asmparser, printer and selection // vs custom encoder). def bitfield32_lsl_imm : Operand, - ImmLeaf= 0 && Imm <= 31; }]> -{ + ImmLeaf= 0 && Imm <= 31; }]> { let ParserMatchClass = uimm5_asmoperand; let EncoderMethod = "getBitfield32LSLOpValue"; } def bitfield64_lsl_imm : Operand, - ImmLeaf= 0 && Imm <= 63; }]> -{ + ImmLeaf= 0 && Imm <= 63; }]> { let ParserMatchClass = uimm6_asmoperand; let EncoderMethod = "getBitfield64LSLOpValue"; } @@ -1187,8 +1135,7 @@ class A64I_bitfield_lsl : A64I_bitfield -{ + NoItinerary> { bits<12> FullImm; let ImmR = FullImm{5-0}; let ImmS = FullImm{11-6}; @@ -1205,41 +1152,35 @@ def LSLxxi : A64I_bitfield_lsl<0b1, GPR64, bitfield64_lsl_imm>; // 5. Aliases for bitfield extract instructions //===------------------------------- -def bfx32_width_asmoperand : AsmOperandClass -{ +def bfx32_width_asmoperand : AsmOperandClass { let Name = "BFX32Width"; let PredicateMethod = "isBitfieldWidth<32>"; let RenderMethod = "addBFXWidthOperands"; } -def bfx32_width : Operand, ImmLeaf -{ +def bfx32_width : Operand, ImmLeaf { let PrintMethod = "printBFXWidthOperand"; let ParserMatchClass = bfx32_width_asmoperand; } -def bfx64_width_asmoperand : AsmOperandClass -{ +def bfx64_width_asmoperand : AsmOperandClass { let Name = "BFX64Width"; let PredicateMethod = "isBitfieldWidth<64>"; let RenderMethod = "addBFXWidthOperands"; } -def bfx64_width : Operand -{ +def bfx64_width : Operand { let PrintMethod = "printBFXWidthOperand"; let ParserMatchClass = bfx64_width_asmoperand; } -multiclass A64I_bitfield_extract opc, string asmop, SDNode op> -{ +multiclass A64I_bitfield_extract opc, string asmop, SDNode op> { def wwii : A64I_bitfield<0b0, opc, 0b0, (outs GPR32:$Rd), (ins GPR32:$Rn, bitfield32_imm:$ImmR, bfx32_width:$ImmS), !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"), [(set GPR32:$Rd, (op GPR32:$Rn, imm:$ImmR, imm:$ImmS))], - NoItinerary> - { + NoItinerary> { // As above, no disassembler allowed. let isAsmParserOnly = 1; } @@ -1248,8 +1189,7 @@ multiclass A64I_bitfield_extract opc, string asmop, SDNode op> (ins GPR64:$Rn, bitfield64_imm:$ImmR, bfx64_width:$ImmS), !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"), [(set GPR64:$Rd, (op GPR64:$Rn, imm:$ImmR, imm:$ImmS))], - NoItinerary> - { + NoItinerary> { // As above, no disassembler allowed. let isAsmParserOnly = 1; } @@ -1261,8 +1201,7 @@ defm UBFX : A64I_bitfield_extract<0b10, "ubfx", A64Ubfx>; // Again, variants based on BFM modify Rd so need it as an input too. def BFXILwwii : A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd), (ins GPR32:$src, GPR32:$Rn, bitfield32_imm:$ImmR, bfx32_width:$ImmS), - "bfxil\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> -{ + "bfxil\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> { // As above, no disassembler allowed. let isAsmParserOnly = 1; let Constraints = "$src = $Rd"; @@ -1270,8 +1209,7 @@ def BFXILwwii : A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd), def BFXILxxii : A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd), (ins GPR64:$src, GPR64:$Rn, bitfield64_imm:$ImmR, bfx64_width:$ImmS), - "bfxil\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> -{ + "bfxil\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> { // As above, no disassembler allowed. let isAsmParserOnly = 1; let Constraints = "$src = $Rd"; @@ -1285,34 +1223,33 @@ def : Pat<(i64 (sext_inreg (anyext GPR32:$Rn), i1)), // UBFX makes sense as an implementation of a 64-bit zero-extension too. Could // use either 64-bit or 32-bit variant, but 32-bit might be more efficient. -def : Pat<(zext GPR32:$Rn), (SUBREG_TO_REG (i64 0), (UBFXwwii GPR32:$Rn, 0, 31), sub_32)>; +def : Pat<(zext GPR32:$Rn), (SUBREG_TO_REG (i64 0), (UBFXwwii GPR32:$Rn, 0, 31), + sub_32)>; //===------------------------------- // 6. Aliases for bitfield insert instructions //===------------------------------- -def bfi32_lsb_asmoperand : AsmOperandClass -{ +def bfi32_lsb_asmoperand : AsmOperandClass { let Name = "BFI32LSB"; let PredicateMethod = "isUImm<5>"; let RenderMethod = "addBFILSBOperands<32>"; } -def bfi32_lsb : Operand, ImmLeaf= 0 && Imm <= 31; }]> -{ +def bfi32_lsb : Operand, + ImmLeaf= 0 && Imm <= 31; }]> { let PrintMethod = "printBFILSBOperand<32>"; let ParserMatchClass = bfi32_lsb_asmoperand; } -def bfi64_lsb_asmoperand : AsmOperandClass -{ +def bfi64_lsb_asmoperand : AsmOperandClass { let Name = "BFI64LSB"; let PredicateMethod = "isUImm<6>"; let RenderMethod = "addBFILSBOperands<64>"; } -def bfi64_lsb : Operand, ImmLeaf= 0 && Imm <= 63; }]> -{ +def bfi64_lsb : Operand, + ImmLeaf= 0 && Imm <= 63; }]> { let PrintMethod = "printBFILSBOperand<64>"; let ParserMatchClass = bfi64_lsb_asmoperand; } @@ -1320,41 +1257,35 @@ def bfi64_lsb : Operand, ImmLeaf= 0 && Imm <= 63; }]> // Width verification is performed during conversion so width operand can be // shared between 32/64-bit cases. Still needed for the print method though // because ImmR encodes "width - 1". -def bfi32_width_asmoperand : AsmOperandClass -{ +def bfi32_width_asmoperand : AsmOperandClass { let Name = "BFI32Width"; let PredicateMethod = "isBitfieldWidth<32>"; let RenderMethod = "addBFIWidthOperands"; } def bfi32_width : Operand, - ImmLeaf= 1 && Imm <= 32; }]> -{ + ImmLeaf= 1 && Imm <= 32; }]> { let PrintMethod = "printBFIWidthOperand"; let ParserMatchClass = bfi32_width_asmoperand; } -def bfi64_width_asmoperand : AsmOperandClass -{ +def bfi64_width_asmoperand : AsmOperandClass { let Name = "BFI64Width"; let PredicateMethod = "isBitfieldWidth<64>"; let RenderMethod = "addBFIWidthOperands"; } def bfi64_width : Operand, - ImmLeaf= 1 && Imm <= 64; }]> -{ + ImmLeaf= 1 && Imm <= 64; }]> { let PrintMethod = "printBFIWidthOperand"; let ParserMatchClass = bfi64_width_asmoperand; } -multiclass A64I_bitfield_insert opc, string asmop> -{ +multiclass A64I_bitfield_insert opc, string asmop> { def wwii : A64I_bitfield<0b0, opc, 0b0, (outs GPR32:$Rd), (ins GPR32:$Rn, bfi32_lsb:$ImmR, bfi32_width:$ImmS), !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"), - [], NoItinerary> - { + [], NoItinerary> { // As above, no disassembler allowed. let isAsmParserOnly = 1; } @@ -1362,12 +1293,10 @@ multiclass A64I_bitfield_insert opc, string asmop> def xxii : A64I_bitfield<0b1, opc, 0b1, (outs GPR64:$Rd), (ins GPR64:$Rn, bfi64_lsb:$ImmR, bfi64_width:$ImmS), !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"), - [], NoItinerary> - { + [], NoItinerary> { // As above, no disassembler allowed. let isAsmParserOnly = 1; } - } defm SBFIZ : A64I_bitfield_insert<0b00, "sbfiz">; @@ -1375,18 +1304,16 @@ defm UBFIZ : A64I_bitfield_insert<0b10, "ubfiz">; def BFIwwii : A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd), - (ins GPR32:$src, GPR32:$Rn, bfi32_lsb:$ImmR, bfi32_width:$ImmS), - "bfi\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> -{ + (ins GPR32:$src, GPR32:$Rn, bfi32_lsb:$ImmR, bfi32_width:$ImmS), + "bfi\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> { // As above, no disassembler allowed. let isAsmParserOnly = 1; let Constraints = "$src = $Rd"; } def BFIxxii : A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd), - (ins GPR64:$src, GPR64:$Rn, bfi64_lsb:$ImmR, bfi64_width:$ImmS), - "bfi\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> -{ + (ins GPR64:$src, GPR64:$Rn, bfi64_lsb:$ImmR, bfi64_width:$ImmS), + "bfi\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> { // As above, no disassembler allowed. let isAsmParserOnly = 1; let Constraints = "$src = $Rd"; @@ -1397,8 +1324,7 @@ def BFIxxii : A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd), //===----------------------------------------------------------------------===// // Contains: CBZ, CBNZ -class label_asmoperand : AsmOperandClass -{ +class label_asmoperand : AsmOperandClass { let Name = "Label" # width # "_" # scale; let PredicateMethod = "isLabel<" # width # "," # scale # ">"; let RenderMethod = "addLabelOperands<" # width # ", " # scale # ">"; @@ -1408,8 +1334,7 @@ def label_wid19_scal4_asmoperand : label_asmoperand<19, 4>; // All conditional immediate branches are the same really: 19 signed bits scaled // by the instruction-size (4). -def bcc_target : Operand -{ +def bcc_target : Operand { // This label is a 19-bit offset from PC, scaled by the instruction-width: 4. let ParserMatchClass = label_wid19_scal4_asmoperand; let PrintMethod = "printLabelOperand<19, 4>"; @@ -1417,8 +1342,7 @@ def bcc_target : Operand let OperandType = "OPERAND_PCREL"; } -multiclass cmpbr_sizes -{ +multiclass cmpbr_sizes { let isBranch = 1, isTerminator = 1 in { def x : A64I_cmpbr<0b1, op, (outs), @@ -1448,15 +1372,13 @@ defm CBNZ : cmpbr_sizes<0b1, "cbnz", ImmLeaf, ImmLeaf= 0 && Imm <= 15; -}]> -{ +}]> { let PrintMethod = "printCondCodeOperand"; let ParserMatchClass = cond_code_asmoperand; } @@ -1464,8 +1386,7 @@ def cond_code : Operand, ImmLeaf -{ + NoItinerary> { let Uses = [NZCV]; let isBranch = 1; let isTerminator = 1; @@ -1476,36 +1397,31 @@ def Bcc : A64I_condbr<0b0, 0b0, (outs), //===----------------------------------------------------------------------===// // Contains: CCMN, CCMP -def uimm4_asmoperand : AsmOperandClass -{ +def uimm4_asmoperand : AsmOperandClass { let Name = "UImm4"; let PredicateMethod = "isUImm<4>"; let RenderMethod = "addImmOperands"; } -def uimm4 : Operand -{ +def uimm4 : Operand { let ParserMatchClass = uimm4_asmoperand; } -def uimm5 : Operand -{ +def uimm5 : Operand { let ParserMatchClass = uimm5_asmoperand; } // The only difference between this operand and the one for instructions like // B.cc is that it's parsed manually. The other get parsed implicitly as part of // the mnemonic handling. -def cond_code_op_asmoperand : AsmOperandClass -{ +def cond_code_op_asmoperand : AsmOperandClass { let Name = "CondCodeOp"; let RenderMethod = "addCondCodeOperands"; let PredicateMethod = "isCondCode"; let ParserMethod = "ParseCondCodeOperand"; } -def cond_code_op : Operand -{ +def cond_code_op : Operand { let PrintMethod = "printCondCodeOperand"; let ParserMatchClass = cond_code_op_asmoperand; } @@ -1514,8 +1430,7 @@ class A64I_condcmpimmImpl : A64I_condcmpimm -{ + [], NoItinerary> { let Defs = [NZCV]; } @@ -1534,8 +1449,7 @@ class A64I_condcmpregImpl (outs), (ins GPR:$Rn, GPR:$Rm, uimm4:$NZCVImm, cond_code_op:$Cond), !strconcat(asmop, "\t$Rn, $Rm, $NZCVImm, $Cond"), - [], NoItinerary> -{ + [], NoItinerary> { let Defs = [NZCV]; } @@ -1551,16 +1465,14 @@ def CCMPxx : A64I_condcmpregImpl<0b1, 0b1, GPR64, "ccmp">; // Condition code which is encoded as the inversion (semantically rather than // bitwise) in the instruction. -def inv_cond_code_op_asmoperand : AsmOperandClass -{ +def inv_cond_code_op_asmoperand : AsmOperandClass { let Name = "InvCondCodeOp"; let RenderMethod = "addInvCondCodeOperands"; let PredicateMethod = "isCondCode"; let ParserMethod = "ParseCondCodeOperand"; } -def inv_cond_code_op : Operand -{ +def inv_cond_code_op : Operand { let ParserMatchClass = inv_cond_code_op_asmoperand; } @@ -1576,10 +1488,8 @@ def inv_cond_code multiclass A64I_condselSizes op2, string asmop, - SDPatternOperator select> -{ - let Uses = [NZCV] in - { + SDPatternOperator select> { + let Uses = [NZCV] in { def wwwc : A64I_condsel<0b0, op, 0b0, op2, (outs GPR32:$Rd), (ins GPR32:$Rn, GPR32:$Rm, cond_code_op:$Cond), @@ -1667,9 +1577,11 @@ def : Pat<(A64select_cc NZCV, -1, 0, inv_cond_code:$Cond), // No commutable pattern for CSEL since the commuted version is isomorphic. // CSINC -def :Pat<(A64select_cc NZCV, (add GPR32:$Rm, 1), GPR32:$Rn, inv_cond_code:$Cond), +def :Pat<(A64select_cc NZCV, (add GPR32:$Rm, 1), GPR32:$Rn, + inv_cond_code:$Cond), (CSINCwwwc GPR32:$Rn, GPR32:$Rm, inv_cond_code:$Cond)>; -def :Pat<(A64select_cc NZCV, (add GPR64:$Rm, 1), GPR64:$Rn, inv_cond_code:$Cond), +def :Pat<(A64select_cc NZCV, (add GPR64:$Rm, 1), GPR64:$Rn, + inv_cond_code:$Cond), (CSINCxxxc GPR64:$Rn, GPR64:$Rm, inv_cond_code:$Cond)>; // CSINV @@ -1763,7 +1675,8 @@ multiclass dp_2src_zext opcode, string asmop, SDPatternOperator op> { def www : dp_2src_impl<0b0, opcode, asmop, - [(set GPR32:$Rd, (op GPR32:$Rn, (i64 (zext GPR32:$Rm))))], + [(set GPR32:$Rd, + (op GPR32:$Rn, (i64 (zext GPR32:$Rm))))], GPR32, NoItinerary>; def xxx : dp_2src_impl<0b1, @@ -1829,8 +1742,7 @@ class A64I_dp3_4operand opcode, RegisterClass AccReg, : A64I_dp3 -{ + [(set AccReg:$Rd, pattern)], NoItinerary> { RegisterClass AccGPR = AccReg; RegisterClass SrcGPR = SrcReg; } @@ -1855,8 +1767,7 @@ def UMADDLxwwx : A64I_dp3_4operand<0b1, 0b001010, GPR64, GPR32, "umaddl", def UMSUBLxwwx : A64I_dp3_4operand<0b1, 0b001011, GPR64, GPR32, "umsubl", (sub GPR64:$Ra, (mul (i64 (zext GPR32:$Rn)), (zext GPR32:$Rm)))>; -let isCommutable = 1, PostEncoderMethod = "fixMulHigh" in -{ +let isCommutable = 1, PostEncoderMethod = "fixMulHigh" in { def UMULHxxx : A64I_dp3<0b1, 0b001100, (outs GPR64:$Rd), (ins GPR64:$Rn, GPR64:$Rm), "umulh\t$Rd, $Rn, $Rm", @@ -1871,8 +1782,7 @@ let isCommutable = 1, PostEncoderMethod = "fixMulHigh" in } multiclass A64I_dp3_3operand -{ + Register ZR, dag pattern> { def : InstAlias; @@ -1890,12 +1800,12 @@ defm : A64I_dp3_3operand<"mneg", MSUBxxxx, XZR, defm : A64I_dp3_3operand<"smull", SMADDLxwwx, XZR, (mul (i64 (sext GPR32:$Rn)), (sext GPR32:$Rm))>; defm : A64I_dp3_3operand<"smnegl", SMSUBLxwwx, XZR, - (sub 0, (mul (i64 (sext GPR32:$Rn)), (sext GPR32:$Rm)))>; + (sub 0, (mul (i64 (sext GPR32:$Rn)), (sext GPR32:$Rm)))>; defm : A64I_dp3_3operand<"umull", UMADDLxwwx, XZR, (mul (i64 (zext GPR32:$Rn)), (zext GPR32:$Rm))>; defm : A64I_dp3_3operand<"umnegl", UMSUBLxwwx, XZR, - (sub 0, (mul (i64 (zext GPR32:$Rn)), (zext GPR32:$Rm)))>; + (sub 0, (mul (i64 (zext GPR32:$Rn)), (zext GPR32:$Rm)))>; //===----------------------------------------------------------------------===// @@ -1903,22 +1813,19 @@ defm : A64I_dp3_3operand<"umnegl", UMSUBLxwwx, XZR, //===----------------------------------------------------------------------===// // Contains: SVC, HVC, SMC, BRK, HLT, DCPS1, DCPS2, DCPS3 -def uimm16_asmoperand : AsmOperandClass -{ +def uimm16_asmoperand : AsmOperandClass { let Name = "UImm16"; let PredicateMethod = "isUImm<16>"; let RenderMethod = "addImmOperands"; } -def uimm16 : Operand -{ +def uimm16 : Operand { let ParserMatchClass = uimm16_asmoperand; } class A64I_exceptImpl opc, bits<2> ll, string asmop> : A64I_exception -{ + !strconcat(asmop, "\t$UImm16"), [], NoItinerary> { let isBranch = 1; let isTerminator = 1; } @@ -1973,38 +1880,34 @@ def : Pat<(rotr GPR64:$Rn, bitfield64_imm:$LSB), //===----------------------------------------------------------------------===// // Contains: FCMP, FCMPE -def fpzero_asmoperand : AsmOperandClass -{ +def fpzero_asmoperand : AsmOperandClass { let Name = "FPZero"; let ParserMethod = "ParseFPImmOperand"; } -def fpz32 : Operand, ComplexPattern -{ +def fpz32 : Operand, + ComplexPattern { let ParserMatchClass = fpzero_asmoperand; let PrintMethod = "printFPZeroOperand"; } -def fpz64 : Operand, ComplexPattern -{ +def fpz64 : Operand, + ComplexPattern { let ParserMatchClass = fpzero_asmoperand; let PrintMethod = "printFPZeroOperand"; } multiclass A64I_fpcmpSignal type, bit imm, dag ins, string asmop2, - dag pattern> -{ + dag pattern> { def _quiet : A64I_fpcmp<0b0, 0b0, type, 0b00, {0b0, imm, 0b0, 0b0, 0b0}, (outs), ins, !strconcat("fcmp\t$Rn, ", asmop2), - [pattern], NoItinerary> - { + [pattern], NoItinerary> { let Defs = [NZCV]; } def _sig : A64I_fpcmp<0b0, 0b0, type, 0b00, {0b1, imm, 0b0, 0b0, 0b0}, (outs), ins, !strconcat("fcmpe\t$Rn, ", asmop2), - [], NoItinerary> - { + [], NoItinerary> { let Defs = [NZCV]; } } @@ -2016,8 +1919,7 @@ defm FCMPdd : A64I_fpcmpSignal<0b01, 0b0, (ins FPR64:$Rn, FPR64:$Rm), "$Rm", // What would be Rm should be written as 0, but anything is valid for // disassembly so we can't set the bits -let PostEncoderMethod = "fixFCMPImm" in -{ +let PostEncoderMethod = "fixFCMPImm" in { defm FCMPsi : A64I_fpcmpSignal<0b00, 0b1, (ins FPR32:$Rn, fpz32:$Imm), "$Imm", (set NZCV, (A64cmp (f32 FPR32:$Rn), fpz32:$Imm))>; @@ -2036,8 +1938,7 @@ class A64I_fpccmpImpl type, bit op, RegisterClass FPR, string asmop> (outs), (ins FPR:$Rn, FPR:$Rm, uimm4:$NZCVImm, cond_code_op:$Cond), !strconcat(asmop, "\t$Rn, $Rm, $NZCVImm, $Cond"), - [], NoItinerary> -{ + [], NoItinerary> { let Defs = [NZCV]; } @@ -2051,8 +1952,7 @@ def FCCMPEdd : A64I_fpccmpImpl<0b01, 0b1, FPR64, "fccmpe">; //===----------------------------------------------------------------------===// // Contains: FCSEL -let Uses = [NZCV] in -{ +let Uses = [NZCV] in { def FCSELsssc : A64I_fpcondsel<0b0, 0b0, 0b00, (outs FPR32:$Rd), (ins FPR32:$Rn, FPR32:$Rm, cond_code_op:$Cond), "fcsel\t$Rd, $Rn, $Rm, $Cond", @@ -2082,8 +1982,7 @@ def FPNoUnop : PatFrag<(ops node:$val), (fneg node:$val), // First we do the fairly trivial bunch with uniform "OP s, s" and "OP d, d" // syntax. Default to no pattern because most are odd enough not to have one. multiclass A64I_fpdp1sizes opcode, string asmstr, - SDPatternOperator opnode = FPNoUnop> -{ + SDPatternOperator opnode = FPNoUnop> { def ss : A64I_fpdp1<0b0, 0b0, 0b00, opcode, (outs FPR32:$Rd), (ins FPR32:$Rn), !strconcat(asmstr, "\t$Rd, $Rn"), [(set (f32 FPR32:$Rd), (opnode FPR32:$Rn))], @@ -2111,8 +2010,7 @@ defm FRINTI : A64I_fpdp1sizes<0b001111, "frinti", fnearbyint>; // The FCVT instrucitons have different source and destination register-types, // but the fields are uniform everywhere a D-register (say) crops up. Package // this information in a Record. -class FCVTRegType fld, ValueType vt> -{ +class FCVTRegType fld, ValueType vt> { RegisterClass Class = rc; ValueType VT = vt; bit t1 = fld{1}; @@ -2148,8 +2046,7 @@ def FPNoBinop : PatFrag<(ops node:$lhs, node:$rhs), (fadd node:$lhs, node:$rhs), [{ (void)N; return false; }]>; multiclass A64I_fpdp2sizes opcode, string asmstr, - SDPatternOperator opnode> -{ + SDPatternOperator opnode> { def sss : A64I_fpdp2<0b0, 0b0, 0b00, opcode, (outs FPR32:$Rd), (ins FPR32:$Rn, FPR32:$Rm), @@ -2219,16 +2116,14 @@ def FNMSUBdddd : A64I_fpdp3Impl<"fnmsub", FPR64, f64, 0b01, 0b1, 0b1, fnmsub>; // Contains: FCVTZS, FCVTZU, SCVTF, UCVTF // #1-#32 allowed, encoded as "64 - -def fixedpos_asmoperand_i32 : AsmOperandClass -{ +def fixedpos_asmoperand_i32 : AsmOperandClass { let Name = "CVTFixedPos32"; let RenderMethod = "addCVTFixedPosOperands"; let PredicateMethod = "isCVTFixedPos<32>"; } // Also encoded as "64 - " but #1-#64 allowed. -def fixedpos_asmoperand_i64 : AsmOperandClass -{ +def fixedpos_asmoperand_i64 : AsmOperandClass { let Name = "CVTFixedPos64"; let RenderMethod = "addCVTFixedPosOperands"; let PredicateMethod = "isCVTFixedPos<64>"; @@ -2240,8 +2135,7 @@ def fixedpos_asmoperand_i64 : AsmOperandClass // + Assembly parsing and decoding depend on integer width class cvtfix_i32_op : Operand, - ComplexPattern", [fpimm]> -{ + ComplexPattern", [fpimm]> { let ParserMatchClass = fixedpos_asmoperand_i32; let DecoderMethod = "DecodeCVT32FixedPosOperand"; let PrintMethod = "printCVTFixedPosOperand"; @@ -2249,8 +2143,7 @@ class cvtfix_i32_op class cvtfix_i64_op : Operand, - ComplexPattern", [fpimm]> -{ + ComplexPattern", [fpimm]> { let ParserMatchClass = fixedpos_asmoperand_i64; let PrintMethod = "printCVTFixedPosOperand"; } @@ -2322,17 +2215,24 @@ class A64I_fpintI type, bits<2> rmode, bits<3> opcode, : A64I_fpint; -multiclass A64I_fptointRM rmode, bit o2, string asmop> -{ - def Sws : A64I_fpintI<0b0, 0b00, rmode, {o2, 0, 0}, GPR32, FPR32, asmop # "s">; - def Sxs : A64I_fpintI<0b1, 0b00, rmode, {o2, 0, 0}, GPR64, FPR32, asmop # "s">; - def Uws : A64I_fpintI<0b0, 0b00, rmode, {o2, 0, 1}, GPR32, FPR32, asmop # "u">; - def Uxs : A64I_fpintI<0b1, 0b00, rmode, {o2, 0, 1}, GPR64, FPR32, asmop # "u">; - - def Swd : A64I_fpintI<0b0, 0b01, rmode, {o2, 0, 0}, GPR32, FPR64, asmop # "s">; - def Sxd : A64I_fpintI<0b1, 0b01, rmode, {o2, 0, 0}, GPR64, FPR64, asmop # "s">; - def Uwd : A64I_fpintI<0b0, 0b01, rmode, {o2, 0, 1}, GPR32, FPR64, asmop # "u">; - def Uxd : A64I_fpintI<0b1, 0b01, rmode, {o2, 0, 1}, GPR64, FPR64, asmop # "u">; +multiclass A64I_fptointRM rmode, bit o2, string asmop> { + def Sws : A64I_fpintI<0b0, 0b00, rmode, {o2, 0, 0}, + GPR32, FPR32, asmop # "s">; + def Sxs : A64I_fpintI<0b1, 0b00, rmode, {o2, 0, 0}, + GPR64, FPR32, asmop # "s">; + def Uws : A64I_fpintI<0b0, 0b00, rmode, {o2, 0, 1}, + GPR32, FPR32, asmop # "u">; + def Uxs : A64I_fpintI<0b1, 0b00, rmode, {o2, 0, 1}, + GPR64, FPR32, asmop # "u">; + + def Swd : A64I_fpintI<0b0, 0b01, rmode, {o2, 0, 0}, + GPR32, FPR64, asmop # "s">; + def Sxd : A64I_fpintI<0b1, 0b01, rmode, {o2, 0, 0}, + GPR64, FPR64, asmop # "s">; + def Uwd : A64I_fpintI<0b0, 0b01, rmode, {o2, 0, 1}, + GPR32, FPR64, asmop # "u">; + def Uxd : A64I_fpintI<0b1, 0b01, rmode, {o2, 0, 1}, + GPR64, FPR64, asmop # "u">; } defm FCVTN : A64I_fptointRM<0b00, 0b0, "fcvtn">; @@ -2350,8 +2250,7 @@ def : Pat<(i64 (fp_to_sint (f64 FPR64:$Rn))), (FCVTZSxd FPR64:$Rn)>; def : Pat<(i32 (fp_to_uint (f64 FPR64:$Rn))), (FCVTZUwd FPR64:$Rn)>; def : Pat<(i64 (fp_to_uint (f64 FPR64:$Rn))), (FCVTZUxd FPR64:$Rn)>; -multiclass A64I_inttofp -{ +multiclass A64I_inttofp { def CVTFsw : A64I_fpintI<0b0, 0b00, 0b00, {0, 1, o0}, FPR32, GPR32, asmop>; def CVTFsx : A64I_fpintI<0b1, 0b00, 0b00, {0, 1, o0}, FPR32, GPR64, asmop>; def CVTFdw : A64I_fpintI<0b0, 0b01, 0b00, {0, 1, o0}, FPR64, GPR32, asmop>; @@ -2380,20 +2279,17 @@ def : Pat<(f32 (bitconvert (i32 GPR32:$Rn))), (FMOVsw GPR32:$Rn)>; def : Pat<(i64 (bitconvert (f64 FPR64:$Rn))), (FMOVxd FPR64:$Rn)>; def : Pat<(f64 (bitconvert (i64 GPR64:$Rn))), (FMOVdx GPR64:$Rn)>; -def lane1_asmoperand : AsmOperandClass -{ +def lane1_asmoperand : AsmOperandClass { let Name = "Lane1"; let RenderMethod = "addImmOperands"; } -def lane1 : Operand -{ +def lane1 : Operand { let ParserMatchClass = lane1_asmoperand; let PrintMethod = "printBareImmOperand"; } -let DecoderMethod = "DecodeFMOVLaneInstruction" in -{ +let DecoderMethod = "DecodeFMOVLaneInstruction" in { def FMOVxv : A64I_fpint<0b1, 0b0, 0b10, 0b01, 0b110, (outs GPR64:$Rd), (ins VPR128:$Rn, lane1:$Lane), "fmov\t$Rd, $Rn.d[$Lane]", [], NoItinerary>; @@ -2414,8 +2310,7 @@ def : InstAlias<"fmov $Rd.2d[$Lane], $Rn", //===----------------------------------------------------------------------===// // Contains: FMOV -def fpimm_asmoperand : AsmOperandClass -{ +def fpimm_asmoperand : AsmOperandClass { let Name = "FMOVImm"; let ParserMethod = "ParseFPImmOperand"; } @@ -2430,8 +2325,7 @@ def SDXF_fpimm : SDNodeXForm : Operand, PatLeaf<(FT fpimm), [{ return A64Imms::isFPImm(N->getValueAPF()); }], - SDXF_fpimm> -{ + SDXF_fpimm> { let PrintMethod = "printFPImmOperand"; let ParserMatchClass = fpimm_asmoperand; } @@ -2456,14 +2350,12 @@ def FMOVdi : A64I_fpimm_impl<0b01, FPR64, f64, fmov64_operand>; //===----------------------------------------------------------------------===// // Contains: LDR, LDRSW, PRFM -def ldrlit_label_asmoperand : AsmOperandClass -{ +def ldrlit_label_asmoperand : AsmOperandClass { let Name = "LoadLitLabel"; let RenderMethod = "addLabelOperands<19, 4>"; } -def ldrlit_label : Operand -{ +def ldrlit_label : Operand { let EncoderMethod = "getLoadLitLabelOpValue"; // This label is a 19-bit offset from PC, scaled by the instruction-width: 4. @@ -2475,18 +2367,15 @@ def ldrlit_label : Operand // Various instructions take an immediate value (which can always be used), // where some numbers have a symbolic name to make things easier. These operands // and the associated functions abstract away the differences. -multiclass namedimm -{ - def _asmoperand : AsmOperandClass - { +multiclass namedimm { + def _asmoperand : AsmOperandClass { let Name = "NamedImm" # prefix; let PredicateMethod = "isUImm"; let RenderMethod = "addImmOperands"; let ParserMethod = "ParseNamedImmOperand<" # mapper # ">"; } - def _op : Operand - { + def _op : Operand { let ParserMatchClass = !cast(prefix # "_asmoperand"); let PrintMethod = "printNamedImmOperand<" # mapper # ">"; let DecoderMethod = "DecodeNamedImmOperand<" # mapper # ">"; @@ -2500,8 +2389,7 @@ class A64I_LDRlitSimple opc, bit v, RegisterClass OutReg, : A64I_LDRlit; -let mayLoad = 1 in -{ +let mayLoad = 1 in { def LDRw_lit : A64I_LDRlitSimple<0b00, 0b0, GPR32>; def LDRx_lit : A64I_LDRlitSimple<0b01, 0b0, GPR64>; } @@ -2511,8 +2399,7 @@ def LDRs_lit : A64I_LDRlitSimple<0b00, 0b1, FPR32, def LDRd_lit : A64I_LDRlitSimple<0b01, 0b1, FPR64, [(set (f64 FPR64:$Rt), (load constpool:$Imm19))]>; -let mayLoad = 1 in -{ +let mayLoad = 1 in { def LDRq_lit : A64I_LDRlitSimple<0b10, 0b1, FPR128>; @@ -2548,16 +2435,14 @@ let mayLoad = 1 in // This operand parses a GPR64xsp register, followed by an optional immediate // #0. -def GPR64xsp0_asmoperand : AsmOperandClass -{ +def GPR64xsp0_asmoperand : AsmOperandClass { let Name = "GPR64xsp0"; let PredicateMethod = "isWrappedReg"; let RenderMethod = "addRegOperands"; let ParserMethod = "ParseLSXAddressOperand"; } -def GPR64xsp0 : RegisterOperand -{ +def GPR64xsp0 : RegisterOperand { let ParserMatchClass = GPR64xsp0_asmoperand; } @@ -2568,7 +2453,7 @@ def GPR64xsp0 : RegisterOperand class A64I_SRexs_impl size, bits<3> opcode, string asm, dag outs, dag ins, list pat, InstrItinClass itin> : - A64I_LDSTex_stn ; class A64I_LRexs_impl size, bits<3> opcode, string asm, dag outs, dag ins, list pat, InstrItinClass itin> : - A64I_LDSTex_tn ; class A64I_SLexs_impl size, bits<3> opcode, string asm, dag outs, dag ins, list pat, InstrItinClass itin> : - A64I_LDSTex_tn ; class A64I_SPexs_impl size, bits<3> opcode, string asm, dag outs, dag ins, list pat, InstrItinClass itin> : - A64I_LDSTex_stt2n -{ + pat, itin> { let mayStore = 1; } @@ -2740,7 +2624,7 @@ defm STLXP : A64I_SPex<"stlxp", 0b011>; class A64I_LPexs_impl size, bits<3> opcode, string asm, dag outs, dag ins, list pat, InstrItinClass itin> : - A64I_LDSTex_tt2n ; // 1.1 Unsigned 12-bit immediate operands //===------------------------------- -multiclass offsets_uimm12 -{ - def uimm12_asmoperand : AsmOperandClass - { +multiclass offsets_uimm12 { + def uimm12_asmoperand : AsmOperandClass { let Name = "OffsetUImm12_" # MemSize; let PredicateMethod = "isOffsetUImm12<" # MemSize # ">"; let RenderMethod = "addOffsetUImm12Operands<" # MemSize # ">"; @@ -2840,8 +2722,7 @@ multiclass offsets_uimm12 // Pattern is really no more than an ImmLeaf, but predicated on MemSize which // complicates things beyond TableGen's ken. def uimm12 : Operand, - ComplexPattern"> - { + ComplexPattern"> { let ParserMatchClass = !cast(prefix # uimm12_asmoperand); @@ -2866,8 +2747,7 @@ def SDXF_simm9 : SDNodeXFormgetTargetConstant(N->getZExtValue() & 0x1ff, MVT::i32); }]>; -def simm9_asmoperand : AsmOperandClass -{ +def simm9_asmoperand : AsmOperandClass { let Name = "SImm9"; let PredicateMethod = "isSImm<9>"; let RenderMethod = "addSImmOperands<9>"; @@ -2875,8 +2755,7 @@ def simm9_asmoperand : AsmOperandClass def simm9 : Operand, ImmLeaf= -0x100 && Imm <= 0xff; }], - SDXF_simm9> -{ + SDXF_simm9> { let PrintMethod = "printOffsetSImm9Operand"; let ParserMatchClass = simm9_asmoperand; } @@ -2899,17 +2778,14 @@ def simm9 : Operand, // which will need separate instructions for LLVM type-consistency. We'll also // need separate operands, of course. multiclass regexts -{ - def regext_asmoperand : AsmOperandClass - { + string Rm, string prefix> { + def regext_asmoperand : AsmOperandClass { let Name = "AddrRegExtend_" # MemSize # "_" # Rm; let PredicateMethod = "isAddrRegExtend<" # MemSize # "," # RmSize # ">"; let RenderMethod = "addAddrRegExtendOperands<" # MemSize # ">"; } - def regext : Operand - { + def regext : Operand { let PrintMethod = "printAddrRegExtendOperand<" # MemSize # ", " # RmSize # ">"; @@ -2919,8 +2795,7 @@ multiclass regexts -{ +multiclass regexts_wx { // Rm is an X-register if LSL or SXTX are specified as the shift. defm Xm_ : regexts; @@ -2959,8 +2834,7 @@ defm qword_ : regexts_wx<16, "qword_">; // This class covers the basic unsigned or irrelevantly-signed loads and stores, // to general-purpose and floating-point registers. -class AddrParams -{ +class AddrParams { Operand uimm12 = !cast(prefix # "_uimm12"); Operand regextWm = !cast(prefix # "_Wm_regext"); @@ -2975,14 +2849,12 @@ def qword_addrparams : AddrParams<"qword">; multiclass A64I_LDRSTR_unsigned size, bit v, bit high_opc, string asmsuffix, - RegisterClass GPR, AddrParams params> -{ + RegisterClass GPR, AddrParams params> { // Unsigned immediate def _STR : A64I_LSunsigimm - { + [], NoItinerary> { let mayStore = 1; } def : InstAlias<"str" # asmsuffix # " $Rt, [$Rn]", @@ -2991,16 +2863,14 @@ multiclass A64I_LDRSTR_unsigned size, bit v, def _LDR : A64I_LSunsigimm - { + [], NoItinerary> { let mayLoad = 1; } def : InstAlias<"ldr" # asmsuffix # " $Rt, [$Rn]", (!cast(prefix # "_LDR") GPR:$Rt, GPR64xsp:$Rn, 0)>; // Register offset (four of these: load/store and Wm/Xm). - let mayLoad = 1 in - { + let mayLoad = 1 in { def _Wm_RegOffset_LDR : A64I_LSregoff size, bit v, (!cast(prefix # "_Xm_RegOffset_LDR") GPR:$Rt, GPR64xsp:$Rn, GPR64:$Rm, 2)>; - let mayStore = 1 in - { + let mayStore = 1 in { def _Wm_RegOffset_STR : A64I_LSregoff size, bit v, def _STUR : A64I_LSunalimm - { + [], NoItinerary> { let mayStore = 1; } def : InstAlias<"stur" # asmsuffix # " $Rt, [$Rn]", @@ -3049,8 +2917,7 @@ multiclass A64I_LDRSTR_unsigned size, bit v, def _LDUR : A64I_LSunalimm - { + [], NoItinerary> { let mayLoad = 1; } def : InstAlias<"ldur" # asmsuffix # " $Rt, [$Rn]", @@ -3061,8 +2928,7 @@ multiclass A64I_LDRSTR_unsigned size, bit v, (outs GPR64xsp:$Rn_wb), (ins GPR:$Rt, GPR64xsp:$Rn, simm9:$SImm9), "str" # asmsuffix # "\t$Rt, [$Rn], $SImm9", - [], NoItinerary> - { + [], NoItinerary> { let Constraints = "$Rn = $Rn_wb"; let mayStore = 1; @@ -3074,8 +2940,7 @@ multiclass A64I_LDRSTR_unsigned size, bit v, (outs GPR:$Rt, GPR64xsp:$Rn_wb), (ins GPR64xsp:$Rn, simm9:$SImm9), "ldr" # asmsuffix # "\t$Rt, [$Rn], $SImm9", - [], NoItinerary> - { + [], NoItinerary> { let mayLoad = 1; let Constraints = "$Rn = $Rn_wb"; let DecoderMethod = "DecodeSingleIndexedInstruction"; @@ -3086,8 +2951,7 @@ multiclass A64I_LDRSTR_unsigned size, bit v, (outs GPR64xsp:$Rn_wb), (ins GPR:$Rt, GPR64xsp:$Rn, simm9:$SImm9), "str" # asmsuffix # "\t$Rt, [$Rn, $SImm9]!", - [], NoItinerary> - { + [], NoItinerary> { let Constraints = "$Rn = $Rn_wb"; let mayStore = 1; @@ -3099,8 +2963,7 @@ multiclass A64I_LDRSTR_unsigned size, bit v, (outs GPR:$Rt, GPR64xsp:$Rn_wb), (ins GPR64xsp:$Rn, simm9:$SImm9), "ldr" # asmsuffix # "\t$Rt, [$Rn, $SImm9]!", - [], NoItinerary> - { + [], NoItinerary> { let mayLoad = 1; let Constraints = "$Rn = $Rn_wb"; let DecoderMethod = "DecodeSingleIndexedInstruction"; @@ -3141,7 +3004,8 @@ defm LSFP64 : A64I_LDRSTR_unsigned<"LSFP64", 0b11, 0b1, 0b0, "", FPR64, dword_addrparams>; // STR/LDR to/from a Q register defm LSFP128 - : A64I_LDRSTR_unsigned<"LSFP128", 0b00, 0b1, 0b1, "", FPR128, qword_addrparams>; + : A64I_LDRSTR_unsigned<"LSFP128", 0b00, 0b1, 0b1, "", FPR128, + qword_addrparams>; //===------------------------------ // 2.3 Signed loads @@ -3151,15 +3015,13 @@ defm LSFP128 // so it's worth factoring out. Signed word loads don't fit because there is no // W version. multiclass A64I_LDR_signed size, string asmopcode, AddrParams params, - string prefix> -{ + string prefix> { // Unsigned offset def w : A64I_LSunsigimm - { + [], NoItinerary> { let mayLoad = 1; } def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn]", @@ -3169,16 +3031,14 @@ multiclass A64I_LDR_signed size, string asmopcode, AddrParams params, (outs GPR64:$Rt), (ins GPR64xsp:$Rn, params.uimm12:$UImm12), "ldrs" # asmopcode # "\t$Rt, [$Rn, $UImm12]", - [], NoItinerary> - { + [], NoItinerary> { let mayLoad = 1; } def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn]", (!cast(prefix # x) GPR64:$Rt, GPR64xsp:$Rn, 0)>; // Register offset - let mayLoad = 1 in - { + let mayLoad = 1 in { def w_Wm_RegOffset : A64I_LSregoff size, string asmopcode, AddrParams params, GPR64:$Rm, 2)>; - let mayLoad = 1 in - { + let mayLoad = 1 in { // Unaligned offset def w_U : A64I_LSunalimm size, string asmopcode, AddrParams params, (outs GPR32:$Rt, GPR64xsp:$Rn_wb), (ins GPR64xsp:$Rn, simm9:$SImm9), "ldrs" # asmopcode # "\t$Rt, [$Rn], $SImm9", - [], NoItinerary> - { + [], NoItinerary> { let Constraints = "$Rn = $Rn_wb"; let DecoderMethod = "DecodeSingleIndexedInstruction"; } @@ -3243,8 +3101,7 @@ multiclass A64I_LDR_signed size, string asmopcode, AddrParams params, (outs GPR64:$Rt, GPR64xsp:$Rn_wb), (ins GPR64xsp:$Rn, simm9:$SImm9), "ldrs" # asmopcode # "\t$Rt, [$Rn], $SImm9", - [], NoItinerary> - { + [], NoItinerary> { let Constraints = "$Rn = $Rn_wb"; let DecoderMethod = "DecodeSingleIndexedInstruction"; } @@ -3254,8 +3111,7 @@ multiclass A64I_LDR_signed size, string asmopcode, AddrParams params, (outs GPR32:$Rt, GPR64xsp:$Rn_wb), (ins GPR64xsp:$Rn, simm9:$SImm9), "ldrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]!", - [], NoItinerary> - { + [], NoItinerary> { let Constraints = "$Rn = $Rn_wb"; let DecoderMethod = "DecodeSingleIndexedInstruction"; } @@ -3264,8 +3120,7 @@ multiclass A64I_LDR_signed size, string asmopcode, AddrParams params, (outs GPR64:$Rt, GPR64xsp:$Rn_wb), (ins GPR64xsp:$Rn, simm9:$SImm9), "ldrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]!", - [], NoItinerary> - { + [], NoItinerary> { let Constraints = "$Rn = $Rn_wb"; let DecoderMethod = "DecodeSingleIndexedInstruction"; } @@ -3283,14 +3138,12 @@ def LDRSWx (outs GPR64:$Rt), (ins GPR64xsp:$Rn, word_uimm12:$UImm12), "ldrsw\t$Rt, [$Rn, $UImm12]", - [], NoItinerary> -{ + [], NoItinerary> { let mayLoad = 1; } def : InstAlias<"ldrsw $Rt, [$Rn]", (LDRSWx GPR64:$Rt, GPR64xsp:$Rn, 0)>; -let mayLoad = 1 in -{ +let mayLoad = 1 in { def LDRSWx_Wm_RegOffset : A64I_LSregoff<0b10, 0b0, 0b10, 0b0, (outs GPR64:$Rt), (ins GPR64xsp:$Rn, GPR32:$Rm, word_Wm_regext:$Ext), @@ -3312,8 +3165,7 @@ def LDURSWx (outs GPR64:$Rt), (ins GPR64xsp:$Rn, simm9:$SImm9), "ldursw\t$Rt, [$Rn, $SImm9]", - [], NoItinerary> -{ + [], NoItinerary> { let mayLoad = 1; } def : InstAlias<"ldursw $Rt, [$Rn]", (LDURSWx GPR64:$Rt, GPR64xsp:$Rn, 0)>; @@ -3323,8 +3175,7 @@ def LDRSWx_PostInd (outs GPR64:$Rt, GPR64xsp:$Rn_wb), (ins GPR64xsp:$Rn, simm9:$SImm9), "ldrsw\t$Rt, [$Rn], $SImm9", - [], NoItinerary> -{ + [], NoItinerary> { let mayLoad = 1; let Constraints = "$Rn = $Rn_wb"; let DecoderMethod = "DecodeSingleIndexedInstruction"; @@ -3334,8 +3185,7 @@ def LDRSWx_PreInd : A64I_LSpreind<0b10, 0b0, 0b10, (outs GPR64:$Rt, GPR64xsp:$Rn_wb), (ins GPR64xsp:$Rn, simm9:$SImm9), "ldrsw\t$Rt, [$Rn, $SImm9]!", - [], NoItinerary> -{ + [], NoItinerary> { let mayLoad = 1; let Constraints = "$Rn = $Rn_wb"; let DecoderMethod = "DecodeSingleIndexedInstruction"; @@ -3348,15 +3198,13 @@ def LDRSWx_PreInd : A64I_LSpreind<0b10, 0b0, 0b10, def PRFM : A64I_LSunsigimm<0b11, 0b0, 0b10, (outs), (ins prefetch_op:$Rt, GPR64xsp:$Rn, dword_uimm12:$UImm12), "prfm\t$Rt, [$Rn, $UImm12]", - [], NoItinerary> -{ + [], NoItinerary> { let mayLoad = 1; } def : InstAlias<"prfm $Rt, [$Rn]", (PRFM prefetch_op:$Rt, GPR64xsp:$Rn, 0)>; -let mayLoad = 1 in -{ +let mayLoad = 1 in { def PRFM_Wm_RegOffset : A64I_LSregoff<0b11, 0b0, 0b10, 0b0, (outs), (ins prefetch_op:$Rt, GPR64xsp:$Rn, GPR32:$Rm, dword_Wm_regext:$Ext), @@ -3377,8 +3225,7 @@ def : InstAlias<"prfm $Rt, [$Rn, $Rm]", def PRFUM : A64I_LSunalimm<0b11, 0b0, 0b10, (outs), (ins prefetch_op:$Rt, GPR64xsp:$Rn, simm9:$SImm9), "prfum\t$Rt, [$Rn, $SImm9]", - [], NoItinerary> -{ + [], NoItinerary> { let mayLoad = 1; } def : InstAlias<"prfum $Rt, [$Rn]", @@ -3394,13 +3241,11 @@ def : InstAlias<"prfum $Rt, [$Rn]", // section to avoid instantiation of "ldtr d0, [sp]" etc. multiclass A64I_LDTRSTTR size, string asmsuffix, RegisterClass GPR, - string prefix> -{ + string prefix> { def _UnPriv_STR : A64I_LSunpriv - { + [], NoItinerary> { let mayStore = 1; } @@ -3410,8 +3255,7 @@ multiclass A64I_LDTRSTTR size, string asmsuffix, RegisterClass GPR, def _UnPriv_LDR : A64I_LSunpriv - { + [], NoItinerary> { let mayLoad = 1; } @@ -3434,10 +3278,8 @@ defm LS64 : A64I_LDTRSTTR<0b11, "", GPR64, "LS64">; // Now a class for the signed instructions that can go to either 32 or 64 // bits... -multiclass A64I_LDTR_signed size, string asmopcode, string prefix> -{ - let mayLoad = 1 in - { +multiclass A64I_LDTR_signed size, string asmopcode, string prefix> { + let mayLoad = 1 in { def w : A64I_LSunpriv -{ + [], NoItinerary> { let mayLoad = 1; } def : InstAlias<"ldtrsw $Rt, [$Rn]", (LDTRSWx GPR64:$Rt, GPR64xsp:$Rn, 0)>; @@ -3507,20 +3348,17 @@ def : InstAlias<"ldtrsw $Rt, [$Rn]", (LDTRSWx GPR64:$Rt, GPR64xsp:$Rn, 0)>; // Operands for each access size. This multiclass takes care of instantiating // the correct template functions in the rest of the backend. -multiclass offsets_simm7 -{ +multiclass offsets_simm7 { // The bare signed 7-bit immediate is used in post-indexed instructions, but // because of the scaling performed a generic "simm7" operand isn't // appropriate here either. - def simm7_asmoperand : AsmOperandClass - { + def simm7_asmoperand : AsmOperandClass { let Name = "SImm7_Scaled" # MemSize; let PredicateMethod = "isSImm7Scaled<" # MemSize # ">"; let RenderMethod = "addSImm7ScaledOperands<" # MemSize # ">"; } - def simm7 : Operand - { + def simm7 : Operand { let PrintMethod = "printSImm7ScaledOperand<" # MemSize # ">"; let ParserMatchClass = !cast(prefix # "simm7_asmoperand"); } @@ -3531,12 +3369,10 @@ defm dword_ : offsets_simm7<"8", "dword_">; defm qword_ : offsets_simm7<"16", "qword_">; multiclass A64I_LSPsimple opc, bit v, RegisterClass SomeReg, - Operand simm7, string prefix> -{ + Operand simm7, string prefix> { def _STR : A64I_LSPoffset - { + "stp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary> { let mayStore = 1; let DecoderMethod = "DecodeLDSTPairInstruction"; } @@ -3547,8 +3383,7 @@ multiclass A64I_LSPsimple opc, bit v, RegisterClass SomeReg, def _LDR : A64I_LSPoffset - { + "ldp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary> { let mayLoad = 1; let DecoderMethod = "DecodeLDSTPairInstruction"; } @@ -3562,8 +3397,7 @@ multiclass A64I_LSPsimple opc, bit v, RegisterClass SomeReg, GPR64xsp:$Rn, simm7:$SImm7), "stp\t$Rt, $Rt2, [$Rn], $SImm7", - [], NoItinerary> - { + [], NoItinerary> { let mayStore = 1; let Constraints = "$Rn = $Rn_wb"; @@ -3575,8 +3409,7 @@ multiclass A64I_LSPsimple opc, bit v, RegisterClass SomeReg, (outs SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn_wb), (ins GPR64xsp:$Rn, simm7:$SImm7), "ldp\t$Rt, $Rt2, [$Rn], $SImm7", - [], NoItinerary> - { + [], NoItinerary> { let mayLoad = 1; let Constraints = "$Rn = $Rn_wb"; let DecoderMethod = "DecodeLDSTPairInstruction"; @@ -3585,8 +3418,7 @@ multiclass A64I_LSPsimple opc, bit v, RegisterClass SomeReg, def _PreInd_STR : A64I_LSPpreind - { + [], NoItinerary> { let mayStore = 1; let Constraints = "$Rn = $Rn_wb"; let DecoderMethod = "DecodeLDSTPairInstruction"; @@ -3596,8 +3428,7 @@ multiclass A64I_LSPsimple opc, bit v, RegisterClass SomeReg, (outs SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn_wb), (ins GPR64xsp:$Rn, simm7:$SImm7), "ldp\t$Rt, $Rt2, [$Rn, $SImm7]!", - [], NoItinerary> - { + [], NoItinerary> { let mayLoad = 1; let Constraints = "$Rn = $Rn_wb"; let DecoderMethod = "DecodeLDSTPairInstruction"; @@ -3605,8 +3436,7 @@ multiclass A64I_LSPsimple opc, bit v, RegisterClass SomeReg, def _NonTemp_STR : A64I_LSPnontemp - { + "stnp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary> { let mayStore = 1; let DecoderMethod = "DecodeLDSTPairInstruction"; } @@ -3617,8 +3447,7 @@ multiclass A64I_LSPsimple opc, bit v, RegisterClass SomeReg, def _NonTemp_LDR : A64I_LSPnontemp - { + "ldnp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary> { let mayLoad = 1; let DecoderMethod = "DecodeLDSTPairInstruction"; } @@ -3633,14 +3462,14 @@ defm LSPair32 : A64I_LSPsimple<0b00, 0b0, GPR32, word_simm7, "LSPair32">; defm LSPair64 : A64I_LSPsimple<0b10, 0b0, GPR64, dword_simm7, "LSPair64">; defm LSFPPair32 : A64I_LSPsimple<0b00, 0b1, FPR32, word_simm7, "LSFPPair32">; defm LSFPPair64 : A64I_LSPsimple<0b01, 0b1, FPR64, dword_simm7, "LSFPPair64">; -defm LSFPPair128 : A64I_LSPsimple<0b10, 0b1, FPR128, qword_simm7, "LSFPPair128">; +defm LSFPPair128 : A64I_LSPsimple<0b10, 0b1, FPR128, qword_simm7, + "LSFPPair128">; def LDPSWx : A64I_LSPoffset<0b01, 0b0, 0b1, (outs GPR64:$Rt, GPR64:$Rt2), (ins GPR64xsp:$Rn, word_simm7:$SImm7), - "ldpsw\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary> -{ + "ldpsw\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary> { let mayLoad = 1; let DecoderMethod = "DecodeLDSTPairInstruction"; } @@ -3651,8 +3480,7 @@ def LDPSWx_PostInd : A64I_LSPpostind<0b01, 0b0, 0b1, (outs GPR64:$Rt, GPR64:$Rt2, GPR64:$Rn_wb), (ins GPR64xsp:$Rn, word_simm7:$SImm7), "ldpsw\t$Rt, $Rt2, [$Rn], $SImm7", - [], NoItinerary> -{ + [], NoItinerary> { let mayLoad = 1; let Constraints = "$Rn = $Rn_wb"; let DecoderMethod = "DecodeLDSTPairInstruction"; @@ -3662,8 +3490,7 @@ def LDPSWx_PreInd : A64I_LSPpreind<0b01, 0b0, 0b1, (outs GPR64:$Rt, GPR64:$Rt2, GPR64:$Rn_wb), (ins GPR64xsp:$Rn, word_simm7:$SImm7), "ldpsw\t$Rt, $Rt2, [$Rn, $SImm7]!", - [], NoItinerary> -{ + [], NoItinerary> { let mayLoad = 1; let Constraints = "$Rn = $Rn_wb"; let DecoderMethod = "DecodeLDSTPairInstruction"; @@ -3675,18 +3502,15 @@ def LDPSWx_PreInd : A64I_LSPpreind<0b01, 0b0, 0b1, // Contains: AND, ORR, EOR, ANDS, + aliases TST, MOV multiclass logical_imm_operands -{ - def _asmoperand : AsmOperandClass - { + int size, ValueType VT> { + def _asmoperand : AsmOperandClass { let Name = "LogicalImm" # note # size; let PredicateMethod = "isLogicalImm" # note # "<" # size # ">"; let RenderMethod = "addLogicalImmOperands<" # size # ">"; } def _operand - : Operand, ComplexPattern - { + : Operand, ComplexPattern { let ParserMatchClass = !cast(prefix # "_asmoperand"); let PrintMethod = "printLogicalImmOperand<" # size # ">"; let DecoderMethod = "DecodeLogicalImmOperand<" # size # ">"; @@ -3704,8 +3528,7 @@ defm logical_imm64_mov : logical_imm_operands<"logical_imm64_mov", "MOV", 64, i64>; -multiclass A64I_logimmSizes opc, string asmop, SDNode opnode> -{ +multiclass A64I_logimmSizes opc, string asmop, SDNode opnode> { def wwi : A64I_logicalimm<0b0, opc, (outs GPR32wsp:$Rd), (ins GPR32:$Rn, logical_imm32_operand:$Imm), !strconcat(asmop, "\t$Rd, $Rn, $Imm"), @@ -3725,8 +3548,7 @@ defm AND : A64I_logimmSizes<0b00, "and", and>; defm ORR : A64I_logimmSizes<0b01, "orr", or>; defm EOR : A64I_logimmSizes<0b10, "eor", xor>; -let Defs = [NZCV] in -{ +let Defs = [NZCV] in { def ANDSwwi : A64I_logicalimm<0b0, 0b11, (outs GPR32:$Rd), (ins GPR32:$Rn, logical_imm32_operand:$Imm), "ands\t$Rd, $Rn, $Imm", @@ -3770,8 +3592,7 @@ def signed_cond : PatLeaf<(cond), [{ multiclass logical_shifts opc, bit N, bit commutable, string asmop, SDPatternOperator opfrag, string sty, - RegisterClass GPR, list defs> -{ + RegisterClass GPR, list defs> { let isCommutable = commutable, Defs = defs in { def _lsl : A64I_logicalshift opc, multiclass logical_sizes opc, bit N, bit commutable, string asmop, SDPatternOperator opfrag, - list defs> -{ + list defs> { defm xxx : logical_shifts; defm www : logical_shifts, [NZCV]>; -multiclass tst_shifts -{ +multiclass tst_shifts { let isCommutable = 1, Rd = 0b11111, Defs = [NZCV] in { def _lsl : A64I_logicalshift; defm TSTww : tst_shifts<"TSTww", 0b0, "i32", GPR32>; -multiclass mvn_shifts -{ +multiclass mvn_shifts { let isCommutable = 0, Rn = 0b11111 in { def _lsl : A64I_logicalshift; // A wide variety of different relocations are needed for variants of these // instructions, so it turns out that we need a different operand for all of // them. -multiclass movw_operands -{ - def _imm_asmoperand : AsmOperandClass - { +multiclass movw_operands { + def _imm_asmoperand : AsmOperandClass { let Name = instname # width # "Shifted" # shift; let PredicateMethod = "is" # instname # width # "Imm"; let RenderMethod = "addMoveWideImmOperands"; @@ -3986,8 +3802,7 @@ multiclass movw_operands let ParserMethod = "ParseImmWithLSLOperand"; } - def _imm : Operand - { + def _imm : Operand { let ParserMatchClass = !cast(prefix # "_imm_asmoperand"); let PrintMethod = "printMoveWideImmOperand"; let EncoderMethod = "getMoveWideImmOpValue"; @@ -4004,13 +3819,12 @@ defm movz64 : movw_operands<"movz64", "MOVZ", 64>; defm movk32 : movw_operands<"movk32", "MOVK", 32>; defm movk64 : movw_operands<"movk64", "MOVK", 64>; -multiclass A64I_movwSizes opc, string asmop, dag ins32bit, dag ins64bit> -{ +multiclass A64I_movwSizes opc, string asmop, dag ins32bit, + dag ins64bit> { def wii : A64I_movw<0b0, opc, (outs GPR32:$Rd), ins32bit, !strconcat(asmop, "\t$Rd, $FullImm"), - [], NoItinerary> - { + [], NoItinerary> { bits<18> FullImm; let UImm16 = FullImm{15-0}; let Shift = FullImm{17-16}; @@ -4018,8 +3832,7 @@ multiclass A64I_movwSizes opc, string asmop, dag ins32bit, dag ins64bit> def xii : A64I_movw<0b1, opc, (outs GPR64:$Rd), ins64bit, !strconcat(asmop, "\t$Rd, $FullImm"), - [], NoItinerary> - { + [], NoItinerary> { bits<18> FullImm; let UImm16 = FullImm{15-0}; let Shift = FullImm{17-16}; @@ -4027,8 +3840,7 @@ multiclass A64I_movwSizes opc, string asmop, dag ins32bit, dag ins64bit> } let isMoveImm = 1, isReMaterializable = 1, - isAsCheapAsAMove = 1, neverHasSideEffects = 1 in -{ + isAsCheapAsAMove = 1, neverHasSideEffects = 1 in { defm MOVN : A64I_movwSizes<0b00, "movn", (ins movn32_imm:$FullImm), (ins movn64_imm:$FullImm)>; @@ -4051,10 +3863,8 @@ defm MOVK : A64I_movwSizes<0b11, "movk", // And now the "MOV" aliases. These also need their own operands because what // they accept is completely different to what the base instructions accept. multiclass movalias_operand -{ - def _asmoperand : AsmOperandClass - { + string immpredicate, int width> { + def _asmoperand : AsmOperandClass { let Name = basename # width # "MovAlias"; let PredicateMethod = "isMoveWideMovAlias<" # width # ", A64Imms::" # immpredicate # ">"; @@ -4063,8 +3873,7 @@ multiclass movalias_operand"; } - def _movimm : Operand - { + def _movimm : Operand { let ParserMatchClass = !cast(prefix # "_asmoperand"); let MIOperandInfo = (ops uimm16:$UImm16, imm:$Shift); @@ -4102,14 +3911,12 @@ def adr_label : Operand { let OperandType = "OPERAND_PCREL"; } -def adrp_label_asmoperand : AsmOperandClass -{ +def adrp_label_asmoperand : AsmOperandClass { let Name = "AdrpLabel"; let RenderMethod = "addLabelOperands<21, 4096>"; } -def adrp_label : Operand -{ +def adrp_label : Operand { let EncoderMethod = "getAdrpLabelOpValue"; // This label is a 21-bit offset from PC, scaled by the page-size: 4096. @@ -4118,8 +3925,7 @@ def adrp_label : Operand let OperandType = "OPERAND_PCREL"; } -let neverHasSideEffects = 1 in -{ +let neverHasSideEffects = 1 in { def ADRxi : A64I_PCADR<0b0, (outs GPR64:$Rd), (ins adr_label:$Label), "adr\t$Rd, $Label", [], NoItinerary>; @@ -4134,28 +3940,24 @@ let neverHasSideEffects = 1 in // + aliases IC, DC, AT, TLBI, NOP, YIELD, WFE, WFI, SEV, SEVL // Op1 and Op2 fields are sometimes simple 3-bit unsigned immediate values. -def uimm3_asmoperand : AsmOperandClass -{ +def uimm3_asmoperand : AsmOperandClass { let Name = "UImm3"; let PredicateMethod = "isUImm<3>"; let RenderMethod = "addImmOperands"; } -def uimm3 : Operand -{ +def uimm3 : Operand { let ParserMatchClass = uimm3_asmoperand; } // The HINT alias can accept a simple unsigned 7-bit immediate. -def uimm7_asmoperand : AsmOperandClass -{ +def uimm7_asmoperand : AsmOperandClass { let Name = "UImm7"; let PredicateMethod = "isUImm<7>"; let RenderMethod = "addImmOperands"; } -def uimm7 : Operand -{ +def uimm7 : Operand { let ParserMatchClass = uimm7_asmoperand; } @@ -4174,8 +3976,8 @@ defm tlbi : namedimm<"tlbi", "A64TLBI::TLBIMapper">; // * There are ~1000 generic names S3____ which have an // implementation-defined effect // * Most registers are shared, but some are read-only or write-only. -// * There is a variant of MSR which accepts the same register name (SPSel), but -// which would have a different encoding. +// * There is a variant of MSR which accepts the same register name (SPSel), +// but which would have a different encoding. // In principle these could be resolved in with more complicated subclasses of // NamedImmMapper, however that imposes an overhead on other "named @@ -4185,21 +3987,18 @@ defm tlbi : namedimm<"tlbi", "A64TLBI::TLBIMapper">; // The solution adopted here is to take the MRS/MSR Mappers out of the usual // hierarchy (they're not derived from NamedImmMapper) and to add logic for // their special situation. -def mrs_asmoperand : AsmOperandClass -{ +def mrs_asmoperand : AsmOperandClass { let Name = "MRS"; let ParserMethod = "ParseSysRegOperand"; } -def mrs_op : Operand -{ +def mrs_op : Operand { let ParserMatchClass = mrs_asmoperand; let PrintMethod = "printMRSOperand"; let DecoderMethod = "DecodeMRSOperand"; } -def msr_asmoperand : AsmOperandClass -{ +def msr_asmoperand : AsmOperandClass { let Name = "MSRWithReg"; // Note that SPSel is valid for both this and the pstate operands, but with @@ -4209,22 +4008,19 @@ def msr_asmoperand : AsmOperandClass let ParserMethod = "ParseSysRegOperand"; } -def msr_op : Operand -{ +def msr_op : Operand { let ParserMatchClass = msr_asmoperand; let PrintMethod = "printMSROperand"; let DecoderMethod = "DecodeMSROperand"; } -def pstate_asmoperand : AsmOperandClass -{ +def pstate_asmoperand : AsmOperandClass { let Name = "MSRPState"; // See comment above about parser. let ParserMethod = "ParseSysRegOperand"; } -def pstate_op : Operand -{ +def pstate_op : Operand { let ParserMatchClass = pstate_asmoperand; let PrintMethod = "printNamedImmOperand"; let DecoderMethod = "DecodeNamedImmOperand"; @@ -4232,16 +4028,14 @@ def pstate_op : Operand // When is specified, an assembler should accept something like "C4", not // the usual "#4" immediate. -def CRx_asmoperand : AsmOperandClass -{ +def CRx_asmoperand : AsmOperandClass { let Name = "CRx"; let PredicateMethod = "isUImm<4>"; let RenderMethod = "addImmOperands"; let ParserMethod = "ParseCRxOperand"; } -def CRx : Operand -{ +def CRx : Operand { let ParserMatchClass = CRx_asmoperand; let PrintMethod = "printCRxOperand"; } @@ -4251,8 +4045,7 @@ def CRx : Operand // HINT is straightforward, with a few aliases. def HINTi : A64I_system<0b0, (outs), (ins uimm7:$UImm7), "hint\t$UImm7", - [], NoItinerary> -{ + [], NoItinerary> { bits<7> UImm7; let CRm = UImm7{6-3}; let Op2 = UImm7{2-0}; @@ -4275,8 +4068,7 @@ def : InstAlias<"sevl", (HINTi 5)>; class simple_sys op0, bits<3> op1, bits<4> crn, bits<3> op2, Operand operand, string asmop> : A64I_system<0b0, (outs), (ins operand:$CRm), !strconcat(asmop, "\t$CRm"), - [], NoItinerary> -{ + [], NoItinerary> { let Op0 = op0; let Op1 = op1; let CRn = crn; @@ -4303,8 +4095,7 @@ def SYSiccix : A64I_system<0b0, (outs), (ins uimm3:$Op1, CRx:$CRn, CRx:$CRm, uimm3:$Op2, GPR64:$Rt), "sys\t$Op1, $CRn, $CRm, $Op2, $Rt", - [], NoItinerary> -{ + [], NoItinerary> { let Op0 = 0b01; } @@ -4316,8 +4107,7 @@ def : InstAlias<"sys $Op1, $CRn, $CRm, $Op2", // But many have aliases, which obviously don't fit into class SYSalias - : A64I_system<0b0, (outs), ins, asmstring, [], NoItinerary> -{ + : A64I_system<0b0, (outs), ins, asmstring, [], NoItinerary> { let isAsmParserOnly = 1; bits<14> SysOp; @@ -4330,8 +4120,7 @@ class SYSalias def ICix : SYSalias<(ins ic_op:$SysOp, GPR64:$Rt), "ic\t$SysOp, $Rt">; -def ICi : SYSalias<(ins ic_op:$SysOp), "ic\t$SysOp"> -{ +def ICi : SYSalias<(ins ic_op:$SysOp), "ic\t$SysOp"> { let Rt = 0b11111; } @@ -4340,8 +4129,7 @@ def ATix : SYSalias<(ins at_op:$SysOp, GPR64:$Rt), "at\t$SysOp, $Rt">; def TLBIix : SYSalias<(ins tlbi_op:$SysOp, GPR64:$Rt), "tlbi\t$SysOp, $Rt">; -def TLBIi : SYSalias<(ins tlbi_op:$SysOp), "tlbi\t$SysOp"> -{ +def TLBIi : SYSalias<(ins tlbi_op:$SysOp), "tlbi\t$SysOp"> { let Rt = 0b11111; } @@ -4349,15 +4137,13 @@ def TLBIi : SYSalias<(ins tlbi_op:$SysOp), "tlbi\t$SysOp"> def SYSLxicci : A64I_system<0b1, (outs GPR64:$Rt), (ins uimm3:$Op1, CRx:$CRn, CRx:$CRm, uimm3:$Op2), "sysl\t$Rt, $Op1, $CRn, $CRm, $Op2", - [], NoItinerary> -{ + [], NoItinerary> { let Op0 = 0b01; } // The instructions themselves are rather simple for MSR and MRS. def MSRix : A64I_system<0b0, (outs), (ins msr_op:$SysReg, GPR64:$Rt), - "msr\t$SysReg, $Rt", [], NoItinerary> -{ + "msr\t$SysReg, $Rt", [], NoItinerary> { bits<16> SysReg; let Op0 = SysReg{15-14}; let Op1 = SysReg{13-11}; @@ -4367,8 +4153,7 @@ def MSRix : A64I_system<0b0, (outs), (ins msr_op:$SysReg, GPR64:$Rt), } def MRSxi : A64I_system<0b1, (outs GPR64:$Rt), (ins mrs_op:$SysReg), - "mrs\t$Rt, $SysReg", [], NoItinerary> -{ + "mrs\t$Rt, $SysReg", [], NoItinerary> { bits<16> SysReg; let Op0 = SysReg{15-14}; let Op1 = SysReg{13-11}; @@ -4378,8 +4163,7 @@ def MRSxi : A64I_system<0b1, (outs GPR64:$Rt), (ins mrs_op:$SysReg), } def MSRii : A64I_system<0b0, (outs), (ins pstate_op:$PState, uimm4:$CRm), - "msr\t$PState, $CRm", [], NoItinerary> -{ + "msr\t$PState, $CRm", [], NoItinerary> { bits<6> PState; let Op0 = 0b00; @@ -4396,15 +4180,13 @@ def MSRii : A64I_system<0b0, (outs), (ins pstate_op:$PState, uimm4:$CRm), // The bit to test is a simple unsigned 6-bit immediate in the X-register // versions. -def uimm6 : Operand -{ +def uimm6 : Operand { let ParserMatchClass = uimm6_asmoperand; } def label_wid14_scal4_asmoperand : label_asmoperand<14, 4>; -def tbimm_target : Operand -{ +def tbimm_target : Operand { let EncoderMethod = "getLabelOpValue"; // This label is a 14-bit offset from PC, scaled by the instruction-width: 4. @@ -4422,8 +4204,7 @@ def A64ne : ImmLeaf; def tstb64_pat : ComplexPattern">; def tstb32_pat : ComplexPattern">; -let isBranch = 1, isTerminator = 1 in -{ +let isBranch = 1, isTerminator = 1 in { def TBZxii : A64I_TBimm<0b0, (outs), (ins GPR64:$Rt, uimm6:$Imm, tbimm_target:$Label), "tbz\t$Rt, $Imm, $Label", @@ -4448,8 +4229,7 @@ let isBranch = 1, isTerminator = 1 in "tbz\t$Rt, $Imm, $Label", [(A64br_cc (A64cmp (and GPR32:$Rt, tstb32_pat:$Imm), 0), A64eq, bb:$Label)], - NoItinerary> - { + NoItinerary> { let Imm{5} = 0b0; } @@ -4458,8 +4238,7 @@ let isBranch = 1, isTerminator = 1 in "tbnz\t$Rt, $Imm, $Label", [(A64br_cc (A64cmp (and GPR32:$Rt, tstb32_pat:$Imm), 0), A64ne, bb:$Label)], - NoItinerary> - { + NoItinerary> { let Imm{5} = 0b0; } } @@ -4471,8 +4250,7 @@ let isBranch = 1, isTerminator = 1 in def label_wid26_scal4_asmoperand : label_asmoperand<26, 4>; -def bimm_target : Operand -{ +def bimm_target : Operand { let EncoderMethod = "getLabelOpValue"; // This label is a 26-bit offset from PC, scaled by the instruction-width: 4. @@ -4482,8 +4260,7 @@ def bimm_target : Operand let OperandType = "OPERAND_PCREL"; } -def blimm_target : Operand -{ +def blimm_target : Operand { let EncoderMethod = "getLabelOpValue"; // This label is a 26-bit offset from PC, scaled by the instruction-width: 4. @@ -4499,15 +4276,13 @@ class A64I_BimmImpl patterns, Operand lbl_type> NoItinerary>; let isBranch = 1 in { - def Bimm : A64I_BimmImpl<0b0, "b", [(br bb:$Label)], bimm_target> - { + def Bimm : A64I_BimmImpl<0b0, "b", [(br bb:$Label)], bimm_target> { let isTerminator = 1; let isBarrier = 1; } def BLimm : A64I_BimmImpl<0b1, "bl", - [(AArch64Call tglobaladdr:$Label)], blimm_target> - { + [(AArch64Call tglobaladdr:$Label)], blimm_target> { let isCall = 1; let Defs = [X30]; } @@ -4526,8 +4301,7 @@ class A64I_BregImpl opc, dag outs, dag ins, string asmstr, list patterns, InstrItinClass itin = NoItinerary> : A64I_Breg -{ + outs, ins, asmstr, patterns, itin> { let isBranch = 1; let isIndirectBranch = 1; } @@ -4538,23 +4312,20 @@ class A64I_BregImpl opc, let isBranch = 1 in { def BRx : A64I_BregImpl<0b0000,(outs), (ins GPR64:$Rn), - "br\t$Rn", [(brind GPR64:$Rn)]> - { + "br\t$Rn", [(brind GPR64:$Rn)]> { let isBarrier = 1; let isTerminator = 1; } def BLRx : A64I_BregImpl<0b0001, (outs), (ins GPR64:$Rn), - "blr\t$Rn", [(AArch64Call GPR64:$Rn)]> - { + "blr\t$Rn", [(AArch64Call GPR64:$Rn)]> { let isBarrier = 0; let isCall = 1; let Defs = [X30]; } def RETx : A64I_BregImpl<0b0010, (outs), (ins GPR64:$Rn), - "ret\t$Rn", []> - { + "ret\t$Rn", []> { let isBarrier = 1; let isTerminator = 1; let isReturn = 1; @@ -4563,23 +4334,20 @@ let isBranch = 1 in { // Create a separate pseudo-instruction for codegen to use so that we don't // flag x30 as used in every function. It'll be restored before the RET by the // epilogue if it's legitimately used. - def RET : A64PseudoExpand<(outs), (ins), [(A64ret)], (RETx (ops X30))> - { + def RET : A64PseudoExpand<(outs), (ins), [(A64ret)], (RETx (ops X30))> { let isTerminator = 1; let isBarrier = 1; let isReturn = 1; } - def ERET : A64I_BregImpl<0b0100, (outs), (ins), "eret", []> - { + def ERET : A64I_BregImpl<0b0100, (outs), (ins), "eret", []> { let Rn = 0b11111; let isBarrier = 1; let isTerminator = 1; let isReturn = 1; } - def DRPS : A64I_BregImpl<0b0101, (outs), (ins), "drps", []> - { + def DRPS : A64I_BregImpl<0b0101, (outs), (ins), "drps", []> { let Rn = 0b11111; let isBarrier = 1; } @@ -4632,8 +4400,7 @@ def : GOTLoadSmall; // Tail call handling //===----------------------------------------------------------------------===// -let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [XSP] in -{ +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [XSP] in { def TC_RETURNdi : PseudoInst<(outs), (ins i64imm:$dst, i32imm:$FPDiff), [(AArch64tcret tglobaladdr:$dst, (i32 timm:$FPDiff))]>; @@ -4644,8 +4411,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [XSP] in } let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, - Uses = [XSP] in -{ + Uses = [XSP] in { def TAIL_Bimm : A64PseudoExpand<(outs), (ins bimm_target:$Label), [], (Bimm bimm_target:$Label)>; @@ -4668,14 +4434,12 @@ def : Pat<(AArch64tcret texternalsym:$dst, (i32 timm:$FPDiff)), def : Pat<(A64threadpointer), (MRSxi 0xde82)>; -def TLSDESCCALL : PseudoInst<(outs), (ins i64imm:$Lbl), []> -{ +def TLSDESCCALL : PseudoInst<(outs), (ins i64imm:$Lbl), []> { let hasSideEffects = 1; } def TLSDESC_BLRx : PseudoInst<(outs), (ins GPR64:$Rn, i64imm:$Var), - [(A64tlsdesc_blr GPR64:$Rn, tglobaltlsaddr:$Var)]> -{ + [(A64tlsdesc_blr GPR64:$Rn, tglobaltlsaddr:$Var)]> { let isCall = 1; let Defs = [X30]; } @@ -4737,8 +4501,7 @@ def cpinst_operand : Operand; def CONSTPOOL_ENTRY : PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx, - i32imm:$size), []> -{ + i32imm:$size), []> { let neverHasSideEffects = 1; let isNotDuplicable = 1; } @@ -4761,8 +4524,7 @@ def : Pat<(i64 (anyext (i32 GPR32:$val))), def F128CSEL : PseudoInst<(outs FPR128:$Rd), (ins FPR128:$Rn, FPR128:$Rm, cond_code_op:$Cond), [(set FPR128:$Rd, (simple_select (f128 FPR128:$Rn), - FPR128:$Rm))]> -{ + FPR128:$Rm))]> { let Uses = [NZCV]; let usesCustomInserter = 1; } @@ -4798,8 +4560,7 @@ def F128CSEL : PseudoInst<(outs FPR128:$Rd), //===------------------------------ // First, some simple classes for !foreach and !subst to use: -class Decls -{ +class Decls { dag pattern; } @@ -4876,8 +4637,7 @@ def atomic_store_simple_i64 : simple_store; // quick multiclass here allows reuse. multiclass ls_atomic_pats -{ + ValueType sty> { def : Pat<(!cast("atomic_load_simple_" # sty) address), (LOAD Base, Offset)>; @@ -4894,8 +4654,7 @@ multiclass ls_atomic_pats - : ls_atomic_pats -{ + : ls_atomic_pats { def : Pat<(!cast(zextload # sty) address), (LOAD Base, Offset)>; def : Pat<(!cast(extload # sty) address), (LOAD Base, Offset)>; @@ -4919,8 +4678,7 @@ multiclass ls_small_pats -{ + dag address, ValueType sty> { def : Pat<(i32 (!cast("sextload" # sty) address)), (!cast("LDRS" # T # "w" # U) Base, Offset)>; @@ -4932,8 +4690,7 @@ multiclass load_signed_pats -{ + ValueType sty> { def : Pat<(sty (load address)), (LOAD Base, Offset)>; def : Pat<(store (sty TPR:$Rt), address), (STORE TPR:$Rt, Base, Offset)>; } @@ -4949,8 +4706,7 @@ multiclass ls_int_neutral_pats -{ +multiclass uimm12_pats { defm : ls_small_pats !subst(OFFSET, word_uimm12, !subst(ALIGN, min_align4, decls.pattern)))), (LDRSWx Base, !foreach(decls.pattern, Offset, - !subst(OFFSET, word_uimm12, decls.pattern)))>; + !subst(OFFSET, word_uimm12, decls.pattern)))>; } // Straightforward patterns of last resort: a pointer with or without an @@ -5059,11 +4815,13 @@ defm : uimm12_pats<(add_like_or GPR64xsp:$Rn, OFFSET:$UImm12), defm : uimm12_pats<(A64WrapperSmall tglobaladdr:$Hi, tglobaladdr:$Lo12, ALIGN), (ADRPxi tglobaladdr:$Hi), (i64 tglobaladdr:$Lo12)>; -defm : uimm12_pats<(A64WrapperSmall tglobaltlsaddr:$Hi, tglobaltlsaddr:$Lo12, ALIGN), +defm : uimm12_pats<(A64WrapperSmall tglobaltlsaddr:$Hi, tglobaltlsaddr:$Lo12, + ALIGN), (ADRPxi tglobaltlsaddr:$Hi), (i64 tglobaltlsaddr:$Lo12)>; // External symbols that make it this far should also get standard relocations. -defm : uimm12_pats<(A64WrapperSmall texternalsym:$Hi, texternalsym:$Lo12, ALIGN), +defm : uimm12_pats<(A64WrapperSmall texternalsym:$Hi, texternalsym:$Lo12, + ALIGN), (ADRPxi texternalsym:$Hi), (i64 texternalsym:$Lo12)>; @@ -5078,8 +4836,7 @@ defm : uimm12_pats<(i64 frameindex:$Rn), // These can be much simpler than uimm12 because we don't to change the operand // type (e.g. LDURB and LDURH take the same operands). -multiclass simm9_pats -{ +multiclass simm9_pats { defm : ls_small_pats; defm : ls_small_pats; @@ -5123,8 +4880,7 @@ defm : simm9_pats<(add_like_or GPR64xsp:$Rn, simm9:$SImm9), // quick multiclass here allows reuse. multiclass ro_atomic_pats -{ + RegisterClass TPR, ValueType sty> { def : Pat<(!cast("atomic_load_simple_" # sty) address), (LOAD Base, Offset, Extend)>; @@ -5140,8 +4896,7 @@ multiclass ro_atomic_pats - : ro_atomic_pats -{ + : ro_atomic_pats { def : Pat<(!cast(zextload # sty) address), (LOAD Base, Offset, Extend)>; @@ -5168,8 +4923,7 @@ multiclass ro_small_pats -{ + dag address, ValueType sty> { def : Pat<(i32 (!cast("sextload" # sty) address)), (!cast("LDRS" # T # "w_" # Rm # "_RegOffset") Base, Offset, Extend)>; @@ -5182,21 +4936,20 @@ multiclass ro_signed_pats -{ + RegisterClass TPR, ValueType sty> { def : Pat<(sty (load address)), (LOAD Base, Offset, Extend)>; def : Pat<(store (sty TPR:$Rt), address), (STORE TPR:$Rt, Base, Offset, Extend)>; } multiclass ro_int_neutral_pats + dag Base, dag Offset, dag Extend, dag address, + RegisterClass TPR, ValueType sty> : ro_neutral_pats, ro_atomic_pats; -multiclass regoff_pats -{ +multiclass regoff_pats { defm : ro_small_pats("LS8_" # Rm # "_RegOffset_LDR"), !cast("LS8_" # Rm # "_RegOffset_STR"), Base, Offset, Extend, @@ -5216,19 +4969,21 @@ multiclass regoff_pats !subst(SHIFT, imm_eq2, decls.pattern)), i32>; - defm : ro_int_neutral_pats("LS32_" # Rm # "_RegOffset_LDR"), - !cast("LS32_" # Rm # "_RegOffset_STR"), - Base, Offset, Extend, - !foreach(decls.pattern, address, - !subst(SHIFT, imm_eq2, decls.pattern)), - GPR32, i32>; - - defm : ro_int_neutral_pats("LS64_" # Rm # "_RegOffset_LDR"), - !cast("LS64_" # Rm # "_RegOffset_STR"), - Base, Offset, Extend, - !foreach(decls.pattern, address, - !subst(SHIFT, imm_eq3, decls.pattern)), - GPR64, i64>; + defm : ro_int_neutral_pats< + !cast("LS32_" # Rm # "_RegOffset_LDR"), + !cast("LS32_" # Rm # "_RegOffset_STR"), + Base, Offset, Extend, + !foreach(decls.pattern, address, + !subst(SHIFT, imm_eq2, decls.pattern)), + GPR32, i32>; + + defm : ro_int_neutral_pats< + !cast("LS64_" # Rm # "_RegOffset_LDR"), + !cast("LS64_" # Rm # "_RegOffset_STR"), + Base, Offset, Extend, + !foreach(decls.pattern, address, + !subst(SHIFT, imm_eq3, decls.pattern)), + GPR64, i64>; defm : ro_neutral_pats("LSFP16_" # Rm # "_RegOffset_LDR"), !cast("LSFP16_" # Rm # "_RegOffset_STR"), diff --git a/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/lib/Target/AArch64/AArch64MachineFunctionInfo.h index bf5cadf..37c1cc5 100644 --- a/lib/Target/AArch64/AArch64MachineFunctionInfo.h +++ b/lib/Target/AArch64/AArch64MachineFunctionInfo.h @@ -121,7 +121,9 @@ public: void setBytesInStackArgArea (unsigned bytes) { BytesInStackArgArea = bytes;} unsigned getArgumentStackToRestore() const { return ArgumentStackToRestore; } - void setArgumentStackToRestore(unsigned bytes) { ArgumentStackToRestore = bytes; } + void setArgumentStackToRestore(unsigned bytes) { + ArgumentStackToRestore = bytes; + } unsigned getInitialStackAdjust() const { return InitialStackAdjust; } void setInitialStackAdjust(unsigned bytes) { InitialStackAdjust = bytes; } diff --git a/lib/Target/AArch64/AArch64RegisterInfo.cpp b/lib/Target/AArch64/AArch64RegisterInfo.cpp index 2481176..da45685 100644 --- a/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -7,7 +7,8 @@ // //===----------------------------------------------------------------------===// // -// This file contains the AArch64 implementation of the TargetRegisterInfo class. +// This file contains the AArch64 implementation of the TargetRegisterInfo +// class. // //===----------------------------------------------------------------------===// @@ -87,7 +88,7 @@ AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MBBI, MachineFunction &MF = *MBB.getParent(); MachineFrameInfo *MFI = MF.getFrameInfo(); const AArch64FrameLowering *TFI = - static_cast(MF.getTarget().getFrameLowering()); + static_cast(MF.getTarget().getFrameLowering()); // In order to work out the base and offset for addressing, the FrameLowering // code needs to know (sometimes) whether the instruction is storing/loading a @@ -202,6 +203,7 @@ AArch64RegisterInfo::getFrameRegister(const MachineFunction &MF) const { bool AArch64RegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const { const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - const AArch64FrameLowering *AFI = static_cast(TFI); + const AArch64FrameLowering *AFI + = static_cast(TFI); return AFI->useFPForAddressing(MF); } diff --git a/lib/Target/AArch64/AArch64RegisterInfo.td b/lib/Target/AArch64/AArch64RegisterInfo.td index f1f7fd1..3cbbf14 100644 --- a/lib/Target/AArch64/AArch64RegisterInfo.td +++ b/lib/Target/AArch64/AArch64RegisterInfo.td @@ -193,13 +193,11 @@ def VPR128 : RegisterClass<"AArch64", (sequence "V%u", 0, 31)>; // Flags register -def NZCV : Register<"nzcv"> -{ +def NZCV : Register<"nzcv"> { let Namespace = "AArch64"; } -def FlagClass : RegisterClass<"AArch64", [i32], 32, (add NZCV)> -{ +def FlagClass : RegisterClass<"AArch64", [i32], 32, (add NZCV)> { let CopyCost = -1; let isAllocatable = 0; } diff --git a/lib/Target/AArch64/AArch64SelectionDAGInfo.h b/lib/Target/AArch64/AArch64SelectionDAGInfo.h index 8d3889e..d412ed2 100644 --- a/lib/Target/AArch64/AArch64SelectionDAGInfo.h +++ b/lib/Target/AArch64/AArch64SelectionDAGInfo.h @@ -1,4 +1,4 @@ -//===-- AArch64SelectionDAGInfo.h - AArch64 SelectionDAG Info -----*- C++ -*-===// +//===-- AArch64SelectionDAGInfo.h - AArch64 SelectionDAG Info ---*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.cpp b/lib/Target/AArch64/AArch64TargetObjectFile.cpp index 3bb961a..d5c3e89 100644 --- a/lib/Target/AArch64/AArch64TargetObjectFile.cpp +++ b/lib/Target/AArch64/AArch64TargetObjectFile.cpp @@ -1,4 +1,4 @@ -//===-- AArch64TargetObjectFile.cpp - AArch64 Object Info ------------------===// +//===-- AArch64TargetObjectFile.cpp - AArch64 Object Info -----------------===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index bc0f396..e15d135 100644 --- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -53,7 +53,7 @@ public: // These are the public interface of the MCTargetAsmParser bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc); - bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, SmallVectorImpl &Operands); @@ -116,7 +116,7 @@ public: ParseSysRegOperand(SmallVectorImpl &Operands); bool validateInstruction(MCInst &Inst, - const SmallVectorImpl &Operands); + const SmallVectorImpl &Operands); /// Scan the next token (which had better be an identifier) and determine /// whether it represents a general-purpose or vector register. It returns @@ -1674,7 +1674,8 @@ AArch64AsmParser::ParseShiftExtend( if (Parser.getTok().is(AsmToken::Comma) || Parser.getTok().is(AsmToken::EndOfStatement) || Parser.getTok().is(AsmToken::RBrac)) { - Operands.push_back(AArch64Operand::CreateShiftExtend(Spec, 0, true, S, E)); + Operands.push_back(AArch64Operand::CreateShiftExtend(Spec, 0, true, + S, E)); return MatchOperand_Success; } } @@ -1697,7 +1698,8 @@ AArch64AsmParser::ParseShiftExtend( Parser.Lex(); E = Parser.getTok().getLoc(); - Operands.push_back(AArch64Operand::CreateShiftExtend(Spec, Amount, false, S, E)); + Operands.push_back(AArch64Operand::CreateShiftExtend(Spec, Amount, false, + S, E)); return MatchOperand_Success; } @@ -1942,7 +1944,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, bool MatchingInlineAsm) { MCInst Inst; unsigned MatchResult; - MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo, + MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm); switch (MatchResult) { default: break; diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp index 017b509..38d0e8e 100644 --- a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp +++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp @@ -77,10 +77,12 @@ static DecodeStatus DecodeFPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); static DecodeStatus DecodeFPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeFPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); +static DecodeStatus DecodeFPR128RegisterClass(llvm::MCInst &Inst, + unsigned RegNo, uint64_t Address, + const void *Decoder); +static DecodeStatus DecodeVPR128RegisterClass(llvm::MCInst &Inst, + unsigned RegNo, uint64_t Address, + const void *Decoder); static DecodeStatus DecodeAddrRegExtendOperand(llvm::MCInst &Inst, unsigned OptionHiS, @@ -143,11 +145,10 @@ static DecodeStatus DecodeNamedImmOperand(llvm::MCInst &Inst, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeSysRegOperand(const A64SysReg::SysRegMapper &InstMapper, - llvm::MCInst &Inst, - unsigned Val, - uint64_t Address, - const void *Decoder); +static DecodeStatus +DecodeSysRegOperand(const A64SysReg::SysRegMapper &InstMapper, + llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); static DecodeStatus DecodeMRSOperand(llvm::MCInst &Inst, unsigned Val, @@ -247,7 +248,8 @@ DecodeGPR64xspRegisterClass(llvm::MCInst &Inst, unsigned RegNo, } static DecodeStatus DecodeGPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const void *Decoder) { if (RegNo > 31) return MCDisassembler::Fail; @@ -460,8 +462,10 @@ static DecodeStatus DecodeBitfieldInstruction(llvm::MCInst &Inst, unsigned Insn, } // ASR and LSR have more specific patterns so they won't get here: - assert(!(ImmS == 31 && !SF && Opc != BFM) && "shift should have used auto decode"); - assert(!(ImmS == 63 && SF && Opc != BFM) && "shift should have used auto decode"); + assert(!(ImmS == 31 && !SF && Opc != BFM) + && "shift should have used auto decode"); + assert(!(ImmS == 63 && SF && Opc != BFM) + && "shift should have used auto decode"); // Extension instructions similarly: if (Opc == SBFM && ImmR == 0) { diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h index ec14595..639fa86 100644 --- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h +++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h @@ -114,7 +114,7 @@ public: } void printShiftOperand(const char *name, const MCInst *MI, - unsigned OpIdx, raw_ostream &O); + unsigned OpIdx, raw_ostream &O); void printLSLOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp index a206fd1..5d5e38e 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp @@ -94,73 +94,73 @@ public: // This table *must* be in the order that the fixup_* kinds are defined in // AArch64FixupKinds.h. // -// Name Offset (bits) Size (bits) Flags - { "fixup_a64_ld_prel", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, - { "fixup_a64_adr_prel", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, - { "fixup_a64_adr_prel_page", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, - { "fixup_a64_add_lo12", 0, 32, 0 }, - { "fixup_a64_ldst8_lo12", 0, 32, 0 }, - { "fixup_a64_ldst16_lo12", 0, 32, 0 }, - { "fixup_a64_ldst32_lo12", 0, 32, 0 }, - { "fixup_a64_ldst64_lo12", 0, 32, 0 }, - { "fixup_a64_ldst128_lo12", 0, 32, 0 }, - { "fixup_a64_tstbr", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, - { "fixup_a64_condbr", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, - { "fixup_a64_uncondbr", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, - { "fixup_a64_call", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, - { "fixup_a64_movw_uabs_g0", 0, 32, 0 }, - { "fixup_a64_movw_uabs_g0_nc", 0, 32, 0 }, - { "fixup_a64_movw_uabs_g1", 0, 32, 0 }, - { "fixup_a64_movw_uabs_g1_nc", 0, 32, 0 }, - { "fixup_a64_movw_uabs_g2", 0, 32, 0 }, - { "fixup_a64_movw_uabs_g2_nc", 0, 32, 0 }, - { "fixup_a64_movw_uabs_g3", 0, 32, 0 }, - { "fixup_a64_movw_sabs_g0", 0, 32, 0 }, - { "fixup_a64_movw_sabs_g1", 0, 32, 0 }, - { "fixup_a64_movw_sabs_g2", 0, 32, 0 }, - { "fixup_a64_adr_prel_got_page", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, - { "fixup_a64_ld64_got_lo12_nc", 0, 32, 0 }, - { "fixup_a64_movw_dtprel_g2", 0, 32, 0 }, - { "fixup_a64_movw_dtprel_g1", 0, 32, 0 }, - { "fixup_a64_movw_dtprel_g1_nc", 0, 32, 0 }, - { "fixup_a64_movw_dtprel_g0", 0, 32, 0 }, - { "fixup_a64_movw_dtprel_g0_nc", 0, 32, 0 }, - { "fixup_a64_add_dtprel_hi12", 0, 32, 0 }, - { "fixup_a64_add_dtprel_lo12", 0, 32, 0 }, - { "fixup_a64_add_dtprel_lo12_nc", 0, 32, 0 }, - { "fixup_a64_ldst8_dtprel_lo12", 0, 32, 0 }, - { "fixup_a64_ldst8_dtprel_lo12_nc", 0, 32, 0 }, - { "fixup_a64_ldst16_dtprel_lo12", 0, 32, 0 }, - { "fixup_a64_ldst16_dtprel_lo12_nc", 0, 32, 0 }, - { "fixup_a64_ldst32_dtprel_lo12", 0, 32, 0 }, - { "fixup_a64_ldst32_dtprel_lo12_nc", 0, 32, 0 }, - { "fixup_a64_ldst64_dtprel_lo12", 0, 32, 0 }, - { "fixup_a64_ldst64_dtprel_lo12_nc", 0, 32, 0 }, - { "fixup_a64_movw_gottprel_g1", 0, 32, 0 }, - { "fixup_a64_movw_gottprel_g0_nc", 0, 32, 0 }, - { "fixup_a64_adr_gottprel_page", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, - { "fixup_a64_ld64_gottprel_lo12_nc", 0, 32, 0 }, - { "fixup_a64_ld_gottprel_prel19", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, - { "fixup_a64_movw_tprel_g2", 0, 32, 0 }, - { "fixup_a64_movw_tprel_g1", 0, 32, 0 }, - { "fixup_a64_movw_tprel_g1_nc", 0, 32, 0 }, - { "fixup_a64_movw_tprel_g0", 0, 32, 0 }, - { "fixup_a64_movw_tprel_g0_nc", 0, 32, 0 }, - { "fixup_a64_add_tprel_hi12", 0, 32, 0 }, - { "fixup_a64_add_tprel_lo12", 0, 32, 0 }, - { "fixup_a64_add_tprel_lo12_nc", 0, 32, 0 }, - { "fixup_a64_ldst8_tprel_lo12", 0, 32, 0 }, - { "fixup_a64_ldst8_tprel_lo12_nc", 0, 32, 0 }, - { "fixup_a64_ldst16_tprel_lo12", 0, 32, 0 }, - { "fixup_a64_ldst16_tprel_lo12_nc", 0, 32, 0 }, - { "fixup_a64_ldst32_tprel_lo12", 0, 32, 0 }, - { "fixup_a64_ldst32_tprel_lo12_nc", 0, 32, 0 }, - { "fixup_a64_ldst64_tprel_lo12", 0, 32, 0 }, - { "fixup_a64_ldst64_tprel_lo12_nc", 0, 32, 0 }, - { "fixup_a64_tlsdesc_adr_page", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, - { "fixup_a64_tlsdesc_ld64_lo12_nc", 0, 32, 0 }, - { "fixup_a64_tlsdesc_add_lo12_nc", 0, 32, 0 }, - { "fixup_a64_tlsdesc_call", 0, 0, 0 } +// Name Offset (bits) Size (bits) Flags +{ "fixup_a64_ld_prel", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_a64_adr_prel", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_a64_adr_prel_page", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_a64_add_lo12", 0, 32, 0 }, +{ "fixup_a64_ldst8_lo12", 0, 32, 0 }, +{ "fixup_a64_ldst16_lo12", 0, 32, 0 }, +{ "fixup_a64_ldst32_lo12", 0, 32, 0 }, +{ "fixup_a64_ldst64_lo12", 0, 32, 0 }, +{ "fixup_a64_ldst128_lo12", 0, 32, 0 }, +{ "fixup_a64_tstbr", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_a64_condbr", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_a64_uncondbr", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_a64_call", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_a64_movw_uabs_g0", 0, 32, 0 }, +{ "fixup_a64_movw_uabs_g0_nc", 0, 32, 0 }, +{ "fixup_a64_movw_uabs_g1", 0, 32, 0 }, +{ "fixup_a64_movw_uabs_g1_nc", 0, 32, 0 }, +{ "fixup_a64_movw_uabs_g2", 0, 32, 0 }, +{ "fixup_a64_movw_uabs_g2_nc", 0, 32, 0 }, +{ "fixup_a64_movw_uabs_g3", 0, 32, 0 }, +{ "fixup_a64_movw_sabs_g0", 0, 32, 0 }, +{ "fixup_a64_movw_sabs_g1", 0, 32, 0 }, +{ "fixup_a64_movw_sabs_g2", 0, 32, 0 }, +{ "fixup_a64_adr_prel_got_page", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_a64_ld64_got_lo12_nc", 0, 32, 0 }, +{ "fixup_a64_movw_dtprel_g2", 0, 32, 0 }, +{ "fixup_a64_movw_dtprel_g1", 0, 32, 0 }, +{ "fixup_a64_movw_dtprel_g1_nc", 0, 32, 0 }, +{ "fixup_a64_movw_dtprel_g0", 0, 32, 0 }, +{ "fixup_a64_movw_dtprel_g0_nc", 0, 32, 0 }, +{ "fixup_a64_add_dtprel_hi12", 0, 32, 0 }, +{ "fixup_a64_add_dtprel_lo12", 0, 32, 0 }, +{ "fixup_a64_add_dtprel_lo12_nc", 0, 32, 0 }, +{ "fixup_a64_ldst8_dtprel_lo12", 0, 32, 0 }, +{ "fixup_a64_ldst8_dtprel_lo12_nc", 0, 32, 0 }, +{ "fixup_a64_ldst16_dtprel_lo12", 0, 32, 0 }, +{ "fixup_a64_ldst16_dtprel_lo12_nc", 0, 32, 0 }, +{ "fixup_a64_ldst32_dtprel_lo12", 0, 32, 0 }, +{ "fixup_a64_ldst32_dtprel_lo12_nc", 0, 32, 0 }, +{ "fixup_a64_ldst64_dtprel_lo12", 0, 32, 0 }, +{ "fixup_a64_ldst64_dtprel_lo12_nc", 0, 32, 0 }, +{ "fixup_a64_movw_gottprel_g1", 0, 32, 0 }, +{ "fixup_a64_movw_gottprel_g0_nc", 0, 32, 0 }, +{ "fixup_a64_adr_gottprel_page", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_a64_ld64_gottprel_lo12_nc", 0, 32, 0 }, +{ "fixup_a64_ld_gottprel_prel19", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_a64_movw_tprel_g2", 0, 32, 0 }, +{ "fixup_a64_movw_tprel_g1", 0, 32, 0 }, +{ "fixup_a64_movw_tprel_g1_nc", 0, 32, 0 }, +{ "fixup_a64_movw_tprel_g0", 0, 32, 0 }, +{ "fixup_a64_movw_tprel_g0_nc", 0, 32, 0 }, +{ "fixup_a64_add_tprel_hi12", 0, 32, 0 }, +{ "fixup_a64_add_tprel_lo12", 0, 32, 0 }, +{ "fixup_a64_add_tprel_lo12_nc", 0, 32, 0 }, +{ "fixup_a64_ldst8_tprel_lo12", 0, 32, 0 }, +{ "fixup_a64_ldst8_tprel_lo12_nc", 0, 32, 0 }, +{ "fixup_a64_ldst16_tprel_lo12", 0, 32, 0 }, +{ "fixup_a64_ldst16_tprel_lo12_nc", 0, 32, 0 }, +{ "fixup_a64_ldst32_tprel_lo12", 0, 32, 0 }, +{ "fixup_a64_ldst32_tprel_lo12_nc", 0, 32, 0 }, +{ "fixup_a64_ldst64_tprel_lo12", 0, 32, 0 }, +{ "fixup_a64_ldst64_tprel_lo12_nc", 0, 32, 0 }, +{ "fixup_a64_tlsdesc_adr_page", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_a64_tlsdesc_ld64_lo12_nc", 0, 32, 0 }, +{ "fixup_a64_tlsdesc_add_lo12_nc", 0, 32, 0 }, +{ "fixup_a64_tlsdesc_call", 0, 0, 0 } }; if (Kind < FirstTargetFixupKind) return MCAsmBackend::getFixupKindInfo(Kind); diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp index ee77da2..8d45198 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp @@ -364,7 +364,7 @@ AArch64MCCodeEmitter::getMachineOpValue(const MCInst &MI, unsigned AArch64MCCodeEmitter::getMoveWideImmOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups) const { + SmallVectorImpl &Fixups) const { const MCOperand &UImm16MO = MI.getOperand(OpIdx); const MCOperand &ShiftMO = MI.getOperand(OpIdx + 1); diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h index 20adc0c..f2af204 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h @@ -83,7 +83,8 @@ public: return Create(VK_AARCH64_GOT, Expr, Ctx); } - static const AArch64MCExpr *CreateGOTLo12(const MCExpr *Expr, MCContext &Ctx) { + static const AArch64MCExpr *CreateGOTLo12(const MCExpr *Expr, + MCContext &Ctx) { return Create(VK_AARCH64_GOT_LO12, Expr, Ctx); } diff --git a/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp b/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp index fa07d49..5dbdc57 100644 --- a/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp +++ b/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp @@ -1,4 +1,4 @@ -//===-- AArch64TargetInfo.cpp - AArch64 Target Implementation ---------------===// +//===-- AArch64TargetInfo.cpp - AArch64 Target Implementation -------------===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp index cf9a638..5f09074 100644 --- a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp +++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp @@ -725,7 +725,8 @@ bool A64Imms::isLogicalImm(unsigned RegWidth, uint64_t Imm, uint32_t &Bits) { } -bool A64Imms::isLogicalImmBits(unsigned RegWidth, uint32_t Bits, uint64_t &Imm) { +bool A64Imms::isLogicalImmBits(unsigned RegWidth, uint32_t Bits, + uint64_t &Imm) { uint32_t N = Bits >> 12; uint32_t ImmR = (Bits >> 6) & 0x3f; uint32_t ImmS = Bits & 0x3f; -- cgit v1.1 From e2d5590c33f1b5203c0104c1c82bf8e0f28b828e Mon Sep 17 00:00:00 2001 From: Arnold Schwaighofer Date: Tue, 5 Feb 2013 14:05:55 +0000 Subject: ARM cost model: Cost for scalar integer casts and floating point conversions Also adds some costs for vector integer float conversions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174371 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMTargetTransformInfo.cpp | 115 ++++++++++++++++++++++++++++-- 1 file changed, 108 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp index 2ded63f..bf83d51 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -177,25 +177,126 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst, return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); // Some arithmetic, load and store operations have specific instructions - // to cast up/down their types automatically at no extra cost - // TODO: Get these tables to know at least what the related operations are - static const TypeConversionCostTblEntry NEONConversionTbl[] = { + // to cast up/down their types automatically at no extra cost. + // TODO: Get these tables to know at least what the related operations are. + static const TypeConversionCostTblEntry NEONVectorConversionTbl[] = { { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 0 }, { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 0 }, { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i32, 1 }, { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i32, 1 }, { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0 }, { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 }, + + // Vector float <-> i32 conversions. + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, + { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 }, + { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 }, + + // Vector double <-> i32 conversions. + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, + { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 }, + { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 } }; - if (ST->hasNEON()) { - int Idx = ConvertCostTableLookup(NEONConversionTbl, - array_lengthof(NEONConversionTbl), + if (SrcTy.isVector() && ST->hasNEON()) { + int Idx = ConvertCostTableLookup(NEONVectorConversionTbl, + array_lengthof(NEONVectorConversionTbl), ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()); if (Idx != -1) - return NEONConversionTbl[Idx].Cost; + return NEONVectorConversionTbl[Idx].Cost; + } + + // Scalar float to integer conversions. + static const TypeConversionCostTblEntry NEONFloatConversionTbl[] = { + { ISD::FP_TO_SINT, MVT::i1, MVT::f32, 2 }, + { ISD::FP_TO_UINT, MVT::i1, MVT::f32, 2 }, + { ISD::FP_TO_SINT, MVT::i1, MVT::f64, 2 }, + { ISD::FP_TO_UINT, MVT::i1, MVT::f64, 2 }, + { ISD::FP_TO_SINT, MVT::i8, MVT::f32, 2 }, + { ISD::FP_TO_UINT, MVT::i8, MVT::f32, 2 }, + { ISD::FP_TO_SINT, MVT::i8, MVT::f64, 2 }, + { ISD::FP_TO_UINT, MVT::i8, MVT::f64, 2 }, + { ISD::FP_TO_SINT, MVT::i16, MVT::f32, 2 }, + { ISD::FP_TO_UINT, MVT::i16, MVT::f32, 2 }, + { ISD::FP_TO_SINT, MVT::i16, MVT::f64, 2 }, + { ISD::FP_TO_UINT, MVT::i16, MVT::f64, 2 }, + { ISD::FP_TO_SINT, MVT::i32, MVT::f32, 2 }, + { ISD::FP_TO_UINT, MVT::i32, MVT::f32, 2 }, + { ISD::FP_TO_SINT, MVT::i32, MVT::f64, 2 }, + { ISD::FP_TO_UINT, MVT::i32, MVT::f64, 2 }, + { ISD::FP_TO_SINT, MVT::i64, MVT::f32, 10 }, + { ISD::FP_TO_UINT, MVT::i64, MVT::f32, 10 }, + { ISD::FP_TO_SINT, MVT::i64, MVT::f64, 10 }, + { ISD::FP_TO_UINT, MVT::i64, MVT::f64, 10 } + }; + if (SrcTy.isFloatingPoint() && ST->hasNEON()) { + int Idx = ConvertCostTableLookup(NEONFloatConversionTbl, + array_lengthof(NEONFloatConversionTbl), + ISD, DstTy.getSimpleVT(), + SrcTy.getSimpleVT()); + if (Idx != -1) + return NEONFloatConversionTbl[Idx].Cost; + } + + + // Scalar integer to float conversions. + static const TypeConversionCostTblEntry NEONIntegerConversionTbl[] = { + { ISD::SINT_TO_FP, MVT::f32, MVT::i1, 2 }, + { ISD::UINT_TO_FP, MVT::f32, MVT::i1, 2 }, + { ISD::SINT_TO_FP, MVT::f64, MVT::i1, 2 }, + { ISD::UINT_TO_FP, MVT::f64, MVT::i1, 2 }, + { ISD::SINT_TO_FP, MVT::f32, MVT::i8, 2 }, + { ISD::UINT_TO_FP, MVT::f32, MVT::i8, 2 }, + { ISD::SINT_TO_FP, MVT::f64, MVT::i8, 2 }, + { ISD::UINT_TO_FP, MVT::f64, MVT::i8, 2 }, + { ISD::SINT_TO_FP, MVT::f32, MVT::i16, 2 }, + { ISD::UINT_TO_FP, MVT::f32, MVT::i16, 2 }, + { ISD::SINT_TO_FP, MVT::f64, MVT::i16, 2 }, + { ISD::UINT_TO_FP, MVT::f64, MVT::i16, 2 }, + { ISD::SINT_TO_FP, MVT::f32, MVT::i32, 2 }, + { ISD::UINT_TO_FP, MVT::f32, MVT::i32, 2 }, + { ISD::SINT_TO_FP, MVT::f64, MVT::i32, 2 }, + { ISD::UINT_TO_FP, MVT::f64, MVT::i32, 2 }, + { ISD::SINT_TO_FP, MVT::f32, MVT::i64, 10 }, + { ISD::UINT_TO_FP, MVT::f32, MVT::i64, 10 }, + { ISD::SINT_TO_FP, MVT::f64, MVT::i64, 10 }, + { ISD::UINT_TO_FP, MVT::f64, MVT::i64, 10 } + }; + + if (SrcTy.isInteger() && ST->hasNEON()) { + int Idx = ConvertCostTableLookup(NEONIntegerConversionTbl, + array_lengthof(NEONIntegerConversionTbl), + ISD, DstTy.getSimpleVT(), + SrcTy.getSimpleVT()); + if (Idx != -1) + return NEONIntegerConversionTbl[Idx].Cost; } + // Scalar integer conversion costs. + static const TypeConversionCostTblEntry ARMIntegerConversionTbl[] = { + // i16 -> i64 requires two dependent operations. + { ISD::SIGN_EXTEND, MVT::i64, MVT::i16, 2 }, + + // Truncates on i64 are assumed to be free. + { ISD::TRUNCATE, MVT::i32, MVT::i64, 0 }, + { ISD::TRUNCATE, MVT::i16, MVT::i64, 0 }, + { ISD::TRUNCATE, MVT::i8, MVT::i64, 0 }, + { ISD::TRUNCATE, MVT::i1, MVT::i64, 0 } + }; + + if (SrcTy.isInteger()) { + int Idx = + ConvertCostTableLookup(ARMIntegerConversionTbl, + array_lengthof(ARMIntegerConversionTbl), + ISD, DstTy.getSimpleVT(), + SrcTy.getSimpleVT()); + if (Idx != -1) + return ARMIntegerConversionTbl[Idx].Cost; + } + + return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); } -- cgit v1.1 From b0c899666a6c5397cf35fffd0f8d93749cb2cb38 Mon Sep 17 00:00:00 2001 From: Logan Chien Date: Tue, 5 Feb 2013 14:18:59 +0000 Subject: Link .ARM.exidx with corresponding text section. The sh_link in the ELF section header of .ARM.exidx should be filled with the section index of the corresponding text section. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174372 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/ELFObjectWriter.cpp | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'lib') diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp index 8805773..d65f227 100644 --- a/lib/MC/ELFObjectWriter.cpp +++ b/lib/MC/ELFObjectWriter.cpp @@ -1332,6 +1332,24 @@ void ELFObjectWriter::WriteSection(MCAssembler &Asm, break; } + if (TargetObjectWriter->getEMachine() == ELF::EM_ARM && + Section.getType() == ELF::SHT_ARM_EXIDX) { + StringRef SecName(Section.getSectionName()); + if (SecName == ".ARM.exidx") { + sh_link = SectionIndexMap.lookup( + Asm.getContext().getELFSection(".text", + ELF::SHT_PROGBITS, + ELF::SHF_EXECINSTR | ELF::SHF_ALLOC, + SectionKind::getText())); + } else if (SecName.startswith(".ARM.exidx")) { + sh_link = SectionIndexMap.lookup( + Asm.getContext().getELFSection(SecName.substr(sizeof(".ARM.exidx") - 1), + ELF::SHT_PROGBITS, + ELF::SHF_EXECINSTR | ELF::SHF_ALLOC, + SectionKind::getText())); + } + } + WriteSecHdrEntry(SectionStringTableIndex[&Section], Section.getType(), Section.getFlags(), 0, Offset, Size, sh_link, sh_info, Alignment, Section.getEntrySize()); -- cgit v1.1 From eb260b252749470654ef0655aa1d1334a4c0d983 Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Tue, 5 Feb 2013 14:44:16 +0000 Subject: Revert r174343, "When the target-independent DAGCombiner inferred a higher alignment for a load," It caused hangups in compiling clang/lib/Parse/ParseDecl.cpp and clang/lib/Driver/Tools.cpp in stage2 on some hosts. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174374 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 39a8e82..79ec227 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7199,15 +7199,12 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { // Try to infer better alignment information than the load already has. if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) { if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { - if (Align > LD->getAlignment()) { - SDValue NewLoad = - DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(), + if (Align > LD->getAlignment()) + return DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(), LD->getValueType(0), Chain, Ptr, LD->getPointerInfo(), LD->getMemoryVT(), LD->isVolatile(), LD->isNonTemporal(), Align); - return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true); - } } } -- cgit v1.1 From b9e1a33941d25faf54dc3ddec4be7f8f0750a155 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Tue, 5 Feb 2013 15:01:51 +0000 Subject: Fix remaining StringRef abuse. This should fix the valgrind buildbot failure. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174375 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index e15d135..69e4fc2 100644 --- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -1442,8 +1442,8 @@ AArch64AsmParser::IdentifyRegister(unsigned &RegNum, SMLoc &RegEndLoc, // gives us a permanent string to use in the token (a pointer into LowerReg // would go out of scope when we return). LayoutLoc = SMLoc::getFromPointer(S.getPointer() + DotPos + 1); - Layout = LowerReg.substr(DotPos, StringRef::npos); - Layout = StringSwitch(Layout) + std::string LayoutText = LowerReg.substr(DotPos, StringRef::npos); + Layout = StringSwitch(LayoutText) .Case(".d", ".d").Case(".1d", ".1d").Case(".2d", ".2d") .Case(".s", ".s").Case(".2s", ".2s").Case(".4s", ".4s") .Case(".h", ".h").Case(".4h", ".4h").Case(".8h", ".8h") -- cgit v1.1 From 935645b7655a0b5189d40b3d65b3bcb14e30d859 Mon Sep 17 00:00:00 2001 From: Arnold Schwaighofer Date: Tue, 5 Feb 2013 15:08:02 +0000 Subject: Loop Vectorizer: Handle pointer stores/loads in getWidestType() In the loop vectorizer cost model, we used to ignore stores/loads of a pointer type when computing the widest type within a loop. This meant that if we had only stores/loads of pointers in a loop we would return a widest type of 8bits (instead of 32 or 64 bit) and therefore a vector factor that was too big. Now, if we see a consecutive store/load of pointers we use the size of a pointer (from data layout). This problem occured in SingleSource/Benchmarks/Shootout-C++/hash.cpp (reduced test case is the first test in vector_ptr_load_store.ll). radar://13139343 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174377 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/LoopVectorize.cpp | 40 +++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 9 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 1b242c9..6254273 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -518,8 +518,9 @@ class LoopVectorizationCostModel { public: LoopVectorizationCostModel(Loop *L, ScalarEvolution *SE, LoopInfo *LI, LoopVectorizationLegality *Legal, - const TargetTransformInfo &TTI) - : TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI) {} + const TargetTransformInfo &TTI, + DataLayout *DL) + : TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI), DL(DL) {} /// Information about vectorization costs struct VectorizationFactor { @@ -575,6 +576,10 @@ private: /// the scalar type. static Type* ToVectorTy(Type *Scalar, unsigned VF); + /// Returns whether the instruction is a load or store and will be a emitted + /// as a vector operation. + bool isConsecutiveLoadOrStore(Instruction *I); + /// The loop that we evaluate. Loop *TheLoop; /// Scev analysis. @@ -585,6 +590,8 @@ private: LoopVectorizationLegality *Legal; /// Vector target information. const TargetTransformInfo &TTI; + /// Target data layout information. + DataLayout *DL; }; /// The LoopVectorize Pass. @@ -624,7 +631,7 @@ struct LoopVectorize : public LoopPass { } // Use the cost model. - LoopVectorizationCostModel CM(L, SE, LI, &LVL, *TTI); + LoopVectorizationCostModel CM(L, SE, LI, &LVL, *TTI, DL); // Check the function attribues to find out if this function should be // optimized for size. @@ -2786,14 +2793,17 @@ unsigned LoopVectorizationCostModel::getWidestType() { continue; // Examine the stored values. - if (StoreInst *ST = dyn_cast(it)) + StoreInst *ST = 0; + if ((ST = dyn_cast(it))) T = ST->getValueOperand()->getType(); - // Ignore stored/loaded pointer types. - if (T->isPointerTy()) - continue; - - MaxWidth = std::max(MaxWidth, T->getScalarSizeInBits()); + // Ignore loaded pointer types and stored pointer types that are not + // consecutive. However, we do want to take consecutive stores/loads of + // pointer vectors into account. + if (T->isPointerTy() && isConsecutiveLoadOrStore(it)) + MaxWidth = std::max(MaxWidth, DL->getPointerSizeInBits()); + else + MaxWidth = std::max(MaxWidth, T->getScalarSizeInBits()); } } @@ -3241,4 +3251,16 @@ namespace llvm { } } +bool LoopVectorizationCostModel::isConsecutiveLoadOrStore(Instruction *Inst) { + // Check for a store. + StoreInst *ST = dyn_cast(Inst); + if (ST) + return Legal->isConsecutivePtr(ST->getPointerOperand()) != 0; + // Check for a load. + LoadInst *LI = dyn_cast(Inst); + if (LI) + return Legal->isConsecutivePtr(LI->getPointerOperand()) != 0; + + return false; +} -- cgit v1.1 From a693205ce1d5a57ae4d855d5773f5e1c89eff063 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Tue, 5 Feb 2013 16:40:06 +0000 Subject: Fix signed-unsigned comparison warning. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174387 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64ISelLowering.cpp | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 2158b05..652f129 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2389,8 +2389,10 @@ static SDValue PerformATOMIC_FENCECombine(SDNode *FenceNode, if (!AtomicNode) return SDValue(); - uint64_t FenceOrder = FenceNode->getConstantOperandVal(1); - uint64_t FenceScope = FenceNode->getConstantOperandVal(2); + AtomicOrdering FenceOrder + = static_cast(FenceNode->getConstantOperandVal(1)); + SynchronizationScope FenceScope + = static_cast(FenceNode->getConstantOperandVal(2)); if (FenceOrder != Acquire || FenceScope != AtomicNode->getSynchScope()) return SDValue(); @@ -2409,7 +2411,7 @@ static SDValue PerformATOMIC_FENCECombine(SDNode *FenceNode, Chain, // Chain AtomicOp.getOperand(1), // Pointer AtomicNode->getMemOperand(), Acquire, - static_cast(FenceScope)); + FenceScope); if (AtomicNode->getOpcode() == ISD::ATOMIC_LOAD) DAG.ReplaceAllUsesWith(AtomicNode, Op.getNode()); @@ -2428,10 +2430,10 @@ static SDValue PerformATOMIC_STORECombine(SDNode *N, if (FenceOp.getOpcode() != ISD::ATOMIC_FENCE) return SDValue(); - uint64_t FenceOrder - = cast(FenceOp.getOperand(1))->getZExtValue(); - uint64_t FenceScope - = cast(FenceOp.getOperand(2))->getZExtValue(); + AtomicOrdering FenceOrder + = static_cast(FenceOp->getConstantOperandVal(1)); + SynchronizationScope FenceScope + = static_cast(FenceOp->getConstantOperandVal(2)); if (FenceOrder != Release || FenceScope != AtomicNode->getSynchScope()) return SDValue(); @@ -2442,7 +2444,7 @@ static SDValue PerformATOMIC_STORECombine(SDNode *N, AtomicNode->getOperand(1), // Pointer AtomicNode->getOperand(2), // Value AtomicNode->getMemOperand(), Release, - static_cast(FenceScope)); + FenceScope); } /// For a true bitfield insert, the bits getting into that contiguous mask -- cgit v1.1 From 4210da7253e04f7bf3267cbfb2b80f9116e408fa Mon Sep 17 00:00:00 2001 From: Jyotsna Verma Date: Tue, 5 Feb 2013 16:42:24 +0000 Subject: Hexagon: Add V4 compare instructions. Enable relationship mapping for the existing instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174389 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Hexagon/HexagonInstrInfoV4.td | 159 +++++++++++++++++++++++++++---- 1 file changed, 143 insertions(+), 16 deletions(-) (limited to 'lib') diff --git a/lib/Target/Hexagon/HexagonInstrInfoV4.td b/lib/Target/Hexagon/HexagonInstrInfoV4.td index 08225e4..f7b6a9d 100644 --- a/lib/Target/Hexagon/HexagonInstrInfoV4.td +++ b/lib/Target/Hexagon/HexagonInstrInfoV4.td @@ -251,6 +251,54 @@ def TFR_FI_immext_V4 : ALU32_ri<(outs IntRegs:$dst), []>, Requires<[HasV4T]>; +// Rd=cmp.eq(Rs,#s8) +let validSubTargets = HasV4SubT, isExtendable = 1, opExtendable = 2, +isExtentSigned = 1, opExtentBits = 8 in +def V4_A4_rcmpeqi : ALU32_ri<(outs IntRegs:$Rd), + (ins IntRegs:$Rs, s8Ext:$s8), + "$Rd = cmp.eq($Rs, #$s8)", + [(set (i32 IntRegs:$Rd), + (i32 (zext (i1 (seteq (i32 IntRegs:$Rs), + s8ExtPred:$s8)))))]>, + Requires<[HasV4T]>; + +// Preserve the TSTBIT generation +def : Pat <(i32 (zext (i1 (setne (i32 (and (i32 (shl 1, (i32 IntRegs:$src2))), + (i32 IntRegs:$src1))), 0)))), + (i32 (MUX_ii (i1 (TSTBIT_rr (i32 IntRegs:$src1), (i32 IntRegs:$src2))), + 1, 0))>; + +// Interfered with tstbit generation, above pattern preserves, see : tstbit.ll +// Rd=cmp.ne(Rs,#s8) +let validSubTargets = HasV4SubT, isExtendable = 1, opExtendable = 2, +isExtentSigned = 1, opExtentBits = 8 in +def V4_A4_rcmpneqi : ALU32_ri<(outs IntRegs:$Rd), + (ins IntRegs:$Rs, s8Ext:$s8), + "$Rd = !cmp.eq($Rs, #$s8)", + [(set (i32 IntRegs:$Rd), + (i32 (zext (i1 (setne (i32 IntRegs:$Rs), + s8ExtPred:$s8)))))]>, + Requires<[HasV4T]>; + +// Rd=cmp.eq(Rs,Rt) +let validSubTargets = HasV4SubT in +def V4_A4_rcmpeq : ALU32_ri<(outs IntRegs:$Rd), + (ins IntRegs:$Rs, IntRegs:$Rt), + "$Rd = cmp.eq($Rs, $Rt)", + [(set (i32 IntRegs:$Rd), + (i32 (zext (i1 (seteq (i32 IntRegs:$Rs), + IntRegs:$Rt)))))]>, + Requires<[HasV4T]>; + +// Rd=cmp.ne(Rs,Rt) +let validSubTargets = HasV4SubT in +def V4_A4_rcmpneq : ALU32_ri<(outs IntRegs:$Rd), + (ins IntRegs:$Rs, IntRegs:$Rt), + "$Rd = !cmp.eq($Rs, $Rt)", + [(set (i32 IntRegs:$Rd), + (i32 (zext (i1 (setne (i32 IntRegs:$Rs), + IntRegs:$Rt)))))]>, + Requires<[HasV4T]>; //===----------------------------------------------------------------------===// // ALU32 - @@ -3656,7 +3704,61 @@ def MEMb_ORr_MEM_V4 : MEMInst_V4<(outs), // incorrect code for negative numbers. // Pd=cmpb.eq(Rs,#u8) -let isCompare = 1 in +// p=!cmp.eq(r1,r2) +let isCompare = 1, validSubTargets = HasV4SubT in +def CMPnotEQ_rr : ALU32_rr<(outs PredRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst = !cmp.eq($src1, $src2)", + [(set (i1 PredRegs:$dst), + (setne (i32 IntRegs:$src1), (i32 IntRegs:$src2)))]>, + Requires<[HasV4T]>; + +// p=!cmp.eq(r1,#s10) +let isCompare = 1, validSubTargets = HasV4SubT in +def CMPnotEQ_ri : ALU32_ri<(outs PredRegs:$dst), + (ins IntRegs:$src1, s10Ext:$src2), + "$dst = !cmp.eq($src1, #$src2)", + [(set (i1 PredRegs:$dst), + (setne (i32 IntRegs:$src1), s10ImmPred:$src2))]>, + Requires<[HasV4T]>; + +// p=!cmp.gt(r1,r2) +let isCompare = 1, validSubTargets = HasV4SubT in +def CMPnotGT_rr : ALU32_rr<(outs PredRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst = !cmp.gt($src1, $src2)", + [(set (i1 PredRegs:$dst), + (not (setgt (i32 IntRegs:$src1), (i32 IntRegs:$src2))))]>, + Requires<[HasV4T]>; + +// p=!cmp.gt(r1,#s10) +let isCompare = 1, validSubTargets = HasV4SubT in +def CMPnotGT_ri : ALU32_ri<(outs PredRegs:$dst), + (ins IntRegs:$src1, s10Ext:$src2), + "$dst = !cmp.gt($src1, #$src2)", + [(set (i1 PredRegs:$dst), + (not (setgt (i32 IntRegs:$src1), s10ImmPred:$src2)))]>, + Requires<[HasV4T]>; + +// p=!cmp.gtu(r1,r2) +let isCompare = 1, validSubTargets = HasV4SubT in +def CMPnotGTU_rr : ALU32_rr<(outs PredRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst = !cmp.gtu($src1, $src2)", + [(set (i1 PredRegs:$dst), + (not (setugt (i32 IntRegs:$src1), (i32 IntRegs:$src2))))]>, + Requires<[HasV4T]>; + +// p=!cmp.gtu(r1,#u9) +let isCompare = 1, validSubTargets = HasV4SubT in +def CMPnotGTU_ri : ALU32_ri<(outs PredRegs:$dst), + (ins IntRegs:$src1, u9Ext:$src2), + "$dst = !cmp.gtu($src1, #$src2)", + [(set (i1 PredRegs:$dst), + (not (setugt (i32 IntRegs:$src1), u9ImmPred:$src2)))]>, + Requires<[HasV4T]>; + +let isCompare = 1, validSubTargets = HasV4SubT in def CMPbEQri_V4 : MInst<(outs PredRegs:$dst), (ins IntRegs:$src1, u8Imm:$src2), "$dst = cmpb.eq($src1, #$src2)", @@ -3664,6 +3766,12 @@ def CMPbEQri_V4 : MInst<(outs PredRegs:$dst), (seteq (and (i32 IntRegs:$src1), 255), u8ImmPred:$src2))]>, Requires<[HasV4T]>; +def : Pat <(brcond (i1 (setne (and (i32 IntRegs:$src1), 255), u8ImmPred:$src2)), + bb:$offset), + (JMP_cNot (CMPbEQri_V4 (i32 IntRegs:$src1), u8ImmPred:$src2), + bb:$offset)>, + Requires<[HasV4T]>; + // Pd=cmpb.eq(Rs,Rt) let isCompare = 1, validSubTargets = HasV4SubT in def CMPbEQrr_ubub_V4 : MInst<(outs PredRegs:$dst), @@ -3705,20 +3813,21 @@ def CMPbGTUri_V4 : MInst<(outs PredRegs:$dst), Requires<[HasV4T]>, ImmRegRel; // Pd=cmpb.gtu(Rs,Rt) -let isCompare = 1 in +let isCompare = 1, validSubTargets = HasV4SubT, CextOpcode = "CMPbGTU", +InputType = "reg" in def CMPbGTUrr_V4 : MInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), "$dst = cmpb.gtu($src1, $src2)", [(set (i1 PredRegs:$dst), (setugt (and (i32 IntRegs:$src1), 255), (and (i32 IntRegs:$src2), 255)))]>, - Requires<[HasV4T]>; + Requires<[HasV4T]>, ImmRegRel; // Following instruction is not being extended as it results into the incorrect // code for negative numbers. // Signed half compare(.eq) ri. // Pd=cmph.eq(Rs,#s8) -let isCompare = 1 in +let isCompare = 1, validSubTargets = HasV4SubT in def CMPhEQri_V4 : MInst<(outs PredRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2), "$dst = cmph.eq($src1, #$src2)", @@ -3732,7 +3841,7 @@ def CMPhEQri_V4 : MInst<(outs PredRegs:$dst), // r0=and(r0,#0xffff) // p0=cmp.eq(r0,#0) // Pd=cmph.eq(Rs,Rt) -let isCompare = 1 in +let isCompare = 1, validSubTargets = HasV4SubT in def CMPhEQrr_xor_V4 : MInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), "$dst = cmph.eq($src1, $src2)", @@ -3747,7 +3856,7 @@ def CMPhEQrr_xor_V4 : MInst<(outs PredRegs:$dst), // r1=asl(r1,16) // p0=cmp.eq(r0,r1) // Pd=cmph.eq(Rs,Rt) -let isCompare = 1 in +let isCompare = 1, validSubTargets = HasV4SubT in def CMPhEQrr_shl_V4 : MInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), "$dst = cmph.eq($src1, $src2)", @@ -3761,19 +3870,20 @@ used in the cmph.gt instruction. // Signed half compare(.gt) ri. // Pd=cmph.gt(Rs,#s8) -let isCompare = 1 in +let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 8, +isCompare = 1, validSubTargets = HasV4SubT in def CMPhGTri_V4 : MInst<(outs PredRegs:$dst), - (ins IntRegs:$src1, s8Imm:$src2), + (ins IntRegs:$src1, s8Ext:$src2), "$dst = cmph.gt($src1, #$src2)", [(set (i1 PredRegs:$dst), (setgt (shl (i32 IntRegs:$src1), (i32 16)), - s8ImmPred:$src2))]>, + s8ExtPred:$src2))]>, Requires<[HasV4T]>; */ // Signed half compare(.gt) rr. // Pd=cmph.gt(Rs,Rt) -let isCompare = 1 in +let isCompare = 1, validSubTargets = HasV4SubT in def CMPhGTrr_shl_V4 : MInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), "$dst = cmph.gt($src1, $src2)", @@ -3784,24 +3894,41 @@ def CMPhGTrr_shl_V4 : MInst<(outs PredRegs:$dst), // Unsigned half compare rr (.gtu). // Pd=cmph.gtu(Rs,Rt) -let isCompare = 1 in +let isCompare = 1, validSubTargets = HasV4SubT, CextOpcode = "CMPhGTU", +InputType = "reg" in def CMPhGTUrr_V4 : MInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), "$dst = cmph.gtu($src1, $src2)", [(set (i1 PredRegs:$dst), (setugt (and (i32 IntRegs:$src1), 65535), (and (i32 IntRegs:$src2), 65535)))]>, - Requires<[HasV4T]>; + Requires<[HasV4T]>, ImmRegRel; // Unsigned half compare ri (.gtu). // Pd=cmph.gtu(Rs,#u7) -let isCompare = 1 in +let isExtendable = 1, opExtendable = 2, isExtentSigned = 0, opExtentBits = 7, +isCompare = 1, validSubTargets = HasV4SubT, CextOpcode = "CMPhGTU", +InputType = "imm" in def CMPhGTUri_V4 : MInst<(outs PredRegs:$dst), - (ins IntRegs:$src1, u7Imm:$src2), + (ins IntRegs:$src1, u7Ext:$src2), "$dst = cmph.gtu($src1, #$src2)", [(set (i1 PredRegs:$dst), (setugt (and (i32 IntRegs:$src1), 65535), - u7ImmPred:$src2))]>, - Requires<[HasV4T]>; + u7ExtPred:$src2))]>, + Requires<[HasV4T]>, ImmRegRel; + +let validSubTargets = HasV4SubT in +def NTSTBIT_rr : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + "$dst = !tstbit($src1, $src2)", + [(set (i1 PredRegs:$dst), + (seteq (and (shl 1, (i32 IntRegs:$src2)), (i32 IntRegs:$src1)), 0))]>, + Requires<[HasV4T]>; + +let validSubTargets = HasV4SubT in +def NTSTBIT_ri : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2), + "$dst = !tstbit($src1, $src2)", + [(set (i1 PredRegs:$dst), + (seteq (and (shl 1, u5ImmPred:$src2), (i32 IntRegs:$src1)), 0))]>, + Requires<[HasV4T]>; //===----------------------------------------------------------------------===// // XTYPE/PRED - -- cgit v1.1 From d07c2a5fa1fd3bee45104953714a128cf5e19158 Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Tue, 5 Feb 2013 16:53:11 +0000 Subject: Fix comments git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174390 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86CodeEmitter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp index bc77334..ece38aa 100644 --- a/lib/Target/X86/X86CodeEmitter.cpp +++ b/lib/Target/X86/X86CodeEmitter.cpp @@ -124,7 +124,7 @@ template } // end anonymous namespace. /// createX86CodeEmitterPass - Return a pass that emits the collected X86 code -/// to the specified templated MachineCodeEmitter object. +/// to the specified JITCodeEmitter object. FunctionPass *llvm::createX86JITCodeEmitterPass(X86TargetMachine &TM, JITCodeEmitter &JCE) { return new Emitter(TM, JCE); -- cgit v1.1 From 50a44a62b8dc8092b0c5cf5e4f2b62cd60085527 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Tue, 5 Feb 2013 17:09:10 +0000 Subject: R600/SI: Add patterns for fcos and fsin. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes 37 piglit tests and allows e.g. FlightGear to run with radeonsi. Patch by: Michel Dänzer Signed-off-by: Michel Dänzer Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174391 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIInstructions.td | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'lib') diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 4164c55..85bb73a 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1326,6 +1326,16 @@ def : Pat< >; def : Pat < + (fcos AllReg_32:$src0), + (V_COS_F32_e32 (V_MUL_F32_e32 AllReg_32:$src0, (V_MOV_IMM_I32 CONST.TWO_PI_INV))) +>; + +def : Pat < + (fsin AllReg_32:$src0), + (V_SIN_F32_e32 (V_MUL_F32_e32 AllReg_32:$src0, (V_MOV_IMM_I32 CONST.TWO_PI_INV))) +>; + +def : Pat < (int_AMDGPU_cube VReg_128:$src), (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), (V_CUBETC_F32 (EXTRACT_SUBREG VReg_128:$src, sel_x), -- cgit v1.1 From 3ce2ec847885b004c768869b825be1ff9d98eca3 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Tue, 5 Feb 2013 17:09:11 +0000 Subject: R600: Emit function name in the AsmPrinter Emitting the function name allows us to check for it in the FileCheck tests so we can make sure FileCheck is checking the output of the correct function. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174392 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUAsmPrinter.cpp | 3 +++ 1 file changed, 3 insertions(+) (limited to 'lib') diff --git a/lib/Target/R600/AMDGPUAsmPrinter.cpp b/lib/Target/R600/AMDGPUAsmPrinter.cpp index 754506c..715a378 100644 --- a/lib/Target/R600/AMDGPUAsmPrinter.cpp +++ b/lib/Target/R600/AMDGPUAsmPrinter.cpp @@ -47,6 +47,9 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { #endif } SetupMachineFunction(MF); + if (OutStreamer.hasRawTextSupport()) { + OutStreamer.EmitRawText("@" + MF.getName() + ":"); + } OutStreamer.SwitchSection(getObjFileLowering().getTextSection()); if (STM.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) { EmitProgramInfo(MF); -- cgit v1.1 From 29b15a378045762ce09642ab9dd741ece41f59a3 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Tue, 5 Feb 2013 17:09:14 +0000 Subject: R600: improve inputs/interpolation handling Use one intrinsic for all sorts of interpolation. Use two separate unexpanded instructions to represent INTERP_XY and _ZW - this will allow to eliminate one part if it's not used. Track liveness of special interpolation regs instead of reserving them - this will allow to reuse those regs, lowering reg pressure. Patch By: Vadim Girlin v2[Vincent Lejeune]: Rebased against current llvm master Signed-off-by: Vadim Girlin Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174394 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelLowering.cpp | 2 - lib/Target/R600/AMDGPUISelLowering.h | 2 - lib/Target/R600/R600ExpandSpecialInstrs.cpp | 199 +++++++++++----------------- lib/Target/R600/R600ISelLowering.cpp | 94 ++++--------- lib/Target/R600/R600InstrInfo.cpp | 1 - lib/Target/R600/R600Instructions.td | 42 +++--- lib/Target/R600/R600Intrinsics.td | 12 +- lib/Target/R600/R600MachineFunctionInfo.cpp | 17 +-- lib/Target/R600/R600MachineFunctionInfo.h | 7 - lib/Target/R600/R600RegisterInfo.cpp | 6 - 10 files changed, 130 insertions(+), 252 deletions(-) (limited to 'lib') diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 309bcf5..f3a047a 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -410,8 +410,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(SMIN) NODE_NAME_CASE(UMIN) NODE_NAME_CASE(URECIP) - NODE_NAME_CASE(INTERP) - NODE_NAME_CASE(INTERP_P0) NODE_NAME_CASE(EXPORT) NODE_NAME_CASE(CONST_ADDRESS) } diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h index 9938c65..0584d39 100644 --- a/lib/Target/R600/AMDGPUISelLowering.h +++ b/lib/Target/R600/AMDGPUISelLowering.h @@ -120,8 +120,6 @@ enum { SMIN, UMIN, URECIP, - INTERP, - INTERP_P0, EXPORT, CONST_ADDRESS, LAST_AMDGPU_ISD_NUMBER diff --git a/lib/Target/R600/R600ExpandSpecialInstrs.cpp b/lib/Target/R600/R600ExpandSpecialInstrs.cpp index b903d4a..f8c900f 100644 --- a/lib/Target/R600/R600ExpandSpecialInstrs.cpp +++ b/lib/Target/R600/R600ExpandSpecialInstrs.cpp @@ -55,118 +55,6 @@ FunctionPass *llvm::createR600ExpandSpecialInstrsPass(TargetMachine &TM) { return new R600ExpandSpecialInstrsPass(TM); } -bool R600ExpandSpecialInstrsPass::ExpandInputPerspective(MachineInstr &MI) { - const R600RegisterInfo &TRI = TII->getRegisterInfo(); - if (MI.getOpcode() != AMDGPU::input_perspective) - return false; - - MachineBasicBlock::iterator I = &MI; - unsigned DstReg = MI.getOperand(0).getReg(); - R600MachineFunctionInfo *MFI = MI.getParent()->getParent() - ->getInfo(); - unsigned IJIndexBase; - - // In Evergreen ISA doc section 8.3.2 : - // We need to interpolate XY and ZW in two different instruction groups. - // An INTERP_* must occupy all 4 slots of an instruction group. - // Output of INTERP_XY is written in X,Y slots - // Output of INTERP_ZW is written in Z,W slots - // - // Thus interpolation requires the following sequences : - // - // AnyGPR.x = INTERP_ZW; (Write Masked Out) - // AnyGPR.y = INTERP_ZW; (Write Masked Out) - // DstGPR.z = INTERP_ZW; - // DstGPR.w = INTERP_ZW; (End of first IG) - // DstGPR.x = INTERP_XY; - // DstGPR.y = INTERP_XY; - // AnyGPR.z = INTERP_XY; (Write Masked Out) - // AnyGPR.w = INTERP_XY; (Write Masked Out) (End of second IG) - // - switch (MI.getOperand(1).getImm()) { - case 0: - IJIndexBase = MFI->GetIJPerspectiveIndex(); - break; - case 1: - IJIndexBase = MFI->GetIJLinearIndex(); - break; - default: - assert(0 && "Unknow ij index"); - } - - for (unsigned i = 0; i < 8; i++) { - unsigned IJIndex = AMDGPU::R600_TReg32RegClass.getRegister( - 2 * IJIndexBase + ((i + 1) % 2)); - unsigned ReadReg = AMDGPU::R600_ArrayBaseRegClass.getRegister( - MI.getOperand(2).getImm()); - - - unsigned Sel = AMDGPU::sel_x; - switch (i % 4) { - case 0:Sel = AMDGPU::sel_x;break; - case 1:Sel = AMDGPU::sel_y;break; - case 2:Sel = AMDGPU::sel_z;break; - case 3:Sel = AMDGPU::sel_w;break; - default:break; - } - - unsigned Res = TRI.getSubReg(DstReg, Sel); - - unsigned Opcode = (i < 4)?AMDGPU::INTERP_ZW:AMDGPU::INTERP_XY; - - MachineBasicBlock &MBB = *(MI.getParent()); - MachineInstr *NewMI = - TII->buildDefaultInstruction(MBB, I, Opcode, Res, IJIndex, ReadReg); - - if (!(i> 1 && i < 6)) { - TII->addFlag(NewMI, 0, MO_FLAG_MASK); - } - - if (i % 4 != 3) - TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST); - } - - MI.eraseFromParent(); - - return true; -} - -bool R600ExpandSpecialInstrsPass::ExpandInputConstant(MachineInstr &MI) { - const R600RegisterInfo &TRI = TII->getRegisterInfo(); - if (MI.getOpcode() != AMDGPU::input_constant) - return false; - - MachineBasicBlock::iterator I = &MI; - unsigned DstReg = MI.getOperand(0).getReg(); - - for (unsigned i = 0; i < 4; i++) { - unsigned ReadReg = AMDGPU::R600_ArrayBaseRegClass.getRegister( - MI.getOperand(1).getImm()); - - unsigned Sel = AMDGPU::sel_x; - switch (i % 4) { - case 0:Sel = AMDGPU::sel_x;break; - case 1:Sel = AMDGPU::sel_y;break; - case 2:Sel = AMDGPU::sel_z;break; - case 3:Sel = AMDGPU::sel_w;break; - default:break; - } - - unsigned Res = TRI.getSubReg(DstReg, Sel); - - MachineBasicBlock &MBB = *(MI.getParent()); - MachineInstr *NewMI = TII->buildDefaultInstruction( - MBB, I, AMDGPU::INTERP_LOAD_P0, Res, ReadReg); - - if (i % 4 != 3) - TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST); - } - - MI.eraseFromParent(); - - return true; -} - bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) { const R600RegisterInfo &TRI = TII->getRegisterInfo(); @@ -200,7 +88,7 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) { MI.eraseFromParent(); continue; } - case AMDGPU::BREAK: + case AMDGPU::BREAK: { MachineInstr *PredSet = TII->buildDefaultInstruction(MBB, I, AMDGPU::PRED_SETE_INT, AMDGPU::PREDICATE_BIT, @@ -214,12 +102,87 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) { .addReg(AMDGPU::PREDICATE_BIT); MI.eraseFromParent(); continue; - } + } - if (ExpandInputPerspective(MI)) - continue; - if (ExpandInputConstant(MI)) - continue; + case AMDGPU::INTERP_PAIR_XY: { + MachineInstr *BMI; + unsigned PReg = AMDGPU::R600_ArrayBaseRegClass.getRegister( + MI.getOperand(2).getImm()); + + for (unsigned Chan = 0; Chan < 4; ++Chan) { + unsigned DstReg; + + if (Chan < 2) + DstReg = MI.getOperand(Chan).getReg(); + else + DstReg = Chan == 2 ? AMDGPU::T0_Z : AMDGPU::T0_W; + + BMI = TII->buildDefaultInstruction(MBB, I, AMDGPU::INTERP_XY, + DstReg, MI.getOperand(3 + (Chan % 2)).getReg(), PReg); + + if (Chan > 0) { + BMI->bundleWithPred(); + } + if (Chan >= 2) + TII->addFlag(BMI, 0, MO_FLAG_MASK); + if (Chan != 3) + TII->addFlag(BMI, 0, MO_FLAG_NOT_LAST); + } + + MI.eraseFromParent(); + continue; + } + + case AMDGPU::INTERP_PAIR_ZW: { + MachineInstr *BMI; + unsigned PReg = AMDGPU::R600_ArrayBaseRegClass.getRegister( + MI.getOperand(2).getImm()); + + for (unsigned Chan = 0; Chan < 4; ++Chan) { + unsigned DstReg; + + if (Chan < 2) + DstReg = Chan == 0 ? AMDGPU::T0_X : AMDGPU::T0_Y; + else + DstReg = MI.getOperand(Chan-2).getReg(); + + BMI = TII->buildDefaultInstruction(MBB, I, AMDGPU::INTERP_ZW, + DstReg, MI.getOperand(3 + (Chan % 2)).getReg(), PReg); + + if (Chan > 0) { + BMI->bundleWithPred(); + } + if (Chan < 2) + TII->addFlag(BMI, 0, MO_FLAG_MASK); + if (Chan != 3) + TII->addFlag(BMI, 0, MO_FLAG_NOT_LAST); + } + + MI.eraseFromParent(); + continue; + } + + case AMDGPU::INTERP_VEC_LOAD: { + const R600RegisterInfo &TRI = TII->getRegisterInfo(); + MachineInstr *BMI; + unsigned PReg = AMDGPU::R600_ArrayBaseRegClass.getRegister( + MI.getOperand(1).getImm()); + unsigned DstReg = MI.getOperand(0).getReg(); + + for (unsigned Chan = 0; Chan < 4; ++Chan) { + BMI = TII->buildDefaultInstruction(MBB, I, AMDGPU::INTERP_LOAD_P0, + TRI.getSubReg(DstReg, TRI.getSubRegFromChannel(Chan)), PReg); + if (Chan > 0) { + BMI->bundleWithPred(); + } + if (Chan != 3) + TII->addFlag(BMI, 0, MO_FLAG_NOT_LAST); + } + + MI.eraseFromParent(); + continue; + } + } bool IsReduction = TII->isReductionOp(MI.getOpcode()); bool IsVector = TII->isVector(MI); diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index ff18a44..8fe31e0 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -148,18 +148,6 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( break; } - case AMDGPU::RESERVE_REG: { - R600MachineFunctionInfo * MFI = MF->getInfo(); - int64_t ReservedIndex = MI->getOperand(0).getImm(); - unsigned ReservedReg = - AMDGPU::R600_TReg32RegClass.getRegister(ReservedIndex); - MFI->ReservedRegs.push_back(ReservedReg); - unsigned SuperReg = - AMDGPU::R600_Reg128RegClass.getRegister(ReservedIndex / 4); - MFI->ReservedRegs.push_back(SuperReg); - break; - } - case AMDGPU::TXD: { unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass); unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass); @@ -244,29 +232,6 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( break; } - case AMDGPU::input_perspective: { - R600MachineFunctionInfo *MFI = MF->getInfo(); - - // XXX Be more fine about register reservation - for (unsigned i = 0; i < 4; i ++) { - unsigned ReservedReg = AMDGPU::R600_TReg32RegClass.getRegister(i); - MFI->ReservedRegs.push_back(ReservedReg); - } - - switch (MI->getOperand(1).getImm()) { - case 0:// Perspective - MFI->HasPerspectiveInterpolation = true; - break; - case 1:// Linear - MFI->HasLinearInterpolation = true; - break; - default: - assert(0 && "Unknow ij index"); - } - - return BB; - } - case AMDGPU::EG_ExportSwz: case AMDGPU::R600_ExportSwz: { // Instruction is left unmodified if its not the last one of its type @@ -421,38 +386,35 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex); return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, Reg, VT); } - case AMDGPUIntrinsic::R600_load_input_perspective: { - int slot = cast(Op.getOperand(1))->getZExtValue(); - if (slot < 0) - return DAG.getUNDEF(MVT::f32); - SDValue FullVector = DAG.getNode( - AMDGPUISD::INTERP, - DL, MVT::v4f32, - DAG.getConstant(0, MVT::i32), DAG.getConstant(slot / 4 , MVT::i32)); - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, - DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32)); - } - case AMDGPUIntrinsic::R600_load_input_linear: { - int slot = cast(Op.getOperand(1))->getZExtValue(); - if (slot < 0) - return DAG.getUNDEF(MVT::f32); - SDValue FullVector = DAG.getNode( - AMDGPUISD::INTERP, - DL, MVT::v4f32, - DAG.getConstant(1, MVT::i32), DAG.getConstant(slot / 4 , MVT::i32)); - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, - DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32)); - } - case AMDGPUIntrinsic::R600_load_input_constant: { + + case AMDGPUIntrinsic::R600_interp_input: { int slot = cast(Op.getOperand(1))->getZExtValue(); - if (slot < 0) - return DAG.getUNDEF(MVT::f32); - SDValue FullVector = DAG.getNode( - AMDGPUISD::INTERP_P0, - DL, MVT::v4f32, - DAG.getConstant(slot / 4 , MVT::i32)); - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, - DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32)); + int ijb = cast(Op.getOperand(2))->getSExtValue(); + MachineSDNode *interp; + if (ijb < 0) { + interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL, + MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32)); + return DAG.getTargetExtractSubreg( + TII->getRegisterInfo().getSubRegFromChannel(slot % 4), + DL, MVT::f32, SDValue(interp, 0)); + } + + if (slot % 4 < 2) + interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL, + MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32), + CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, + AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1), MVT::f32), + CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, + AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb), MVT::f32)); + else + interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL, + MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32), + CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, + AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1), MVT::f32), + CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, + AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb), MVT::f32)); + + return SDValue(interp, slot % 2); } case r600_read_ngroups_x: diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp index 1adb142..85859eb 100644 --- a/lib/Target/R600/R600InstrInfo.cpp +++ b/lib/Target/R600/R600InstrInfo.cpp @@ -104,7 +104,6 @@ bool R600InstrInfo::isPlaceHolderOpcode(unsigned Opcode) const { switch (Opcode) { default: return false; case AMDGPU::RETURN: - case AMDGPU::RESERVE_REG: return true; } } diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index 86ee0bb..bcbb5a1 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -480,13 +480,17 @@ def isR600toCayman : Predicate< // R600 SDNodes //===----------------------------------------------------------------------===// -def INTERP: SDNode<"AMDGPUISD::INTERP", - SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisInt<1>, SDTCisInt<2>]> - >; - -def INTERP_P0: SDNode<"AMDGPUISD::INTERP_P0", - SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisInt<1>]> - >; +def INTERP_PAIR_XY : AMDGPUShaderInst < + (outs R600_TReg32_X:$dst0, R600_TReg32_Y:$dst1), + (ins i32imm:$src0, R600_Reg32:$src1, R600_Reg32:$src2), + "INTERP_PAIR_XY $src0 $src1 $src2 : $dst0 dst1", + []>; + +def INTERP_PAIR_ZW : AMDGPUShaderInst < + (outs R600_TReg32_Z:$dst0, R600_TReg32_W:$dst1), + (ins i32imm:$src0, R600_Reg32:$src1, R600_Reg32:$src2), + "INTERP_PAIR_ZW $src0 $src1 $src2 : $dst0 dst1", + []>; def CONST_ADDRESS: SDNode<"AMDGPUISD::CONST_ADDRESS", SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisPtrTy<1>]>, @@ -497,21 +501,11 @@ def CONST_ADDRESS: SDNode<"AMDGPUISD::CONST_ADDRESS", // Interpolation Instructions //===----------------------------------------------------------------------===// -let usesCustomInserter = 1 in { -def input_perspective : AMDGPUShaderInst < +def INTERP_VEC_LOAD : AMDGPUShaderInst < (outs R600_Reg128:$dst), - (ins i32imm:$src0, i32imm:$src1), - "input_perspective $src0 $src1 : dst", - [(set R600_Reg128:$dst, (INTERP (i32 imm:$src0), (i32 imm:$src1)))]>; -} // End usesCustomInserter = 1 - -def input_constant : AMDGPUShaderInst < - (outs R600_Reg128:$dst), - (ins i32imm:$src), - "input_perspective $src : dst", - [(set R600_Reg128:$dst, (INTERP_P0 (i32 imm:$src)))]>; - - + (ins i32imm:$src0), + "INTERP_LOAD $src0 : $dst", + []>; def INTERP_XY : R600_2OP <0xD6, "INTERP_XY", []> { let bank_swizzle = 5; @@ -1562,12 +1556,6 @@ def MASK_WRITE : AMDGPUShaderInst < } // End mayLoad = 0, mayStore = 0, hasSideEffects = 1 -def RESERVE_REG : AMDGPUShaderInst < - (outs), - (ins i32imm:$src), - "RESERVE_REG $src", - [(int_AMDGPU_reserve_reg imm:$src)] ->; def TXD: AMDGPUShaderInst < (outs R600_Reg128:$dst), (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget), diff --git a/lib/Target/R600/R600Intrinsics.td b/lib/Target/R600/R600Intrinsics.td index 9d416a6..284d4d8 100644 --- a/lib/Target/R600/R600Intrinsics.td +++ b/lib/Target/R600/R600Intrinsics.td @@ -12,15 +12,13 @@ //===----------------------------------------------------------------------===// let TargetPrefix = "R600", isTarget = 1 in { - def int_R600_load_input : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>; - def int_R600_load_input_perspective : - Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>; - def int_R600_load_input_constant : - Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>; - def int_R600_load_input_linear : - Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>; + def int_R600_load_input : + Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>; + def int_R600_interp_input : + Intrinsic<[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_R600_store_swizzle : Intrinsic<[], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>; + def int_R600_store_stream_output : Intrinsic<[], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; def int_R600_store_pixel_color : diff --git a/lib/Target/R600/R600MachineFunctionInfo.cpp b/lib/Target/R600/R600MachineFunctionInfo.cpp index bcb7f94..40aec83 100644 --- a/lib/Target/R600/R600MachineFunctionInfo.cpp +++ b/lib/Target/R600/R600MachineFunctionInfo.cpp @@ -13,21 +13,6 @@ using namespace llvm; R600MachineFunctionInfo::R600MachineFunctionInfo(const MachineFunction &MF) - : MachineFunctionInfo(), - HasLinearInterpolation(false), - HasPerspectiveInterpolation(false) { + : MachineFunctionInfo() { memset(Outputs, 0, sizeof(Outputs)); } - -unsigned R600MachineFunctionInfo::GetIJPerspectiveIndex() const { - assert(HasPerspectiveInterpolation); - return 0; -} - -unsigned R600MachineFunctionInfo::GetIJLinearIndex() const { - assert(HasLinearInterpolation); - if (HasPerspectiveInterpolation) - return 1; - else - return 0; -} diff --git a/lib/Target/R600/R600MachineFunctionInfo.h b/lib/Target/R600/R600MachineFunctionInfo.h index 91f9de2..0cea211 100644 --- a/lib/Target/R600/R600MachineFunctionInfo.h +++ b/lib/Target/R600/R600MachineFunctionInfo.h @@ -23,14 +23,7 @@ class R600MachineFunctionInfo : public MachineFunctionInfo { public: R600MachineFunctionInfo(const MachineFunction &MF); - std::vector ReservedRegs; SDNode *Outputs[16]; - bool HasLinearInterpolation; - bool HasPerspectiveInterpolation; - - unsigned GetIJLinearIndex() const; - unsigned GetIJPerspectiveIndex() const; - }; } // End llvm namespace diff --git a/lib/Target/R600/R600RegisterInfo.cpp b/lib/Target/R600/R600RegisterInfo.cpp index 0441e4a..d46b3a3 100644 --- a/lib/Target/R600/R600RegisterInfo.cpp +++ b/lib/Target/R600/R600RegisterInfo.cpp @@ -28,7 +28,6 @@ R600RegisterInfo::R600RegisterInfo(AMDGPUTargetMachine &tm, BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); - const R600MachineFunctionInfo * MFI = MF.getInfo(); Reserved.set(AMDGPU::ZERO); Reserved.set(AMDGPU::HALF); @@ -44,11 +43,6 @@ BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(AMDGPU::PRED_SEL_ZERO); Reserved.set(AMDGPU::PRED_SEL_ONE); - for (std::vector::const_iterator I = MFI->ReservedRegs.begin(), - E = MFI->ReservedRegs.end(); I != E; ++I) { - Reserved.set(*I); - } - return Reserved; } -- cgit v1.1 From cc38cad53cfebcdfc3b4fbdd924c2a92cd9dacc0 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Tue, 5 Feb 2013 17:09:16 +0000 Subject: R600: Fold remaining CONST_COPY after expand pseudo inst Patch by: Vincent Lejeune Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174395 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUTargetMachine.cpp | 2 +- lib/Target/R600/R600LowerConstCopy.cpp | 170 +++++++++++++++++++++++++++++--- 2 files changed, 160 insertions(+), 12 deletions(-) (limited to 'lib') diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp index 7b069e7..2185be3 100644 --- a/lib/Target/R600/AMDGPUTargetMachine.cpp +++ b/lib/Target/R600/AMDGPUTargetMachine.cpp @@ -136,8 +136,8 @@ bool AMDGPUPassConfig::addPreEmitPass() { addPass(createAMDGPUCFGPreparationPass(*TM)); addPass(createAMDGPUCFGStructurizerPass(*TM)); addPass(createR600ExpandSpecialInstrsPass(*TM)); - addPass(createR600LowerConstCopy(*TM)); addPass(&FinalizeMachineBundlesID); + addPass(createR600LowerConstCopy(*TM)); } else { addPass(createSILowerLiteralConstantsPass(*TM)); addPass(createSILowerControlFlowPass(*TM)); diff --git a/lib/Target/R600/R600LowerConstCopy.cpp b/lib/Target/R600/R600LowerConstCopy.cpp index 70a2b13..46f2aef 100644 --- a/lib/Target/R600/R600LowerConstCopy.cpp +++ b/lib/Target/R600/R600LowerConstCopy.cpp @@ -13,7 +13,6 @@ /// fold them inside vector instruction, like DOT4 or Cube ; ISel emits /// ConstCopy instead. This pass (executed after ExpandingSpecialInstr) will try /// to fold them if possible or replace them by MOV otherwise. -/// TODO : Implement the folding part, using Copy Propagation algorithm. // //===----------------------------------------------------------------------===// @@ -30,6 +29,13 @@ class R600LowerConstCopy : public MachineFunctionPass { private: static char ID; const R600InstrInfo *TII; + + struct ConstPairs { + unsigned XYPair; + unsigned ZWPair; + }; + + bool canFoldInBundle(ConstPairs &UsedConst, unsigned ReadConst) const; public: R600LowerConstCopy(TargetMachine &tm); virtual bool runOnMachineFunction(MachineFunction &MF); @@ -39,27 +45,169 @@ public: char R600LowerConstCopy::ID = 0; - R600LowerConstCopy::R600LowerConstCopy(TargetMachine &tm) : MachineFunctionPass(ID), TII (static_cast(tm.getInstrInfo())) { } +bool R600LowerConstCopy::canFoldInBundle(ConstPairs &UsedConst, + unsigned ReadConst) const { + unsigned ReadConstChan = ReadConst & 3; + unsigned ReadConstIndex = ReadConst & (~3); + if (ReadConstChan < 2) { + if (!UsedConst.XYPair) { + UsedConst.XYPair = ReadConstIndex; + } + return UsedConst.XYPair == ReadConstIndex; + } else { + if (!UsedConst.ZWPair) { + UsedConst.ZWPair = ReadConstIndex; + } + return UsedConst.ZWPair == ReadConstIndex; + } +} + +static bool isControlFlow(const MachineInstr &MI) { + return (MI.getOpcode() == AMDGPU::IF_PREDICATE_SET) || + (MI.getOpcode() == AMDGPU::ENDIF) || + (MI.getOpcode() == AMDGPU::ELSE) || + (MI.getOpcode() == AMDGPU::WHILELOOP) || + (MI.getOpcode() == AMDGPU::BREAK); +} + bool R600LowerConstCopy::runOnMachineFunction(MachineFunction &MF) { + for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); BB != BB_E; ++BB) { MachineBasicBlock &MBB = *BB; - for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); - I != E;) { - MachineInstr &MI = *I; - I = llvm::next(I); - if (MI.getOpcode() != AMDGPU::CONST_COPY) + DenseMap RegToConstIndex; + for (MachineBasicBlock::instr_iterator I = MBB.instr_begin(), + E = MBB.instr_end(); I != E;) { + + if (I->getOpcode() == AMDGPU::CONST_COPY) { + MachineInstr &MI = *I; + I = llvm::next(I); + unsigned DstReg = MI.getOperand(0).getReg(); + DenseMap::iterator SrcMI = + RegToConstIndex.find(DstReg); + if (SrcMI != RegToConstIndex.end()) { + SrcMI->second->eraseFromParent(); + RegToConstIndex.erase(SrcMI); + } + MachineInstr *NewMI = + TII->buildDefaultInstruction(MBB, &MI, AMDGPU::MOV, + MI.getOperand(0).getReg(), AMDGPU::ALU_CONST); + TII->setImmOperand(NewMI, R600Operands::SRC0_SEL, + MI.getOperand(1).getImm()); + RegToConstIndex[DstReg] = NewMI; + MI.eraseFromParent(); continue; - MachineInstr *NewMI = TII->buildDefaultInstruction(MBB, I, AMDGPU::MOV, - MI.getOperand(0).getReg(), AMDGPU::ALU_CONST); - NewMI->getOperand(9).setImm(MI.getOperand(1).getImm()); - MI.eraseFromParent(); + } + + std::vector Defs; + // We consider all Instructions as bundled because algorithm that handle + // const read port limitations inside an IG is still valid with single + // instructions. + std::vector Bundle; + + if (I->isBundle()) { + unsigned BundleSize = I->getBundleSize(); + for (unsigned i = 0; i < BundleSize; i++) { + I = llvm::next(I); + Bundle.push_back(I); + } + } else if (TII->isALUInstr(I->getOpcode())){ + Bundle.push_back(I); + } else if (isControlFlow(*I)) { + RegToConstIndex.clear(); + I = llvm::next(I); + continue; + } else { + MachineInstr &MI = *I; + for (MachineInstr::mop_iterator MOp = MI.operands_begin(), + MOpE = MI.operands_end(); MOp != MOpE; ++MOp) { + MachineOperand &MO = *MOp; + if (!MO.isReg()) + continue; + if (MO.isDef()) { + Defs.push_back(MO.getReg()); + } else { + // Either a TEX or an Export inst, prevent from erasing def of used + // operand + RegToConstIndex.erase(MO.getReg()); + for (MCSubRegIterator SR(MO.getReg(), &TII->getRegisterInfo()); + SR.isValid(); ++SR) { + RegToConstIndex.erase(*SR); + } + } + } + } + + + R600Operands::Ops OpTable[3][2] = { + {R600Operands::SRC0, R600Operands::SRC0_SEL}, + {R600Operands::SRC1, R600Operands::SRC1_SEL}, + {R600Operands::SRC2, R600Operands::SRC2_SEL}, + }; + + for(std::vector::iterator It = Bundle.begin(), + ItE = Bundle.end(); It != ItE; ++It) { + MachineInstr *MI = *It; + if (TII->isPredicated(MI)) { + // We don't want to erase previous assignment + RegToConstIndex.erase(MI->getOperand(0).getReg()); + } else { + int WriteIDX = TII->getOperandIdx(MI->getOpcode(), R600Operands::WRITE); + if (WriteIDX < 0 || MI->getOperand(WriteIDX).getImm()) + Defs.push_back(MI->getOperand(0).getReg()); + } + } + + ConstPairs CP = {0,0}; + for (unsigned SrcOp = 0; SrcOp < 3; SrcOp++) { + for(std::vector::iterator It = Bundle.begin(), + ItE = Bundle.end(); It != ItE; ++It) { + MachineInstr *MI = *It; + int SrcIdx = TII->getOperandIdx(MI->getOpcode(), OpTable[SrcOp][0]); + if (SrcIdx < 0) + continue; + MachineOperand &MO = MI->getOperand(SrcIdx); + DenseMap::iterator SrcMI = + RegToConstIndex.find(MO.getReg()); + if (SrcMI != RegToConstIndex.end()) { + MachineInstr *CstMov = SrcMI->second; + int ConstMovSel = + TII->getOperandIdx(CstMov->getOpcode(), R600Operands::SRC0_SEL); + unsigned ConstIndex = CstMov->getOperand(ConstMovSel).getImm(); + if (canFoldInBundle(CP, ConstIndex)) { + TII->setImmOperand(MI, OpTable[SrcOp][1], ConstIndex); + MI->getOperand(SrcIdx).setReg(AMDGPU::ALU_CONST); + } else { + RegToConstIndex.erase(SrcMI); + } + } + } + } + + for (std::vector::iterator It = Defs.begin(), ItE = Defs.end(); + It != ItE; ++It) { + DenseMap::iterator SrcMI = + RegToConstIndex.find(*It); + if (SrcMI != RegToConstIndex.end()) { + SrcMI->second->eraseFromParent(); + RegToConstIndex.erase(SrcMI); + } + } + I = llvm::next(I); + } + + if (MBB.succ_empty()) { + for (DenseMap::iterator + DI = RegToConstIndex.begin(), DE = RegToConstIndex.end(); + DI != DE; ++DI) { + DI->second->eraseFromParent(); + } } } return false; -- cgit v1.1 From 0a9d1d31e9b3239b1dc38a67d31810c4bb405d0a Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Tue, 5 Feb 2013 17:40:36 +0000 Subject: Avoid using MRI::liveout_iterator for computing VRSAVEs. The liveout lists are about to be removed from MRI, this is the only place they were used after register allocation. Get the live out V registers directly from the return instructions instead. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174399 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCFrameLowering.cpp | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp index 5901f36..9948d61 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -119,12 +119,21 @@ static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) { if (VRRegNo[RegNo] == I->first) // If this really is a vector reg. UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked. } - for (MachineRegisterInfo::liveout_iterator - I = MF->getRegInfo().liveout_begin(), - E = MF->getRegInfo().liveout_end(); I != E; ++I) { - unsigned RegNo = getPPCRegisterNumbering(*I); - if (VRRegNo[RegNo] == *I) // If this really is a vector reg. - UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked. + + // Live out registers appear as use operands on return instructions. + for (MachineFunction::const_iterator BI = MF->begin(), BE = MF->end(); + UsedRegMask != 0 && BI != BE; ++BI) { + const MachineBasicBlock &MBB = *BI; + if (MBB.empty() || !MBB.back().isReturn()) + continue; + const MachineInstr &Ret = MBB.back(); + for (unsigned I = 0, E = Ret.getNumOperands(); I != E; ++I) { + const MachineOperand &MO = Ret.getOperand(I); + if (!MO.isReg() || !PPC::VRRCRegClass.contains(MO.getReg())) + continue; + unsigned RegNo = getPPCRegisterNumbering(MO.getReg()); + UsedRegMask &= ~(1 << (31-RegNo)); + } } // If no registers are used, turn this into a copy. -- cgit v1.1 From a499d2bcef0c1001c60d752d356e50eed2402ca8 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Tue, 5 Feb 2013 17:53:52 +0000 Subject: Don't use MRI liveouts in R600. Something very strange is going on with the output registers in this target. Its ISelLowering code is inserting dangling CopyToReg nodes, hoping that those physregs won't get clobbered before the RETURN. This patch adds the output registers as implicit uses on RETURN instructions in the custom emission pass. I'd much prefer to have those CopyToReg nodes glued to the RETURNs, but I don't see how. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174400 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/R600ISelLowering.cpp | 15 +++++++++++---- lib/Target/R600/R600Instructions.td | 3 ++- lib/Target/R600/R600MachineFunctionInfo.h | 1 + 3 files changed, 14 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index 8fe31e0..110dcc1 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -266,6 +266,15 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( .addImm(EOP); break; } + case AMDGPU::RETURN: { + // RETURN instructions must have the live-out registers as implicit uses, + // otherwise they appear dead. + R600MachineFunctionInfo *MFI = MF->getInfo(); + MachineInstrBuilder MIB(*MF, MI); + for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i) + MIB.addReg(MFI->LiveOuts[i], RegState::Implicit); + return BB; + } } MI->eraseFromParent(); @@ -348,12 +357,10 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const switch (IntrinsicID) { case AMDGPUIntrinsic::AMDGPU_store_output: { MachineFunction &MF = DAG.getMachineFunction(); - MachineRegisterInfo &MRI = MF.getRegInfo(); + R600MachineFunctionInfo *MFI = MF.getInfo(); int64_t RegIndex = cast(Op.getOperand(3))->getZExtValue(); unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex); - if (!MRI.isLiveOut(Reg)) { - MRI.addLiveOut(Reg); - } + MFI->LiveOuts.push_back(Reg); return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2)); } case AMDGPUIntrinsic::R600_store_pixel_color: { diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index bcbb5a1..f935313 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -1580,7 +1580,8 @@ def FNEG_R600 : FNEG; //===---------------------------------------------------------------------===// // Return instruction //===---------------------------------------------------------------------===// -let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1 in { +let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1, + usesCustomInserter = 1 in { def RETURN : ILFormat<(outs), (ins variable_ops), "RETURN", [(IL_retflag)]>; } diff --git a/lib/Target/R600/R600MachineFunctionInfo.h b/lib/Target/R600/R600MachineFunctionInfo.h index 0cea211..ad7b4da 100644 --- a/lib/Target/R600/R600MachineFunctionInfo.h +++ b/lib/Target/R600/R600MachineFunctionInfo.h @@ -23,6 +23,7 @@ class R600MachineFunctionInfo : public MachineFunctionInfo { public: R600MachineFunctionInfo(const MachineFunction &MF); + SmallVector LiveOuts; SDNode *Outputs[16]; }; -- cgit v1.1 From f918d7fd7393049bc87bc03fda2d2cd3cec1dacb Mon Sep 17 00:00:00 2001 From: Derek Schuff Date: Tue, 5 Feb 2013 17:55:27 +0000 Subject: [MC] Bundle alignment: Invalidate relaxed fragments Currently, when a fragment is relaxed, its size is modified, but its offset is not (it gets laid out as a side effect of checking whether it needs relaxation), then all subsequent fragments are invalidated because their offsets need to change. When bundling is enabled, relaxed fragments need to get laid out again, because the increase in size may push it over a bundle boundary. So instead of only invalidating subsequent fragments, also invalidate the fragment that gets relaxed, which causes it to get laid out again. This patch also fixes some trailing whitespace and fixes the bundling-related debug output of MCFragments. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174401 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCAssembler.cpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp index 498fbf7..fd281e6 100644 --- a/lib/MC/MCAssembler.cpp +++ b/lib/MC/MCAssembler.cpp @@ -82,14 +82,15 @@ bool MCAsmLayout::isFragmentValid(const MCFragment *F) const { return F->getLayoutOrder() <= LastValid->getLayoutOrder(); } -void MCAsmLayout::invalidateFragmentsAfter(MCFragment *F) { +void MCAsmLayout::invalidateFragmentsFrom(MCFragment *F) { // If this fragment wasn't already valid, we don't need to do anything. if (!isFragmentValid(F)) return; - // Otherwise, reset the last valid fragment to this fragment. + // Otherwise, reset the last valid fragment to the previous fragment + // (if this is the first fragment, it will be NULL). const MCSectionData &SD = *F->getParent(); - LastValidFragment[&SD] = F; + LastValidFragment[&SD] = F->getPrevNode(); } void MCAsmLayout::ensureValid(const MCFragment *F) const { @@ -165,14 +166,14 @@ uint64_t MCAsmLayout::getSectionFileSize(const MCSectionData *SD) const { uint64_t MCAsmLayout::computeBundlePadding(const MCFragment *F, uint64_t FOffset, uint64_t FSize) { uint64_t BundleSize = Assembler.getBundleAlignSize(); - assert(BundleSize > 0 && + assert(BundleSize > 0 && "computeBundlePadding should only be called if bundling is enabled"); uint64_t BundleMask = BundleSize - 1; uint64_t OffsetInBundle = FOffset & BundleMask; uint64_t EndOfFragment = OffsetInBundle + FSize; // There are two kinds of bundling restrictions: - // + // // 1) For alignToBundleEnd(), add padding to ensure that the fragment will // *end* on a bundle boundary. // 2) Otherwise, check if the fragment would cross a bundle boundary. If it @@ -939,7 +940,7 @@ bool MCAssembler::layoutSectionOnce(MCAsmLayout &Layout, MCSectionData &SD) { FirstRelaxedFragment = I; } if (FirstRelaxedFragment) { - Layout.invalidateFragmentsAfter(FirstRelaxedFragment); + Layout.invalidateFragmentsFrom(FirstRelaxedFragment); return true; } return false; @@ -999,7 +1000,7 @@ void MCFragment::dump() { OS << ""; + << " BundlePadding:" << static_cast(getBundlePadding()) << ">"; switch (getKind()) { case MCFragment::FT_Align: { -- cgit v1.1 From c3afc760e1a49f29634b7442a3d38bc88a1f113e Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Tue, 5 Feb 2013 17:59:48 +0000 Subject: Move MRI liveouts to X86 return instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174402 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FastISel.cpp | 14 ++++++++++---- lib/Target/X86/X86ISelLowering.cpp | 10 ++-------- 2 files changed, 12 insertions(+), 12 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 5facb7b..b6c1512 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -738,6 +738,9 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { if (F.isVarArg()) return false; + // Build a list of return value registers. + SmallVector RetRegs; + if (Ret->getNumOperands() > 0) { SmallVector Outs; GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI); @@ -805,8 +808,8 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), DstReg).addReg(SrcReg); - // Mark the register as live out of the function. - MRI.addLiveOut(VA.getLocReg()); + // Add register to return instruction. + RetRegs.push_back(VA.getLocReg()); } // The x86-64 ABI for returning structs by value requires that we copy @@ -819,11 +822,14 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { "SRetReturnReg should have been set in LowerFormalArguments()!"); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), X86::RAX).addReg(Reg); - MRI.addLiveOut(X86::RAX); + RetRegs.push_back(X86::RAX); } // Now emit the RET. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::RET)); + MachineInstrBuilder MIB = + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::RET)); + for (unsigned i = 0, e = RetRegs.size(); i != e; ++i) + MIB.addReg(RetRegs[i], RegState::Implicit); return true; } diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 4962023..c24d41b 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1590,14 +1590,7 @@ X86TargetLowering::LowerReturn(SDValue Chain, RVLocs, *DAG.getContext()); CCInfo.AnalyzeReturn(Outs, RetCC_X86); - // Add the regs to the liveout set for the function. - MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); - for (unsigned i = 0; i != RVLocs.size(); ++i) - if (RVLocs[i].isRegLoc() && !MRI.isLiveOut(RVLocs[i].getLocReg())) - MRI.addLiveOut(RVLocs[i].getLocReg()); - SDValue Flag; - SmallVector RetOps; RetOps.push_back(Chain); // Operand #0 = Chain (updated below) // Operand #1 = Bytes To Pop @@ -1666,6 +1659,7 @@ X86TargetLowering::LowerReturn(SDValue Chain, Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ValToCopy, Flag); Flag = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); } // The x86-64 ABIs require that for returning structs by value we copy @@ -1686,7 +1680,7 @@ X86TargetLowering::LowerReturn(SDValue Chain, Flag = Chain.getValue(1); // RAX/EAX now acts like a return value. - MRI.addLiveOut(RetValReg); + RetOps.push_back(DAG.getRegister(RetValReg, MVT::i64)); } RetOps[0] = Chain; // Update chain. -- cgit v1.1 From fc7432744476281511704c7e07bf89e20c215601 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Tue, 5 Feb 2013 18:08:40 +0000 Subject: Move MRI liveouts to ARM return instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174406 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMFastISel.cpp | 14 ++++++++++---- lib/Target/ARM/ARMISelLowering.cpp | 24 +++++++++++------------- lib/Target/ARM/ARMInstrInfo.td | 2 +- 3 files changed, 22 insertions(+), 18 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index a2d0cde..4b7978a 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -2099,6 +2099,9 @@ bool ARMFastISel::SelectRet(const Instruction *I) { if (!FuncInfo.CanLowerReturn) return false; + // Build a list of return value registers. + SmallVector RetRegs; + CallingConv::ID CC = F.getCallingConv(); if (Ret->getNumOperands() > 0) { SmallVector Outs; @@ -2157,13 +2160,16 @@ bool ARMFastISel::SelectRet(const Instruction *I) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), DstReg).addReg(SrcReg); - // Mark the register as live out of the function. - MRI.addLiveOut(VA.getLocReg()); + // Add register to return instruction. + RetRegs.push_back(VA.getLocReg()); } unsigned RetOpc = isThumb2 ? ARM::tBX_RET : ARM::BX_RET; - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(RetOpc))); + MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(RetOpc)); + AddOptionalDefs(MIB); + for (unsigned i = 0, e = RetRegs.size(); i != e; ++i) + MIB.addReg(RetRegs[i], RegState::Implicit); return true; } diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 82b475a..4c9d2da 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -1928,15 +1928,9 @@ ARMTargetLowering::LowerReturn(SDValue Chain, CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true, isVarArg)); - // If this is the first return lowered for this function, add - // the regs to the liveout set for the function. - if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { - for (unsigned i = 0; i != RVLocs.size(); ++i) - if (RVLocs[i].isRegLoc()) - DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); - } - SDValue Flag; + SmallVector RetOps; + RetOps.push_back(Chain); // Operand #0 = Chain (updated below) // Copy the result values into the output registers. for (unsigned i = 0, realRVLocIdx = 0; @@ -1965,10 +1959,12 @@ ARMTargetLowering::LowerReturn(SDValue Chain, Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag); Flag = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); VA = RVLocs[++i]; // skip ahead to next loc Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs.getValue(1), Flag); Flag = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); VA = RVLocs[++i]; // skip ahead to next loc // Extract the 2nd half and fall through to handle it as an f64 value. @@ -1981,6 +1977,7 @@ ARMTargetLowering::LowerReturn(SDValue Chain, DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1); Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag); Flag = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); VA = RVLocs[++i]; // skip ahead to next loc Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1), Flag); @@ -1990,15 +1987,16 @@ ARMTargetLowering::LowerReturn(SDValue Chain, // Guarantee that all emitted copies are // stuck together, avoiding something bad. Flag = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); } - SDValue result; + // Update chain and glue. + RetOps[0] = Chain; if (Flag.getNode()) - result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain, Flag); - else // Return Void - result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain); + RetOps.push_back(Flag); - return result; + return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, + RetOps.data(), RetOps.size()); } bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index e31c479..c938c41 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -117,7 +117,7 @@ def ARMcall_nolink : SDNode<"ARMISD::CALL_NOLINK", SDT_ARMcall, SDNPVariadic]>; def ARMretflag : SDNode<"ARMISD::RET_FLAG", SDTNone, - [SDNPHasChain, SDNPOptInGlue]>; + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; def ARMcmov : SDNode<"ARMISD::CMOV", SDT_ARMCMov, [SDNPInGlue]>; -- cgit v1.1 From 87b87ad8fb8671efb5577dbfd40c02248862cc8d Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Tue, 5 Feb 2013 18:08:43 +0000 Subject: Move MRI liveouts to Hexagon return instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174407 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Hexagon/HexagonISelLowering.cpp | 19 +++++++++---------- lib/Target/Hexagon/HexagonInstrInfo.td | 2 +- 2 files changed, 10 insertions(+), 11 deletions(-) (limited to 'lib') diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp index 9c7243b..857b15f 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -304,15 +304,9 @@ HexagonTargetLowering::LowerReturn(SDValue Chain, // Analyze return values of ISD::RET CCInfo.AnalyzeReturn(Outs, RetCC_Hexagon); - // If this is the first return lowered for this function, add the regs to the - // liveout set for the function. - if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { - for (unsigned i = 0; i != RVLocs.size(); ++i) - if (RVLocs[i].isRegLoc()) - DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); - } - SDValue Flag; + SmallVector RetOps(1, Chain); + // Copy the result values into the output registers. for (unsigned i = 0; i != RVLocs.size(); ++i) { CCValAssign &VA = RVLocs[i]; @@ -321,12 +315,17 @@ HexagonTargetLowering::LowerReturn(SDValue Chain, // Guarantee that all emitted copies are stuck together with flags. Flag = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); } + RetOps[0] = Chain; // Update chain. + + // Add the flag if we have it. if (Flag.getNode()) - return DAG.getNode(HexagonISD::RET_FLAG, dl, MVT::Other, Chain, Flag); + RetOps.push_back(Flag); - return DAG.getNode(HexagonISD::RET_FLAG, dl, MVT::Other, Chain); + return DAG.getNode(HexagonISD::RET_FLAG, dl, MVT::Other, + &RetOps[0], RetOps.size()); } diff --git a/lib/Target/Hexagon/HexagonInstrInfo.td b/lib/Target/Hexagon/HexagonInstrInfo.td index 6caab26..ac2dd22 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.td +++ b/lib/Target/Hexagon/HexagonInstrInfo.td @@ -808,7 +808,7 @@ let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC], // JR + //===----------------------------------------------------------------------===// def retflag : SDNode<"HexagonISD::RET_FLAG", SDTNone, - [SDNPHasChain, SDNPOptInGlue]>; + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; // Jump to address from register. let isPredicable =1, isReturn = 1, isTerminator = 1, isBarrier = 1, -- cgit v1.1 From f02138e6ec475dc0a3c0e6f664a068db3dc2c214 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Tue, 5 Feb 2013 18:08:45 +0000 Subject: Move MRI liveouts to MBlaze return instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174408 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/MBlaze/MBlazeISelLowering.cpp | 32 +++++++++++++++----------------- lib/Target/MBlaze/MBlazeInstrInfo.td | 4 ++-- 2 files changed, 17 insertions(+), 19 deletions(-) (limited to 'lib') diff --git a/lib/Target/MBlaze/MBlazeISelLowering.cpp b/lib/Target/MBlaze/MBlazeISelLowering.cpp index 5b3c6fe..7664c60 100644 --- a/lib/Target/MBlaze/MBlazeISelLowering.cpp +++ b/lib/Target/MBlaze/MBlazeISelLowering.cpp @@ -1028,15 +1028,17 @@ LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, // Analize return values. CCInfo.AnalyzeReturn(Outs, RetCC_MBlaze); - // If this is the first return lowered for this function, add - // the regs to the liveout set for the function. - if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { - for (unsigned i = 0; i != RVLocs.size(); ++i) - if (RVLocs[i].isRegLoc()) - DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); - } - SDValue Flag; + SmallVector RetOps(1, Chain); + + // If this function is using the interrupt_handler calling convention + // then use "rtid r14, 0" otherwise use "rtsd r15, 8" + unsigned Ret = (CallConv == CallingConv::MBLAZE_INTR) ? MBlazeISD::IRet + : MBlazeISD::Ret; + unsigned Reg = (CallConv == CallingConv::MBLAZE_INTR) ? MBlaze::R14 + : MBlaze::R15; + RetOps.push_back(DAG.getRegister(Reg, MVT::i32)); + // Copy the result values into the output registers. for (unsigned i = 0; i != RVLocs.size(); ++i) { @@ -1049,20 +1051,16 @@ LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, // guarantee that all emitted copies are // stuck together, avoiding something bad Flag = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); } - // If this function is using the interrupt_handler calling convention - // then use "rtid r14, 0" otherwise use "rtsd r15, 8" - unsigned Ret = (CallConv == CallingConv::MBLAZE_INTR) ? MBlazeISD::IRet - : MBlazeISD::Ret; - unsigned Reg = (CallConv == CallingConv::MBLAZE_INTR) ? MBlaze::R14 - : MBlaze::R15; - SDValue DReg = DAG.getRegister(Reg, MVT::i32); + RetOps[0] = Chain; // Update chain. + // Add the flag if we have it. if (Flag.getNode()) - return DAG.getNode(Ret, dl, MVT::Other, Chain, DReg, Flag); + RetOps.push_back(Flag); - return DAG.getNode(Ret, dl, MVT::Other, Chain, DReg); + return DAG.getNode(Ret, dl, MVT::Other, &RetOps[0], RetOps.size()); } //===----------------------------------------------------------------------===// diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.td b/lib/Target/MBlaze/MBlazeInstrInfo.td index 139bf71..f86bc0b 100644 --- a/lib/Target/MBlaze/MBlazeInstrInfo.td +++ b/lib/Target/MBlaze/MBlazeInstrInfo.td @@ -28,9 +28,9 @@ def SDT_MBCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; //===----------------------------------------------------------------------===// def MBlazeRet : SDNode<"MBlazeISD::Ret", SDT_MBlazeRet, - [SDNPHasChain, SDNPOptInGlue]>; + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; def MBlazeIRet : SDNode<"MBlazeISD::IRet", SDT_MBlazeIRet, - [SDNPHasChain, SDNPOptInGlue]>; + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; def MBlazeJmpLink : SDNode<"MBlazeISD::JmpLink",SDT_MBlazeJmpLink, [SDNPHasChain,SDNPOptInGlue,SDNPOutGlue, -- cgit v1.1 From 6ab5061a2c01b68c6e47c55d4cfe29b3e3b29963 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Tue, 5 Feb 2013 18:12:00 +0000 Subject: Move MRI liveouts to PowerPC return instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174409 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCISelLowering.cpp | 30 +++++++++--------------------- 1 file changed, 9 insertions(+), 21 deletions(-) (limited to 'lib') diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 5631c93..af89bc9 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -3341,17 +3341,6 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl, // Emit tail call. if (isTailCall) { - // If this is the first return lowered for this function, add the regs - // to the liveout set for the function. - if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { - SmallVector RVLocs; - CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), RVLocs, *DAG.getContext()); - CCInfo.AnalyzeCallResult(Ins, RetCC_PPC); - for (unsigned i = 0; i != RVLocs.size(); ++i) - DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); - } - assert(((Callee.getOpcode() == ISD::Register && cast(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || @@ -4417,14 +4406,8 @@ PPCTargetLowering::LowerReturn(SDValue Chain, getTargetMachine(), RVLocs, *DAG.getContext()); CCInfo.AnalyzeReturn(Outs, RetCC_PPC); - // If this is the first return lowered for this function, add the regs to the - // liveout set for the function. - if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { - for (unsigned i = 0; i != RVLocs.size(); ++i) - DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); - } - SDValue Flag; + SmallVector RetOps(1, Chain); // Copy the result values into the output registers. for (unsigned i = 0; i != RVLocs.size(); ++i) { @@ -4449,12 +4432,17 @@ PPCTargetLowering::LowerReturn(SDValue Chain, Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); Flag = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); } + RetOps[0] = Chain; // Update chain. + + // Add the flag if we have it. if (Flag.getNode()) - return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, Chain, Flag); - else - return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, Chain); + RetOps.push_back(Flag); + + return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, + &RetOps[0], RetOps.size()); } SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG, -- cgit v1.1 From d07359667118ab1e889c3b9163b5e6a12414c38b Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Tue, 5 Feb 2013 18:12:03 +0000 Subject: Move MRI liveouts to Mips return instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174410 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsISelLowering.cpp | 25 ++++++++++--------------- lib/Target/Mips/MipsInstrInfo.td | 3 ++- 2 files changed, 12 insertions(+), 16 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index cf8ed35..ff0064e 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -3454,15 +3454,8 @@ MipsTargetLowering::LowerReturn(SDValue Chain, // Analize return values. CCInfo.AnalyzeReturn(Outs, RetCC_Mips); - // If this is the first return lowered for this function, add - // the regs to the liveout set for the function. - if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { - for (unsigned i = 0; i != RVLocs.size(); ++i) - if (RVLocs[i].isRegLoc()) - DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); - } - SDValue Flag; + SmallVector RetOps(1, Chain); // Copy the result values into the output registers. for (unsigned i = 0; i != RVLocs.size(); ++i) { @@ -3471,9 +3464,9 @@ MipsTargetLowering::LowerReturn(SDValue Chain, Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), OutVals[i], Flag); - // guarantee that all emitted copies are - // stuck together, avoiding something bad + // Guarantee that all emitted copies are stuck together with flags. Flag = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); } // The mips ABIs for returning structs by value requires that we copy @@ -3492,15 +3485,17 @@ MipsTargetLowering::LowerReturn(SDValue Chain, Chain = DAG.getCopyToReg(Chain, dl, V0, Val, Flag); Flag = Chain.getValue(1); - MF.getRegInfo().addLiveOut(V0); + RetOps.push_back(DAG.getRegister(V0, getPointerTy())); } - // Return on Mips is always a "jr $ra" + RetOps[0] = Chain; // Update chain. + + // Add the flag if we have it. if (Flag.getNode()) - return DAG.getNode(MipsISD::Ret, dl, MVT::Other, Chain, Flag); + RetOps.push_back(Flag); - // Return Void - return DAG.getNode(MipsISD::Ret, dl, MVT::Other, Chain); + // Return on Mips is always a "jr $ra" + return DAG.getNode(MipsISD::Ret, dl, MVT::Other, &RetOps[0], RetOps.size()); } //===----------------------------------------------------------------------===// diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index b563b8f..f9e3af5 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -72,7 +72,8 @@ def MipsTprelLo : SDNode<"MipsISD::TprelLo", SDTIntUnaryOp>; def MipsThreadPointer: SDNode<"MipsISD::ThreadPointer", SDT_MipsThreadPointer>; // Return -def MipsRet : SDNode<"MipsISD::Ret", SDTNone, [SDNPHasChain, SDNPOptInGlue]>; +def MipsRet : SDNode<"MipsISD::Ret", SDTNone, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; // These are target-independent nodes, but have target-specific formats. def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_MipsCallSeqStart, -- cgit v1.1 From 294014e1585ef5e0c1bf17a9cf79039ce662b64f Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Tue, 5 Feb 2013 18:12:06 +0000 Subject: Move MRI liveouts to MSP430 return instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174411 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/MSP430/MSP430ISelLowering.cpp | 18 +++++++----------- lib/Target/MSP430/MSP430InstrInfo.td | 4 ++-- 2 files changed, 9 insertions(+), 13 deletions(-) (limited to 'lib') diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp index 5a156c1..09cdf32 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -423,15 +423,8 @@ MSP430TargetLowering::LowerReturn(SDValue Chain, // Analize return values. CCInfo.AnalyzeReturn(Outs, RetCC_MSP430); - // If this is the first return lowered for this function, add the regs to the - // liveout set for the function. - if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { - for (unsigned i = 0; i != RVLocs.size(); ++i) - if (RVLocs[i].isRegLoc()) - DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); - } - SDValue Flag; + SmallVector RetOps(1, Chain); // Copy the result values into the output registers. for (unsigned i = 0; i != RVLocs.size(); ++i) { @@ -444,16 +437,19 @@ MSP430TargetLowering::LowerReturn(SDValue Chain, // Guarantee that all emitted copies are stuck together, // avoiding something bad. Flag = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); } unsigned Opc = (CallConv == CallingConv::MSP430_INTR ? MSP430ISD::RETI_FLAG : MSP430ISD::RET_FLAG); + RetOps[0] = Chain; // Update chain. + + // Add the flag if we have it. if (Flag.getNode()) - return DAG.getNode(Opc, dl, MVT::Other, Chain, Flag); + RetOps.push_back(Flag); - // Return Void - return DAG.getNode(Opc, dl, MVT::Other, Chain); + return DAG.getNode(Opc, dl, MVT::Other, &RetOps[0], RetOps.size()); } /// LowerCCCCallTo - functions arguments are copied from virtual regs to diff --git a/lib/Target/MSP430/MSP430InstrInfo.td b/lib/Target/MSP430/MSP430InstrInfo.td index f003574..e45780d 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.td +++ b/lib/Target/MSP430/MSP430InstrInfo.td @@ -40,9 +40,9 @@ def SDT_MSP430Shift : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, // MSP430 Specific Node Definitions. //===----------------------------------------------------------------------===// def MSP430retflag : SDNode<"MSP430ISD::RET_FLAG", SDTNone, - [SDNPHasChain, SDNPOptInGlue]>; + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; def MSP430retiflag : SDNode<"MSP430ISD::RETI_FLAG", SDTNone, - [SDNPHasChain, SDNPOptInGlue]>; + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; def MSP430rra : SDNode<"MSP430ISD::RRA", SDTIntUnaryOp, []>; def MSP430rla : SDNode<"MSP430ISD::RLA", SDTIntUnaryOp, []>; -- cgit v1.1 From 691c365aad62779c4fec9f06dbd57be31e9446f8 Mon Sep 17 00:00:00 2001 From: Jyotsna Verma Date: Tue, 5 Feb 2013 18:15:34 +0000 Subject: Hexagon: Use multiclass for absolute addressing mode stores. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174412 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Hexagon/HexagonInstrInfoV4.td | 172 +++++++++++++------------------ 1 file changed, 70 insertions(+), 102 deletions(-) (limited to 'lib') diff --git a/lib/Target/Hexagon/HexagonInstrInfoV4.td b/lib/Target/Hexagon/HexagonInstrInfoV4.td index f7b6a9d..53f1189 100644 --- a/lib/Target/Hexagon/HexagonInstrInfoV4.td +++ b/lib/Target/Hexagon/HexagonInstrInfoV4.td @@ -4040,143 +4040,111 @@ let isReturn = 1, isTerminator = 1, Requires<[HasV4T]>; } - // Load/Store with absolute addressing mode // memw(#u6)=Rt -multiclass ST_abs { - let isPredicable = 1 in - def _abs_V4 : STInst2<(outs), - (ins globaladdress:$absaddr, IntRegs:$src), - !strconcat(OpcStr, "(##$absaddr) = $src"), - []>, - Requires<[HasV4T]>; - - let isPredicated = 1 in - def _abs_cPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2), - !strconcat("if ($src1)", - !strconcat(OpcStr, "(##$absaddr) = $src2")), - []>, - Requires<[HasV4T]>; - - let isPredicated = 1 in - def _abs_cNotPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2), - !strconcat("if (!$src1)", - !strconcat(OpcStr, "(##$absaddr) = $src2")), +multiclass ST_Abs_Predbase { + let PNewValue = !if(isPredNew, "new", "") in + def NAME#_V4 : STInst2<(outs), + (ins PredRegs:$src1, globaladdressExt:$absaddr, RC: $src2), + !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", + ") ")#mnemonic#"(##$absaddr) = $src2", []>, Requires<[HasV4T]>; +} - let isPredicated = 1 in - def _abs_cdnPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2), - !strconcat("if ($src1.new)", - !strconcat(OpcStr, "(##$absaddr) = $src2")), - []>, - Requires<[HasV4T]>; +multiclass ST_Abs_Pred { + let PredSense = !if(PredNot, "false", "true") in { + defm _c#NAME : ST_Abs_Predbase; + // Predicate new + defm _cdn#NAME : ST_Abs_Predbase; + } +} - let isPredicated = 1 in - def _abs_cdnNotPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2), - !strconcat("if (!$src1.new)", - !strconcat(OpcStr, "(##$absaddr) = $src2")), +let isNVStorable = 1, isExtended = 1, neverHasSideEffects = 1 in +multiclass ST_Abs { + let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in { + let opExtendable = 0, isPredicable = 1 in + def NAME#_V4 : STInst2<(outs), + (ins globaladdressExt:$absaddr, RC:$src), + mnemonic#"(##$absaddr) = $src", []>, Requires<[HasV4T]>; - def _abs_nv_V4 : STInst2<(outs), - (ins globaladdress:$absaddr, IntRegs:$src), - !strconcat(OpcStr, "(##$absaddr) = $src.new"), - []>, - Requires<[HasV4T]>; + let opExtendable = 1, isPredicated = 1 in { + defm Pt : ST_Abs_Pred; + defm NotPt : ST_Abs_Pred; + } + } +} - let isPredicated = 1 in - def _abs_cPt_nv_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2), - !strconcat("if ($src1)", - !strconcat(OpcStr, "(##$absaddr) = $src2.new")), +multiclass ST_Abs_Predbase_nv { + let PNewValue = !if(isPredNew, "new", "") in + def NAME#_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, globaladdressExt:$absaddr, RC: $src2), + !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", + ") ")#mnemonic#"(##$absaddr) = $src2.new", []>, Requires<[HasV4T]>; +} - let isPredicated = 1 in - def _abs_cNotPt_nv_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2), - !strconcat("if (!$src1)", - !strconcat(OpcStr, "(##$absaddr) = $src2.new")), - []>, - Requires<[HasV4T]>; +multiclass ST_Abs_Pred_nv { + let PredSense = !if(PredNot, "false", "true") in { + defm _c#NAME : ST_Abs_Predbase_nv; + // Predicate new + defm _cdn#NAME : ST_Abs_Predbase_nv; + } +} - let isPredicated = 1 in - def _abs_cdnPt_nv_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2), - !strconcat("if ($src1.new)", - !strconcat(OpcStr, "(##$absaddr) = $src2.new")), +let mayStore = 1, isNVStore = 1, isExtended = 1, neverHasSideEffects = 1 in +multiclass ST_Abs_nv { + let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in { + let opExtendable = 0, isPredicable = 1 in + def NAME#_nv_V4 : NVInst_V4<(outs), + (ins globaladdressExt:$absaddr, RC:$src), + mnemonic#"(##$absaddr) = $src.new", []>, Requires<[HasV4T]>; - let isPredicated = 1 in - def _abs_cdnNotPt_nv_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2), - !strconcat("if (!$src1.new)", - !strconcat(OpcStr, "(##$absaddr) = $src2.new")), - []>, - Requires<[HasV4T]>; + let opExtendable = 1, isPredicated = 1 in { + defm Pt : ST_Abs_Pred_nv; + defm NotPt : ST_Abs_Pred_nv; + } + } } -let AddedComplexity = 30, isPredicable = 1 in -def STrid_abs_V4 : STInst<(outs), - (ins globaladdress:$absaddr, DoubleRegs:$src), - "memd(##$absaddr) = $src", - [(store (i64 DoubleRegs:$src), - (HexagonCONST32 tglobaladdr:$absaddr))]>, - Requires<[HasV4T]>; - -let AddedComplexity = 30, isPredicated = 1 in -def STrid_abs_cPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$absaddr, DoubleRegs:$src2), - "if ($src1) memd(##$absaddr) = $src2", - []>, - Requires<[HasV4T]>; - -let AddedComplexity = 30, isPredicated = 1 in -def STrid_abs_cNotPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$absaddr, DoubleRegs:$src2), - "if (!$src1) memd(##$absaddr) = $src2", - []>, - Requires<[HasV4T]>; +let addrMode = Absolute in { + defm STrib_abs : ST_Abs<"memb", "STrib", IntRegs>, + ST_Abs_nv<"memb", "STrib", IntRegs>, AddrModeRel; -let AddedComplexity = 30, isPredicated = 1 in -def STrid_abs_cdnPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$absaddr, DoubleRegs:$src2), - "if ($src1.new) memd(##$absaddr) = $src2", - []>, - Requires<[HasV4T]>; + defm STrih_abs : ST_Abs<"memh", "STrih", IntRegs>, + ST_Abs_nv<"memh", "STrih", IntRegs>, AddrModeRel; -let AddedComplexity = 30, isPredicated = 1 in -def STrid_abs_cdnNotPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$absaddr, DoubleRegs:$src2), - "if (!$src1.new) memd(##$absaddr) = $src2", - []>, - Requires<[HasV4T]>; + defm STriw_abs : ST_Abs<"memw", "STriw", IntRegs>, + ST_Abs_nv<"memw", "STriw", IntRegs>, AddrModeRel; -defm STrib : ST_abs<"memb">; -defm STrih : ST_abs<"memh">; -defm STriw : ST_abs<"memw">; + let isNVStorable = 0 in + defm STrid_abs : ST_Abs<"memd", "STrid", DoubleRegs>, AddrModeRel; +} -let Predicates = [HasV4T], AddedComplexity = 30 in +let Predicates = [HasV4T], AddedComplexity = 30 in { def : Pat<(truncstorei8 (i32 IntRegs:$src1), (HexagonCONST32 tglobaladdr:$absaddr)), (STrib_abs_V4 tglobaladdr: $absaddr, IntRegs: $src1)>; -let Predicates = [HasV4T], AddedComplexity = 30 in def : Pat<(truncstorei16 (i32 IntRegs:$src1), (HexagonCONST32 tglobaladdr:$absaddr)), (STrih_abs_V4 tglobaladdr: $absaddr, IntRegs: $src1)>; -let Predicates = [HasV4T], AddedComplexity = 30 in def : Pat<(store (i32 IntRegs:$src1), (HexagonCONST32 tglobaladdr:$absaddr)), (STriw_abs_V4 tglobaladdr: $absaddr, IntRegs: $src1)>; +def : Pat<(store (i64 DoubleRegs:$src1), + (HexagonCONST32 tglobaladdr:$absaddr)), + (STrid_abs_V4 tglobaladdr: $absaddr, DoubleRegs: $src1)>; +} multiclass LD_abs { let isPredicable = 1 in -- cgit v1.1 From 067e5a2a1a3ed35bce7dc9e9b2eedee501db53c9 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Tue, 5 Feb 2013 18:16:58 +0000 Subject: Move MRI liveouts to Sparc return instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174413 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Sparc/SparcISelLowering.cpp | 27 ++++++++++++--------------- lib/Target/Sparc/SparcInstrInfo.td | 2 +- 2 files changed, 13 insertions(+), 16 deletions(-) (limited to 'lib') diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp index 7664abd..138b92d 100644 --- a/lib/Target/Sparc/SparcISelLowering.cpp +++ b/lib/Target/Sparc/SparcISelLowering.cpp @@ -95,15 +95,10 @@ SparcTargetLowering::LowerReturn(SDValue Chain, // Analize return values. CCInfo.AnalyzeReturn(Outs, RetCC_Sparc32); - // If this is the first return lowered for this function, add the regs to the - // liveout set for the function. - if (MF.getRegInfo().liveout_empty()) { - for (unsigned i = 0; i != RVLocs.size(); ++i) - if (RVLocs[i].isRegLoc()) - MF.getRegInfo().addLiveOut(RVLocs[i].getLocReg()); - } - SDValue Flag; + SmallVector RetOps(1, Chain); + // Make room for the return address offset. + RetOps.push_back(SDValue()); // Copy the result values into the output registers. for (unsigned i = 0; i != RVLocs.size(); ++i) { @@ -115,6 +110,7 @@ SparcTargetLowering::LowerReturn(SDValue Chain, // Guarantee that all emitted copies are stuck together with flags. Flag = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); } unsigned RetAddrOffset = 8; //Call Inst + Delay Slot @@ -127,18 +123,19 @@ SparcTargetLowering::LowerReturn(SDValue Chain, SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy()); Chain = DAG.getCopyToReg(Chain, dl, SP::I0, Val, Flag); Flag = Chain.getValue(1); - if (MF.getRegInfo().liveout_empty()) - MF.getRegInfo().addLiveOut(SP::I0); + RetOps.push_back(DAG.getRegister(SP::I0, getPointerTy())); RetAddrOffset = 12; // CallInst + Delay Slot + Unimp } - SDValue RetAddrOffsetNode = DAG.getConstant(RetAddrOffset, MVT::i32); + RetOps[0] = Chain; // Update chain. + RetOps[1] = DAG.getConstant(RetAddrOffset, MVT::i32); + // Add the flag if we have it. if (Flag.getNode()) - return DAG.getNode(SPISD::RET_FLAG, dl, MVT::Other, Chain, - RetAddrOffsetNode, Flag); - return DAG.getNode(SPISD::RET_FLAG, dl, MVT::Other, Chain, - RetAddrOffsetNode); + RetOps.push_back(Flag); + + return DAG.getNode(SPISD::RET_FLAG, dl, MVT::Other, + &RetOps[0], RetOps.size()); } /// LowerFormalArguments - V8 uses a very simple ABI, where all values are diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td index e64c140..90b698d 100644 --- a/lib/Target/Sparc/SparcInstrInfo.td +++ b/lib/Target/Sparc/SparcInstrInfo.td @@ -126,7 +126,7 @@ def call : SDNode<"SPISD::CALL", SDT_SPCall, def SDT_SPRet : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; def retflag : SDNode<"SPISD::RET_FLAG", SDT_SPRet, - [SDNPHasChain, SDNPOptInGlue]>; + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; def flushw : SDNode<"SPISD::FLUSHW", SDTNone, [SDNPHasChain, SDNPSideEffect, SDNPMayStore]>; -- cgit v1.1 From 0873bc84190df92b474cb92ec4a7e36fbcb610f9 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Tue, 5 Feb 2013 18:21:46 +0000 Subject: Move MRI liveouts to XCore return instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174414 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/XCore/XCoreISelLowering.cpp | 26 ++++++++++++-------------- lib/Target/XCore/XCoreInstrInfo.td | 4 ++-- 2 files changed, 14 insertions(+), 16 deletions(-) (limited to 'lib') diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index 59be84a..541dd2f 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -1249,15 +1249,11 @@ XCoreTargetLowering::LowerReturn(SDValue Chain, // Analyze return values. CCInfo.AnalyzeReturn(Outs, RetCC_XCore); - // If this is the first return lowered for this function, add - // the regs to the liveout set for the function. - if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { - for (unsigned i = 0; i != RVLocs.size(); ++i) - if (RVLocs[i].isRegLoc()) - DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); - } - SDValue Flag; + SmallVector RetOps(1, Chain); + + // Return on XCore is always a "retsp 0" + RetOps.push_back(DAG.getConstant(0, MVT::i32)); // Copy the result values into the output registers. for (unsigned i = 0; i != RVLocs.size(); ++i) { @@ -1270,15 +1266,17 @@ XCoreTargetLowering::LowerReturn(SDValue Chain, // guarantee that all emitted copies are // stuck together, avoiding something bad Flag = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); } - // Return on XCore is always a "retsp 0" + RetOps[0] = Chain; // Update chain. + + // Add the flag if we have it. if (Flag.getNode()) - return DAG.getNode(XCoreISD::RETSP, dl, MVT::Other, - Chain, DAG.getConstant(0, MVT::i32), Flag); - else // Return Void - return DAG.getNode(XCoreISD::RETSP, dl, MVT::Other, - Chain, DAG.getConstant(0, MVT::i32)); + RetOps.push_back(Flag); + + return DAG.getNode(XCoreISD::RETSP, dl, MVT::Other, + &RetOps[0], RetOps.size()); } //===----------------------------------------------------------------------===// diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td index befc096..c23f874 100644 --- a/lib/Target/XCore/XCoreInstrInfo.td +++ b/lib/Target/XCore/XCoreInstrInfo.td @@ -32,8 +32,8 @@ def XCoreBranchLink : SDNode<"XCoreISD::BL",SDT_XCoreBranchLink, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; -def XCoreRetsp : SDNode<"XCoreISD::RETSP", SDTBrind, - [SDNPHasChain, SDNPOptInGlue, SDNPMayLoad]>; +def XCoreRetsp : SDNode<"XCoreISD::RETSP", SDTBrind, + [SDNPHasChain, SDNPOptInGlue, SDNPMayLoad, SDNPVariadic]>; def SDT_XCoreBR_JT : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; -- cgit v1.1 From baa3c50a7bb0ddb0397b71b732c52b19cb700116 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Tue, 5 Feb 2013 18:21:49 +0000 Subject: Move MRI liveouts to AArch64 return instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174415 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64ISelLowering.cpp | 23 ++++++++++------------- lib/Target/AArch64/AArch64InstrInfo.td | 3 ++- 2 files changed, 12 insertions(+), 14 deletions(-) (limited to 'lib') diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 652f129..e2e472f 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1035,15 +1035,8 @@ AArch64TargetLowering::LowerReturn(SDValue Chain, // Analyze outgoing return values. CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv)); - // If this is the first return lowered for this function, add - // the regs to the liveout set for the function. - if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { - for (unsigned i = 0; i != RVLocs.size(); ++i) - if (RVLocs[i].isRegLoc()) - DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); - } - SDValue Flag; + SmallVector RetOps(1, Chain); for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { // PCS: "If the type, T, of the result of a function is such that @@ -1087,13 +1080,17 @@ AArch64TargetLowering::LowerReturn(SDValue Chain, Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); Flag = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); } - if (Flag.getNode()) { - return DAG.getNode(AArch64ISD::Ret, dl, MVT::Other, Chain, Flag); - } else { - return DAG.getNode(AArch64ISD::Ret, dl, MVT::Other, Chain); - } + RetOps[0] = Chain; // Update chain. + + // Add the flag if we have it. + if (Flag.getNode()) + RetOps.push_back(Flag); + + return DAG.getNode(AArch64ISD::Ret, dl, MVT::Other, + &RetOps[0], RetOps.size()); } SDValue diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index 673e051..03cae93 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -6,7 +6,8 @@ include "AArch64InstrFormats.td" def SDT_A64ret : SDTypeProfile<0, 0, []>; def A64ret : SDNode<"AArch64ISD::Ret", SDT_A64ret, [SDNPHasChain, - SDNPOptInGlue]>; + SDNPOptInGlue, + SDNPVariadic]>; // (ins NZCV, Condition, Dest) def SDT_A64br_cc : SDTypeProfile<0, 3, [SDTCisVT<0, i32>]>; -- cgit v1.1 From b45e4deb102d47602f5b941da7f412ecc9a867e9 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Tue, 5 Feb 2013 18:21:52 +0000 Subject: Remove special-casing of return blocks for liveness. Now that return value registers are return instruction uses, there is no need for special treatment of return blocks. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174416 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AggressiveAntiDepBreaker.cpp | 18 +--------- lib/CodeGen/CriticalAntiDepBreaker.cpp | 18 +--------- lib/CodeGen/DeadMachineInstructionElim.cpp | 9 ----- lib/CodeGen/LiveVariables.cpp | 23 ------------ lib/CodeGen/PostRASchedulerList.cpp | 24 +++---------- lib/CodeGen/RegAllocFast.cpp | 57 ------------------------------ 6 files changed, 7 insertions(+), 142 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp index 152d9fa..c50f8b5 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -151,23 +151,7 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { std::vector &KillIndices = State->GetKillIndices(); std::vector &DefIndices = State->GetDefIndices(); - // Determine the live-out physregs for this block. - if (IsReturnBlock) { - // In a return block, examine the function live-out regs. - for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(), - E = MRI.liveout_end(); I != E; ++I) { - for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) { - unsigned Reg = *AI; - State->UnionGroups(Reg, 0); - KillIndices[Reg] = BB->size(); - DefIndices[Reg] = ~0u; - } - } - } - - // In a non-return block, examine the live-in regs of all successors. - // Note a return block can have successors if the return instruction is - // predicated. + // Examine the live-in regs of all successors. for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), SE = BB->succ_end(); SI != SE; ++SI) for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp index ee31dde..0eb74a4 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -57,23 +57,7 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { bool IsReturnBlock = (BBSize != 0 && BB->back().isReturn()); - // Determine the live-out physregs for this block. - if (IsReturnBlock) { - // In a return block, examine the function live-out regs. - for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(), - E = MRI.liveout_end(); I != E; ++I) { - for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) { - unsigned Reg = *AI; - Classes[Reg] = reinterpret_cast(-1); - KillIndices[Reg] = BBSize; - DefIndices[Reg] = ~0u; - } - } - } - - // In a non-return block, examine the live-in regs of all successors. - // Note a return block can have successors if the return instruction is - // predicated. + // Examine the live-in regs of all successors. for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), SE = BB->succ_end(); SI != SE; ++SI) for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp index a526d24..a54217f 100644 --- a/lib/CodeGen/DeadMachineInstructionElim.cpp +++ b/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -99,15 +99,6 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { // Start out assuming that reserved registers are live out of this block. LivePhysRegs = MRI->getReservedRegs(); - // Also add any explicit live-out physregs for this block. - if (!MBB->empty() && MBB->back().isReturn()) - for (MachineRegisterInfo::liveout_iterator LOI = MRI->liveout_begin(), - LOE = MRI->liveout_end(); LOI != LOE; ++LOI) { - unsigned Reg = *LOI; - if (TargetRegisterInfo::isPhysicalRegister(Reg)) - LivePhysRegs.set(Reg); - } - // Add live-ins from sucessors to LivePhysRegs. Normally, physregs are not // live across blocks, but some targets (x86) can have flags live out of a // block. diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp index f81ad1c..789eddc 100644 --- a/lib/CodeGen/LiveVariables.cpp +++ b/lib/CodeGen/LiveVariables.cpp @@ -619,29 +619,6 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { MBB); } - // Finally, if the last instruction in the block is a return, make sure to - // mark it as using all of the live-out values in the function. - // Things marked both call and return are tail calls; do not do this for - // them. The tail callee need not take the same registers as input - // that it produces as output, and there are dependencies for its input - // registers elsewhere. - if (!MBB->empty() && MBB->back().isReturn() - && !MBB->back().isCall()) { - MachineInstr *Ret = &MBB->back(); - - for (MachineRegisterInfo::liveout_iterator - I = MF->getRegInfo().liveout_begin(), - E = MF->getRegInfo().liveout_end(); I != E; ++I) { - assert(TargetRegisterInfo::isPhysicalRegister(*I) && - "Cannot have a live-out virtual register!"); - HandlePhysRegUse(*I, Ret); - - // Add live-out registers as implicit uses. - if (!Ret->readsRegister(*I)) - Ret->addOperand(MachineOperand::CreateReg(*I, false, true)); - } - } - // MachineCSE may CSE instructions which write to non-allocatable physical // registers across MBBs. Remember if any reserved register is liveout. SmallSet LiveOuts; diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp index 488f1d4..53fe273 100644 --- a/lib/CodeGen/PostRASchedulerList.cpp +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -418,11 +418,11 @@ void SchedulePostRATDList::StartBlockForKills(MachineBasicBlock *BB) { // Start with no live registers. LiveRegs.reset(); - // Determine the live-out physregs for this block. - if (!BB->empty() && BB->back().isReturn()) { - // In a return block, examine the function live-out regs. - for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(), - E = MRI.liveout_end(); I != E; ++I) { + // Examine the live-in regs of all successors. + for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + SE = BB->succ_end(); SI != SE; ++SI) { + for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), + E = (*SI)->livein_end(); I != E; ++I) { unsigned Reg = *I; LiveRegs.set(Reg); // Repeat, for all subregs. @@ -430,20 +430,6 @@ void SchedulePostRATDList::StartBlockForKills(MachineBasicBlock *BB) { LiveRegs.set(*SubRegs); } } - else { - // In a non-return block, examine the live-in regs of all successors. - for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), - SE = BB->succ_end(); SI != SE; ++SI) { - for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), - E = (*SI)->livein_end(); I != E; ++I) { - unsigned Reg = *I; - LiveRegs.set(Reg); - // Repeat, for all subregs. - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) - LiveRegs.set(*SubRegs); - } - } - } } bool SchedulePostRATDList::ToggleKillFlag(MachineInstr *MI, diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp index 8d849dc..840ddb1 100644 --- a/lib/CodeGen/RegAllocFast.cpp +++ b/lib/CodeGen/RegAllocFast.cpp @@ -177,7 +177,6 @@ namespace { unsigned VirtReg, unsigned Hint); void spillAll(MachineBasicBlock::iterator MI); bool setPhysReg(MachineInstr *MI, unsigned OpNum, unsigned PhysReg); - void addRetOperands(MachineBasicBlock *MBB); }; char RAFast::ID = 0; } @@ -774,59 +773,6 @@ void RAFast::handleThroughOperands(MachineInstr *MI, UsedInInstr.insert(PartialDefs[i]); } -/// addRetOperand - ensure that a return instruction has an operand for each -/// value live out of the function. -/// -/// Things marked both call and return are tail calls; do not do this for them. -/// The tail callee need not take the same registers as input that it produces -/// as output, and there are dependencies for its input registers elsewhere. -/// -/// FIXME: This should be done as part of instruction selection, and this helper -/// should be deleted. Until then, we use custom logic here to create the proper -/// operand under all circumstances. We can't use addRegisterKilled because that -/// doesn't make sense for undefined values. We can't simply avoid calling it -/// for undefined values, because we must ensure that the operand always exists. -void RAFast::addRetOperands(MachineBasicBlock *MBB) { - if (MBB->empty() || !MBB->back().isReturn() || MBB->back().isCall()) - return; - - MachineInstr *MI = &MBB->back(); - - for (MachineRegisterInfo::liveout_iterator - I = MBB->getParent()->getRegInfo().liveout_begin(), - E = MBB->getParent()->getRegInfo().liveout_end(); I != E; ++I) { - unsigned Reg = *I; - assert(TargetRegisterInfo::isPhysicalRegister(Reg) && - "Cannot have a live-out virtual register."); - - bool hasDef = PhysRegState[Reg] == regReserved; - - // Check if this register already has an operand. - bool Found = false; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || !MO.isUse()) - continue; - - unsigned OperReg = MO.getReg(); - if (!TargetRegisterInfo::isPhysicalRegister(OperReg)) - continue; - - if (OperReg == Reg || TRI->isSuperRegister(OperReg, Reg)) { - // If the ret already has an operand for this physreg or a superset, - // don't duplicate it. Set the kill flag if the value is defined. - if (hasDef && !MO.isKill()) - MO.setIsKill(); - Found = true; - break; - } - } - if (!Found) - MachineInstrBuilder(*MF, MI) - .addReg(Reg, llvm::RegState::Implicit | getKillRegState(hasDef)); - } -} - void RAFast::AllocateBasicBlock() { DEBUG(dbgs() << "\nAllocating " << *MBB); @@ -1109,9 +1055,6 @@ void RAFast::AllocateBasicBlock() { MBB->erase(Coalesced[i]); NumCopies += Coalesced.size(); - // addRetOperands must run after we've seen all defs in this block. - addRetOperands(MBB); - DEBUG(MBB->dump()); } -- cgit v1.1 From e6dc59891fc53d65b3f6d19772d26e23e0cc1cac Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Tue, 5 Feb 2013 18:21:56 +0000 Subject: Remove liveout lists from MachineRegisterInfo. All targets are now adding return value registers as implicit uses on return instructions, and there is no longer a need for the live out lists. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174417 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineFunction.cpp | 7 ------- lib/CodeGen/MachineInstr.cpp | 4 ++-- lib/CodeGen/MachineRegisterInfo.cpp | 7 ------- 3 files changed, 2 insertions(+), 16 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index 4a9a62a..1898222 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -346,13 +346,6 @@ void MachineFunction::print(raw_ostream &OS, SlotIndexes *Indexes) const { } OS << '\n'; } - if (RegInfo && !RegInfo->liveout_empty()) { - OS << "Function Live Outs:"; - for (MachineRegisterInfo::liveout_iterator - I = RegInfo->liveout_begin(), E = RegInfo->liveout_end(); I != E; ++I) - OS << ' ' << PrintReg(*I, TRI); - OS << '\n'; - } for (const_iterator BB = begin(), E = end(); BB != E; ++BB) { OS << '\n'; diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index d8b5fd4..32d0668 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -1515,12 +1515,12 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM, unsigned Reg = MO.getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) { const MachineRegisterInfo &MRI = MF->getRegInfo(); - if (MRI.use_empty(Reg) && !MRI.isLiveOut(Reg)) { + if (MRI.use_empty(Reg)) { bool HasAliasLive = false; for (MCRegAliasIterator AI(Reg, TM->getRegisterInfo(), true); AI.isValid(); ++AI) { unsigned AliasReg = *AI; - if (!MRI.use_empty(AliasReg) || MRI.isLiveOut(AliasReg)) { + if (!MRI.use_empty(AliasReg)) { HasAliasLive = true; break; } diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp index 21877e5..a777f52 100644 --- a/lib/CodeGen/MachineRegisterInfo.cpp +++ b/lib/CodeGen/MachineRegisterInfo.cpp @@ -283,13 +283,6 @@ bool MachineRegisterInfo::isLiveIn(unsigned Reg) const { return false; } -bool MachineRegisterInfo::isLiveOut(unsigned Reg) const { - for (liveout_iterator I = liveout_begin(), E = liveout_end(); I != E; ++I) - if (*I == Reg) - return true; - return false; -} - /// getLiveInPhysReg - If VReg is a live-in virtual register, return the /// corresponding live-in physical register. unsigned MachineRegisterInfo::getLiveInPhysReg(unsigned VReg) const { -- cgit v1.1 From 1e45487dfde9de304f28e1fb49b3c15a036b9c00 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Tue, 5 Feb 2013 18:23:10 +0000 Subject: [SjLj Prepare] When demoting an invoke instructions to the stack, if the normal edge is critical, then split it so we can insert the store. rdar://13126179 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174418 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Utils/DemoteRegToStack.cpp | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Utils/DemoteRegToStack.cpp b/lib/Transforms/Utils/DemoteRegToStack.cpp index d5c41f5..db525cd 100644 --- a/lib/Transforms/Utils/DemoteRegToStack.cpp +++ b/lib/Transforms/Utils/DemoteRegToStack.cpp @@ -7,6 +7,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/ADT/DenseMap.h" #include "llvm/IR/Function.h" @@ -78,12 +79,21 @@ AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads, InsertPt = &I; ++InsertPt; } else { - // We cannot demote invoke instructions to the stack if their normal edge - // is critical. InvokeInst &II = cast(I); - assert(II.getNormalDest()->getSinglePredecessor() && - "Cannot demote invoke with a critical successor!"); - InsertPt = II.getNormalDest()->begin(); + if (II.getNormalDest()->getSinglePredecessor()) + InsertPt = II.getNormalDest()->getFirstInsertionPt(); + else { + // We cannot demote invoke instructions to the stack if their normal edge + // is critical. Therefore, split the critical edge and insert the store + // in the newly created basic block. + unsigned SuccNum = GetSuccessorNumber(I.getParent(), II.getNormalDest()); + TerminatorInst *TI = &cast(I); + assert (isCriticalEdge(TI, SuccNum) && + "Expected a critical edge!"); + BasicBlock *BB = SplitCriticalEdge(TI, SuccNum); + assert (BB && "Unable to split critical edge."); + InsertPt = BB->getFirstInsertionPt(); + } } for (; isa(InsertPt) || isa(InsertPt); ++InsertPt) -- cgit v1.1 From 7d80dad40741ecc2e45943f685bf539eeed2ba51 Mon Sep 17 00:00:00 2001 From: Arnold Schwaighofer Date: Tue, 5 Feb 2013 18:46:41 +0000 Subject: Loop Vectorizer: Refactor code to compute vectorized memory instruction cost Introduce a helper class that computes the cost of memory access instructions. No functionality change intended. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174422 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/LoopVectorize.cpp | 257 ++++++++++++++++++++--------- 1 file changed, 178 insertions(+), 79 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 6254273..7d696a7 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -560,6 +560,11 @@ public: /// \return information about the register usage of the loop. RegisterUsage calculateRegisterUsage(); + /// A helper function for converting Scalar types to vector types. + /// If the incoming type is void, we return void. If the VF is 1, we return + /// the scalar type. + static Type* ToVectorTy(Type *Scalar, unsigned VF); + private: /// Returns the expected execution cost. The unit of the cost does /// not matter because we use the 'cost' units to compare different @@ -571,11 +576,6 @@ private: /// width. Vector width of one means scalar. unsigned getInstructionCost(Instruction *I, unsigned VF); - /// A helper function for converting Scalar types to vector types. - /// If the incoming type is void, we return void. If the VF is 1, we return - /// the scalar type. - static Type* ToVectorTy(Type *Scalar, unsigned VF); - /// Returns whether the instruction is a load or store and will be a emitted /// as a vector operation. bool isConsecutiveLoadOrStore(Instruction *I); @@ -594,6 +594,177 @@ private: DataLayout *DL; }; +/// A helper class to compute the cost of a memory operation (load or store). +class MemoryCostComputation { +public: + /// \brief This function computes the cost of a memory instruction, either of + /// a load or of a store. + /// \param Inst a pointer to a LoadInst or a StoreInst. + /// \param VF the vector factor to use. + /// \param TTI the target transform information used to obtain costs. + /// \param Legality the legality class used by this function to obtain the + /// access strid of the memory operation. + /// \returns the estimated cost of the memory instruction. + static unsigned computeCost(Value *Inst, unsigned VF, + const TargetTransformInfo &TTI, + LoopVectorizationLegality *Legality) { + if (StoreInst *Store = dyn_cast(Inst)) + return StoreCost(Store, VF, TTI, Legality).cost(); + + return LoadCost(cast(Inst), VF, TTI, Legality).cost(); + } + +private: + /// An helper class to compute the cost of vectorize memory instruction. It is + /// subclassed by load and store cost computation classes who fill the fields + /// with values that require knowing about the concrete Load/StoreInst class. + class MemoryOpCost { + public: + /// \return the cost of vectorizing the memory access instruction. + unsigned cost() { + if (VectorFactor == 1) + return TTI.getMemoryOpCost(Opcode, VectorTy, Alignment, AddressSpace); + + if ((Stride = Legality->isConsecutivePtr(PointerOperand))) + return costOfWideMemInst(); + + return costOfScalarizedMemInst(); + } + + protected: + /// The pointer operand of the memory instruction. + Value *PointerOperand; + /// The scalar type of the memory access. + Type *ScalarTy; + /// The vector type of the memory access. + Type *VectorTy; + /// The vector factor by which we vectorize. + unsigned VectorFactor; + /// The stride of the memory access. + int Stride; + /// The alignment of the memory operation. + unsigned Alignment; + /// The address space of the memory operation. + unsigned AddressSpace; + /// The opcode of the memory instruction. + unsigned Opcode; + /// Are we looking at a load or store instruction. + bool IsLoadInst; + const TargetTransformInfo &TTI; + LoopVectorizationLegality *Legality; + + /// Constructs a helper class to compute the cost of a memory instruction. + /// \param VF the vector factor (the length of the vector). + /// \param TI the target transform information used by this class to obtain + /// costs. + /// \param L the legality class used by this class to obtain the access + /// stride of the memory operation. + MemoryOpCost(unsigned VF, const TargetTransformInfo &TI, + LoopVectorizationLegality *L) : + VectorFactor(VF), TTI(TI), Legality(L) { + } + + private: + /// \return the cost if the memory instruction is scalarized. + unsigned costOfScalarizedMemInst() { + unsigned Cost = 0; + Cost += costOfExtractFromPointerVector(); + Cost += costOfExtractFromValueVector(); + Cost += VectorFactor * TTI.getMemoryOpCost(Opcode, ScalarTy, Alignment, + AddressSpace); + Cost += costOfInsertIntoValueVector(); + return Cost; + } + + /// \return the cost of extracting the pointers out of the pointer vector. + unsigned costOfExtractFromPointerVector() { + Type *PtrTy = getVectorizedPointerOperandType(); + return costOfVectorInstForAllElems(Instruction::ExtractElement, PtrTy); + } + + /// \return the cost for extracting values out of the value vector if the + /// memory instruction is a store and zero otherwise. + unsigned costOfExtractFromValueVector() { + if (IsLoadInst) + return 0; + + return costOfVectorInstForAllElems(Instruction::ExtractElement, VectorTy); + } + + /// \return the cost of insert values into the value vector if the memory + /// instruction was a load and zero otherwise. + unsigned costOfInsertIntoValueVector() { + if (!IsLoadInst) + return 0; + + return costOfVectorInstForAllElems(Instruction::InsertElement, VectorTy); + } + + /// \return the cost of a vector memory instruction. + unsigned costOfWideMemInst() { + unsigned Cost = TTI.getMemoryOpCost(Opcode, VectorTy, Alignment, + AddressSpace); + // Reverse stride. + if (Stride < 0) + Cost += TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, VectorTy, + 0); + return Cost; + } + + /// Helper function to compute the cost of one insert- or extractelement + /// instruction per vector element. + /// \param VecOpcode the vector instruction opcode (Can be either + /// InsertElement or an ExtractElement). + /// \param Ty the vector type the vector instruction operates on. + /// \return the cost of an vector instruction applied to each vector + /// element. + unsigned costOfVectorInstForAllElems(unsigned VecOpcode, Type *Ty) { + unsigned Cost = 0; + for (unsigned i = 0; i < VectorFactor; ++i) + Cost += TTI.getVectorInstrCost(VecOpcode, Ty, i); + return Cost; + } + + /// \return a vectorized type for the pointer operand. + Type * getVectorizedPointerOperandType() { + Type *PointerOpTy = PointerOperand->getType(); + return LoopVectorizationCostModel::ToVectorTy(PointerOpTy, VectorFactor); + } + }; + + /// Implementation of the abstract memory cost base class. Sets field of base + /// class whose value depends on the LoadInst. + class LoadCost : public MemoryOpCost { + public: + LoadCost(LoadInst *Load, unsigned VF, const TargetTransformInfo &TI, + LoopVectorizationLegality *L) : MemoryOpCost(VF, TI, L) { + PointerOperand = Load->getPointerOperand(); + ScalarTy = Load->getType(); + VectorTy = LoopVectorizationCostModel::ToVectorTy(ScalarTy, VF); + Alignment = Load->getAlignment(); + AddressSpace = Load->getPointerAddressSpace(); + Opcode = Load->getOpcode(); + IsLoadInst = true; + } + }; + + /// Implementation of the abstract memory cost base class. Sets field of base + /// class whose value depends on the StoreInst. + class StoreCost : public MemoryOpCost { + public: + StoreCost(StoreInst *Store, unsigned VF, const TargetTransformInfo &TI, + LoopVectorizationLegality *L) : MemoryOpCost(VF, TI, L) { + PointerOperand = Store->getPointerOperand(); + ScalarTy = Store->getValueOperand()->getType(); + VectorTy = LoopVectorizationCostModel::ToVectorTy(ScalarTy, VF); + Alignment = Store->getAlignment(); + AddressSpace = Store->getPointerAddressSpace(); + Opcode = Store->getOpcode(); + IsLoadInst = false; + } + }; +}; + /// The LoopVectorize Pass. struct LoopVectorize : public LoopPass { /// Pass identification, replacement for typeid @@ -3097,83 +3268,11 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { VectorTy = ToVectorTy(ValTy, VF); return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy); } + case Instruction::Load: case Instruction::Store: { - StoreInst *SI = cast(I); - Type *ValTy = SI->getValueOperand()->getType(); - VectorTy = ToVectorTy(ValTy, VF); - - if (VF == 1) - return TTI.getMemoryOpCost(I->getOpcode(), VectorTy, - SI->getAlignment(), - SI->getPointerAddressSpace()); - - // Scalarized stores. - int Stride = Legal->isConsecutivePtr(SI->getPointerOperand()); - bool Reverse = Stride < 0; - if (0 == Stride) { - unsigned Cost = 0; - - // The cost of extracting from the value vector and pointer vector. - Type *PtrTy = ToVectorTy(I->getOperand(0)->getType(), VF); - for (unsigned i = 0; i < VF; ++i) { - Cost += TTI.getVectorInstrCost(Instruction::ExtractElement, VectorTy, - i); - Cost += TTI.getVectorInstrCost(Instruction::ExtractElement, PtrTy, i); - } - - // The cost of the scalar stores. - Cost += VF * TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(), - SI->getAlignment(), - SI->getPointerAddressSpace()); - return Cost; - } - - // Wide stores. - unsigned Cost = TTI.getMemoryOpCost(I->getOpcode(), VectorTy, - SI->getAlignment(), - SI->getPointerAddressSpace()); - if (Reverse) - Cost += TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, - VectorTy, 0); - return Cost; + return MemoryCostComputation::computeCost(I, VF, TTI, Legal); } - case Instruction::Load: { - LoadInst *LI = cast(I); - if (VF == 1) - return TTI.getMemoryOpCost(I->getOpcode(), VectorTy, LI->getAlignment(), - LI->getPointerAddressSpace()); - - // Scalarized loads. - int Stride = Legal->isConsecutivePtr(LI->getPointerOperand()); - bool Reverse = Stride < 0; - if (0 == Stride) { - unsigned Cost = 0; - Type *PtrTy = ToVectorTy(I->getOperand(0)->getType(), VF); - - // The cost of extracting from the pointer vector. - for (unsigned i = 0; i < VF; ++i) - Cost += TTI.getVectorInstrCost(Instruction::ExtractElement, PtrTy, i); - - // The cost of inserting data to the result vector. - for (unsigned i = 0; i < VF; ++i) - Cost += TTI.getVectorInstrCost(Instruction::InsertElement, VectorTy, i); - - // The cost of the scalar stores. - Cost += VF * TTI.getMemoryOpCost(I->getOpcode(), RetTy->getScalarType(), - LI->getAlignment(), - LI->getPointerAddressSpace()); - return Cost; - } - - // Wide loads. - unsigned Cost = TTI.getMemoryOpCost(I->getOpcode(), VectorTy, - LI->getAlignment(), - LI->getPointerAddressSpace()); - if (Reverse) - Cost += TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, VectorTy, 0); - return Cost; - } case Instruction::ZExt: case Instruction::SExt: case Instruction::FPToUI: -- cgit v1.1 From faf601ee936a440027447fa11ef400cf53bc1acf Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Tue, 5 Feb 2013 19:04:36 +0000 Subject: ConstantFolding: Fix a crash when encoutering a truncating inttoptr. This was introduced in r173293. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174424 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/ConstantFolding.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index 400ce72..91424b2 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -545,14 +545,18 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, // constant. This happens frequently when iterating over a global array. if (Opc == Instruction::Sub && TD) { GlobalValue *GV1, *GV2; - APInt Offs1(TD->getPointerSizeInBits(), 0), - Offs2(TD->getPointerSizeInBits(), 0); + unsigned PtrSize = TD->getPointerSizeInBits(); + unsigned OpSize = TD->getTypeSizeInBits(Op0->getType()); + APInt Offs1(PtrSize, 0), Offs2(PtrSize, 0); if (IsConstantOffsetFromGlobal(Op0, GV1, Offs1, *TD)) if (IsConstantOffsetFromGlobal(Op1, GV2, Offs2, *TD) && GV1 == GV2) { // (&GV+C1) - (&GV+C2) -> C1-C2, pointer arithmetic cannot overflow. - return ConstantInt::get(Op0->getType(), Offs1-Offs2); + // PtrToInt may change the bitwidth so we have convert to the right size + // first. + return ConstantInt::get(Op0->getType(), Offs1.zextOrTrunc(OpSize) - + Offs2.zextOrTrunc(OpSize)); } } -- cgit v1.1 From 1d3d2c57f55e04197efe15b293c783fe879c2551 Mon Sep 17 00:00:00 2001 From: Jyotsna Verma Date: Tue, 5 Feb 2013 19:20:45 +0000 Subject: Hexagon: Use TFR_cond with cmpb.[eq,gt,gtu] to handle zext( set[ne,eq,gt,ugt] (...) ) type of dag patterns. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174429 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Hexagon/HexagonISelDAGToDAG.cpp | 9 +- lib/Target/Hexagon/HexagonInstrInfoV4.td | 206 +++++++++++++++++++++++++++++ 2 files changed, 214 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp index 381032b..6443cb2 100644 --- a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp +++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp @@ -97,7 +97,14 @@ public: SDNode *SelectAdd(SDNode *N); bool isConstExtProfitable(SDNode *N) const; - // Include the pieces autogenerated from the target description. +// XformU7ToU7M1Imm - Return a target constant decremented by 1, in range +// [1..128], used in cmpb.gtu instructions. +inline SDValue XformU7ToU7M1Imm(signed Imm) { + assert((Imm >= 1 && Imm <= 128) && "Constant out of range for cmpb op"); + return CurDAG->getTargetConstant(Imm - 1, MVT::i8); +} + +// Include the pieces autogenerated from the target description. #include "HexagonGenDAGISel.inc" }; } // end anonymous namespace diff --git a/lib/Target/Hexagon/HexagonInstrInfoV4.td b/lib/Target/Hexagon/HexagonInstrInfoV4.td index 53f1189..4e37b99 100644 --- a/lib/Target/Hexagon/HexagonInstrInfoV4.td +++ b/lib/Target/Hexagon/HexagonInstrInfoV4.td @@ -3812,6 +3812,212 @@ def CMPbGTUri_V4 : MInst<(outs PredRegs:$dst), u7ExtPred:$src2))]>, Requires<[HasV4T]>, ImmRegRel; +// SDNode for converting immediate C to C-1. +def DEC_CONST_BYTE : SDNodeXFormgetSExtValue(); + return XformU7ToU7M1Imm(imm); +}]>; + +// For the sequence +// zext( seteq ( and(Rs, 255), u8)) +// Generate +// Pd=cmpb.eq(Rs, #u8) +// if (Pd.new) Rd=#1 +// if (!Pd.new) Rd=#0 +def : Pat <(i32 (zext (i1 (seteq (i32 (and (i32 IntRegs:$Rs), 255)), + u8ExtPred:$u8)))), + (i32 (TFR_condset_ii (i1 (CMPbEQri_V4 (i32 IntRegs:$Rs), + (u8ExtPred:$u8))), + 1, 0))>, + Requires<[HasV4T]>; + +// For the sequence +// zext( setne ( and(Rs, 255), u8)) +// Generate +// Pd=cmpb.eq(Rs, #u8) +// if (Pd.new) Rd=#0 +// if (!Pd.new) Rd=#1 +def : Pat <(i32 (zext (i1 (setne (i32 (and (i32 IntRegs:$Rs), 255)), + u8ExtPred:$u8)))), + (i32 (TFR_condset_ii (i1 (CMPbEQri_V4 (i32 IntRegs:$Rs), + (u8ExtPred:$u8))), + 0, 1))>, + Requires<[HasV4T]>; + +// For the sequence +// zext( seteq (Rs, and(Rt, 255))) +// Generate +// Pd=cmpb.eq(Rs, Rt) +// if (Pd.new) Rd=#1 +// if (!Pd.new) Rd=#0 +def : Pat <(i32 (zext (i1 (seteq (i32 IntRegs:$Rt), + (i32 (and (i32 IntRegs:$Rs), 255)))))), + (i32 (TFR_condset_ii (i1 (CMPbEQrr_ubub_V4 (i32 IntRegs:$Rs), + (i32 IntRegs:$Rt))), + 1, 0))>, + Requires<[HasV4T]>; + +// For the sequence +// zext( setne (Rs, and(Rt, 255))) +// Generate +// Pd=cmpb.eq(Rs, Rt) +// if (Pd.new) Rd=#0 +// if (!Pd.new) Rd=#1 +def : Pat <(i32 (zext (i1 (setne (i32 IntRegs:$Rt), + (i32 (and (i32 IntRegs:$Rs), 255)))))), + (i32 (TFR_condset_ii (i1 (CMPbEQrr_ubub_V4 (i32 IntRegs:$Rs), + (i32 IntRegs:$Rt))), + 0, 1))>, + Requires<[HasV4T]>; + +// For the sequence +// zext( setugt ( and(Rs, 255), u8)) +// Generate +// Pd=cmpb.gtu(Rs, #u8) +// if (Pd.new) Rd=#1 +// if (!Pd.new) Rd=#0 +def : Pat <(i32 (zext (i1 (setugt (i32 (and (i32 IntRegs:$Rs), 255)), + u8ExtPred:$u8)))), + (i32 (TFR_condset_ii (i1 (CMPbGTUri_V4 (i32 IntRegs:$Rs), + (u8ExtPred:$u8))), + 1, 0))>, + Requires<[HasV4T]>; + +// For the sequence +// zext( setugt ( and(Rs, 254), u8)) +// Generate +// Pd=cmpb.gtu(Rs, #u8) +// if (Pd.new) Rd=#1 +// if (!Pd.new) Rd=#0 +def : Pat <(i32 (zext (i1 (setugt (i32 (and (i32 IntRegs:$Rs), 254)), + u8ExtPred:$u8)))), + (i32 (TFR_condset_ii (i1 (CMPbGTUri_V4 (i32 IntRegs:$Rs), + (u8ExtPred:$u8))), + 1, 0))>, + Requires<[HasV4T]>; + +// For the sequence +// zext( setult ( Rs, Rt)) +// Generate +// Pd=cmp.ltu(Rs, Rt) +// if (Pd.new) Rd=#1 +// if (!Pd.new) Rd=#0 +// cmp.ltu(Rs, Rt) -> cmp.gtu(Rt, Rs) +def : Pat <(i32 (zext (i1 (setult (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))), + (i32 (TFR_condset_ii (i1 (CMPGTUrr (i32 IntRegs:$Rt), + (i32 IntRegs:$Rs))), + 1, 0))>, + Requires<[HasV4T]>; + +// For the sequence +// zext( setlt ( Rs, Rt)) +// Generate +// Pd=cmp.lt(Rs, Rt) +// if (Pd.new) Rd=#1 +// if (!Pd.new) Rd=#0 +// cmp.lt(Rs, Rt) -> cmp.gt(Rt, Rs) +def : Pat <(i32 (zext (i1 (setlt (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))), + (i32 (TFR_condset_ii (i1 (CMPGTrr (i32 IntRegs:$Rt), + (i32 IntRegs:$Rs))), + 1, 0))>, + Requires<[HasV4T]>; + +// For the sequence +// zext( setugt ( Rs, Rt)) +// Generate +// Pd=cmp.gtu(Rs, Rt) +// if (Pd.new) Rd=#1 +// if (!Pd.new) Rd=#0 +def : Pat <(i32 (zext (i1 (setugt (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))), + (i32 (TFR_condset_ii (i1 (CMPGTUrr (i32 IntRegs:$Rs), + (i32 IntRegs:$Rt))), + 1, 0))>, + Requires<[HasV4T]>; + +// This pattern interefers with coremark performance, not implementing at this +// time. +// For the sequence +// zext( setgt ( Rs, Rt)) +// Generate +// Pd=cmp.gt(Rs, Rt) +// if (Pd.new) Rd=#1 +// if (!Pd.new) Rd=#0 + +// For the sequence +// zext( setuge ( Rs, Rt)) +// Generate +// Pd=cmp.ltu(Rs, Rt) +// if (Pd.new) Rd=#0 +// if (!Pd.new) Rd=#1 +// cmp.ltu(Rs, Rt) -> cmp.gtu(Rt, Rs) +def : Pat <(i32 (zext (i1 (setuge (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))), + (i32 (TFR_condset_ii (i1 (CMPGTUrr (i32 IntRegs:$Rt), + (i32 IntRegs:$Rs))), + 0, 1))>, + Requires<[HasV4T]>; + +// For the sequence +// zext( setge ( Rs, Rt)) +// Generate +// Pd=cmp.lt(Rs, Rt) +// if (Pd.new) Rd=#0 +// if (!Pd.new) Rd=#1 +// cmp.lt(Rs, Rt) -> cmp.gt(Rt, Rs) +def : Pat <(i32 (zext (i1 (setge (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))), + (i32 (TFR_condset_ii (i1 (CMPGTrr (i32 IntRegs:$Rt), + (i32 IntRegs:$Rs))), + 0, 1))>, + Requires<[HasV4T]>; + +// For the sequence +// zext( setule ( Rs, Rt)) +// Generate +// Pd=cmp.gtu(Rs, Rt) +// if (Pd.new) Rd=#0 +// if (!Pd.new) Rd=#1 +def : Pat <(i32 (zext (i1 (setule (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))), + (i32 (TFR_condset_ii (i1 (CMPGTUrr (i32 IntRegs:$Rs), + (i32 IntRegs:$Rt))), + 0, 1))>, + Requires<[HasV4T]>; + +// For the sequence +// zext( setle ( Rs, Rt)) +// Generate +// Pd=cmp.gt(Rs, Rt) +// if (Pd.new) Rd=#0 +// if (!Pd.new) Rd=#1 +def : Pat <(i32 (zext (i1 (setle (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))), + (i32 (TFR_condset_ii (i1 (CMPGTrr (i32 IntRegs:$Rs), + (i32 IntRegs:$Rt))), + 0, 1))>, + Requires<[HasV4T]>; + +// For the sequence +// zext( setult ( and(Rs, 255), u8)) +// Use the isdigit transformation below + +// Generate code of the form 'mux_ii(cmpbgtu(Rdd, C-1),0,1)' +// for C code of the form r = ((c>='0') & (c<='9')) ? 1 : 0;. +// The isdigit transformation relies on two 'clever' aspects: +// 1) The data type is unsigned which allows us to eliminate a zero test after +// biasing the expression by 48. We are depending on the representation of +// the unsigned types, and semantics. +// 2) The front end has converted <= 9 into < 10 on entry to LLVM +// +// For the C code: +// retval = ((c>='0') & (c<='9')) ? 1 : 0; +// The code is transformed upstream of llvm into +// retval = (c-48) < 10 ? 1 : 0; +let AddedComplexity = 139 in +def : Pat <(i32 (zext (i1 (setult (i32 (and (i32 IntRegs:$src1), 255)), + u7StrictPosImmPred:$src2)))), + (i32 (MUX_ii (i1 (CMPbGTUri_V4 (i32 IntRegs:$src1), + (DEC_CONST_BYTE u7StrictPosImmPred:$src2))), + 0, 1))>, + Requires<[HasV4T]>; + // Pd=cmpb.gtu(Rs,Rt) let isCompare = 1, validSubTargets = HasV4SubT, CextOpcode = "CMPbGTU", InputType = "reg" in -- cgit v1.1 From 1018fa256d5b5a134c06dac76d1d285e04562187 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Tue, 5 Feb 2013 19:21:56 +0000 Subject: InstCombine: Harden code to work with vectors of pointers and simplify it a bit. Found by running instcombine on a fabricated test case for the constant folder. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174430 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/InstCombine/InstCombineCasts.cpp | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index 653f97a..230dc28 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -1395,17 +1395,13 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) { // If the destination integer type is not the intptr_t type for this target, // do a ptrtoint to intptr_t then do a trunc or zext. This allows the cast // to be exposed to other transforms. - if (TD) { - if (CI.getType()->getScalarSizeInBits() < TD->getPointerSizeInBits()) { - Value *P = Builder->CreatePtrToInt(CI.getOperand(0), - TD->getIntPtrType(CI.getContext())); - return new TruncInst(P, CI.getType()); - } - if (CI.getType()->getScalarSizeInBits() > TD->getPointerSizeInBits()) { - Value *P = Builder->CreatePtrToInt(CI.getOperand(0), - TD->getIntPtrType(CI.getContext())); - return new ZExtInst(P, CI.getType()); - } + if (TD && CI.getType()->getScalarSizeInBits() != TD->getPointerSizeInBits()) { + Type *Ty = TD->getIntPtrType(CI.getContext()); + if (CI.getType()->isVectorTy()) // Handle vectors of pointers. + Ty = VectorType::get(Ty, CI.getType()->getVectorNumElements()); + + Value *P = Builder->CreatePtrToInt(CI.getOperand(0), Ty); + return CastInst::CreateIntegerCast(P, CI.getType(), /*isSigned=*/false); } return commonPointerCastTransforms(CI); -- cgit v1.1 From b48783b09127136c98cc6b0399be757728504e56 Mon Sep 17 00:00:00 2001 From: Owen Anderson Date: Tue, 5 Feb 2013 19:24:39 +0000 Subject: Reapply r174343, with a fix for a scary DAG combine bug where it failed to differentiate between the alignment of the base point of a load, and the overall alignment of the load. This caused infinite loops in DAG combine with the original application of this patch. ORIGINAL COMMIT LOG: When the target-independent DAGCombiner inferred a higher alignment for a load, it would replace the load with one with the higher alignment. However, it did not place the new load in the worklist, which prevented later DAG combines in the same phase (for example, target-specific combines) from ever seeing it. This patch corrects that oversight, and updates some tests whose output changed due to slightly different DAGCombine outputs. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174431 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 79ec227..d694bc7 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7199,12 +7199,15 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { // Try to infer better alignment information than the load already has. if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) { if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { - if (Align > LD->getAlignment()) - return DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(), + if (Align > LD->getMemOperand()->getBaseAlignment()) { + SDValue NewLoad = + DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(), LD->getValueType(0), Chain, Ptr, LD->getPointerInfo(), LD->getMemoryVT(), LD->isVolatile(), LD->isNonTemporal(), Align); + return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true); + } } } -- cgit v1.1 From da6bf1d8bbf8e787f7c0edffbad40531dd520446 Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Tue, 5 Feb 2013 19:32:18 +0000 Subject: Removed explicit inline as per the LLVM style guide. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174432 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/ObjCARC/ObjCARCOpts.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp index 3bfb003..e6cd1a7 100644 --- a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp @@ -451,31 +451,31 @@ namespace { PtrState() : KnownPositiveRefCount(false), Partial(false), Seq(S_None) {} - inline void SetKnownPositiveRefCount() { + void SetKnownPositiveRefCount() { KnownPositiveRefCount = true; } - inline void ClearRefCount() { + void ClearRefCount() { KnownPositiveRefCount = false; } - inline bool IsKnownIncremented() const { + bool IsKnownIncremented() const { return KnownPositiveRefCount; } - inline void SetSeq(Sequence NewSeq) { + void SetSeq(Sequence NewSeq) { Seq = NewSeq; } - inline Sequence GetSeq() const { + Sequence GetSeq() const { return Seq; } - inline void ClearSequenceProgress() { + void ClearSequenceProgress() { ResetSequenceProgress(S_None); } - inline void ResetSequenceProgress(Sequence NewSeq) { + void ResetSequenceProgress(Sequence NewSeq) { Seq = NewSeq; Partial = false; RRI.clear(); -- cgit v1.1 From 39b5f12dd68430c4794b1d24af0fd204c82bc12f Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Tue, 5 Feb 2013 20:22:40 +0000 Subject: InstCombine: Fix and simplify the inttoptr side too. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174438 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/InstCombine/InstCombineCasts.cpp | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index 230dc28..98fd05a 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -1322,19 +1322,14 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) { // If the source integer type is not the intptr_t type for this target, do a // trunc or zext to the intptr_t type, then inttoptr of it. This allows the // cast to be exposed to other transforms. - if (TD) { - if (CI.getOperand(0)->getType()->getScalarSizeInBits() > - TD->getPointerSizeInBits()) { - Value *P = Builder->CreateTrunc(CI.getOperand(0), - TD->getIntPtrType(CI.getContext())); - return new IntToPtrInst(P, CI.getType()); - } - if (CI.getOperand(0)->getType()->getScalarSizeInBits() < - TD->getPointerSizeInBits()) { - Value *P = Builder->CreateZExt(CI.getOperand(0), - TD->getIntPtrType(CI.getContext())); - return new IntToPtrInst(P, CI.getType()); - } + if (TD && CI.getOperand(0)->getType()->getScalarSizeInBits() != + TD->getPointerSizeInBits()) { + Type *Ty = TD->getIntPtrType(CI.getContext()); + if (CI.getType()->isVectorTy()) // Handle vectors of pointers. + Ty = VectorType::get(Ty, CI.getType()->getVectorNumElements()); + + Value *P = Builder->CreateZExtOrTrunc(CI.getOperand(0), Ty); + return new IntToPtrInst(P, CI.getType()); } if (Instruction *I = commonCastTransforms(CI)) -- cgit v1.1 From baabdecbb9bf5b32fa81b1e2830ab13076d549f1 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Tue, 5 Feb 2013 21:18:11 +0000 Subject: [mips] Do not use function CC_MipsN_VarArg unless the function being analyzed is a vararg function. The original code was examining flag OutputArg::IsFixed to determine whether CC_MipsN_VarArg or CC_MipsN should be called. This is not correct, since this flag is often set to false when the function being analyzed is a non-variadic function. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174442 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsISelLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index ff0064e..2ff369c 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -3793,7 +3793,7 @@ MipsTargetLowering::MipsCC::MipsCC(CallingConv::ID CallConv, bool IsVarArg, IntArgRegs = Mips64IntRegs; ShadowRegs = Mips64DPRegs; FixedFn = CC_MipsN; - VarFn = CC_MipsN_VarArg; + VarFn = IsVarArg ? CC_MipsN_VarArg : CC_MipsN; } if (CallConv == CallingConv::Fast) { -- cgit v1.1 From 43213cf1ac05b4198fcf9fa85d7da85477daafd1 Mon Sep 17 00:00:00 2001 From: Manman Ren Date: Tue, 5 Feb 2013 21:52:47 +0000 Subject: Dwarf: support for LTO where a single object file can have multiple line tables We generate one line table for each compilation unit in the object file. Reviewed by Eric and Kevin. rdar://problem/13067005 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174445 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 21 ++++++++++++-- lib/MC/MCContext.cpp | 2 +- lib/MC/MCDwarf.cpp | 53 +++++++++++++++++++++++++++-------- 3 files changed, 61 insertions(+), 15 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 967c149..dcaab31 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -662,13 +662,21 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) { // 2.17.1 requires that we use DW_AT_low_pc for a single entry point // into an entity. We're using 0 (or a NULL label) for this. NewCU->addLabelAddress(Die, dwarf::DW_AT_low_pc, NULL); + + // Define start line table label for each Compile Unit. + MCSymbol *LineTableStartSym = Asm->GetTempSymbol("line_table_start", + NewCU->getUniqueID()); + Asm->OutStreamer.getContext().setMCLineTableSymbol(LineTableStartSym, + NewCU->getUniqueID()); + // DW_AT_stmt_list is a offset of line number information for this // compile unit in debug_line section. if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, - Asm->GetTempSymbol("section_line")); + LineTableStartSym); else - NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0); + NewCU->addDelta(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, + LineTableStartSym, Asm->GetTempSymbol("section_line")); if (!CompilationDir.empty()) NewCU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir); @@ -1399,6 +1407,13 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { if (LScopes.empty()) return; identifyScopeMarkers(); + // Set DwarfCompileUnitID in MCContext to the Compile Unit this function + // belongs to. + LexicalScope *FnScope = LScopes.getCurrentFunctionScope(); + CompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode()); + assert(TheCU && "Unable to find compile unit!"); + Asm->OutStreamer.getContext().setDwarfCompileUnitID(TheCU->getUniqueID()); + FunctionBeginSym = Asm->GetTempSymbol("func_begin", Asm->getFunctionNumber()); // Assumes in correct section after the entry point. @@ -1583,6 +1598,8 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { Asm->getFunctionNumber()); // Assumes in correct section after the entry point. Asm->OutStreamer.EmitLabel(FunctionEndSym); + // Set DwarfCompileUnitID in MCContext to default value. + Asm->OutStreamer.getContext().setDwarfCompileUnitID(0); SmallPtrSet ProcessedVars; collectVariableInfo(MF, ProcessedVars); diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp index aa52b49..a074003 100644 --- a/lib/MC/MCContext.cpp +++ b/lib/MC/MCContext.cpp @@ -40,7 +40,7 @@ MCContext::MCContext(const MCAsmInfo &mai, const MCRegisterInfo &mri, CompilationDir(llvm::sys::Path::GetCurrentDirectory().str()), CurrentDwarfLoc(0,0,0,DWARF2_FLAG_IS_STMT,0,0), DwarfLocSeen(false), GenDwarfForAssembly(false), GenDwarfFileNumber(0), - AllowTemporaryLabels(true), AutoReset(DoAutoReset) { + AllowTemporaryLabels(true), DwarfCompileUnitID(0), AutoReset(DoAutoReset) { MachOUniquingMap = 0; ELFUniquingMap = 0; diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp index 3cf47bc..5465af6 100644 --- a/lib/MC/MCDwarf.cpp +++ b/lib/MC/MCDwarf.cpp @@ -101,7 +101,8 @@ void MCLineEntry::Make(MCStreamer *MCOS, const MCSection *Section) { } // Add the line entry to this section's entries. - LineSection->addLineEntry(LineEntry); + LineSection->addLineEntry(LineEntry, + MCOS->getContext().getDwarfCompileUnitID()); } // @@ -131,7 +132,12 @@ static inline const MCExpr *MakeStartMinusEndExpr(const MCStreamer &MCOS, // static inline void EmitDwarfLineTable(MCStreamer *MCOS, const MCSection *Section, - const MCLineSection *LineSection) { + const MCLineSection *LineSection, + unsigned CUID) { + // This LineSection does not contain any LineEntry for the given Compile Unit. + if (!LineSection->containEntriesForID(CUID)) + return; + unsigned FileNum = 1; unsigned LastLine = 1; unsigned Column = 0; @@ -141,8 +147,8 @@ static inline void EmitDwarfLineTable(MCStreamer *MCOS, // Loop through each MCLineEntry and encode the dwarf line number table. for (MCLineSection::const_iterator - it = LineSection->getMCLineEntries()->begin(), - ie = LineSection->getMCLineEntries()->end(); it != ie; ++it) { + it = LineSection->getMCLineEntries(CUID).begin(), + ie = LineSection->getMCLineEntries(CUID).end(); it != ie; ++it) { if (FileNum != it->getFileNum()) { FileNum = it->getFileNum(); @@ -215,9 +221,36 @@ const MCSymbol *MCDwarfFileTable::Emit(MCStreamer *MCOS) { // Switch to the section where the table will be emitted into. MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfLineSection()); - // Create a symbol at the beginning of this section. - MCSymbol *LineStartSym = context.CreateTempSymbol(); - // Set the value of the symbol, as we are at the start of the section. + const DenseMap &MCLineTableSymbols = + MCOS->getContext().getMCLineTableSymbols(); + // CUID and MCLineTableSymbols are set in DwarfDebug, when DwarfDebug does + // not exist, CUID will be 0 and MCLineTableSymbols will be empty. + // Handle Compile Unit 0, the line table start symbol is the section symbol. + const MCSymbol *LineStartSym = EmitCU(MCOS, 0); + // Handle the rest of the Compile Units. + for (unsigned Is = 1, Ie = MCLineTableSymbols.size(); Is < Ie; Is++) + EmitCU(MCOS, Is); + + // Now delete the MCLineSections that were created in MCLineEntry::Make() + // and used to emit the line table. + const DenseMap &MCLineSections = + MCOS->getContext().getMCLineSections(); + for (DenseMap::const_iterator it = + MCLineSections.begin(), ie = MCLineSections.end(); it != ie; + ++it) + delete it->second; + + return LineStartSym; +} + +const MCSymbol *MCDwarfFileTable::EmitCU(MCStreamer *MCOS, unsigned CUID) { + MCContext &context = MCOS->getContext(); + + // Create a symbol at the beginning of the line table. + MCSymbol *LineStartSym = MCOS->getContext().getMCLineTableSymbol(CUID); + if (!LineStartSym) + LineStartSym = context.CreateTempSymbol(); + // Set the value of the symbol, as we are at the start of the line table. MCOS->EmitLabel(LineStartSym); // Create a symbol for the end of the section (to be set when we get there). @@ -301,11 +334,7 @@ const MCSymbol *MCDwarfFileTable::Emit(MCStreamer *MCOS) { ++it) { const MCSection *Sec = *it; const MCLineSection *Line = MCLineSections.lookup(Sec); - EmitDwarfLineTable(MCOS, Sec, Line); - - // Now delete the MCLineSections that were created in MCLineEntry::Make() - // and used to emit the line table. - delete Line; + EmitDwarfLineTable(MCOS, Sec, Line, CUID); } if (MCOS->getContext().getAsmInfo().getLinkerRequiresNonEmptyDwarfLines() -- cgit v1.1 From 2a1b60d791522d73be91d4281c90d25bd5e3d117 Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Tue, 5 Feb 2013 21:53:29 +0000 Subject: Make sure the correct opcodes are used to SUB and ADD the stack pointer in function prologs/epilogs. The opcodes should depend on the data model (LP64 vs. ILP32) rather than the architecture bit-ness. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174446 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FrameLowering.cpp | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 420aeb8..0901961 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -55,8 +55,8 @@ bool X86FrameLowering::hasFP(const MachineFunction &MF) const { MMI.callsUnwindInit() || MMI.callsEHReturn()); } -static unsigned getSUBriOpcode(unsigned is64Bit, int64_t Imm) { - if (is64Bit) { +static unsigned getSUBriOpcode(unsigned isLP64, int64_t Imm) { + if (isLP64) { if (isInt<8>(Imm)) return X86::SUB64ri8; return X86::SUB64ri32; @@ -67,8 +67,8 @@ static unsigned getSUBriOpcode(unsigned is64Bit, int64_t Imm) { } } -static unsigned getADDriOpcode(unsigned is64Bit, int64_t Imm) { - if (is64Bit) { +static unsigned getADDriOpcode(unsigned isLP64, int64_t Imm) { + if (isLP64) { if (isInt<8>(Imm)) return X86::ADD64ri8; return X86::ADD64ri32; @@ -145,7 +145,7 @@ static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB, static void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, unsigned StackPtr, int64_t NumBytes, - bool Is64Bit, bool UseLEA, + bool Is64Bit, bool IsLP64, bool UseLEA, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI) { bool isSub = NumBytes < 0; uint64_t Offset = isSub ? -NumBytes : NumBytes; @@ -154,8 +154,8 @@ void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, Opc = getLEArOpcode(Is64Bit); else Opc = isSub - ? getSUBriOpcode(Is64Bit, Offset) - : getADDriOpcode(Is64Bit, Offset); + ? getSUBriOpcode(IsLP64, Offset) + : getADDriOpcode(IsLP64, Offset); uint64_t Chunk = (1LL << 31) - 1; DebugLoc DL = MBB.findDebugLoc(MBBI); @@ -660,6 +660,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { uint64_t StackSize = MFI->getStackSize(); // Number of bytes to allocate. bool HasFP = hasFP(MF); bool Is64Bit = STI.is64Bit(); + bool IsLP64 = STI.isTarget64BitLP64(); bool IsWin64 = STI.isTargetWin64(); bool UseLEA = STI.useLeaForSP(); unsigned StackAlign = getStackAlignment(); @@ -711,7 +712,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { if (TailCallReturnAddrDelta < 0) { MachineInstr *MI = BuildMI(MBB, MBBI, DL, - TII.get(getSUBriOpcode(Is64Bit, -TailCallReturnAddrDelta)), + TII.get(getSUBriOpcode(IsLP64, -TailCallReturnAddrDelta)), StackPtr) .addReg(StackPtr) .addImm(-TailCallReturnAddrDelta) @@ -927,7 +928,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { // MSVC x64's __chkstk needs to adjust %rsp. // FIXME: %rax preserves the offset and should be available. if (isSPUpdateNeeded) - emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, + emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, IsLP64, UseLEA, TII, *RegInfo); if (isEAXAlive) { @@ -939,7 +940,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { MBB.insert(MBBI, MI); } } else if (NumBytes) - emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, + emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, IsLP64, UseLEA, TII, *RegInfo); // If we need a base pointer, set it up here. It's whatever the value @@ -996,6 +997,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, unsigned RetOpcode = MBBI->getOpcode(); DebugLoc DL = MBBI->getDebugLoc(); bool Is64Bit = STI.is64Bit(); + bool IsLP64 = STI.isTarget64BitLP64(); bool UseLEA = STI.useLeaForSP(); unsigned StackAlign = getStackAlignment(); unsigned SlotSize = RegInfo->getSlotSize(); @@ -1091,7 +1093,8 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, } } else if (NumBytes) { // Adjust stack pointer back: ESP += numbytes. - emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, UseLEA, TII, *RegInfo); + emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, IsLP64, UseLEA, + TII, *RegInfo); } // We're returning from function via eh_return. @@ -1126,7 +1129,8 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, if (Offset) { // Check for possible merge with preceding ADD instruction. Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true); - emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, UseLEA, TII, *RegInfo); + emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, IsLP64, + UseLEA, TII, *RegInfo); } // Jump to label or value in register. @@ -1169,7 +1173,8 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, // Check for possible merge with preceding ADD instruction. delta += mergeSPUpdates(MBB, MBBI, StackPtr, true); - emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, UseLEA, TII, *RegInfo); + emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, IsLP64, UseLEA, TII, + *RegInfo); } } -- cgit v1.1 From 8c74ecfbddabe89e150abff4fdff0a27108874b9 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Tue, 5 Feb 2013 22:37:24 +0000 Subject: Convert to storing the attribute's internals as enums, integers, and strings. The stuff we're handing are all enums (Attribute::AttrKind), integers and strings. Don't convert them to Constants, which is an unnecessary step here. The rest of the changes are mostly mechanical. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174456 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/AttributeImpl.h | 122 +++++++++++++++++++---- lib/IR/Attributes.cpp | 256 +++++++++++++++++++++++++++++-------------------- 2 files changed, 251 insertions(+), 127 deletions(-) (limited to 'lib') diff --git a/lib/IR/AttributeImpl.h b/lib/IR/AttributeImpl.h index bf87562..5f9e3e7 100644 --- a/lib/IR/AttributeImpl.h +++ b/lib/IR/AttributeImpl.h @@ -27,47 +27,127 @@ class LLVMContext; //===----------------------------------------------------------------------===// /// \class +/// \brief A set of classes that contain the kind and (optional) value of the +/// attribute object. There are three main categories: enum attribute entries, +/// represented by Attribute::AttrKind; alignment attribute entries; and string +/// attribute enties, which are for target-dependent attributes. +class AttributeEntry { + unsigned char KindID; +protected: + enum AttrEntryKind { + EnumAttrEntry, + AlignAttrEntry, + StringAttrEntry + }; +public: + AttributeEntry(AttrEntryKind Kind) + : KindID(Kind) {} + virtual ~AttributeEntry() {} + + unsigned getKindID() const { return KindID; } + + static inline bool classof(const AttributeEntry *) { return true; } +}; + +class EnumAttributeEntry : public AttributeEntry { + Attribute::AttrKind Kind; +public: + EnumAttributeEntry(Attribute::AttrKind Kind) + : AttributeEntry(EnumAttrEntry), Kind(Kind) {} + + Attribute::AttrKind getEnumKind() const { return Kind; } + + static inline bool classof(const AttributeEntry *AE) { + return AE->getKindID() == EnumAttrEntry; + } + static inline bool classof(const EnumAttributeEntry *) { return true; } +}; + +class AlignAttributeEntry : public AttributeEntry { + Attribute::AttrKind Kind; + unsigned Align; +public: + AlignAttributeEntry(Attribute::AttrKind Kind, unsigned Align) + : AttributeEntry(AlignAttrEntry), Kind(Kind), Align(Align) {} + + Attribute::AttrKind getEnumKind() const { return Kind; } + unsigned getAlignment() const { return Align; } + + static inline bool classof(const AttributeEntry *AE) { + return AE->getKindID() == AlignAttrEntry; + } + static inline bool classof(const AlignAttributeEntry *) { return true; } +}; + +class StringAttributeEntry : public AttributeEntry { + std::string Kind; + std::string Val; +public: + StringAttributeEntry(StringRef Kind, StringRef Val = StringRef()) + : AttributeEntry(StringAttrEntry), Kind(Kind), Val(Val) {} + + StringRef getStringKind() const { return Kind; } + StringRef getStringValue() const { return Val; } + + static inline bool classof(const AttributeEntry *AE) { + return AE->getKindID() == StringAttrEntry; + } + static inline bool classof(const StringAttributeEntry *) { return true; } +}; + +//===----------------------------------------------------------------------===// +/// \class /// \brief This class represents a single, uniqued attribute. That attribute /// could be a single enum, a tuple, or a string. class AttributeImpl : public FoldingSetNode { - LLVMContext &Context; ///< Global context for uniquing objects - Constant *Kind; ///< Kind of attribute: enum or string - Constant *Values; ///< Values associated with the attribute + LLVMContext &Context; ///< Global context for uniquing objects + Constant *Kind; ///< Kind of attribute: enum or string + + AttributeEntry *Entry; ///< Holds the kind and value of the attribute // AttributesImpl is uniqued, these should not be publicly available. void operator=(const AttributeImpl &) LLVM_DELETED_FUNCTION; AttributeImpl(const AttributeImpl &) LLVM_DELETED_FUNCTION; public: - AttributeImpl(LLVMContext &C, Constant *Kind, Constant *Values = 0) - : Context(C), Kind(Kind), Values(Values) {} + AttributeImpl(LLVMContext &C, Attribute::AttrKind Kind); + AttributeImpl(LLVMContext &C, Attribute::AttrKind Kind, unsigned Align); + AttributeImpl(LLVMContext &C, StringRef Kind, StringRef Val = StringRef()); + ~AttributeImpl(); LLVMContext &getContext() { return Context; } - bool hasAttribute(Attribute::AttrKind A) const; + bool isEnumAttribute() const; + bool isAlignAttribute() const; + bool isStringAttribute() const; - Constant *getAttributeKind() const { return Kind; } - Constant *getAttributeValues() const { return Values; } - - uint64_t getAlignment() const; - uint64_t getStackAlignment() const; + bool hasAttribute(Attribute::AttrKind A) const; + bool hasAttribute(StringRef Kind) const; - /// \brief Equality and non-equality comparison operators. - bool operator==(Attribute::AttrKind Kind) const; - bool operator!=(Attribute::AttrKind Kind) const; + Attribute::AttrKind getKindAsEnum() const; + uint64_t getValueAsInt() const; - bool operator==(StringRef Kind) const; - bool operator!=(StringRef Kind) const; + StringRef getKindAsString() const; + StringRef getValueAsString() const; /// \brief Used when sorting the attributes. bool operator<(const AttributeImpl &AI) const; void Profile(FoldingSetNodeID &ID) const { - Profile(ID, Kind, Values); + if (isEnumAttribute()) + Profile(ID, getKindAsEnum(), 0); + else if (isAlignAttribute()) + Profile(ID, getKindAsEnum(), getValueAsInt()); + else + Profile(ID, getKindAsString(), getValueAsString()); + } + static void Profile(FoldingSetNodeID &ID, Attribute::AttrKind Kind, + uint64_t Val) { + ID.AddInteger(Kind); + if (Val) ID.AddInteger(Val); } - static void Profile(FoldingSetNodeID &ID, Constant *Kind, Constant *Values) { - ID.AddPointer(Kind); - if (Values) - ID.AddPointer(Values); + static void Profile(FoldingSetNodeID &ID, StringRef Kind, StringRef Values) { + ID.AddString(Kind); + ID.AddString(Values); } // FIXME: Remove this! diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 8a0551c..d61bd09 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -30,11 +30,12 @@ using namespace llvm; // Attribute Construction Methods //===----------------------------------------------------------------------===// -Attribute Attribute::get(LLVMContext &Context, Constant *Kind, Constant *Val) { +Attribute Attribute::get(LLVMContext &Context, Attribute::AttrKind Kind, + uint64_t Val) { LLVMContextImpl *pImpl = Context.pImpl; FoldingSetNodeID ID; - ID.AddPointer(Kind); - if (Val) ID.AddPointer(Val); + ID.AddInteger(Kind); + if (Val) ID.AddInteger(Val); void *InsertPoint; AttributeImpl *PA = pImpl->AttrsSet.FindNodeOrInsertPos(ID, InsertPoint); @@ -42,7 +43,9 @@ Attribute Attribute::get(LLVMContext &Context, Constant *Kind, Constant *Val) { if (!PA) { // If we didn't find any existing attributes of the same shape then create a // new one and insert it. - PA = new AttributeImpl(Context, Kind, Val); + PA = !Val ? + new AttributeImpl(Context, Kind) : + new AttributeImpl(Context, Kind, Val); pImpl->AttrsSet.InsertNode(PA, InsertPoint); } @@ -50,47 +53,88 @@ Attribute Attribute::get(LLVMContext &Context, Constant *Kind, Constant *Val) { return Attribute(PA); } -Attribute Attribute::get(LLVMContext &Context, AttrKind Kind, Constant *Val) { - ConstantInt *KindVal = ConstantInt::get(Type::getInt64Ty(Context), Kind); - return get(Context, KindVal, Val); +Attribute Attribute::get(LLVMContext &Context, StringRef Kind, StringRef Val) { + LLVMContextImpl *pImpl = Context.pImpl; + FoldingSetNodeID ID; + ID.AddString(Kind); + if (!Val.empty()) ID.AddString(Val); + + void *InsertPoint; + AttributeImpl *PA = pImpl->AttrsSet.FindNodeOrInsertPos(ID, InsertPoint); + + if (!PA) { + // If we didn't find any existing attributes of the same shape then create a + // new one and insert it. + PA = new AttributeImpl(Context, Kind, Val); + pImpl->AttrsSet.InsertNode(PA, InsertPoint); + } + + // Return the Attribute that we found or created. + return Attribute(PA); } Attribute Attribute::getWithAlignment(LLVMContext &Context, uint64_t Align) { assert(isPowerOf2_32(Align) && "Alignment must be a power of two."); assert(Align <= 0x40000000 && "Alignment too large."); - return get(Context, Alignment, - ConstantInt::get(Type::getInt64Ty(Context), Align)); + return get(Context, Alignment, Align); } Attribute Attribute::getWithStackAlignment(LLVMContext &Context, uint64_t Align) { assert(isPowerOf2_32(Align) && "Alignment must be a power of two."); assert(Align <= 0x100 && "Alignment too large."); - return get(Context, StackAlignment, - ConstantInt::get(Type::getInt64Ty(Context), Align)); + return get(Context, StackAlignment, Align); } //===----------------------------------------------------------------------===// // Attribute Accessor Methods //===----------------------------------------------------------------------===// -bool Attribute::hasAttribute(AttrKind Val) const { - return pImpl && pImpl->hasAttribute(Val); +bool Attribute::isEnumAttribute() const { + return pImpl && pImpl->isEnumAttribute(); } -Constant *Attribute::getAttributeKind() const { - return pImpl ? pImpl->getAttributeKind() : 0; +bool Attribute::isAlignAttribute() const { + return pImpl && pImpl->isAlignAttribute(); } -Constant *Attribute::getAttributeValues() const { - return pImpl ? pImpl->getAttributeValues() : 0; +bool Attribute::isStringAttribute() const { + return pImpl && pImpl->isStringAttribute(); +} + +Attribute::AttrKind Attribute::getKindAsEnum() const { + assert((isEnumAttribute() || isAlignAttribute()) && + "Invalid attribute type to get the kind as an enum!"); + return pImpl ? pImpl->getKindAsEnum() : None; +} + +uint64_t Attribute::getValueAsInt() const { + assert(isAlignAttribute() && + "Expected the attribute to be an alignment attribute!"); + return pImpl ? pImpl->getValueAsInt() : 0; +} + +StringRef Attribute::getKindAsString() const { + assert(isStringAttribute() && + "Invalid attribute type to get the kind as a string!"); + return pImpl ? pImpl->getKindAsString() : StringRef(); +} + +StringRef Attribute::getValueAsString() const { + assert(isStringAttribute() && + "Invalid attribute type to get the value as a string!"); + return pImpl ? pImpl->getValueAsString() : StringRef(); +} + +bool Attribute::hasAttribute(AttrKind Val) const { + return (pImpl && pImpl->hasAttribute(Val)) || (!pImpl && Val == None); } /// This returns the alignment field of an attribute as a byte alignment value. unsigned Attribute::getAlignment() const { assert(hasAttribute(Attribute::Alignment) && "Trying to get alignment from non-alignment attribute!"); - return pImpl->getAlignment(); + return pImpl->getValueAsInt(); } /// This returns the stack alignment field of an attribute as a byte alignment @@ -98,7 +142,7 @@ unsigned Attribute::getAlignment() const { unsigned Attribute::getStackAlignment() const { assert(hasAttribute(Attribute::StackAlignment) && "Trying to get alignment from non-alignment attribute!"); - return pImpl->getStackAlignment(); + return pImpl->getValueAsInt(); } std::string Attribute::getAsString() const { @@ -166,17 +210,17 @@ std::string Attribute::getAsString() const { // align=4 // alignstack=8 // - if (hasAttribute(Attribute::StackAlignment)) { + if (hasAttribute(Attribute::Alignment)) { std::string Result; - Result += "alignstack("; - Result += utostr(getStackAlignment()); - Result += ")"; + Result += "align "; + Result += utostr(getValueAsInt()); return Result; } - if (hasAttribute(Attribute::Alignment)) { + if (hasAttribute(Attribute::StackAlignment)) { std::string Result; - Result += "align "; - Result += utostr(getAlignment()); + Result += "alignstack("; + Result += utostr(getValueAsInt()); + Result += ")"; return Result; } @@ -186,33 +230,21 @@ std::string Attribute::getAsString() const { // "kind" = "value" // "kind" = ( "value1" "value2" "value3" ) // - if (ConstantDataArray *CDA = - dyn_cast(pImpl->getAttributeKind())) { + if (isStringAttribute()) { std::string Result; - Result += '\"' + CDA->getAsString().str() + '"'; + Result += '\"' + getKindAsString().str() + '"'; - Constant *Vals = pImpl->getAttributeValues(); - if (!Vals) return Result; - - // FIXME: This should support more than just ConstantDataArrays. Also, - // support a vector of attribute values. + StringRef Val = pImpl->getValueAsString(); + if (Val.empty()) return Result; Result += " = "; - Result += '\"' + cast(Vals)->getAsString().str() + '"'; - + Result += '\"' + Val.str() + '"'; return Result; } llvm_unreachable("Unknown attribute"); } -bool Attribute::operator==(AttrKind K) const { - return (pImpl && *pImpl == K) || (!pImpl && K == None); -} -bool Attribute::operator!=(AttrKind K) const { - return !(*this == K); -} - bool Attribute::operator<(Attribute A) const { if (!pImpl && !A.pImpl) return false; if (!pImpl) return true; @@ -224,68 +256,86 @@ bool Attribute::operator<(Attribute A) const { // AttributeImpl Definition //===----------------------------------------------------------------------===// -bool AttributeImpl::hasAttribute(Attribute::AttrKind A) const { - if (ConstantInt *CI = dyn_cast(Kind)) - return CI->getZExtValue() == A; - return false; +AttributeImpl::AttributeImpl(LLVMContext &C, Attribute::AttrKind Kind) + : Context(C), Entry(new EnumAttributeEntry(Kind)) {} + +AttributeImpl::AttributeImpl(LLVMContext &C, Attribute::AttrKind Kind, + unsigned Align) + : Context(C) { + assert((Kind == Attribute::Alignment || Kind == Attribute::StackAlignment) && + "Wrong kind for alignment attribute!"); + Entry = new AlignAttributeEntry(Kind, Align); } -uint64_t AttributeImpl::getAlignment() const { - assert(hasAttribute(Attribute::Alignment) && - "Trying to retrieve the alignment from a non-alignment attr!"); - return cast(Values)->getZExtValue(); +AttributeImpl::AttributeImpl(LLVMContext &C, StringRef Kind, StringRef Val) + : Context(C), Entry(new StringAttributeEntry(Kind, Val)) {} + +AttributeImpl::~AttributeImpl() { + delete Entry; } -uint64_t AttributeImpl::getStackAlignment() const { - assert(hasAttribute(Attribute::StackAlignment) && - "Trying to retrieve the stack alignment from a non-alignment attr!"); - return cast(Values)->getZExtValue(); +bool AttributeImpl::isEnumAttribute() const { + return isa(Entry); } -bool AttributeImpl::operator==(Attribute::AttrKind kind) const { - if (ConstantInt *CI = dyn_cast(Kind)) - return CI->getZExtValue() == kind; - return false; +bool AttributeImpl::isAlignAttribute() const { + return isa(Entry); } -bool AttributeImpl::operator!=(Attribute::AttrKind kind) const { - return !(*this == kind); + +bool AttributeImpl::isStringAttribute() const { + return isa(Entry); } -bool AttributeImpl::operator==(StringRef kind) const { - if (ConstantDataArray *CDA = dyn_cast(Kind)) - if (CDA->isString()) - return CDA->getAsString() == kind; - return false; +bool AttributeImpl::hasAttribute(Attribute::AttrKind A) const { + if (isStringAttribute()) return false; + return getKindAsEnum() == A; } -bool AttributeImpl::operator!=(StringRef kind) const { - return !(*this == kind); +bool AttributeImpl::hasAttribute(StringRef Kind) const { + if (!isStringAttribute()) return false; + return getKindAsString() == Kind; } -bool AttributeImpl::operator<(const AttributeImpl &AI) const { - // This sorts the attributes with Attribute::AttrKinds coming first (sorted - // relative to their enum value) and then strings. +Attribute::AttrKind AttributeImpl::getKindAsEnum() const { + if (EnumAttributeEntry *E = dyn_cast(Entry)) + return E->getEnumKind(); + return cast(Entry)->getEnumKind(); +} - if (!Kind && !AI.Kind) return false; - if (!Kind && AI.Kind) return true; - if (Kind && !AI.Kind) return false; +uint64_t AttributeImpl::getValueAsInt() const { + return cast(Entry)->getAlignment(); +} - ConstantInt *ThisCI = dyn_cast(Kind); - ConstantInt *ThatCI = dyn_cast(AI.Kind); +StringRef AttributeImpl::getKindAsString() const { + return cast(Entry)->getStringKind(); +} - ConstantDataArray *ThisCDA = dyn_cast(Kind); - ConstantDataArray *ThatCDA = dyn_cast(AI.Kind); +StringRef AttributeImpl::getValueAsString() const { + return cast(Entry)->getStringValue(); +} - if (ThisCI && ThatCI) - return ThisCI->getZExtValue() < ThatCI->getZExtValue(); +bool AttributeImpl::operator<(const AttributeImpl &AI) const { + // This sorts the attributes with Attribute::AttrKinds coming first (sorted + // relative to their enum value) and then strings. + if (isEnumAttribute()) + if (AI.isAlignAttribute() || AI.isEnumAttribute()) + return getKindAsEnum() < AI.getKindAsEnum(); - if (ThisCI && ThatCDA) - return true; + if (isAlignAttribute()) { + if (!AI.isStringAttribute() && getKindAsEnum() < AI.getKindAsEnum()) + return true; + if (AI.isAlignAttribute()) + return getValueAsInt() < AI.getValueAsInt(); + } - if (ThisCDA && ThatCI) - return false; + if (isStringAttribute()) { + if (!AI.isStringAttribute()) return false; + if (getKindAsString() < AI.getKindAsString()) return true; + if (getKindAsString() == AI.getKindAsString()) + return getValueAsString() < AI.getValueAsString(); + } - return ThisCDA->getAsString() < ThatCDA->getAsString(); + return false; } uint64_t AttributeImpl::getAttrMask(Attribute::AttrKind Val) { @@ -413,15 +463,14 @@ uint64_t AttributeSetImpl::Raw(uint64_t Index) const { for (AttributeSetNode::const_iterator II = ASN->begin(), IE = ASN->end(); II != IE; ++II) { Attribute Attr = *II; - ConstantInt *Kind = cast(Attr.getAttributeKind()); - Attribute::AttrKind KindVal = Attribute::AttrKind(Kind->getZExtValue()); + Attribute::AttrKind Kind = Attr.getKindAsEnum(); - if (KindVal == Attribute::Alignment) + if (Kind == Attribute::Alignment) Mask |= (Log2_32(ASN->getAlignment()) + 1) << 16; - else if (KindVal == Attribute::StackAlignment) + else if (Kind == Attribute::StackAlignment) Mask |= (Log2_32(ASN->getStackAlignment()) + 1) << 26; else - Mask |= AttributeImpl::getAttrMask(KindVal); + Mask |= AttributeImpl::getAttrMask(Kind); } return Mask; @@ -465,7 +514,7 @@ AttributeSet AttributeSet::get(LLVMContext &C, for (unsigned i = 0, e = Attrs.size(); i != e; ++i) { assert((!i || Attrs[i-1].first <= Attrs[i].first) && "Misordered Attributes list!"); - assert(Attrs[i].second != Attribute::None && + assert(!Attrs[i].second.hasAttribute(Attribute::None) && "Pointless attribute!"); } #endif @@ -815,13 +864,13 @@ AttrBuilder &AttrBuilder::addAttribute(Attribute::AttrKind Val) { } AttrBuilder &AttrBuilder::addAttribute(Attribute Attr) { - ConstantInt *Kind = cast(Attr.getAttributeKind()); - Attribute::AttrKind KindVal = Attribute::AttrKind(Kind->getZExtValue()); - Attrs.insert(KindVal); + // FIXME: Handle string attributes. + Attribute::AttrKind Kind = Attr.getKindAsEnum(); + Attrs.insert(Kind); - if (KindVal == Attribute::Alignment) + if (Kind == Attribute::Alignment) Alignment = Attr.getAlignment(); - else if (KindVal == Attribute::StackAlignment) + else if (Kind == Attribute::StackAlignment) StackAlignment = Attr.getStackAlignment(); return *this; } @@ -853,8 +902,8 @@ AttrBuilder &AttrBuilder::removeAttributes(AttributeSet A, uint64_t Index) { assert(Idx != ~0U && "Couldn't find index in AttributeSet!"); for (AttributeSet::iterator I = A.begin(Idx), E = A.end(Idx); I != E; ++I) { - ConstantInt *CI = cast(I->getAttributeKind()); - Attribute::AttrKind Kind = Attribute::AttrKind(CI->getZExtValue()); + // FIXME: Support string attributes. + Attribute::AttrKind Kind = I->getKindAsEnum(); Attrs.erase(Kind); if (Kind == Attribute::Alignment) @@ -915,15 +964,10 @@ bool AttrBuilder::hasAttributes(AttributeSet A, uint64_t Index) const { assert(Idx != ~0U && "Couldn't find the index!"); for (AttributeSet::iterator I = A.begin(Idx), E = A.end(Idx); - I != E; ++I) { - Attribute Attr = *I; - // FIXME: Support StringRefs. - ConstantInt *Kind = cast(Attr.getAttributeKind()); - Attribute::AttrKind KindVal = Attribute::AttrKind(Kind->getZExtValue()); - - if (Attrs.count(KindVal)) + I != E; ++I) + // FIXME: Support string attributes. + if (Attrs.count(I->getKindAsEnum())) return true; - } return false; } -- cgit v1.1 From 60bdc5b16e2fc17be184b515a00c2e2a2eb40b89 Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Tue, 5 Feb 2013 23:30:58 +0000 Subject: Initial support for DWARF CFI parsing and dumping in LLVM git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174463 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/DebugInfo/DWARFContext.cpp | 30 +++++- lib/DebugInfo/DWARFContext.h | 11 +++ lib/DebugInfo/DWARFDebugFrame.cpp | 195 ++++++++++++++++++++++++++++++++++++++ lib/DebugInfo/DWARFDebugFrame.h | 46 +++++++++ 4 files changed, 281 insertions(+), 1 deletion(-) create mode 100644 lib/DebugInfo/DWARFDebugFrame.cpp create mode 100644 lib/DebugInfo/DWARFDebugFrame.h (limited to 'lib') diff --git a/lib/DebugInfo/DWARFContext.cpp b/lib/DebugInfo/DWARFContext.cpp index 768427f..d061f4e 100644 --- a/lib/DebugInfo/DWARFContext.cpp +++ b/lib/DebugInfo/DWARFContext.cpp @@ -31,6 +31,11 @@ void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType) { getCompileUnitAtIndex(i)->dump(OS); } + if (DumpType == DIDT_All || DumpType == DIDT_Frames) { + OS << "\n.debug_frame contents:\n"; + getDebugFrame()->dump(OS); + } + uint32_t offset = 0; if (DumpType == DIDT_All || DumpType == DIDT_Aranges) { OS << "\n.debug_aranges contents:\n"; @@ -152,6 +157,26 @@ const DWARFDebugAranges *DWARFContext::getDebugAranges() { return Aranges.get(); } +const DWARFDebugFrame *DWARFContext::getDebugFrame() { + if (DebugFrame) + return DebugFrame.get(); + + // There's a "bug" in the DWARFv3 standard with respect to the target address + // size within debug frame sections. While DWARF is supposed to be independent + // of its container, FDEs have fields with size being "target address size", + // which isn't specified in DWARF in general. It's only specified for CUs, but + // .eh_frame can appear without a .debug_info section. Follow the example of + // other tools (libdwarf) and extract this from the container (ObjectFile + // provides this information). This problem is fixed in DWARFv4 + // See this dwarf-discuss discussion for more details: + // http://lists.dwarfstd.org/htdig.cgi/dwarf-discuss-dwarfstd.org/2011-December/001173.html + DataExtractor debugFrameData(getDebugFrameSection(), isLittleEndian(), + getAddressSize()); + DebugFrame.reset(new DWARFDebugFrame()); + DebugFrame->parse(debugFrameData); + return DebugFrame.get(); +} + const DWARFLineTable * DWARFContext::getLineTableForCompileUnit(DWARFCompileUnit *cu) { if (!Line) @@ -440,7 +465,8 @@ DIInliningInfo DWARFContext::getInliningInfoForAddress(uint64_t Address, } DWARFContextInMemory::DWARFContextInMemory(object::ObjectFile *Obj) : - IsLittleEndian(Obj->isLittleEndian()) { + IsLittleEndian(Obj->isLittleEndian()), + AddressSize(Obj->getBytesInAddress()) { error_code ec; for (object::section_iterator i = Obj->begin_sections(), e = Obj->end_sections(); @@ -459,6 +485,8 @@ DWARFContextInMemory::DWARFContextInMemory(object::ObjectFile *Obj) : LineSection = data; else if (name == "debug_aranges") ARangeSection = data; + else if (name == "debug_frame") + DebugFrameSection = data; else if (name == "debug_str") StringSection = data; else if (name == "debug_ranges") { diff --git a/lib/DebugInfo/DWARFContext.h b/lib/DebugInfo/DWARFContext.h index 9ff094b..f12a054 100644 --- a/lib/DebugInfo/DWARFContext.h +++ b/lib/DebugInfo/DWARFContext.h @@ -12,6 +12,7 @@ #include "DWARFCompileUnit.h" #include "DWARFDebugAranges.h" +#include "DWARFDebugFrame.h" #include "DWARFDebugLine.h" #include "DWARFDebugRangeList.h" #include "llvm/ADT/OwningPtr.h" @@ -29,6 +30,7 @@ class DWARFContext : public DIContext { OwningPtr Abbrev; OwningPtr Aranges; OwningPtr Line; + OwningPtr DebugFrame; SmallVector DWOCUs; OwningPtr AbbrevDWO; @@ -84,6 +86,9 @@ public: /// Get a pointer to the parsed DebugAranges object. const DWARFDebugAranges *getDebugAranges(); + /// Get a pointer to the parsed frame information object. + const DWARFDebugFrame *getDebugFrame(); + /// Get a pointer to a parsed line table corresponding to a compile unit. const DWARFDebugLine::LineTable * getLineTableForCompileUnit(DWARFCompileUnit *cu); @@ -96,11 +101,13 @@ public: DILineInfoSpecifier Specifier = DILineInfoSpecifier()); virtual bool isLittleEndian() const = 0; + virtual uint8_t getAddressSize() const = 0; virtual const RelocAddrMap &infoRelocMap() const = 0; virtual const RelocAddrMap &lineRelocMap() const = 0; virtual StringRef getInfoSection() = 0; virtual StringRef getAbbrevSection() = 0; virtual StringRef getARangeSection() = 0; + virtual StringRef getDebugFrameSection() = 0; virtual StringRef getLineSection() = 0; virtual StringRef getStringSection() = 0; virtual StringRef getRangeSection() = 0; @@ -132,11 +139,13 @@ private: class DWARFContextInMemory : public DWARFContext { virtual void anchor(); bool IsLittleEndian; + uint8_t AddressSize; RelocAddrMap InfoRelocMap; RelocAddrMap LineRelocMap; StringRef InfoSection; StringRef AbbrevSection; StringRef ARangeSection; + StringRef DebugFrameSection; StringRef LineSection; StringRef StringSection; StringRef RangeSection; @@ -153,11 +162,13 @@ class DWARFContextInMemory : public DWARFContext { public: DWARFContextInMemory(object::ObjectFile *); virtual bool isLittleEndian() const { return IsLittleEndian; } + virtual uint8_t getAddressSize() const { return AddressSize; } virtual const RelocAddrMap &infoRelocMap() const { return InfoRelocMap; } virtual const RelocAddrMap &lineRelocMap() const { return LineRelocMap; } virtual StringRef getInfoSection() { return InfoSection; } virtual StringRef getAbbrevSection() { return AbbrevSection; } virtual StringRef getARangeSection() { return ARangeSection; } + virtual StringRef getDebugFrameSection() { return DebugFrameSection; } virtual StringRef getLineSection() { return LineSection; } virtual StringRef getStringSection() { return StringSection; } virtual StringRef getRangeSection() { return RangeSection; } diff --git a/lib/DebugInfo/DWARFDebugFrame.cpp b/lib/DebugInfo/DWARFDebugFrame.cpp new file mode 100644 index 0000000..0b78cce --- /dev/null +++ b/lib/DebugInfo/DWARFDebugFrame.cpp @@ -0,0 +1,195 @@ +//===-- DWARFDebugFrame.h - Parsing of .debug_frame -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "DWARFDebugFrame.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/DataTypes.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/Format.h" + +using namespace llvm; +using namespace dwarf; + + +class llvm::FrameEntry { +public: + enum FrameKind {FK_CIE, FK_FDE}; + FrameEntry(FrameKind K, DataExtractor D, uint64_t Offset, uint64_t Length) + : Kind(K), Data(D), Offset(Offset), Length(Length) + {} + + FrameKind getKind() const { return Kind; } + + virtual void dumpHeader(raw_ostream &OS) const = 0; +protected: + const FrameKind Kind; + DataExtractor Data; + uint64_t Offset; + uint64_t Length; +}; + + +class CIE : public FrameEntry { +public: + // CIEs (and FDEs) are simply container classes, so the only sensible way to + // create them is by providing the full parsed contents in the constructor. + CIE(DataExtractor D, uint64_t Offset, uint64_t Length, uint8_t Version, + SmallString<8> Augmentation, uint64_t CodeAlignmentFactor, + int64_t DataAlignmentFactor, uint64_t ReturnAddressRegister) + : FrameEntry(FK_CIE, D, Offset, Length), Version(Version), + Augmentation(Augmentation), CodeAlignmentFactor(CodeAlignmentFactor), + DataAlignmentFactor(DataAlignmentFactor), + ReturnAddressRegister(ReturnAddressRegister) + {} + + void dumpHeader(raw_ostream &OS) const { + OS << format("%08x %08x %08x CIE", Offset, Length, DW_CIE_ID) << "\n"; + OS << format(" Version: %d\n", Version); + OS << " Augmentation: \"" << Augmentation << "\"\n"; + OS << format(" Code alignment factor: %u\n", CodeAlignmentFactor); + OS << format(" Data alignment factor: %d\n", DataAlignmentFactor); + OS << format(" Return address column: %d\n", ReturnAddressRegister); + OS << "\n"; + } + + static bool classof(const FrameEntry *FE) { + return FE->getKind() == FK_CIE; + } +private: + uint8_t Version; + SmallString<8> Augmentation; + uint64_t CodeAlignmentFactor; + int64_t DataAlignmentFactor; + uint64_t ReturnAddressRegister; +}; + + +class FDE : public FrameEntry { +public: + // Each FDE has a CIE it's "linked to". Our FDE contains is constructed with + // an offset to the CIE (provided by parsing the FDE header). The CIE itself + // is obtained lazily once it's actually required. + FDE(DataExtractor D, uint64_t Offset, uint64_t Length, int64_t LinkedCIEOffset, + uint64_t InitialLocation, uint64_t AddressRange) + : FrameEntry(FK_FDE, D, Offset, Length), LinkedCIEOffset(LinkedCIEOffset), + InitialLocation(InitialLocation), AddressRange(AddressRange), + LinkedCIE(NULL) + {} + + void dumpHeader(raw_ostream &OS) const { + OS << format("%08x %08x %08x FDE ", Offset, Length, LinkedCIEOffset); + OS << format("cie=%08x pc=%08x...%08x\n", + LinkedCIEOffset, InitialLocation, + InitialLocation + AddressRange); + OS << "\n"; + } + + static bool classof(const FrameEntry *FE) { + return FE->getKind() == FK_FDE; + } +private: + uint64_t LinkedCIEOffset; + uint64_t InitialLocation; + uint64_t AddressRange; + CIE *LinkedCIE; +}; + + +DWARFDebugFrame::DWARFDebugFrame() +{ +} + + +DWARFDebugFrame::~DWARFDebugFrame() +{ + for (EntryVector::iterator I = Entries.begin(), E = Entries.end(); + I != E; ++I) { + delete *I; + } +} + + +static void LLVM_ATTRIBUTE_UNUSED dumpDataAux(DataExtractor Data, + uint32_t Offset, int Length) { + errs() << "DUMP: "; + for (int i = 0; i < Length; ++i) { + uint8_t c = Data.getU8(&Offset); + errs().write_hex(c); errs() << " "; + } + errs() << "\n"; +} + + +void DWARFDebugFrame::parse(DataExtractor Data) { + uint32_t Offset = 0; + + while (Data.isValidOffset(Offset)) { + uint32_t StartOffset = Offset; + + bool IsDWARF64 = false; + uint64_t Length = Data.getU32(&Offset); + uint64_t Id; + + if (Length == UINT32_MAX) { + // DWARF-64 is distinguished by the first 32 bits of the initial length + // field being 0xffffffff. Then, the next 64 bits are the actual entry + // length. + IsDWARF64 = true; + Length = Data.getU64(&Offset); + } + + // At this point, Offset points to the next field after Length. + // Length is the structure size excluding itself. Compute an offset one + // past the end of the structure (needed to know how many instructions to + // read). + // TODO: For honest DWARF64 support, DataExtractor will have to treat + // offset_ptr as uint64_t* + uint32_t EndStructureOffset = Offset + static_cast(Length); + + // The Id field's size depends on the DWARF format + Id = Data.getUnsigned(&Offset, IsDWARF64 ? 8 : 4); + bool IsCIE = ((IsDWARF64 && Id == DW64_CIE_ID) || Id == DW_CIE_ID); + + if (IsCIE) { + // Note: this is specifically DWARFv3 CIE header structure. It was + // changed in DWARFv4. + uint8_t Version = Data.getU8(&Offset); + const char *Augmentation = Data.getCStr(&Offset); + uint64_t CodeAlignmentFactor = Data.getULEB128(&Offset); + int64_t DataAlignmentFactor = Data.getSLEB128(&Offset); + uint64_t ReturnAddressRegister = Data.getULEB128(&Offset); + + CIE *NewCIE = new CIE(Data, StartOffset, Length, Version, + StringRef(Augmentation), CodeAlignmentFactor, + DataAlignmentFactor, ReturnAddressRegister); + Entries.push_back(NewCIE); + } else { + // FDE + uint64_t CIEPointer = Id; + uint64_t InitialLocation = Data.getAddress(&Offset); + uint64_t AddressRange = Data.getAddress(&Offset); + + FDE *NewFDE = new FDE(Data, StartOffset, Length, CIEPointer, + InitialLocation, AddressRange); + Entries.push_back(NewFDE); + } + + Offset = EndStructureOffset; + } +} + + +void DWARFDebugFrame::dump(raw_ostream &OS) const { + OS << "\n"; + for (EntryVector::const_iterator I = Entries.begin(), E = Entries.end(); + I != E; ++I) { + (*I)->dumpHeader(OS); + } +} + diff --git a/lib/DebugInfo/DWARFDebugFrame.h b/lib/DebugInfo/DWARFDebugFrame.h new file mode 100644 index 0000000..48b8d63 --- /dev/null +++ b/lib/DebugInfo/DWARFDebugFrame.h @@ -0,0 +1,46 @@ +//===-- DWARFDebugFrame.h - Parsing of .debug_frame -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_DWARFDEBUGFRAME_H +#define LLVM_DEBUGINFO_DWARFDEBUGFRAME_H + +#include "llvm/Support/DataExtractor.h" +#include "llvm/Support/raw_ostream.h" +#include + + +namespace llvm { + +class FrameEntry; + + +/// \brief A parsed .debug_frame section +/// +class DWARFDebugFrame { +public: + DWARFDebugFrame(); + ~DWARFDebugFrame(); + + /// \brief Dump the section data into the given stream. + void dump(raw_ostream &OS) const; + + /// \brief Parse the section from raw data. + /// data is assumed to be pointing to the beginning of the section. + void parse(DataExtractor Data); + +private: + typedef std::vector EntryVector; + EntryVector Entries; +}; + + +} // namespace llvm + +#endif + -- cgit v1.1 From 7743232775d2fe12f3152fe955218dc1fc97a497 Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Tue, 5 Feb 2013 23:37:18 +0000 Subject: Add missing file to CMake list git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174465 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/DebugInfo/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/DebugInfo/CMakeLists.txt b/lib/DebugInfo/CMakeLists.txt index 1e9e509..e97455a 100644 --- a/lib/DebugInfo/CMakeLists.txt +++ b/lib/DebugInfo/CMakeLists.txt @@ -6,6 +6,7 @@ add_llvm_library(LLVMDebugInfo DWARFDebugAbbrev.cpp DWARFDebugArangeSet.cpp DWARFDebugAranges.cpp + DWARFDebugFrame.cpp DWARFDebugInfoEntry.cpp DWARFDebugLine.cpp DWARFDebugRangeList.cpp -- cgit v1.1 From 64754f499058b5dc748ea6d06a084af0ed539ec4 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Tue, 5 Feb 2013 23:48:36 +0000 Subject: Add the target-dependent (string) attributes from the AttrBuilder to the AttributeSet. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174467 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Attributes.cpp | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index d61bd09..dc1a657 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -126,8 +126,13 @@ StringRef Attribute::getValueAsString() const { return pImpl ? pImpl->getValueAsString() : StringRef(); } -bool Attribute::hasAttribute(AttrKind Val) const { - return (pImpl && pImpl->hasAttribute(Val)) || (!pImpl && Val == None); +bool Attribute::hasAttribute(AttrKind Kind) const { + return (pImpl && pImpl->hasAttribute(Kind)) || (!pImpl && Kind == None); +} + +bool Attribute::hasAttribute(StringRef Kind) const { + if (!isStringAttribute()) return false; + return pImpl && pImpl->hasAttribute(Kind); } /// This returns the alignment field of an attribute as a byte alignment value. @@ -552,6 +557,7 @@ AttributeSet AttributeSet::get(LLVMContext &C, unsigned Idx, AttrBuilder &B) { if (!B.hasAttributes()) return AttributeSet(); + // Add target-independent attributes. SmallVector, 8> Attrs; for (AttrBuilder::iterator I = B.begin(), E = B.end(); I != E; ++I) { Attribute::AttrKind Kind = *I; @@ -565,6 +571,11 @@ AttributeSet AttributeSet::get(LLVMContext &C, unsigned Idx, AttrBuilder &B) { Attrs.push_back(std::make_pair(Idx, Attribute::get(C, Kind))); } + // Add target-dependent (string) attributes. + for (AttrBuilder::td_iterator I = B.td_begin(), E = B.td_end(); + I != E; ++I) + Attrs.push_back(std::make_pair(Idx, Attribute::get(C, I->first,I->second))); + return get(C, Attrs); } -- cgit v1.1 From ba42625074aa7f4f1324a5d6666bd0e302b57f2b Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Wed, 6 Feb 2013 00:20:38 +0000 Subject: Fix some formatting & add comments, following Eric's review git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174473 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/DebugInfo/DWARFDebugFrame.cpp | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) (limited to 'lib') diff --git a/lib/DebugInfo/DWARFDebugFrame.cpp b/lib/DebugInfo/DWARFDebugFrame.cpp index 0b78cce..76fdb79 100644 --- a/lib/DebugInfo/DWARFDebugFrame.cpp +++ b/lib/DebugInfo/DWARFDebugFrame.cpp @@ -21,16 +21,23 @@ class llvm::FrameEntry { public: enum FrameKind {FK_CIE, FK_FDE}; FrameEntry(FrameKind K, DataExtractor D, uint64_t Offset, uint64_t Length) - : Kind(K), Data(D), Offset(Offset), Length(Length) - {} + : Kind(K), Data(D), Offset(Offset), Length(Length) {} FrameKind getKind() const { return Kind; } virtual void dumpHeader(raw_ostream &OS) const = 0; + protected: const FrameKind Kind; + + /// \brief The data stream holding the section from which the entry was + /// parsed. DataExtractor Data; + + /// \brief Offset of this entry in the section. uint64_t Offset; + + /// \brief Entry length as specified in DWARF. uint64_t Length; }; @@ -45,8 +52,7 @@ public: : FrameEntry(FK_CIE, D, Offset, Length), Version(Version), Augmentation(Augmentation), CodeAlignmentFactor(CodeAlignmentFactor), DataAlignmentFactor(DataAlignmentFactor), - ReturnAddressRegister(ReturnAddressRegister) - {} + ReturnAddressRegister(ReturnAddressRegister) {} void dumpHeader(raw_ostream &OS) const { OS << format("%08x %08x %08x CIE", Offset, Length, DW_CIE_ID) << "\n"; @@ -61,7 +67,9 @@ public: static bool classof(const FrameEntry *FE) { return FE->getKind() == FK_CIE; } + private: + /// The following fields are defined in section 6.4.1 of the DWARF standard v3 uint8_t Version; SmallString<8> Augmentation; uint64_t CodeAlignmentFactor; @@ -75,12 +83,11 @@ public: // Each FDE has a CIE it's "linked to". Our FDE contains is constructed with // an offset to the CIE (provided by parsing the FDE header). The CIE itself // is obtained lazily once it's actually required. - FDE(DataExtractor D, uint64_t Offset, uint64_t Length, int64_t LinkedCIEOffset, - uint64_t InitialLocation, uint64_t AddressRange) + FDE(DataExtractor D, uint64_t Offset, uint64_t Length, + int64_t LinkedCIEOffset, uint64_t InitialLocation, uint64_t AddressRange) : FrameEntry(FK_FDE, D, Offset, Length), LinkedCIEOffset(LinkedCIEOffset), InitialLocation(InitialLocation), AddressRange(AddressRange), - LinkedCIE(NULL) - {} + LinkedCIE(NULL) {} void dumpHeader(raw_ostream &OS) const { OS << format("%08x %08x %08x FDE ", Offset, Length, LinkedCIEOffset); @@ -94,6 +101,8 @@ public: return FE->getKind() == FK_FDE; } private: + + /// The following fields are defined in section 6.4.1 of the DWARF standard v3 uint64_t LinkedCIEOffset; uint64_t InitialLocation; uint64_t AddressRange; @@ -101,13 +110,11 @@ private: }; -DWARFDebugFrame::DWARFDebugFrame() -{ +DWARFDebugFrame::DWARFDebugFrame() { } -DWARFDebugFrame::~DWARFDebugFrame() -{ +DWARFDebugFrame::~DWARFDebugFrame() { for (EntryVector::iterator I = Entries.begin(), E = Entries.end(); I != E; ++I) { delete *I; -- cgit v1.1 From 9c5861fdbd5e30df55cb7cd9a0edda8d92494100 Mon Sep 17 00:00:00 2001 From: Manman Ren Date: Wed, 6 Feb 2013 00:59:41 +0000 Subject: Attempt to recover gdb bot after r174445. Failure: undefined symbol 'Lline_table_start0'. Root-cause: we use a symbol subtraction to calculate at_stmt_list, but the line table entries are not dumped in the assembly. Fix: use zero instead of a symbol subtraction for Compile Unit 0. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174479 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 2 ++ 1 file changed, 2 insertions(+) (limited to 'lib') diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index dcaab31..3409490 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -674,6 +674,8 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) { if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, LineTableStartSym); + else if (NewCU->getUniqueID() == 0) + NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0); else NewCU->addDelta(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, LineTableStartSym, Asm->GetTempSymbol("section_line")); -- cgit v1.1 From 85df6b43403d3ebf5d80023a85699c6fb254941a Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Wed, 6 Feb 2013 01:16:00 +0000 Subject: Add methods to merge an AttrBuilder into another builder. This is useful when parsing an object that references multiple attribute groups. N.B. If both builders have alignments specified, then they should match! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174480 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Attributes.cpp | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'lib') diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index dc1a657..67ab4ea 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -956,6 +956,23 @@ AttrBuilder &AttrBuilder::addStackAlignmentAttr(unsigned Align) { return *this; } +AttrBuilder &AttrBuilder::merge(const AttrBuilder &B) { + // FIXME: What if both have alignments, but they don't match?! + if (!Alignment) + Alignment = B.Alignment; + + if (!StackAlignment) + StackAlignment = B.StackAlignment; + + Attrs.insert(B.Attrs.begin(), B.Attrs.end()); + + for (td_const_iterator I = B.TargetDepAttrs.begin(), + E = B.TargetDepAttrs.end(); I != E; ++I) + TargetDepAttrs[I->first] = I->second; + + return *this; +} + bool AttrBuilder::contains(Attribute::AttrKind A) const { return Attrs.count(A); } -- cgit v1.1 From c342d9d345acdbd95577c7c6e9ce7d3a1bdb57bf Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Wed, 6 Feb 2013 01:33:42 +0000 Subject: Add a 'StringRef' version of hasAttribute. Fix the 'operator==' and 'hasAttributes' queries to take into account target-dependent attributes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174481 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Attributes.cpp | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 67ab4ea..99fafae 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -977,8 +977,12 @@ bool AttrBuilder::contains(Attribute::AttrKind A) const { return Attrs.count(A); } +bool AttrBuilder::contains(StringRef A) const { + return TargetDepAttrs.find(A) != TargetDepAttrs.end(); +} + bool AttrBuilder::hasAttributes() const { - return !Attrs.empty(); + return !Attrs.empty() || !TargetDepAttrs.empty(); } bool AttrBuilder::hasAttributes(AttributeSet A, uint64_t Index) const { @@ -1005,9 +1009,17 @@ bool AttrBuilder::hasAlignmentAttr() const { } bool AttrBuilder::operator==(const AttrBuilder &B) { - SmallVector This(Attrs.begin(), Attrs.end()); - SmallVector That(B.Attrs.begin(), B.Attrs.end()); - return This == That; + for (DenseSet::iterator I = Attrs.begin(), + E = Attrs.end(); I != E; ++I) + if (!B.Attrs.count(*I)) + return false; + + for (td_const_iterator I = TargetDepAttrs.begin(), + E = TargetDepAttrs.end(); I != E; ++I) + if (B.TargetDepAttrs.find(I->first) == B.TargetDepAttrs.end()) + return false; + + return Alignment == B.Alignment && StackAlignment == B.StackAlignment; } AttrBuilder &AttrBuilder::addRawValue(uint64_t Val) { -- cgit v1.1 From 607acd66f400045919b1067432927a53484eaec1 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Wed, 6 Feb 2013 02:06:33 +0000 Subject: Tweak check to avoid integer overflow (for insanely large alignments) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174482 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 9bd6ae6..e0d6643 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3520,7 +3520,7 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { EVT VT = TLI.getValueType(I.getType()); - if (I.getAlignment() * 8 < VT.getSizeInBits()) + if (I.getAlignment() < VT.getSizeInBits() / 8) report_fatal_error("Cannot generate unaligned atomic load"); SDValue L = @@ -3550,7 +3550,7 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { EVT VT = TLI.getValueType(I.getValueOperand()->getType()); - if (I.getAlignment() * 8 < VT.getSizeInBits()) + if (I.getAlignment() < VT.getSizeInBits() / 8) report_fatal_error("Cannot generate unaligned atomic store"); if (TLI.getInsertFencesForAtomic()) -- cgit v1.1 From 8a0329e6ffc290fb177fd058a64b4cf81d4b620a Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Wed, 6 Feb 2013 03:08:02 +0000 Subject: Add virtual desctructor to FrameEntry to avoid error on delete-non-virtual-dtor git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174483 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/DebugInfo/DWARFDebugFrame.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'lib') diff --git a/lib/DebugInfo/DWARFDebugFrame.cpp b/lib/DebugInfo/DWARFDebugFrame.cpp index 76fdb79..ec55716 100644 --- a/lib/DebugInfo/DWARFDebugFrame.cpp +++ b/lib/DebugInfo/DWARFDebugFrame.cpp @@ -23,6 +23,9 @@ public: FrameEntry(FrameKind K, DataExtractor D, uint64_t Offset, uint64_t Length) : Kind(K), Data(D), Offset(Offset), Length(Length) {} + virtual ~FrameEntry() { + } + FrameKind getKind() const { return Kind; } virtual void dumpHeader(raw_ostream &OS) const = 0; @@ -54,6 +57,9 @@ public: DataAlignmentFactor(DataAlignmentFactor), ReturnAddressRegister(ReturnAddressRegister) {} + ~CIE() { + } + void dumpHeader(raw_ostream &OS) const { OS << format("%08x %08x %08x CIE", Offset, Length, DW_CIE_ID) << "\n"; OS << format(" Version: %d\n", Version); @@ -89,6 +95,9 @@ public: InitialLocation(InitialLocation), AddressRange(AddressRange), LinkedCIE(NULL) {} + ~FDE() { + } + void dumpHeader(raw_ostream &OS) const { OS << format("%08x %08x %08x FDE ", Offset, Length, LinkedCIEOffset); OS << format("cie=%08x pc=%08x...%08x\n", -- cgit v1.1 From b2ac7c09b17efadea2a9f90f45801d9d2ee687aa Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Wed, 6 Feb 2013 05:37:46 +0000 Subject: Failing builds because a private class member is not being used after initialization is one of the reasons I consider -werror to be shoddy. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174485 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/DebugInfo/DWARFDebugFrame.cpp | 3 +++ 1 file changed, 3 insertions(+) (limited to 'lib') diff --git a/lib/DebugInfo/DWARFDebugFrame.cpp b/lib/DebugInfo/DWARFDebugFrame.cpp index ec55716..974cecc 100644 --- a/lib/DebugInfo/DWARFDebugFrame.cpp +++ b/lib/DebugInfo/DWARFDebugFrame.cpp @@ -104,6 +104,9 @@ public: LinkedCIEOffset, InitialLocation, InitialLocation + AddressRange); OS << "\n"; + if (LinkedCIE) { + OS << format("%p\n", LinkedCIE); + } } static bool classof(const FrameEntry *FE) { -- cgit v1.1 From 5bc79cc4e833fea68f15780f191cbf4881679646 Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Wed, 6 Feb 2013 06:00:11 +0000 Subject: ARM: Use MCTargetAsmParser::validateTargetOperandClass(). Use the validateTargetOperandClass() hook to match literal '#0' operands in InstAlias definitions. Previously this required per-instruction C++ munging of the operand list, but not is handled as a natural part of the matcher. Much better. No additional tests are required, as the pre-existing tests for these instructions exercise the new behaviour as being functionally equivalent to the old. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174488 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 66 +++++++++---------------------- 1 file changed, 19 insertions(+), 47 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 106fd13..3174e9a 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -269,6 +269,7 @@ public: SmallVectorImpl &Operands); bool ParseDirective(AsmToken DirectiveID); + unsigned validateTargetOperandClass(MCParsedAsmOperand *Op, unsigned Kind); unsigned checkTargetMatchPredicate(MCInst &Inst); bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, @@ -5158,53 +5159,6 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, delete Op; } - // The vector-compare-to-zero instructions have a literal token "#0" at - // the end that comes to here as an immediate operand. Convert it to a - // token to play nicely with the matcher. - if ((Mnemonic == "vceq" || Mnemonic == "vcge" || Mnemonic == "vcgt" || - Mnemonic == "vcle" || Mnemonic == "vclt") && Operands.size() == 6 && - static_cast(Operands[5])->isImm()) { - ARMOperand *Op = static_cast(Operands[5]); - const MCConstantExpr *CE = dyn_cast(Op->getImm()); - if (CE && CE->getValue() == 0) { - Operands.erase(Operands.begin() + 5); - Operands.push_back(ARMOperand::CreateToken("#0", Op->getStartLoc())); - delete Op; - } - } - // VCMP{E} does the same thing, but with a different operand count. - if ((Mnemonic == "vcmp" || Mnemonic == "vcmpe") && Operands.size() == 5 && - static_cast(Operands[4])->isImm()) { - ARMOperand *Op = static_cast(Operands[4]); - const MCConstantExpr *CE = dyn_cast(Op->getImm()); - if (CE && CE->getValue() == 0) { - Operands.erase(Operands.begin() + 4); - Operands.push_back(ARMOperand::CreateToken("#0", Op->getStartLoc())); - delete Op; - } - } - // Similarly, the Thumb1 "RSB" instruction has a literal "#0" on the - // end. Convert it to a token here. Take care not to convert those - // that should hit the Thumb2 encoding. - if (Mnemonic == "rsb" && isThumb() && Operands.size() == 6 && - static_cast(Operands[3])->isReg() && - static_cast(Operands[4])->isReg() && - static_cast(Operands[5])->isImm()) { - ARMOperand *Op = static_cast(Operands[5]); - const MCConstantExpr *CE = dyn_cast(Op->getImm()); - if (CE && CE->getValue() == 0 && - (isThumbOne() || - // The cc_out operand matches the IT block. - ((inITBlock() != CarrySetting) && - // Neither register operand is a high register. - (isARMLowRegister(static_cast(Operands[3])->getReg()) && - isARMLowRegister(static_cast(Operands[4])->getReg()))))){ - Operands.erase(Operands.begin() + 5); - Operands.push_back(ARMOperand::CreateToken("#0", Op->getStartLoc())); - delete Op; - } - } - // Adjust operands of ldrexd/strexd to MCK_GPRPair. // ldrexd/strexd require even/odd GPR pair. To enforce this constraint, // a single GPRPair reg operand is used in the .td file to replace the two @@ -7857,3 +7811,21 @@ extern "C" void LLVMInitializeARMAsmParser() { #define GET_SUBTARGET_FEATURE_NAME #define GET_MATCHER_IMPLEMENTATION #include "ARMGenAsmMatcher.inc" + +// Define this matcher function after the auto-generated include so we +// have the match class enum definitions. +unsigned ARMAsmParser::validateTargetOperandClass(MCParsedAsmOperand *AsmOp, + unsigned Kind) { + ARMOperand *Op = static_cast(AsmOp); + // If the kind is a token for a literal immediate, check if our asm + // operand matches. This is for InstAliases which have a fixed-value + // immediate in the syntax. + if (Kind == MCK__35_0 && Op->isImm()) { + const MCConstantExpr *CE = dyn_cast(Op->getImm()); + if (!CE) + return Match_InvalidOperand; + if (CE->getValue() == 0) + return Match_Success; + } + return Match_InvalidOperand; +} -- cgit v1.1 From 60309e17a3a86756f7a1bbaca409159733586f2d Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 6 Feb 2013 06:48:10 +0000 Subject: Remove unused private field to suppress a build warning. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174491 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/AttributeImpl.h | 1 - 1 file changed, 1 deletion(-) (limited to 'lib') diff --git a/lib/IR/AttributeImpl.h b/lib/IR/AttributeImpl.h index 5f9e3e7..84f472d 100644 --- a/lib/IR/AttributeImpl.h +++ b/lib/IR/AttributeImpl.h @@ -101,7 +101,6 @@ public: /// could be a single enum, a tuple, or a string. class AttributeImpl : public FoldingSetNode { LLVMContext &Context; ///< Global context for uniquing objects - Constant *Kind; ///< Kind of attribute: enum or string AttributeEntry *Entry; ///< Holds the kind and value of the attribute -- cgit v1.1 From 97fe3d95110db54908527e547187b3007185e46c Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 6 Feb 2013 06:50:38 +0000 Subject: Remove extra blank line between closing curly brace and 'else' git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174492 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/PassManager.cpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/IR/PassManager.cpp b/lib/IR/PassManager.cpp index 4f7984e..2bdfd98 100644 --- a/lib/IR/PassManager.cpp +++ b/lib/IR/PassManager.cpp @@ -626,8 +626,7 @@ void PMTopLevelManager::schedulePass(Pass *P) { Pass *AnalysisPass2 = findAnalysisPass(*I2); if (AnalysisPass2) { dbgs() << "\t" << AnalysisPass2->getPassName() << "\n"; - } - else { + } else { dbgs() << "\t" << "Error: Required pass not found! Possible causes:" << "\n"; dbgs() << "\t\t" << "- Pass misconfiguration (e.g.: missing macros)" << "\n"; dbgs() << "\t\t" << "- Corruption of the global PassRegistry" << "\n"; @@ -648,8 +647,7 @@ void PMTopLevelManager::schedulePass(Pass *P) { // Recheck analysis passes to ensure that required analyses that // are already checked are still available. checkAnalysis = true; - } - else + } else // Do not schedule this analysis. Lower level analsyis // passes are run on the fly. delete AnalysisPass; @@ -1797,8 +1795,7 @@ void PMStack::push(PMDataManager *PM) { TPM->addIndirectPassManager(PM); PM->setTopLevelManager(TPM); PM->setDepth(this->top()->getDepth()+1); - } - else { + } else { assert((PM->getPassManagerType() == PMT_ModulePassManager || PM->getPassManagerType() == PMT_FunctionPassManager) && "pushing bad pass manager to PMStack"); -- cgit v1.1 From 95ce4c2ffb0ff31a79b060fb112659322a5be3bf Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Wed, 6 Feb 2013 06:52:58 +0000 Subject: Initial submission for the attribute group feature. Attribute groups are of the form: #0 = attributes { noinline "no-sse" "cpu"="cortex-a8" alignstack=4 } Target-dependent attributes are represented as strings. Attributes can have optional values associated with them. E.g., the "cpu" attribute has the value "cortex-a8". Target-independent attributes are listed as enums inside the attribute classes. Multiple attribute groups can be referenced by the same object. In that case, the attributes are merged together. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174493 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/AsmParser/LLLexer.cpp | 60 ++++++++++++++++++---------- lib/AsmParser/LLLexer.h | 1 + lib/AsmParser/LLParser.cpp | 99 +++++++++++++++++++++++++++++++++++++++++++++- lib/AsmParser/LLParser.h | 5 +++ lib/AsmParser/LLToken.h | 43 ++++++++++---------- 5 files changed, 167 insertions(+), 41 deletions(-) (limited to 'lib') diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp index 72136d0..2256124 100644 --- a/lib/AsmParser/LLLexer.cpp +++ b/lib/AsmParser/LLLexer.cpp @@ -226,6 +226,7 @@ lltok::Kind LLLexer::LexToken() { SkipLineComment(); return LexToken(); case '!': return LexExclaim(); + case '#': return LexHash(); case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case '-': @@ -394,6 +395,24 @@ lltok::Kind LLLexer::LexExclaim() { return lltok::exclaim; } +/// LexHash - Lex all tokens that start with a # character: +/// AttrGrpID ::= #[0-9]+ +lltok::Kind LLLexer::LexHash() { + // Handle AttrGrpID: #[0-9]+ + if (isdigit(CurPtr[0])) { + for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr) + /*empty*/; + + uint64_t Val = atoull(TokStart+1, CurPtr); + if ((unsigned)Val != Val) + Error("invalid value number (too large)!"); + UIntVal = unsigned(Val); + return lltok::AttrGrpID; + } + + return lltok::Error; +} + /// LexIdentifier: Handle several related productions: /// Label [-a-zA-Z$._0-9]+: /// IntegerType i[0-9]+ @@ -531,35 +550,36 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(cc); KEYWORD(c); - KEYWORD(signext); - KEYWORD(zeroext); + KEYWORD(attributes); + + KEYWORD(address_safety); + KEYWORD(alwaysinline); + KEYWORD(byval); + KEYWORD(inlinehint); KEYWORD(inreg); - KEYWORD(sret); - KEYWORD(nounwind); - KEYWORD(noreturn); + KEYWORD(minsize); + KEYWORD(naked); + KEYWORD(nest); KEYWORD(noalias); KEYWORD(nocapture); - KEYWORD(byval); - KEYWORD(nest); + KEYWORD(noduplicate); + KEYWORD(noimplicitfloat); + KEYWORD(noinline); + KEYWORD(nonlazybind); + KEYWORD(noredzone); + KEYWORD(noreturn); + KEYWORD(nounwind); + KEYWORD(optsize); KEYWORD(readnone); KEYWORD(readonly); - KEYWORD(uwtable); KEYWORD(returns_twice); - - KEYWORD(inlinehint); - KEYWORD(noinline); - KEYWORD(alwaysinline); - KEYWORD(optsize); + KEYWORD(signext); + KEYWORD(sret); KEYWORD(ssp); KEYWORD(sspreq); KEYWORD(sspstrong); - KEYWORD(noredzone); - KEYWORD(noimplicitfloat); - KEYWORD(naked); - KEYWORD(nonlazybind); - KEYWORD(address_safety); - KEYWORD(minsize); - KEYWORD(noduplicate); + KEYWORD(uwtable); + KEYWORD(zeroext); KEYWORD(type); KEYWORD(opaque); diff --git a/lib/AsmParser/LLLexer.h b/lib/AsmParser/LLLexer.h index 1a307a8..85703c7 100644 --- a/lib/AsmParser/LLLexer.h +++ b/lib/AsmParser/LLLexer.h @@ -81,6 +81,7 @@ namespace llvm { lltok::Kind LexPercent(); lltok::Kind LexQuote(); lltok::Kind Lex0x(); + lltok::Kind LexHash(); uint64_t atoull(const char *Buffer, const char *End); uint64_t HexIntToVal(const char *Buffer, const char *End); diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index 2b6b165..22c21c6 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -174,7 +174,8 @@ bool LLParser::ParseTopLevelEntities() { case lltok::GlobalID: if (ParseUnnamedGlobal()) return true; break; case lltok::GlobalVar: if (ParseNamedGlobal()) return true; break; case lltok::exclaim: if (ParseStandaloneMetadata()) return true; break; - case lltok::MetadataVar: if (ParseNamedMetadata()) return true; break; + case lltok::MetadataVar:if (ParseNamedMetadata()) return true; break; + case lltok::AttrGrpID: if (ParseUnnamedAttrGrp()) return true; break; // The Global variable production with no name can have many different // optional leading prefixes, the production is: @@ -740,6 +741,102 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc, return false; } +/// ParseUnnamedAttrGrp +/// ::= AttrGrpID '=' '{' AttrValPair+ '}' +bool LLParser::ParseUnnamedAttrGrp() { + assert(Lex.getKind() == lltok::AttrGrpID); + LocTy AttrGrpLoc = Lex.getLoc(); + unsigned VarID = Lex.getUIntVal(); + Lex.Lex(); + + if (ParseToken(lltok::equal, "expected '=' here") || + ParseToken(lltok::kw_attributes, "expected 'attributes' keyword here") || + ParseToken(lltok::lbrace, "expected '{' here") || + ParseAttributeValuePairs(ForwardRefAttrBuilder[VarID]) || + ParseToken(lltok::rbrace, "expected end of attribute group")) + return true; + + if (!ForwardRefAttrBuilder[VarID].hasAttributes()) + return Error(AttrGrpLoc, "attribute group has no attributes"); + + return false; +} + +/// ParseAttributeValuePairs +/// ::= | '=' +bool LLParser::ParseAttributeValuePairs(AttrBuilder &B) { + while (true) { + lltok::Kind Token = Lex.getKind(); + switch (Token) { + default: + return Error(Lex.getLoc(), "unterminated attribute group"); + case lltok::rbrace: + // Finished. + return false; + + // Target-dependent attributes: + case lltok::StringConstant: { + std::string Attr = Lex.getStrVal(); + Lex.Lex(); + std::string Val; + if (EatIfPresent(lltok::equal) && + ParseStringConstant(Val)) + return true; + + B.addAttribute(Attr, Val); + break; + } + + // Target-independent attributes: + case lltok::kw_align: { + unsigned Alignment; + if (ParseToken(lltok::equal, "expected '=' here") || + ParseUInt32(Alignment)) + return true; + B.addAlignmentAttr(Alignment); + break; + } + case lltok::kw_alignstack: { + unsigned Alignment; + if (ParseToken(lltok::equal, "expected '=' here") || + ParseUInt32(Alignment)) + return true; + B.addStackAlignmentAttr(Alignment); + break; + } + case lltok::kw_address_safety: B.addAttribute(Attribute::AddressSafety); break; + case lltok::kw_alwaysinline: B.addAttribute(Attribute::AlwaysInline); break; + case lltok::kw_byval: B.addAttribute(Attribute::ByVal); break; + case lltok::kw_inlinehint: B.addAttribute(Attribute::InlineHint); break; + case lltok::kw_inreg: B.addAttribute(Attribute::InReg); break; + case lltok::kw_minsize: B.addAttribute(Attribute::MinSize); break; + case lltok::kw_naked: B.addAttribute(Attribute::Naked); break; + case lltok::kw_nest: B.addAttribute(Attribute::Nest); break; + case lltok::kw_noalias: B.addAttribute(Attribute::NoAlias); break; + case lltok::kw_nocapture: B.addAttribute(Attribute::NoCapture); break; + case lltok::kw_noduplicate: B.addAttribute(Attribute::NoDuplicate); break; + case lltok::kw_noimplicitfloat: B.addAttribute(Attribute::NoImplicitFloat); break; + case lltok::kw_noinline: B.addAttribute(Attribute::NoInline); break; + case lltok::kw_nonlazybind: B.addAttribute(Attribute::NonLazyBind); break; + case lltok::kw_noredzone: B.addAttribute(Attribute::NoRedZone); break; + case lltok::kw_noreturn: B.addAttribute(Attribute::NoReturn); break; + case lltok::kw_nounwind: B.addAttribute(Attribute::NoUnwind); break; + case lltok::kw_optsize: B.addAttribute(Attribute::OptimizeForSize); break; + case lltok::kw_readnone: B.addAttribute(Attribute::ReadNone); break; + case lltok::kw_readonly: B.addAttribute(Attribute::ReadOnly); break; + case lltok::kw_returns_twice: B.addAttribute(Attribute::ReturnsTwice); break; + case lltok::kw_signext: B.addAttribute(Attribute::SExt); break; + case lltok::kw_sret: B.addAttribute(Attribute::StructRet); break; + case lltok::kw_ssp: B.addAttribute(Attribute::StackProtect); break; + case lltok::kw_sspreq: B.addAttribute(Attribute::StackProtectReq); break; + case lltok::kw_sspstrong: B.addAttribute(Attribute::StackProtectStrong); break; + case lltok::kw_uwtable: B.addAttribute(Attribute::UWTable); break; + case lltok::kw_zeroext: B.addAttribute(Attribute::ZExt); break; + } + + Lex.Lex(); + } +} //===----------------------------------------------------------------------===// // GlobalValue Reference/Resolution Routines. diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h index d8de779..131331a 100644 --- a/lib/AsmParser/LLParser.h +++ b/lib/AsmParser/LLParser.h @@ -125,6 +125,9 @@ namespace llvm { std::map > > ForwardRefBlockAddresses; + // Attribute builder reference information. + std::map ForwardRefAttrBuilder; + public: LLParser(MemoryBuffer *F, SourceMgr &SM, SMDiagnostic &Err, Module *m) : Context(m->getContext()), Lex(F, SM, Err, m->getContext()), @@ -236,6 +239,8 @@ namespace llvm { bool ParseMDString(MDString *&Result); bool ParseMDNodeID(MDNode *&Result); bool ParseMDNodeID(MDNode *&Result, unsigned &SlotNo); + bool ParseUnnamedAttrGrp(); + bool ParseAttributeValuePairs(AttrBuilder &B); // Type Parsing. bool ParseType(Type *&Result, bool AllowVoid = false); diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h index c9ecd21..8c18a3b 100644 --- a/lib/AsmParser/LLToken.h +++ b/lib/AsmParser/LLToken.h @@ -30,6 +30,7 @@ namespace lltok { lparen, rparen, // ( ) backslash, // \ (not /) exclaim, // ! + hash, // # kw_x, kw_true, kw_false, @@ -90,35 +91,36 @@ namespace lltok { kw_ptx_kernel, kw_ptx_device, kw_spir_kernel, kw_spir_func, - kw_signext, - kw_zeroext, + // Attributes: + kw_attributes, + kw_alwaysinline, + kw_address_safety, + kw_byval, + kw_inlinehint, kw_inreg, - kw_sret, - kw_nounwind, - kw_noreturn, + kw_minsize, + kw_naked, + kw_nest, kw_noalias, kw_nocapture, - kw_byval, - kw_nest, + kw_noduplicate, + kw_noimplicitfloat, + kw_noinline, + kw_nonlazybind, + kw_noredzone, + kw_noreturn, + kw_nounwind, + kw_optsize, kw_readnone, kw_readonly, - kw_uwtable, kw_returns_twice, - - kw_inlinehint, - kw_noinline, - kw_alwaysinline, - kw_optsize, + kw_signext, kw_ssp, kw_sspreq, kw_sspstrong, - kw_noredzone, - kw_noimplicitfloat, - kw_naked, - kw_nonlazybind, - kw_address_safety, - kw_minsize, - kw_noduplicate, + kw_sret, + kw_uwtable, + kw_zeroext, kw_type, kw_opaque, @@ -155,6 +157,7 @@ namespace lltok { // Unsigned Valued tokens (UIntVal). GlobalID, // @42 LocalVarID, // %42 + AttrGrpID, // #42 // String valued tokens (StrVal). LabelStr, // foo: -- cgit v1.1 From 9e3b31345f0d17b757e183a8384db92616256926 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Wed, 6 Feb 2013 09:04:56 +0000 Subject: Add icache prefetch operations to AArch64 This adds hints to the various "prfm" instructions so that they can affect the instruction cache as well as the data cache. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174495 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/Utils/AArch64BaseInfo.cpp | 6 ++++++ lib/Target/AArch64/Utils/AArch64BaseInfo.h | 6 ++++++ 2 files changed, 12 insertions(+) (limited to 'lib') diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp index 5f09074..ab9bba1 100644 --- a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp +++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp @@ -120,6 +120,12 @@ const NamedImmMapper::Mapping A64PRFM::PRFMMapper::PRFMPairs[] = { {"pldl2strm", PLDL2STRM}, {"pldl3keep", PLDL3KEEP}, {"pldl3strm", PLDL3STRM}, + {"plil1keep", PLIL1KEEP}, + {"plil1strm", PLIL1STRM}, + {"plil2keep", PLIL2KEEP}, + {"plil2strm", PLIL2STRM}, + {"plil3keep", PLIL3KEEP}, + {"plil3strm", PLIL3STRM}, {"pstl1keep", PSTL1KEEP}, {"pstl1strm", PSTL1STRM}, {"pstl2keep", PSTL2KEEP}, diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/lib/Target/AArch64/Utils/AArch64BaseInfo.h index 48e6c83..5eebf44 100644 --- a/lib/Target/AArch64/Utils/AArch64BaseInfo.h +++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.h @@ -248,6 +248,12 @@ namespace A64PRFM { PLDL2STRM = 0x03, PLDL3KEEP = 0x04, PLDL3STRM = 0x05, + PLIL1KEEP = 0x08, + PLIL1STRM = 0x09, + PLIL2KEEP = 0x0a, + PLIL2STRM = 0x0b, + PLIL3KEEP = 0x0c, + PLIL3STRM = 0x0d, PSTL1KEEP = 0x10, PSTL1STRM = 0x11, PSTL2KEEP = 0x12, -- cgit v1.1 From cbff068398a84ed488b7fdab5fea8e05500d385a Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Wed, 6 Feb 2013 09:13:13 +0000 Subject: Add AArch64 CRC32 instructions These instructions are a late addition to the architecture, and may yet end up behind an optional attribute, but for now they're available at all times. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174496 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64InstrInfo.td | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index 03cae93..538d4bd 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -1658,7 +1658,9 @@ def REV16xx : A64I_dp_1src_impl<0b1, 0b000001, "rev16", [], GPR64, NoItinerary>; //===----------------------------------------------------------------------===// // Data Processing (2 sources) instructions //===----------------------------------------------------------------------===// -// Contains: UDIV, SDIV, LSLV, LSRV, ASRV, RORV + aliases LSL, LSR, ASR, ROR +// Contains: CRC32C?[BHWX], UDIV, SDIV, LSLV, LSRV, ASRV, RORV + aliases LSL, +// LSR, ASR, ROR + class dp_2src_impl opcode, string asmop, list patterns, RegisterClass GPRsp, @@ -1672,6 +1674,19 @@ class dp_2src_impl opcode, string asmop, list patterns, patterns, itin>; +multiclass dp_2src_crc { + def B_www : dp_2src_impl<0b0, {0, 1, 0, c, 0, 0}, + !strconcat(asmop, "b"), [], GPR32, NoItinerary>; + def H_www : dp_2src_impl<0b0, {0, 1, 0, c, 0, 1}, + !strconcat(asmop, "h"), [], GPR32, NoItinerary>; + def W_www : dp_2src_impl<0b0, {0, 1, 0, c, 1, 0}, + !strconcat(asmop, "w"), [], GPR32, NoItinerary>; + def X_wwx : A64I_dp_2src<0b1, {0, 1, 0, c, 1, 1}, 0b0, + !strconcat(asmop, "x\t$Rd, $Rn, $Rm"), + (outs GPR32:$Rd), (ins GPR32:$Rn, GPR64:$Rm), [], + NoItinerary>; +} + multiclass dp_2src_zext opcode, string asmop, SDPatternOperator op> { def www : dp_2src_impl<0b0, opcode, @@ -1705,6 +1720,9 @@ multiclass dp_2src opcode, string asmop, SDPatternOperator op> { } // Here we define the data processing 2 source instructions. +defm CRC32 : dp_2src_crc<0b0, "crc32">; +defm CRC32C : dp_2src_crc<0b1, "crc32c">; + defm UDIV : dp_2src<0b000010, "udiv", udiv>; defm SDIV : dp_2src<0b000011, "sdiv", sdiv>; -- cgit v1.1 From 2e402d5b5f2fce8bfe29509cc771b9919946003b Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Wed, 6 Feb 2013 16:20:31 +0000 Subject: Add some comments to new frame entries git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174515 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/DebugInfo/DWARFDebugFrame.cpp | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'lib') diff --git a/lib/DebugInfo/DWARFDebugFrame.cpp b/lib/DebugInfo/DWARFDebugFrame.cpp index 974cecc..62e4856 100644 --- a/lib/DebugInfo/DWARFDebugFrame.cpp +++ b/lib/DebugInfo/DWARFDebugFrame.cpp @@ -17,6 +17,8 @@ using namespace llvm; using namespace dwarf; +/// \brief Abstract frame entry defining the common interface concrete +/// entries implement. class llvm::FrameEntry { public: enum FrameKind {FK_CIE, FK_FDE}; @@ -45,6 +47,7 @@ protected: }; +/// \brief DWARF Common Information Entry (CIE) class CIE : public FrameEntry { public: // CIEs (and FDEs) are simply container classes, so the only sensible way to @@ -84,6 +87,7 @@ private: }; +/// \brief DWARF Frame Description Entry (FDE) class FDE : public FrameEntry { public: // Each FDE has a CIE it's "linked to". Our FDE contains is constructed with -- cgit v1.1 From 8a06229c89f848bf742e2b88423d02558b7ca638 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Wed, 6 Feb 2013 16:43:33 +0000 Subject: Implement external weak (ELF) symbols on AArch64 Weakly defined symbols should evaluate to 0 if they're undefined at link-time. This is impossible to do with the usual address generation patterns, so we should use a literal pool entry to materlialise the address. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174518 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64ISelLowering.cpp | 15 +++++++++++++-- lib/Target/AArch64/AArch64InstrInfo.td | 6 ++++-- 2 files changed, 17 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index e2e472f..071b432 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1863,7 +1863,7 @@ AArch64TargetLowering::LowerGlobalAddressELF(SDValue Op, // TableGen doesn't have easy access to the CodeModel or RelocationModel, so // we make that distinction here. - // We support the static, small memory model for now. + // We support the small memory model for now. assert(getTargetMachine().getCodeModel() == CodeModel::Small); EVT PtrVT = getPointerTy(); @@ -1871,6 +1871,18 @@ AArch64TargetLowering::LowerGlobalAddressELF(SDValue Op, const GlobalAddressSDNode *GN = cast(Op); const GlobalValue *GV = GN->getGlobal(); unsigned Alignment = GV->getAlignment(); + Reloc::Model RelocM = getTargetMachine().getRelocationModel(); + + if (GV->isWeakForLinker() && RelocM == Reloc::Static) { + // Weak symbols can't use ADRP/ADD pair since they should evaluate to + // zero when undefined. In PIC mode the GOT can take care of this, but in + // absolute mode we use a constant pool load. + return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), + DAG.getConstantPool(GV, GN->getValueType(0)), + MachinePointerInfo::getConstantPool(), + /*isVolatile=*/ false, /*isNonTemporal=*/ true, + /*isInvariant=*/ true, 8); + } if (Alignment == 0) { const PointerType *GVPtrTy = cast(GV->getType()); @@ -1886,7 +1898,6 @@ AArch64TargetLowering::LowerGlobalAddressELF(SDValue Op, } unsigned char HiFixup, LoFixup; - Reloc::Model RelocM = getTargetMachine().getRelocationModel(); bool UseGOT = Subtarget->GVIsIndirectSymbol(GV, RelocM); if (UseGOT) { diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index 538d4bd..a9ff02a 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -2409,8 +2409,10 @@ class A64I_LDRlitSimple opc, bit v, RegisterClass OutReg, "ldr\t$Rt, $Imm19", patterns, NoItinerary>; let mayLoad = 1 in { - def LDRw_lit : A64I_LDRlitSimple<0b00, 0b0, GPR32>; - def LDRx_lit : A64I_LDRlitSimple<0b01, 0b0, GPR64>; + def LDRw_lit : A64I_LDRlitSimple<0b00, 0b0, GPR32, + [(set (i32 GPR32:$Rt), (load constpool:$Imm19))]>; + def LDRx_lit : A64I_LDRlitSimple<0b01, 0b0, GPR64, + [(set (i64 GPR64:$Rt), (load constpool:$Imm19))]>; } def LDRs_lit : A64I_LDRlitSimple<0b00, 0b1, FPR32, -- cgit v1.1 From c0b0c677a1138f0a5ce1435fc1e70cef38fd95c8 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Wed, 6 Feb 2013 17:32:29 +0000 Subject: R600: Support for indirect addressing v4 Only implemented for R600 so far. SI is missing implementations of a few callbacks used by the Indirect Addressing pass and needs code to handle frame indices. At the moment R600 only supports array sizes of 16 dwords or less. Register packing of vector types is currently disabled, which means that a vec4 is stored in T0_X, T1_X, T2_X, T3_X, rather than T0_XYZW. In order to correctly pack registers in all cases, we will need to implement an analysis pass for R600 that determines the correct vector width for each array. v2: - Add support for i8 zext load from stack. - Coding style fixes v3: - Don't reserve registers for indirect addressing when it isn't being used. - Fix bug caused by LLVM limiting the number of SubRegIndex declarations. v4: - Fix 64-bit defines git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174525 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPU.h | 1 + lib/Target/R600/AMDGPUFrameLowering.cpp | 122 ++++++++ lib/Target/R600/AMDGPUFrameLowering.h | 44 +++ lib/Target/R600/AMDGPUISelLowering.cpp | 2 + lib/Target/R600/AMDGPUISelLowering.h | 2 + lib/Target/R600/AMDGPUIndirectAddressing.cpp | 326 ++++++++++++++++++++++ lib/Target/R600/AMDGPUInstrInfo.cpp | 11 +- lib/Target/R600/AMDGPUInstrInfo.h | 60 +++- lib/Target/R600/AMDGPUInstrInfo.td | 8 + lib/Target/R600/AMDGPUInstructions.td | 39 ++- lib/Target/R600/AMDGPURegisterInfo.cpp | 23 ++ lib/Target/R600/AMDGPURegisterInfo.h | 2 + lib/Target/R600/AMDGPURegisterInfo.td | 8 + lib/Target/R600/AMDGPUTargetMachine.cpp | 6 + lib/Target/R600/AMDGPUTargetMachine.h | 2 +- lib/Target/R600/AMDILFrameLowering.cpp | 47 ---- lib/Target/R600/AMDILFrameLowering.h | 40 --- lib/Target/R600/AMDILISelDAGToDAG.cpp | 30 +- lib/Target/R600/CMakeLists.txt | 3 +- lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp | 5 +- lib/Target/R600/R600Defines.h | 3 + lib/Target/R600/R600ISelLowering.cpp | 182 +++++++++++- lib/Target/R600/R600ISelLowering.h | 7 +- lib/Target/R600/R600InstrInfo.cpp | 121 ++++++++ lib/Target/R600/R600InstrInfo.h | 32 +++ lib/Target/R600/R600Instructions.td | 15 + lib/Target/R600/R600MachineFunctionInfo.h | 2 + lib/Target/R600/R600RegisterInfo.cpp | 14 + lib/Target/R600/R600RegisterInfo.td | 46 +++ lib/Target/R600/SIInstrInfo.cpp | 48 ++++ lib/Target/R600/SIInstrInfo.h | 26 ++ 31 files changed, 1163 insertions(+), 114 deletions(-) create mode 100644 lib/Target/R600/AMDGPUFrameLowering.cpp create mode 100644 lib/Target/R600/AMDGPUFrameLowering.h create mode 100644 lib/Target/R600/AMDGPUIndirectAddressing.cpp delete mode 100644 lib/Target/R600/AMDILFrameLowering.cpp delete mode 100644 lib/Target/R600/AMDILFrameLowering.h (limited to 'lib') diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h index 1aa607f..bac01a3 100644 --- a/lib/Target/R600/AMDGPU.h +++ b/lib/Target/R600/AMDGPU.h @@ -36,6 +36,7 @@ FunctionPass *createSIInsertWaits(TargetMachine &tm); // Passes common to R600 and SI Pass *createAMDGPUStructurizeCFGPass(); FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm); +FunctionPass* createAMDGPUIndirectAddressingPass(TargetMachine &tm); } // End namespace llvm diff --git a/lib/Target/R600/AMDGPUFrameLowering.cpp b/lib/Target/R600/AMDGPUFrameLowering.cpp new file mode 100644 index 0000000..815d6f7 --- /dev/null +++ b/lib/Target/R600/AMDGPUFrameLowering.cpp @@ -0,0 +1,122 @@ +//===----------------------- AMDGPUFrameLowering.cpp ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +// Interface to describe a layout of a stack frame on a AMDIL target machine +// +//===----------------------------------------------------------------------===// +#include "AMDGPUFrameLowering.h" +#include "AMDGPURegisterInfo.h" +#include "R600MachineFunctionInfo.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/Instructions.h" + +using namespace llvm; +AMDGPUFrameLowering::AMDGPUFrameLowering(StackDirection D, unsigned StackAl, + int LAO, unsigned TransAl) + : TargetFrameLowering(D, StackAl, LAO, TransAl) { } + +AMDGPUFrameLowering::~AMDGPUFrameLowering() { } + +unsigned AMDGPUFrameLowering::getStackWidth(const MachineFunction &MF) const { + + // XXX: Hardcoding to 1 for now. + // + // I think the StackWidth should stored as metadata associated with the + // MachineFunction. This metadata can either be added by a frontend, or + // calculated by a R600 specific LLVM IR pass. + // + // The StackWidth determines how stack objects are laid out in memory. + // For a vector stack variable, like: int4 stack[2], the data will be stored + // in the following ways depending on the StackWidth. + // + // StackWidth = 1: + // + // T0.X = stack[0].x + // T1.X = stack[0].y + // T2.X = stack[0].z + // T3.X = stack[0].w + // T4.X = stack[1].x + // T5.X = stack[1].y + // T6.X = stack[1].z + // T7.X = stack[1].w + // + // StackWidth = 2: + // + // T0.X = stack[0].x + // T0.Y = stack[0].y + // T1.X = stack[0].z + // T1.Y = stack[0].w + // T2.X = stack[1].x + // T2.Y = stack[1].y + // T3.X = stack[1].z + // T3.Y = stack[1].w + // + // StackWidth = 4: + // T0.X = stack[0].x + // T0.Y = stack[0].y + // T0.Z = stack[0].z + // T0.W = stack[0].w + // T1.X = stack[1].x + // T1.Y = stack[1].y + // T1.Z = stack[1].z + // T1.W = stack[1].w + return 1; +} + +/// \returns The number of registers allocated for \p FI. +int AMDGPUFrameLowering::getFrameIndexOffset(const MachineFunction &MF, + int FI) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + unsigned Offset = 0; + int UpperBound = FI == -1 ? MFI->getNumObjects() : FI; + + for (int i = MFI->getObjectIndexBegin(); i < UpperBound; ++i) { + const AllocaInst *Alloca = MFI->getObjectAllocation(i); + unsigned ArrayElements; + const Type *AllocaType = Alloca->getAllocatedType(); + const Type *ElementType; + + if (AllocaType->isArrayTy()) { + ArrayElements = AllocaType->getArrayNumElements(); + ElementType = AllocaType->getArrayElementType(); + } else { + ArrayElements = 1; + ElementType = AllocaType; + } + + unsigned VectorElements; + if (ElementType->isVectorTy()) { + VectorElements = ElementType->getVectorNumElements(); + } else { + VectorElements = 1; + } + + Offset += (VectorElements / getStackWidth(MF)) * ArrayElements; + } + return Offset; +} + +const TargetFrameLowering::SpillSlot * +AMDGPUFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const { + NumEntries = 0; + return 0; +} +void +AMDGPUFrameLowering::emitPrologue(MachineFunction &MF) const { +} +void +AMDGPUFrameLowering::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const { +} + +bool +AMDGPUFrameLowering::hasFP(const MachineFunction &MF) const { + return false; +} diff --git a/lib/Target/R600/AMDGPUFrameLowering.h b/lib/Target/R600/AMDGPUFrameLowering.h new file mode 100644 index 0000000..cf5742e --- /dev/null +++ b/lib/Target/R600/AMDGPUFrameLowering.h @@ -0,0 +1,44 @@ +//===--------------------- AMDGPUFrameLowering.h ----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief Interface to describe a layout of a stack frame on a AMDIL target +/// machine. +// +//===----------------------------------------------------------------------===// +#ifndef AMDILFRAME_LOWERING_H +#define AMDILFRAME_LOWERING_H + +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/Target/TargetFrameLowering.h" + +namespace llvm { + +/// \brief Information about the stack frame layout on the AMDGPU targets. +/// +/// It holds the direction of the stack growth, the known stack alignment on +/// entry to each function, and the offset to the locals area. +/// See TargetFrameInfo for more comments. +class AMDGPUFrameLowering : public TargetFrameLowering { +public: + AMDGPUFrameLowering(StackDirection D, unsigned StackAl, int LAO, + unsigned TransAl = 1); + virtual ~AMDGPUFrameLowering(); + + /// \returns The number of 32-bit sub-registers that are used when storing + /// values to the stack. + virtual unsigned getStackWidth(const MachineFunction &MF) const; + virtual int getFrameIndexOffset(const MachineFunction &MF, int FI) const; + virtual const SpillSlot *getCalleeSavedSpillSlots(unsigned &NumEntries) const; + virtual void emitPrologue(MachineFunction &MF) const; + virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + virtual bool hasFP(const MachineFunction &MF) const; +}; +} // namespace llvm +#endif // AMDILFRAME_LOWERING_H diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index f3a047a..d0d23d6 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -412,5 +412,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(URECIP) NODE_NAME_CASE(EXPORT) NODE_NAME_CASE(CONST_ADDRESS) + NODE_NAME_CASE(REGISTER_LOAD) + NODE_NAME_CASE(REGISTER_STORE) } } diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h index 0584d39..927ed09 100644 --- a/lib/Target/R600/AMDGPUISelLowering.h +++ b/lib/Target/R600/AMDGPUISelLowering.h @@ -122,6 +122,8 @@ enum { URECIP, EXPORT, CONST_ADDRESS, + REGISTER_LOAD, + REGISTER_STORE, LAST_AMDGPU_ISD_NUMBER }; diff --git a/lib/Target/R600/AMDGPUIndirectAddressing.cpp b/lib/Target/R600/AMDGPUIndirectAddressing.cpp new file mode 100644 index 0000000..56aaf23 --- /dev/null +++ b/lib/Target/R600/AMDGPUIndirectAddressing.cpp @@ -0,0 +1,326 @@ +//===-- AMDGPUIndirectAddressing.cpp - Indirect Adressing Support ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// +/// Instructions can use indirect addressing to index the register file as if it +/// were memory. This pass lowers RegisterLoad and RegisterStore instructions +/// to either a COPY or a MOV that uses indirect addressing. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "R600InstrInfo.h" +#include "R600MachineFunctionInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +namespace { + +class AMDGPUIndirectAddressingPass : public MachineFunctionPass { + +private: + static char ID; + const AMDGPUInstrInfo *TII; + + bool regHasExplicitDef(MachineRegisterInfo &MRI, unsigned Reg) const; + +public: + AMDGPUIndirectAddressingPass(TargetMachine &tm) : + MachineFunctionPass(ID), + TII(static_cast(tm.getInstrInfo())) + { } + + virtual bool runOnMachineFunction(MachineFunction &MF); + + const char *getPassName() const { return "R600 Handle indirect addressing"; } + +}; + +} // End anonymous namespace + +char AMDGPUIndirectAddressingPass::ID = 0; + +FunctionPass *llvm::createAMDGPUIndirectAddressingPass(TargetMachine &tm) { + return new AMDGPUIndirectAddressingPass(tm); +} + +bool AMDGPUIndirectAddressingPass::runOnMachineFunction(MachineFunction &MF) { + MachineRegisterInfo &MRI = MF.getRegInfo(); + + int IndirectBegin = TII->getIndirectIndexBegin(MF); + int IndirectEnd = TII->getIndirectIndexEnd(MF); + + if (IndirectBegin == -1) { + // No indirect addressing, we can skip this pass + assert(IndirectEnd == -1); + return false; + } + + // The map keeps track of the indirect address that is represented by + // each virtual register. The key is the register and the value is the + // indirect address it uses. + std::map RegisterAddressMap; + + // First pass - Lower all of the RegisterStore instructions and track which + // registers are live. + for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); + BB != BB_E; ++BB) { + // This map keeps track of the current live indirect registers. + // The key is the address and the value is the register + std::map LiveAddressRegisterMap; + MachineBasicBlock &MBB = *BB; + + for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I); + I != MBB.end(); I = Next) { + Next = llvm::next(I); + MachineInstr &MI = *I; + + if (!TII->isRegisterStore(MI)) { + continue; + } + + // Lower RegisterStore + + unsigned RegIndex = MI.getOperand(2).getImm(); + unsigned Channel = MI.getOperand(3).getImm(); + unsigned Address = TII->calculateIndirectAddress(RegIndex, Channel); + const TargetRegisterClass *IndirectStoreRegClass = + TII->getIndirectAddrStoreRegClass(MI.getOperand(0).getReg()); + + if (MI.getOperand(1).getReg() == AMDGPU::INDIRECT_BASE_ADDR) { + // Direct register access. + unsigned DstReg = MRI.createVirtualRegister(IndirectStoreRegClass); + + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::COPY), DstReg) + .addOperand(MI.getOperand(0)); + + RegisterAddressMap[DstReg] = Address; + LiveAddressRegisterMap[Address] = DstReg; + } else { + // Indirect register access. + MachineInstrBuilder MOV = TII->buildIndirectWrite(BB, I, + MI.getOperand(0).getReg(), // Value + Address, + MI.getOperand(1).getReg()); // Offset + for (int i = IndirectBegin; i <= IndirectEnd; ++i) { + unsigned Addr = TII->calculateIndirectAddress(i, Channel); + unsigned DstReg = MRI.createVirtualRegister(IndirectStoreRegClass); + MOV.addReg(DstReg, RegState::Define | RegState::Implicit); + RegisterAddressMap[DstReg] = Addr; + LiveAddressRegisterMap[Addr] = DstReg; + } + } + MI.eraseFromParent(); + } + + // Update the live-ins of the succesor blocks + for (MachineBasicBlock::succ_iterator Succ = MBB.succ_begin(), + SuccEnd = MBB.succ_end(); + SuccEnd != Succ; ++Succ) { + std::map::const_iterator Key, KeyEnd; + for (Key = LiveAddressRegisterMap.begin(), + KeyEnd = LiveAddressRegisterMap.end(); KeyEnd != Key; ++Key) { + (*Succ)->addLiveIn(Key->second); + } + } + } + + // Second pass - Lower the RegisterLoad instructions + for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); + BB != BB_E; ++BB) { + // Key is the address and the value is the register + std::map LiveAddressRegisterMap; + MachineBasicBlock &MBB = *BB; + + MachineBasicBlock::livein_iterator LI = MBB.livein_begin(); + while (LI != MBB.livein_end()) { + std::vector PhiRegisters; + + // Make sure this live in is used for indirect addressing + if (RegisterAddressMap.find(*LI) == RegisterAddressMap.end()) { + ++LI; + continue; + } + + unsigned Address = RegisterAddressMap[*LI]; + LiveAddressRegisterMap[Address] = *LI; + PhiRegisters.push_back(*LI); + + // Check if there are other live in registers which map to the same + // indirect address. + for (MachineBasicBlock::livein_iterator LJ = llvm::next(LI), + LE = MBB.livein_end(); + LJ != LE; ++LJ) { + unsigned Reg = *LJ; + if (RegisterAddressMap.find(Reg) == RegisterAddressMap.end()) { + continue; + } + + if (RegisterAddressMap[Reg] == Address) { + if (!regHasExplicitDef(MRI, Reg)) { + continue; + } + PhiRegisters.push_back(Reg); + } + } + + if (PhiRegisters.size() == 1) { + // We don't need to insert a Phi instruction, so we can just add the + // registers to the live list for the block. + LiveAddressRegisterMap[Address] = *LI; + MBB.removeLiveIn(*LI); + } else { + // We need to insert a PHI, because we have the same address being + // written in multiple predecessor blocks. + const TargetRegisterClass *PhiDstClass = + TII->getIndirectAddrStoreRegClass(*(PhiRegisters.begin())); + unsigned PhiDstReg = MRI.createVirtualRegister(PhiDstClass); + MachineInstrBuilder Phi = BuildMI(MBB, MBB.begin(), + MBB.findDebugLoc(MBB.begin()), + TII->get(AMDGPU::PHI), PhiDstReg); + + for (std::vector::const_iterator RI = PhiRegisters.begin(), + RE = PhiRegisters.end(); + RI != RE; ++RI) { + unsigned Reg = *RI; + MachineInstr *DefInst = MRI.getVRegDef(Reg); + assert(DefInst); + MachineBasicBlock *RegBlock = DefInst->getParent(); + Phi.addReg(Reg); + Phi.addMBB(RegBlock); + MBB.removeLiveIn(Reg); + } + RegisterAddressMap[PhiDstReg] = Address; + LiveAddressRegisterMap[Address] = PhiDstReg; + } + LI = MBB.livein_begin(); + } + + for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I); + I != MBB.end(); I = Next) { + Next = llvm::next(I); + MachineInstr &MI = *I; + + if (!TII->isRegisterLoad(MI)) { + if (MI.getOpcode() == AMDGPU::PHI) { + continue; + } + // Check for indirect register defs + for (unsigned OpIdx = 0, NumOperands = MI.getNumOperands(); + OpIdx < NumOperands; ++OpIdx) { + MachineOperand &MO = MI.getOperand(OpIdx); + if (MO.isReg() && MO.isDef() && + RegisterAddressMap.find(MO.getReg()) != RegisterAddressMap.end()) { + unsigned Reg = MO.getReg(); + unsigned LiveAddress = RegisterAddressMap[Reg]; + // Chain the live-ins + if (LiveAddressRegisterMap.find(LiveAddress) != + RegisterAddressMap.end()) { + MI.addOperand(MachineOperand::CreateReg( + LiveAddressRegisterMap[LiveAddress], + false, // isDef + true, // isImp + true)); // isKill + } + LiveAddressRegisterMap[LiveAddress] = Reg; + } + } + continue; + } + + const TargetRegisterClass *SuperIndirectRegClass = + TII->getSuperIndirectRegClass(); + const TargetRegisterClass *IndirectLoadRegClass = + TII->getIndirectAddrLoadRegClass(); + unsigned IndirectReg = MRI.createVirtualRegister(SuperIndirectRegClass); + + unsigned RegIndex = MI.getOperand(2).getImm(); + unsigned Channel = MI.getOperand(3).getImm(); + unsigned Address = TII->calculateIndirectAddress(RegIndex, Channel); + + if (MI.getOperand(1).getReg() == AMDGPU::INDIRECT_BASE_ADDR) { + // Direct register access + unsigned Reg = LiveAddressRegisterMap[Address]; + unsigned AddrReg = IndirectLoadRegClass->getRegister(Address); + + if (regHasExplicitDef(MRI, Reg)) { + // If the register we are reading from has an explicit def, then that + // means it was written via a direct register access (i.e. COPY + // or other instruction that doesn't use indirect addressing). In + // this case we know where the value has been stored, so we can just + // issue a copy. + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::COPY), + MI.getOperand(0).getReg()) + .addReg(Reg); + } else { + // If the register we are reading has an implicit def, then that + // means it was written by an indirect register access (i.e. An + // instruction that uses indirect addressing. + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::COPY), + MI.getOperand(0).getReg()) + .addReg(AddrReg); + } + } else { + // Indirect register access + + // Note on REQ_SEQUENCE instructons: You can't actually use the register + // it defines unless you have an instruction that takes the defined + // register class as an operand. + + MachineInstrBuilder Sequence = BuildMI(MBB, I, MBB.findDebugLoc(I), + TII->get(AMDGPU::REG_SEQUENCE), + IndirectReg); + for (int i = IndirectBegin; i <= IndirectEnd; ++i) { + unsigned Addr = TII->calculateIndirectAddress(i, Channel); + if (LiveAddressRegisterMap.find(Addr) == LiveAddressRegisterMap.end()) { + continue; + } + unsigned Reg = LiveAddressRegisterMap[Addr]; + + // We only need to use REG_SEQUENCE for explicit defs, since the + // register coalescer won't do anything with the implicit defs. + MachineInstr *DefInstr = MRI.getVRegDef(Reg); + if (!DefInstr->getOperand(0).isReg() || + DefInstr->getOperand(0).getReg() != Reg) { + continue; + } + + // Insert a REQ_SEQUENCE instruction to force the register allocator + // to allocate the virtual register to the correct physical register. + Sequence.addReg(LiveAddressRegisterMap[Addr]); + Sequence.addImm(TII->getRegisterInfo().getIndirectSubReg(Addr)); + } + MachineInstrBuilder Mov = TII->buildIndirectRead(BB, I, + MI.getOperand(0).getReg(), // Value + Address, + MI.getOperand(1).getReg()); // Offset + + + + Mov.addReg(IndirectReg, RegState::Implicit | RegState::Kill); + + } + MI.eraseFromParent(); + } + } + return false; +} + +bool AMDGPUIndirectAddressingPass::regHasExplicitDef(MachineRegisterInfo &MRI, + unsigned Reg) const { + MachineInstr *DefInstr = MRI.getVRegDef(Reg); + return DefInstr && DefInstr->getOperand(0).isReg() && + DefInstr->getOperand(0).getReg() == Reg; +} diff --git a/lib/Target/R600/AMDGPUInstrInfo.cpp b/lib/Target/R600/AMDGPUInstrInfo.cpp index e42a46d..640707d 100644 --- a/lib/Target/R600/AMDGPUInstrInfo.cpp +++ b/lib/Target/R600/AMDGPUInstrInfo.cpp @@ -234,7 +234,16 @@ AMDGPUInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const { // TODO: Implement this function return true; } - + +bool AMDGPUInstrInfo::isRegisterStore(const MachineInstr &MI) const { + return get(MI.getOpcode()).TSFlags & AMDGPU_FLAG_REGISTER_STORE; +} + +bool AMDGPUInstrInfo::isRegisterLoad(const MachineInstr &MI) const { + return get(MI.getOpcode()).TSFlags & AMDGPU_FLAG_REGISTER_LOAD; +} + + void AMDGPUInstrInfo::convertToISA(MachineInstr & MI, MachineFunction &MF, DebugLoc DL) const { MachineRegisterInfo &MRI = MF.getRegInfo(); diff --git a/lib/Target/R600/AMDGPUInstrInfo.h b/lib/Target/R600/AMDGPUInstrInfo.h index cb97af9..3909e4e 100644 --- a/lib/Target/R600/AMDGPUInstrInfo.h +++ b/lib/Target/R600/AMDGPUInstrInfo.h @@ -40,9 +40,10 @@ class MachineInstrBuilder; class AMDGPUInstrInfo : public AMDGPUGenInstrInfo { private: const AMDGPURegisterInfo RI; - TargetMachine &TM; bool getNextBranchInstr(MachineBasicBlock::iterator &iter, MachineBasicBlock &MBB) const; +protected: + TargetMachine &TM; public: explicit AMDGPUInstrInfo(TargetMachine &tm); @@ -130,12 +131,66 @@ public: bool isAExtLoadInst(llvm::MachineInstr *MI) const; bool isStoreInst(llvm::MachineInstr *MI) const; bool isTruncStoreInst(llvm::MachineInstr *MI) const; + bool isRegisterStore(const MachineInstr &MI) const; + bool isRegisterLoad(const MachineInstr &MI) const; + +//===---------------------------------------------------------------------===// +// Pure virtual funtions to be implemented by sub-classes. +//===---------------------------------------------------------------------===// virtual MachineInstr* getMovImmInstr(MachineFunction *MF, unsigned DstReg, int64_t Imm) const = 0; virtual unsigned getIEQOpcode() const = 0; virtual bool isMov(unsigned opcode) const = 0; + /// \returns the smallest register index that will be accessed by an indirect + /// read or write or -1 if indirect addressing is not used by this program. + virtual int getIndirectIndexBegin(const MachineFunction &MF) const = 0; + + /// \returns the largest register index that will be accessed by an indirect + /// read or write or -1 if indirect addressing is not used by this program. + virtual int getIndirectIndexEnd(const MachineFunction &MF) const = 0; + + /// \brief Calculate the "Indirect Address" for the given \p RegIndex and + /// \p Channel + /// + /// We model indirect addressing using a virtual address space that can be + /// accesed with loads and stores. The "Indirect Address" is the memory + /// address in this virtual address space that maps to the given \p RegIndex + /// and \p Channel. + virtual unsigned calculateIndirectAddress(unsigned RegIndex, + unsigned Channel) const = 0; + + /// \returns The register class to be used for storing values to an + /// "Indirect Address" . + virtual const TargetRegisterClass *getIndirectAddrStoreRegClass( + unsigned SourceReg) const = 0; + + /// \returns The register class to be used for loading values from + /// an "Indirect Address" . + virtual const TargetRegisterClass *getIndirectAddrLoadRegClass() const = 0; + + /// \brief Build instruction(s) for an indirect register write. + /// + /// \returns The instruction that performs the indirect register write + virtual MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB, + MachineBasicBlock::iterator I, + unsigned ValueReg, unsigned Address, + unsigned OffsetReg) const = 0; + + /// \brief Build instruction(s) for an indirect register read. + /// + /// \returns The instruction that performs the indirect register read + virtual MachineInstrBuilder buildIndirectRead(MachineBasicBlock *MBB, + MachineBasicBlock::iterator I, + unsigned ValueReg, unsigned Address, + unsigned OffsetReg) const = 0; + + /// \returns the register class whose sub registers are the set of all + /// possible registers that can be used for indirect addressing. + virtual const TargetRegisterClass *getSuperIndirectRegClass() const = 0; + + /// \brief Convert the AMDIL MachineInstr to a supported ISA /// MachineInstr virtual void convertToISA(MachineInstr & MI, MachineFunction &MF, @@ -145,4 +200,7 @@ public: } // End llvm namespace +#define AMDGPU_FLAG_REGISTER_LOAD (UINT64_C(1) << 63) +#define AMDGPU_FLAG_REGISTER_STORE (UINT64_C(1) << 62) + #endif // AMDGPUINSTRINFO_H diff --git a/lib/Target/R600/AMDGPUInstrInfo.td b/lib/Target/R600/AMDGPUInstrInfo.td index 96368e8..b66ae87 100644 --- a/lib/Target/R600/AMDGPUInstrInfo.td +++ b/lib/Target/R600/AMDGPUInstrInfo.td @@ -72,3 +72,11 @@ def AMDGPUumin : SDNode<"AMDGPUISD::UMIN", SDTIntBinOp, def AMDGPUurecip : SDNode<"AMDGPUISD::URECIP", SDTIntUnaryOp>; def fpow : SDNode<"ISD::FPOW", SDTFPBinOp>; + +def AMDGPUregister_load : SDNode<"AMDGPUISD::REGISTER_LOAD", + SDTypeProfile<1, 2, [SDTCisPtrTy<1>, SDTCisInt<2>]>, + [SDNPHasChain, SDNPMayLoad]>; + +def AMDGPUregister_store : SDNode<"AMDGPUISD::REGISTER_STORE", + SDTypeProfile<0, 3, [SDTCisPtrTy<1>, SDTCisInt<2>]>, + [SDNPHasChain, SDNPMayStore]>; diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td index e634d20..3dee004 100644 --- a/lib/Target/R600/AMDGPUInstructions.td +++ b/lib/Target/R600/AMDGPUInstructions.td @@ -13,8 +13,8 @@ //===----------------------------------------------------------------------===// class AMDGPUInst pattern> : Instruction { - field bits<16> AMDILOp = 0; - field bits<3> Gen = 0; + field bit isRegisterLoad = 0; + field bit isRegisterStore = 0; let Namespace = "AMDGPU"; let OutOperandList = outs; @@ -22,8 +22,9 @@ class AMDGPUInst pattern> : Instructio let AsmString = asm; let Pattern = pattern; let Itinerary = NullALU; - let TSFlags{42-40} = Gen; - let TSFlags{63-48} = AMDILOp; + + let TSFlags{63} = isRegisterLoad; + let TSFlags{62} = isRegisterStore; } class AMDGPUShaderInst pattern> @@ -101,7 +102,9 @@ def FP_ONE : PatLeaf < [{return N->isExactlyValue(1.0);}] >; -let isCodeGenOnly = 1, isPseudo = 1, usesCustomInserter = 1 in { +let isCodeGenOnly = 1, isPseudo = 1 in { + +let usesCustomInserter = 1 in { class CLAMP : AMDGPUShaderInst < (outs rc:$dst), @@ -131,7 +134,31 @@ def SHADER_TYPE : AMDGPUShaderInst < [(int_AMDGPU_shader_type imm:$type)] >; -} // End isCodeGenOnly = 1, isPseudo = 1, hasCustomInserter = 1 +} // usesCustomInserter = 1 + +multiclass RegisterLoadStore { + def RegisterLoad : AMDGPUShaderInst < + (outs dstClass:$dst), + (ins addrClass:$addr, i32imm:$chan), + "RegisterLoad $dst, $addr", + [(set (i32 dstClass:$dst), (AMDGPUregister_load addrPat:$addr, + (i32 timm:$chan)))] + > { + let isRegisterLoad = 1; + } + + def RegisterStore : AMDGPUShaderInst < + (outs), + (ins dstClass:$val, addrClass:$addr, i32imm:$chan), + "RegisterStore $val, $addr", + [(AMDGPUregister_store (i32 dstClass:$val), addrPat:$addr, (i32 timm:$chan))] + > { + let isRegisterStore = 1; + } +} + +} // End isCodeGenOnly = 1, isPseudo = 1 /* Generic helper patterns for intrinsics */ /* -------------------------------------- */ diff --git a/lib/Target/R600/AMDGPURegisterInfo.cpp b/lib/Target/R600/AMDGPURegisterInfo.cpp index b332905..7878d60 100644 --- a/lib/Target/R600/AMDGPURegisterInfo.cpp +++ b/lib/Target/R600/AMDGPURegisterInfo.cpp @@ -48,5 +48,28 @@ unsigned AMDGPURegisterInfo::getFrameRegister(const MachineFunction &MF) const { return 0; } +unsigned AMDGPURegisterInfo::getIndirectSubReg(unsigned IndirectIndex) const { + + switch(IndirectIndex) { + case 0: return AMDGPU::indirect_0; + case 1: return AMDGPU::indirect_1; + case 2: return AMDGPU::indirect_2; + case 3: return AMDGPU::indirect_3; + case 4: return AMDGPU::indirect_4; + case 5: return AMDGPU::indirect_5; + case 6: return AMDGPU::indirect_6; + case 7: return AMDGPU::indirect_7; + case 8: return AMDGPU::indirect_8; + case 9: return AMDGPU::indirect_9; + case 10: return AMDGPU::indirect_10; + case 11: return AMDGPU::indirect_11; + case 12: return AMDGPU::indirect_12; + case 13: return AMDGPU::indirect_13; + case 14: return AMDGPU::indirect_14; + case 15: return AMDGPU::indirect_15; + default: llvm_unreachable("indirect index out of range"); + } +} + #define GET_REGINFO_TARGET_DESC #include "AMDGPUGenRegisterInfo.inc" diff --git a/lib/Target/R600/AMDGPURegisterInfo.h b/lib/Target/R600/AMDGPURegisterInfo.h index 4a4bead..1fc88e7 100644 --- a/lib/Target/R600/AMDGPURegisterInfo.h +++ b/lib/Target/R600/AMDGPURegisterInfo.h @@ -57,6 +57,8 @@ struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo { RegScavenger *RS) const; unsigned getFrameRegister(const MachineFunction &MF) const; + unsigned getIndirectSubReg(unsigned IndirectIndex) const; + }; } // End namespace llvm diff --git a/lib/Target/R600/AMDGPURegisterInfo.td b/lib/Target/R600/AMDGPURegisterInfo.td index 8181e02..0b4482c 100644 --- a/lib/Target/R600/AMDGPURegisterInfo.td +++ b/lib/Target/R600/AMDGPURegisterInfo.td @@ -16,6 +16,14 @@ let Namespace = "AMDGPU" in { def sel_y : SubRegIndex; def sel_z : SubRegIndex; def sel_w : SubRegIndex; + + +foreach Index = 0-15 in { + def indirect_#Index : SubRegIndex; +} + +def INDIRECT_BASE_ADDR : Register <"INDIRECT_BASE_ADDR">; + } include "R600RegisterInfo.td" diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp index 2185be3..821e864 100644 --- a/lib/Target/R600/AMDGPUTargetMachine.cpp +++ b/lib/Target/R600/AMDGPUTargetMachine.cpp @@ -102,6 +102,12 @@ AMDGPUPassConfig::addPreISel() { bool AMDGPUPassConfig::addInstSelector() { addPass(createAMDGPUPeepholeOpt(*TM)); addPass(createAMDGPUISelDag(getAMDGPUTargetMachine())); + + const AMDGPUSubtarget &ST = TM->getSubtarget(); + if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) { + // This callbacks this pass uses are not implemented yet on SI. + addPass(createAMDGPUIndirectAddressingPass(*TM)); + } return false; } diff --git a/lib/Target/R600/AMDGPUTargetMachine.h b/lib/Target/R600/AMDGPUTargetMachine.h index 91f9a83..2afe787 100644 --- a/lib/Target/R600/AMDGPUTargetMachine.h +++ b/lib/Target/R600/AMDGPUTargetMachine.h @@ -15,9 +15,9 @@ #ifndef AMDGPU_TARGET_MACHINE_H #define AMDGPU_TARGET_MACHINE_H +#include "AMDGPUFrameLowering.h" #include "AMDGPUInstrInfo.h" #include "AMDGPUSubtarget.h" -#include "AMDILFrameLowering.h" #include "AMDILIntrinsicInfo.h" #include "R600ISelLowering.h" #include "llvm/ADT/OwningPtr.h" diff --git a/lib/Target/R600/AMDILFrameLowering.cpp b/lib/Target/R600/AMDILFrameLowering.cpp deleted file mode 100644 index 9ad495a..0000000 --- a/lib/Target/R600/AMDILFrameLowering.cpp +++ /dev/null @@ -1,47 +0,0 @@ -//===----------------------- AMDILFrameLowering.cpp -----------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//==-----------------------------------------------------------------------===// -// -/// \file -/// \brief Interface to describe a layout of a stack frame on a AMDGPU target -/// machine. -// -//===----------------------------------------------------------------------===// -#include "AMDILFrameLowering.h" -#include "llvm/CodeGen/MachineFrameInfo.h" - -using namespace llvm; -AMDGPUFrameLowering::AMDGPUFrameLowering(StackDirection D, unsigned StackAl, - int LAO, unsigned TransAl) - : TargetFrameLowering(D, StackAl, LAO, TransAl) { -} - -AMDGPUFrameLowering::~AMDGPUFrameLowering() { -} - -int AMDGPUFrameLowering::getFrameIndexOffset(const MachineFunction &MF, - int FI) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - return MFI->getObjectOffset(FI); -} - -const TargetFrameLowering::SpillSlot * -AMDGPUFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const { - NumEntries = 0; - return 0; -} -void -AMDGPUFrameLowering::emitPrologue(MachineFunction &MF) const { -} -void -AMDGPUFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { -} -bool -AMDGPUFrameLowering::hasFP(const MachineFunction &MF) const { - return false; -} diff --git a/lib/Target/R600/AMDILFrameLowering.h b/lib/Target/R600/AMDILFrameLowering.h deleted file mode 100644 index 51337c3..0000000 --- a/lib/Target/R600/AMDILFrameLowering.h +++ /dev/null @@ -1,40 +0,0 @@ -//===--------------------- AMDILFrameLowering.h -----------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -/// \file -/// \brief Interface to describe a layout of a stack frame on a AMDIL target -/// machine. -// -//===----------------------------------------------------------------------===// -#ifndef AMDILFRAME_LOWERING_H -#define AMDILFRAME_LOWERING_H - -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/Target/TargetFrameLowering.h" - -namespace llvm { - -/// \brief Information about the stack frame layout on the AMDGPU targets. -/// -/// It holds the direction of the stack growth, the known stack alignment on -/// entry to each function, and the offset to the locals area. -/// See TargetFrameInfo for more comments. -class AMDGPUFrameLowering : public TargetFrameLowering { -public: - AMDGPUFrameLowering(StackDirection D, unsigned StackAl, int LAO, - unsigned TransAl = 1); - virtual ~AMDGPUFrameLowering(); - virtual int getFrameIndexOffset(const MachineFunction &MF, int FI) const; - virtual const SpillSlot *getCalleeSavedSpillSlots(unsigned &NumEntries) const; - virtual void emitPrologue(MachineFunction &MF) const; - virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; - virtual bool hasFP(const MachineFunction &MF) const; -}; -} // namespace llvm -#endif // AMDILFRAME_LOWERING_H diff --git a/lib/Target/R600/AMDILISelDAGToDAG.cpp b/lib/Target/R600/AMDILISelDAGToDAG.cpp index 84223f6..2699409 100644 --- a/lib/Target/R600/AMDILISelDAGToDAG.cpp +++ b/lib/Target/R600/AMDILISelDAGToDAG.cpp @@ -75,6 +75,7 @@ private: bool SelectADDR8BitOffset(SDValue Addr, SDValue& Base, SDValue& Offset); bool SelectADDRReg(SDValue Addr, SDValue& Base, SDValue& Offset); bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); + bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset); // Include the pieces autogenerated from the target description. #include "AMDGPUGenDAGISel.inc" @@ -161,16 +162,6 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { } switch (Opc) { default: break; - case ISD::FrameIndex: { - if (FrameIndexSDNode *FIN = dyn_cast(N)) { - unsigned int FI = FIN->getIndex(); - EVT OpVT = N->getValueType(0); - unsigned int NewOpc = AMDGPU::COPY; - SDValue TFI = CurDAG->getTargetFrameIndex(FI, MVT::i32); - return CurDAG->SelectNodeTo(N, NewOpc, OpVT, TFI); - } - break; - } case ISD::ConstantFP: case ISD::Constant: { const AMDGPUSubtarget &ST = TM.getSubtarget(); @@ -613,3 +604,22 @@ bool AMDGPUDAGToDAGISel::SelectADDRReg(SDValue Addr, SDValue& Base, return true; } + +bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, + SDValue &Offset) { + ConstantSDNode *C; + + if ((C = dyn_cast(Addr))) { + Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); + Offset = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32); + } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && + (C = dyn_cast(Addr.getOperand(1)))) { + Base = Addr.getOperand(0); + Offset = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32); + } else { + Base = Addr; + Offset = CurDAG->getTargetConstant(0, MVT::i32); + } + + return true; +} diff --git a/lib/Target/R600/CMakeLists.txt b/lib/Target/R600/CMakeLists.txt index a8be7ed..4f74b04 100644 --- a/lib/Target/R600/CMakeLists.txt +++ b/lib/Target/R600/CMakeLists.txt @@ -17,7 +17,6 @@ add_llvm_target(R600CodeGen AMDILDevice.cpp AMDILDeviceInfo.cpp AMDILEvergreenDevice.cpp - AMDILFrameLowering.cpp AMDILIntrinsicInfo.cpp AMDILISelDAGToDAG.cpp AMDILISelLowering.cpp @@ -25,6 +24,8 @@ add_llvm_target(R600CodeGen AMDILPeepholeOptimizer.cpp AMDILSIDevice.cpp AMDGPUAsmPrinter.cpp + AMDGPUFrameLowering.cpp + AMDGPUIndirectAddressing.cpp AMDGPUMCInstLower.cpp AMDGPUSubtarget.cpp AMDGPUStructurizeCFG.cpp diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp index e76c6c8..fb17ab7 100644 --- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp +++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp @@ -105,10 +105,7 @@ void AMDGPUInstPrinter::printOMOD(const MCInst *MI, unsigned OpNo, void AMDGPUInstPrinter::printRel(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - const MCOperand &Op = MI->getOperand(OpNo); - if (Op.getImm() != 0) { - O << " + " << Op.getImm(); - } + printIfSet(MI, OpNo, O, "+"); } void AMDGPUInstPrinter::printUpdateExecMask(const MCInst *MI, unsigned OpNo, diff --git a/lib/Target/R600/R600Defines.h b/lib/Target/R600/R600Defines.h index e19eea3..16cfcf5 100644 --- a/lib/Target/R600/R600Defines.h +++ b/lib/Target/R600/R600Defines.h @@ -49,6 +49,9 @@ namespace R600_InstFlag { #define HW_REG_MASK 0x1ff #define HW_CHAN_SHIFT 9 +#define GET_REG_CHAN(reg) ((reg) >> HW_CHAN_SHIFT) +#define GET_REG_INDEX(reg) ((reg) & HW_REG_MASK) + namespace R600Operands { enum Ops { DST, diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index 110dcc1..85187f8 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -16,6 +16,7 @@ #include "R600Defines.h" #include "R600InstrInfo.h" #include "R600MachineFunctionInfo.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" @@ -71,11 +72,23 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) : setOperationAction(ISD::SELECT, MVT::i32, Custom); setOperationAction(ISD::SELECT, MVT::f32, Custom); + // Legalize loads and stores to the private address space. + setOperationAction(ISD::LOAD, MVT::i32, Custom); + setOperationAction(ISD::LOAD, MVT::v2i32, Custom); + setOperationAction(ISD::LOAD, MVT::v4i32, Custom); + setLoadExtAction(ISD::EXTLOAD, MVT::v4i8, Custom); + setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom); + setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom); + setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i8, Custom); + setOperationAction(ISD::STORE, MVT::i8, Custom); setOperationAction(ISD::STORE, MVT::i32, Custom); + setOperationAction(ISD::STORE, MVT::v2i32, Custom); setOperationAction(ISD::STORE, MVT::v4i32, Custom); setOperationAction(ISD::LOAD, MVT::i32, Custom); setOperationAction(ISD::LOAD, MVT::v4i32, Custom); + setOperationAction(ISD::FrameIndex, MVT::i32, Custom); + setTargetDAGCombine(ISD::FP_ROUND); setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); @@ -350,6 +363,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const case ISD::STORE: return LowerSTORE(Op, DAG); case ISD::LOAD: return LowerLOAD(Op, DAG); case ISD::FPOW: return LowerFPOW(Op, DAG); + case ISD::FrameIndex: return LowerFrameIndex(Op, DAG); case ISD::INTRINSIC_VOID: { SDValue Chain = Op.getOperand(0); unsigned IntrinsicID = @@ -485,6 +499,10 @@ void R600TargetLowering::ReplaceNodeResults(SDNode *N, DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1)); return; } + case ISD::STORE: + SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode(); + Results.push_back(SDValue(Node, 0)); + return; } } @@ -552,6 +570,20 @@ SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT, false, false, false, 0); } +SDValue R600TargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const { + + MachineFunction &MF = DAG.getMachineFunction(); + const AMDGPUFrameLowering *TFL = + static_cast(getTargetMachine().getFrameLowering()); + + FrameIndexSDNode *FIN = dyn_cast(Op); + assert(FIN); + + unsigned FrameIndex = FIN->getIndex(); + unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex); + return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), MVT::i32); +} + SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const { DebugLoc DL = Op.getDebugLoc(); EVT VT = Op.getValueType(); @@ -766,6 +798,61 @@ SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { return Cond; } +/// LLVM generates byte-addresed pointers. For indirect addressing, we need to +/// convert these pointers to a register index. Each register holds +/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the +/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used +/// for indirect addressing. +SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr, + unsigned StackWidth, + SelectionDAG &DAG) const { + unsigned SRLPad; + switch(StackWidth) { + case 1: + SRLPad = 2; + break; + case 2: + SRLPad = 3; + break; + case 4: + SRLPad = 4; + break; + default: llvm_unreachable("Invalid stack width"); + } + + return DAG.getNode(ISD::SRL, Ptr.getDebugLoc(), Ptr.getValueType(), Ptr, + DAG.getConstant(SRLPad, MVT::i32)); +} + +void R600TargetLowering::getStackAddress(unsigned StackWidth, + unsigned ElemIdx, + unsigned &Channel, + unsigned &PtrIncr) const { + switch (StackWidth) { + default: + case 1: + Channel = 0; + if (ElemIdx > 0) { + PtrIncr = 1; + } else { + PtrIncr = 0; + } + break; + case 2: + Channel = ElemIdx % 2; + if (ElemIdx == 2) { + PtrIncr = 1; + } else { + PtrIncr = 0; + } + break; + case 4: + Channel = ElemIdx; + PtrIncr = 0; + break; + } +} + SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { DebugLoc DL = Op.getDebugLoc(); StoreSDNode *StoreNode = cast(Op); @@ -787,7 +874,52 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { } return Chain; } - return SDValue(); + + EVT ValueVT = Value.getValueType(); + + if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) { + return SDValue(); + } + + // Lowering for indirect addressing + + const MachineFunction &MF = DAG.getMachineFunction(); + const AMDGPUFrameLowering *TFL = static_cast( + getTargetMachine().getFrameLowering()); + unsigned StackWidth = TFL->getStackWidth(MF); + + Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG); + + if (ValueVT.isVector()) { + unsigned NumElemVT = ValueVT.getVectorNumElements(); + EVT ElemVT = ValueVT.getVectorElementType(); + SDValue Stores[4]; + + assert(NumElemVT >= StackWidth && "Stack width cannot be greater than " + "vector width in load"); + + for (unsigned i = 0; i < NumElemVT; ++i) { + unsigned Channel, PtrIncr; + getStackAddress(StackWidth, i, Channel, PtrIncr); + Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr, + DAG.getConstant(PtrIncr, MVT::i32)); + SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT, + Value, DAG.getConstant(i, MVT::i32)); + + Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, + Chain, Elem, Ptr, + DAG.getTargetConstant(Channel, MVT::i32)); + } + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, NumElemVT); + } else { + if (ValueVT == MVT::i8) { + Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value); + } + Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr, + DAG.getTargetConstant(0, MVT::i32)); // Channel + } + + return Chain; } // return (512 + (kc_bank << 12) @@ -876,7 +1008,53 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const return DAG.getMergeValues(MergedValues, 2, DL); } - return SDValue(); + if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) { + return SDValue(); + } + + // Lowering for indirect addressing + const MachineFunction &MF = DAG.getMachineFunction(); + const AMDGPUFrameLowering *TFL = static_cast( + getTargetMachine().getFrameLowering()); + unsigned StackWidth = TFL->getStackWidth(MF); + + Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG); + + if (VT.isVector()) { + unsigned NumElemVT = VT.getVectorNumElements(); + EVT ElemVT = VT.getVectorElementType(); + SDValue Loads[4]; + + assert(NumElemVT >= StackWidth && "Stack width cannot be greater than " + "vector width in load"); + + for (unsigned i = 0; i < NumElemVT; ++i) { + unsigned Channel, PtrIncr; + getStackAddress(StackWidth, i, Channel, PtrIncr); + Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr, + DAG.getConstant(PtrIncr, MVT::i32)); + Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT, + Chain, Ptr, + DAG.getTargetConstant(Channel, MVT::i32), + Op.getOperand(2)); + } + for (unsigned i = NumElemVT; i < 4; ++i) { + Loads[i] = DAG.getUNDEF(ElemVT); + } + EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4); + LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads, 4); + } else { + LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT, + Chain, Ptr, + DAG.getTargetConstant(0, MVT::i32), // Channel + Op.getOperand(2)); + } + + SDValue Ops[2]; + Ops[0] = LoweredLoad; + Ops[1] = Chain; + + return DAG.getMergeValues(Ops, 2, DL); } SDValue R600TargetLowering::LowerFPOW(SDValue Op, diff --git a/lib/Target/R600/R600ISelLowering.h b/lib/Target/R600/R600ISelLowering.h index c141d50..afa3897 100644 --- a/lib/Target/R600/R600ISelLowering.h +++ b/lib/Target/R600/R600ISelLowering.h @@ -64,7 +64,12 @@ private: SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFPOW(SDValue Op, SelectionDAG &DAG) const; SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; - + SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const; + + SDValue stackPtrToRegIndex(SDValue Ptr, unsigned StackWidth, + SelectionDAG &DAG) const; + void getStackAddress(unsigned StackWidth, unsigned ElemIdx, + unsigned &Channel, unsigned &PtrIncr) const; bool isZero(SDValue Op) const; }; diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp index 85859eb..7e3f005 100644 --- a/lib/Target/R600/R600InstrInfo.cpp +++ b/lib/Target/R600/R600InstrInfo.cpp @@ -16,8 +16,11 @@ #include "AMDGPUSubtarget.h" #include "AMDGPUTargetMachine.h" #include "R600Defines.h" +#include "R600MachineFunctionInfo.h" #include "R600RegisterInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #define GET_INSTRINFO_CTOR #include "AMDGPUGenDFAPacketizer.inc" @@ -465,6 +468,124 @@ unsigned int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData, return 2; } +int R600InstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const { + const MachineRegisterInfo &MRI = MF.getRegInfo(); + const MachineFrameInfo *MFI = MF.getFrameInfo(); + int Offset = 0; + + if (MFI->getNumObjects() == 0) { + return -1; + } + + if (MRI.livein_empty()) { + return 0; + } + + for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(), + LE = MRI.livein_end(); + LI != LE; ++LI) { + Offset = std::max(Offset, + GET_REG_INDEX(RI.getEncodingValue(LI->first))); + } + + return Offset + 1; +} + +int R600InstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const { + int Offset = 0; + const MachineFrameInfo *MFI = MF.getFrameInfo(); + + // Variable sized objects are not supported + assert(!MFI->hasVarSizedObjects()); + + if (MFI->getNumObjects() == 0) { + return -1; + } + + Offset = TM.getFrameLowering()->getFrameIndexOffset(MF, -1); + + return getIndirectIndexBegin(MF) + Offset; +} + +std::vector R600InstrInfo::getIndirectReservedRegs( + const MachineFunction &MF) const { + const AMDGPUFrameLowering *TFL = + static_cast(TM.getFrameLowering()); + std::vector Regs; + + unsigned StackWidth = TFL->getStackWidth(MF); + int End = getIndirectIndexEnd(MF); + + if (End == -1) { + return Regs; + } + + for (int Index = getIndirectIndexBegin(MF); Index <= End; ++Index) { + unsigned SuperReg = AMDGPU::R600_Reg128RegClass.getRegister(Index); + Regs.push_back(SuperReg); + for (unsigned Chan = 0; Chan < StackWidth; ++Chan) { + unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister((4 * Index) + Chan); + Regs.push_back(Reg); + } + } + return Regs; +} + +unsigned R600InstrInfo::calculateIndirectAddress(unsigned RegIndex, + unsigned Channel) const { + // XXX: Remove when we support a stack width > 2 + assert(Channel == 0); + return RegIndex; +} + +const TargetRegisterClass * R600InstrInfo::getIndirectAddrStoreRegClass( + unsigned SourceReg) const { + return &AMDGPU::R600_TReg32RegClass; +} + +const TargetRegisterClass *R600InstrInfo::getIndirectAddrLoadRegClass() const { + return &AMDGPU::TRegMemRegClass; +} + +MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB, + MachineBasicBlock::iterator I, + unsigned ValueReg, unsigned Address, + unsigned OffsetReg) const { + unsigned AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); + MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg, + AMDGPU::AR_X, OffsetReg); + setImmOperand(MOVA, R600Operands::WRITE, 0); + + MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV, + AddrReg, ValueReg) + .addReg(AMDGPU::AR_X, RegState::Implicit); + setImmOperand(Mov, R600Operands::DST_REL, 1); + return Mov; +} + +MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB, + MachineBasicBlock::iterator I, + unsigned ValueReg, unsigned Address, + unsigned OffsetReg) const { + unsigned AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); + MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg, + AMDGPU::AR_X, + OffsetReg); + setImmOperand(MOVA, R600Operands::WRITE, 0); + MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV, + ValueReg, + AddrReg) + .addReg(AMDGPU::AR_X, RegState::Implicit); + setImmOperand(Mov, R600Operands::SRC0_REL, 1); + + return Mov; +} + +const TargetRegisterClass *R600InstrInfo::getSuperIndirectRegClass() const { + return &AMDGPU::IndirectRegRegClass; +} + + MachineInstrBuilder R600InstrInfo::buildDefaultInstruction(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned Opcode, diff --git a/lib/Target/R600/R600InstrInfo.h b/lib/Target/R600/R600InstrInfo.h index 11685af..efe721c 100644 --- a/lib/Target/R600/R600InstrInfo.h +++ b/lib/Target/R600/R600InstrInfo.h @@ -113,6 +113,38 @@ namespace llvm { virtual int getInstrLatency(const InstrItineraryData *ItinData, SDNode *Node) const { return 1;} + /// \returns a list of all the registers that may be accesed using indirect + /// addressing. + std::vector getIndirectReservedRegs(const MachineFunction &MF) const; + + virtual int getIndirectIndexBegin(const MachineFunction &MF) const; + + virtual int getIndirectIndexEnd(const MachineFunction &MF) const; + + + virtual unsigned calculateIndirectAddress(unsigned RegIndex, + unsigned Channel) const; + + virtual const TargetRegisterClass *getIndirectAddrStoreRegClass( + unsigned SourceReg) const; + + virtual const TargetRegisterClass *getIndirectAddrLoadRegClass() const; + + virtual MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB, + MachineBasicBlock::iterator I, + unsigned ValueReg, unsigned Address, + unsigned OffsetReg) const; + + virtual MachineInstrBuilder buildIndirectRead(MachineBasicBlock *MBB, + MachineBasicBlock::iterator I, + unsigned ValueReg, unsigned Address, + unsigned OffsetReg) const; + + virtual const TargetRegisterClass *getSuperIndirectRegClass() const; + + + ///buildDefaultInstruction - This function returns a MachineInstr with + /// all the instruction modifiers initialized to their default values. /// You can use this function to avoid manually specifying each instruction /// modifier operand when building a new instruction. /// diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index f935313..afb30ec 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -91,11 +91,16 @@ def UP : InstFlag <"printUpdatePred">; // default to 0. def LAST : InstFlag<"printLast", 1>; +def FRAMEri : Operand { + let MIOperandInfo = (ops R600_Reg32:$ptr, i32imm:$index); +} + def ADDRParam : ComplexPattern; def ADDRDWord : ComplexPattern; def ADDRVTX_READ : ComplexPattern; def ADDRGA_CONST_OFFSET : ComplexPattern; def ADDRGA_VAR_OFFSET : ComplexPattern; +def ADDRIndirect : ComplexPattern; class R600ALU_Word0 { field bits<32> Word0; @@ -1220,6 +1225,10 @@ let Predicates = [isEGorCayman] in { defm DOT4_eg : DOT4_Common<0xBE>; defm CUBE_eg : CUBE_Common<0xC0>; +let hasSideEffects = 1 in { + def MOVA_INT_eg : R600_1OP <0xCC, "MOVA_INT", []>; +} + def TGSI_LIT_Z_eg : TGSI_LIT_Z_Common; def FLT_TO_INT_eg : FLT_TO_INT_Common<0x50> { @@ -1470,6 +1479,12 @@ def CONSTANT_LOAD_eg : VTX_READ_32_eg <1, } +//===----------------------------------------------------------------------===// +// Regist loads and stores - for indirect addressing +//===----------------------------------------------------------------------===// + +defm R600_ : RegisterLoadStore ; + let Predicates = [isCayman] in { let isVector = 1 in { diff --git a/lib/Target/R600/R600MachineFunctionInfo.h b/lib/Target/R600/R600MachineFunctionInfo.h index ad7b4da..4b901f4 100644 --- a/lib/Target/R600/R600MachineFunctionInfo.h +++ b/lib/Target/R600/R600MachineFunctionInfo.h @@ -13,6 +13,7 @@ #ifndef R600MACHINEFUNCTIONINFO_H #define R600MACHINEFUNCTIONINFO_H +#include "llvm/ADT/BitVector.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/SelectionDAG.h" #include @@ -24,6 +25,7 @@ class R600MachineFunctionInfo : public MachineFunctionInfo { public: R600MachineFunctionInfo(const MachineFunction &MF); SmallVector LiveOuts; + std::vector IndirectRegs; SDNode *Outputs[16]; }; diff --git a/lib/Target/R600/R600RegisterInfo.cpp b/lib/Target/R600/R600RegisterInfo.cpp index d46b3a3..cd3fc4a 100644 --- a/lib/Target/R600/R600RegisterInfo.cpp +++ b/lib/Target/R600/R600RegisterInfo.cpp @@ -15,6 +15,7 @@ #include "R600RegisterInfo.h" #include "AMDGPUTargetMachine.h" #include "R600Defines.h" +#include "R600InstrInfo.h" #include "R600MachineFunctionInfo.h" using namespace llvm; @@ -43,6 +44,18 @@ BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(AMDGPU::PRED_SEL_ZERO); Reserved.set(AMDGPU::PRED_SEL_ONE); + for (TargetRegisterClass::iterator I = AMDGPU::R600_AddrRegClass.begin(), + E = AMDGPU::R600_AddrRegClass.end(); I != E; ++I) { + Reserved.set(*I); + } + + const R600InstrInfo *RII = static_cast(&TII); + std::vector IndirectRegs = RII->getIndirectReservedRegs(MF); + for (std::vector::iterator I = IndirectRegs.begin(), + E = IndirectRegs.end(); + I != E; ++I) { + Reserved.set(*I); + } return Reserved; } @@ -77,3 +90,4 @@ unsigned R600RegisterInfo::getSubRegFromChannel(unsigned Channel) const { case 3: return AMDGPU::sel_w; } } + diff --git a/lib/Target/R600/R600RegisterInfo.td b/lib/Target/R600/R600RegisterInfo.td index 993fefc..9a8b859 100644 --- a/lib/Target/R600/R600RegisterInfo.td +++ b/lib/Target/R600/R600RegisterInfo.td @@ -27,6 +27,12 @@ foreach Index = 0-127 in { foreach Chan = [ "X", "Y", "Z", "W" ] in { // 32-bit Temporary Registers def T#Index#_#Chan : R600RegWithChan <"T"#Index#"."#Chan, Index, Chan>; + + // Indirect addressing offset registers + def Addr#Index#_#Chan : R600RegWithChan <"T("#Index#" + AR.x)."#Chan, + Index, Chan>; + def TRegMem#Index#_#Chan : R600RegWithChan <"T"#Index#"."#Chan, Index, + Chan>; } // 128-bit Temporary Registers def T#Index#_XYZW : R600Reg_128 <"T"#Index#".XYZW", @@ -57,6 +63,7 @@ def PREDICATE_BIT : R600Reg<"PredicateBit", 0>; def PRED_SEL_OFF: R600Reg<"Pred_sel_off", 0>; def PRED_SEL_ZERO : R600Reg<"Pred_sel_zero", 2>; def PRED_SEL_ONE : R600Reg<"Pred_sel_one", 3>; +def AR_X : R600Reg<"AR.x", 0>; def R600_ArrayBase : RegisterClass <"AMDGPU", [f32, i32], 32, (add (sequence "ArrayBase%u", 448, 464))>; @@ -66,6 +73,13 @@ def ALU_CONST : R600Reg<"CBuf", 0>; // interpolation param reference, SRCx_SEL contains index def ALU_PARAM : R600Reg<"Param", 0>; +let isAllocatable = 0 in { + +// XXX: Only use the X channel, until we support wider stack widths +def R600_Addr : RegisterClass <"AMDGPU", [i32], 127, (add (sequence "Addr%u_X", 0, 127))>; + +} // End isAllocatable = 0 + def R600_TReg32_X : RegisterClass <"AMDGPU", [f32, i32], 32, (add (sequence "T%u_X", 0, 127))>; @@ -85,6 +99,7 @@ def R600_TReg32 : RegisterClass <"AMDGPU", [f32, i32], 32, def R600_Reg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add R600_TReg32, R600_ArrayBase, + R600_Addr, ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF, ALU_CONST, ALU_PARAM )>; @@ -99,3 +114,34 @@ def R600_Reg128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, (add (sequence "T%u_XYZW", 0, 127))> { let CopyCost = -1; } + +//===----------------------------------------------------------------------===// +// Register classes for indirect addressing +//===----------------------------------------------------------------------===// + +// Super register for all the Indirect Registers. This register class is used +// by the REG_SEQUENCE instruction to specify the registers to use for direct +// reads / writes which may be written / read by an indirect address. +class IndirectSuper subregs> : + RegisterWithSubRegs { + let Namespace = "AMDGPU"; + let SubRegIndices = + [indirect_0,indirect_1,indirect_2,indirect_3,indirect_4,indirect_5,indirect_6, + indirect_7,indirect_8,indirect_9,indirect_10,indirect_11,indirect_12, + indirect_13,indirect_14,indirect_15]; +} + +def IndirectSuperReg : IndirectSuper<"Indirect", + [TRegMem0_X, TRegMem1_X, TRegMem2_X, TRegMem3_X, TRegMem4_X, TRegMem5_X, + TRegMem6_X, TRegMem7_X, TRegMem8_X, TRegMem9_X, TRegMem10_X, TRegMem11_X, + TRegMem12_X, TRegMem13_X, TRegMem14_X, TRegMem15_X] +>; + +def IndirectReg : RegisterClass<"AMDGPU", [f32, i32], 32, (add IndirectSuperReg)>; + +// This register class defines the registers that are the storage units for +// the "Indirect Addressing" pseudo memory space. +// XXX: Only use the X channel, until we support wider stack widths +def TRegMem : RegisterClass<"AMDGPU", [f32, i32], 32, + (add (sequence "TRegMem%u_X", 0, 16)) +>; diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp index c6ad4d5..b40337d 100644 --- a/lib/Target/R600/SIInstrInfo.cpp +++ b/lib/Target/R600/SIInstrInfo.cpp @@ -87,3 +87,51 @@ bool SIInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const { return RC != &AMDGPU::EXECRegRegClass; } + +//===----------------------------------------------------------------------===// +// Indirect addressing callbacks +//===----------------------------------------------------------------------===// + +unsigned SIInstrInfo::calculateIndirectAddress(unsigned RegIndex, + unsigned Channel) const { + assert(Channel == 0); + return RegIndex; +} + + +int SIInstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const { + llvm_unreachable("Unimplemented"); +} + +int SIInstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const { + llvm_unreachable("Unimplemented"); +} + +const TargetRegisterClass *SIInstrInfo::getIndirectAddrStoreRegClass( + unsigned SourceReg) const { + llvm_unreachable("Unimplemented"); +} + +const TargetRegisterClass *SIInstrInfo::getIndirectAddrLoadRegClass() const { + llvm_unreachable("Unimplemented"); +} + +MachineInstrBuilder SIInstrInfo::buildIndirectWrite( + MachineBasicBlock *MBB, + MachineBasicBlock::iterator I, + unsigned ValueReg, + unsigned Address, unsigned OffsetReg) const { + llvm_unreachable("Unimplemented"); +} + +MachineInstrBuilder SIInstrInfo::buildIndirectRead( + MachineBasicBlock *MBB, + MachineBasicBlock::iterator I, + unsigned ValueReg, + unsigned Address, unsigned OffsetReg) const { + llvm_unreachable("Unimplemented"); +} + +const TargetRegisterClass *SIInstrInfo::getSuperIndirectRegClass() const { + llvm_unreachable("Unimplemented"); +} diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h index 783cd9f..e4de4b8 100644 --- a/lib/Target/R600/SIInstrInfo.h +++ b/lib/Target/R600/SIInstrInfo.h @@ -48,6 +48,32 @@ public: virtual bool isMov(unsigned Opcode) const; virtual bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const; + + virtual int getIndirectIndexBegin(const MachineFunction &MF) const; + + virtual int getIndirectIndexEnd(const MachineFunction &MF) const; + + virtual unsigned calculateIndirectAddress(unsigned RegIndex, + unsigned Channel) const; + + virtual const TargetRegisterClass *getIndirectAddrStoreRegClass( + unsigned SourceReg) const; + + virtual const TargetRegisterClass *getIndirectAddrLoadRegClass() const; + + virtual MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB, + MachineBasicBlock::iterator I, + unsigned ValueReg, + unsigned Address, + unsigned OffsetReg) const; + + virtual MachineInstrBuilder buildIndirectRead(MachineBasicBlock *MBB, + MachineBasicBlock::iterator I, + unsigned ValueReg, + unsigned Address, + unsigned OffsetReg) const; + + virtual const TargetRegisterClass *getSuperIndirectRegClass() const; }; } // End namespace llvm -- cgit v1.1 From 212af6af024ca358ed109c7b1c3a6b1fd30fd71b Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Wed, 6 Feb 2013 17:33:58 +0000 Subject: PPC calling convention cleanup. Most of PPCCallingConv.td is used only by the 32-bit SVR4 ABI. Rename things to clarify this. Also delete some code that's been commented out for a long time. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174526 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCCallingConv.td | 57 ++++++++---------------------- lib/Target/PowerPC/PPCISelLowering.cpp | 64 +++++++++++++++++----------------- 2 files changed, 46 insertions(+), 75 deletions(-) (limited to 'lib') diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td index 1633580..caeb179 100644 --- a/lib/Target/PowerPC/PPCCallingConv.td +++ b/lib/Target/PowerPC/PPCCallingConv.td @@ -38,49 +38,20 @@ def RetCC_PPC : CallingConv<[ //===----------------------------------------------------------------------===// -// PowerPC Argument Calling Conventions -//===----------------------------------------------------------------------===// -/* -def CC_PPC : CallingConv<[ - // The first 8 integer arguments are passed in integer registers. - CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>, - CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6, X7, X8, X9, X10]>>, - - // Common sub-targets passes FP values in F1 - F13 - CCIfType<[f32, f64], - CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8,F9,F10,F11,F12,F13]>>, - - // The first 12 Vector arguments are passed in altivec registers. - CCIfType<[v16i8, v8i16, v4i32, v4f32], - CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9, V10,V11,V12,V13]>> - -/* - // Integer/FP values get stored in stack slots that are 8 bytes in size and - // 8-byte aligned if there are no more registers to hold them. - CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>, - - // Vectors get 16-byte stack slots that are 16-byte aligned. - CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], - CCAssignToStack<16, 16>>*/ -]>; - -*/ - -//===----------------------------------------------------------------------===// -// PowerPC System V Release 4 ABI +// PowerPC System V Release 4 32-bit ABI //===----------------------------------------------------------------------===// -def CC_PPC_SVR4_Common : CallingConv<[ +def CC_PPC32_SVR4_Common : CallingConv<[ // The ABI requires i64 to be passed in two adjacent registers with the first // register having an odd register number. - CCIfType<[i32], CCIfSplit>>, + CCIfType<[i32], CCIfSplit>>, // The first 8 integer arguments are passed in integer registers. CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>, // Make sure the i64 words from a long double are either both passed in // registers or both passed on the stack. - CCIfType<[f64], CCIfSplit>>, + CCIfType<[f64], CCIfSplit>>, // FP values are passed in F1 - F8. CCIfType<[f32, f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>, @@ -101,18 +72,18 @@ def CC_PPC_SVR4_Common : CallingConv<[ // This calling convention puts vector arguments always on the stack. It is used // to assign vector arguments which belong to the variable portion of the // parameter list of a variable argument function. -def CC_PPC_SVR4_VarArg : CallingConv<[ - CCDelegateTo +def CC_PPC32_SVR4_VarArg : CallingConv<[ + CCDelegateTo ]>; -// In contrast to CC_PPC_SVR4_VarArg, this calling convention first tries to put -// vector arguments in vector registers before putting them on the stack. -def CC_PPC_SVR4 : CallingConv<[ +// In contrast to CC_PPC32_SVR4_VarArg, this calling convention first tries to +// put vector arguments in vector registers before putting them on the stack. +def CC_PPC32_SVR4 : CallingConv<[ // The first 12 Vector arguments are passed in AltiVec registers. CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13]>>, - CCDelegateTo + CCDelegateTo ]>; // Helper "calling convention" to handle aggregate by value arguments. @@ -123,15 +94,15 @@ def CC_PPC_SVR4 : CallingConv<[ // Still, the address of the aggregate copy in the callers stack frame is passed // in a GPR (or in the parameter list area if all GPRs are allocated) from the // caller to the callee. The location for the address argument is assigned by -// the CC_PPC_SVR4 calling convention. +// the CC_PPC32_SVR4 calling convention. // -// The only purpose of CC_PPC_SVR4_Custom_Dummy is to skip arguments which are +// The only purpose of CC_PPC32_SVR4_Custom_Dummy is to skip arguments which are // not passed by value. -def CC_PPC_SVR4_ByVal : CallingConv<[ +def CC_PPC32_SVR4_ByVal : CallingConv<[ CCIfByVal>, - CCCustom<"CC_PPC_SVR4_Custom_Dummy"> + CCCustom<"CC_PPC32_SVR4_Custom_Dummy"> ]>; def CSR_Darwin32 : CalleeSavedRegs<(add R13, R14, R15, R16, R17, R18, R19, R20, diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index af89bc9..e789112 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -36,20 +36,20 @@ #include "llvm/Target/TargetOptions.h" using namespace llvm; -static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, - CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, - CCState &State); -static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, - MVT &LocVT, - CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, - CCState &State); -static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, +static bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State); +static bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State); +static bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, + MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State); static cl::opt DisablePPCPreinc("disable-ppc-preinc", cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden); @@ -1748,18 +1748,18 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, #include "PPCGenCallingConv.inc" -static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, - CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, - CCState &State) { +static bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { return true; } -static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, - MVT &LocVT, - CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, - CCState &State) { +static bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, + MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { static const uint16_t ArgRegs[] = { PPC::R3, PPC::R4, PPC::R5, PPC::R6, PPC::R7, PPC::R8, PPC::R9, PPC::R10, @@ -1782,11 +1782,11 @@ static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, return false; } -static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, - MVT &LocVT, - CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, - CCState &State) { +static bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, + MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { static const uint16_t ArgRegs[] = { PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, PPC::F8 @@ -1909,7 +1909,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4( // Reserve space for the linkage area on the stack. CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize); - CCInfo.AnalyzeFormalArguments(Ins, CC_PPC_SVR4); + CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4); for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; @@ -1970,7 +1970,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4( // Reserve stack space for the allocations in CCInfo. CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize); - CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC_SVR4_ByVal); + CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal); // Area that is at least reserved in the caller of this function. unsigned MinReservedArea = CCByValInfo.getNextStackOffset(); @@ -3484,11 +3484,11 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee, bool Result; if (Outs[i].IsFixed) { - Result = CC_PPC_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, - CCInfo); + Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, + CCInfo); } else { - Result = CC_PPC_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full, - ArgFlags, CCInfo); + Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full, + ArgFlags, CCInfo); } if (Result) { @@ -3501,7 +3501,7 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee, } } else { // All arguments are treated the same. - CCInfo.AnalyzeCallOperands(Outs, CC_PPC_SVR4); + CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4); } // Assign locations to all of the outgoing aggregate by value arguments. @@ -3512,7 +3512,7 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee, // Reserve stack space for the allocations in CCInfo. CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize); - CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC_SVR4_ByVal); + CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal); // Size of the linkage area, parameter list area and the part of the local // space variable where copies of aggregates which are passed by value are -- cgit v1.1 From 16221a60a00e52b078f6164ba4475c6e8e918e4b Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Wed, 6 Feb 2013 20:43:57 +0000 Subject: This is a follow-up on r174446, now taking Atom processors into account. Atoms use LEA for updating SP in prologs/epilogs, and the exact LEA opcode depends on the data model. Also reapplying the test case which was added and then reverted (because of Atom failures), this time specifying explicitly the CPU in addition to the triple. The test case now checks all variations (data mode, cpu Atom vs. Core). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174542 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FrameLowering.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 0901961..84b1c10 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -67,8 +67,8 @@ static unsigned getSUBriOpcode(unsigned isLP64, int64_t Imm) { } } -static unsigned getADDriOpcode(unsigned isLP64, int64_t Imm) { - if (isLP64) { +static unsigned getADDriOpcode(unsigned IsLP64, int64_t Imm) { + if (IsLP64) { if (isInt<8>(Imm)) return X86::ADD64ri8; return X86::ADD64ri32; @@ -79,8 +79,8 @@ static unsigned getADDriOpcode(unsigned isLP64, int64_t Imm) { } } -static unsigned getLEArOpcode(unsigned is64Bit) { - return is64Bit ? X86::LEA64r : X86::LEA32r; +static unsigned getLEArOpcode(unsigned IsLP64) { + return IsLP64 ? X86::LEA64r : X86::LEA32r; } /// findDeadCallerSavedReg - Return a caller-saved register that isn't live @@ -151,7 +151,7 @@ void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, uint64_t Offset = isSub ? -NumBytes : NumBytes; unsigned Opc; if (UseLEA) - Opc = getLEArOpcode(Is64Bit); + Opc = getLEArOpcode(IsLP64); else Opc = isSub ? getSUBriOpcode(IsLP64, Offset) @@ -1083,7 +1083,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, if (RegInfo->needsStackRealignment(MF)) MBBI = FirstCSPop; if (CSSize != 0) { - unsigned Opc = getLEArOpcode(Is64Bit); + unsigned Opc = getLEArOpcode(IsLP64); addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr), FramePtr, false, -CSSize); } else { -- cgit v1.1 From 6c59c9f57c8428e477ed592ee3537323d287d96f Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Wed, 6 Feb 2013 21:50:15 +0000 Subject: [mips] Make NOP a pseudo instruction and expand it to "sll $zero, $zero, 0". git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174546 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsAsmPrinter.cpp | 13 ++++++------- lib/Target/Mips/MipsInstrFormats.td | 6 ------ lib/Target/Mips/MipsInstrInfo.td | 3 +-- 3 files changed, 7 insertions(+), 15 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp index 07a4faa..e573e89 100644 --- a/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/MipsAsmPrinter.cpp @@ -66,19 +66,18 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } - // Do any auto-generated pseudo lowerings. - if (emitPseudoExpansionLowering(OutStreamer, MI)) - return; - MachineBasicBlock::const_instr_iterator I = MI; MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); do { - MCInst TmpInst0; - MCInstLowering.Lower(I++, TmpInst0); + // Do any auto-generated pseudo lowerings. + if (emitPseudoExpansionLowering(OutStreamer, &*I)) + continue; + MCInst TmpInst0; + MCInstLowering.Lower(I, TmpInst0); OutStreamer.EmitInstruction(TmpInst0); - } while ((I != E) && I->isInsideBundle()); // Delay slot check + } while ((++I != E) && I->isInsideBundle()); // Delay slot check } //===----------------------------------------------------------------------===// diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td index c026b5d..6977e45 100644 --- a/lib/Target/Mips/MipsInstrFormats.td +++ b/lib/Target/Mips/MipsInstrFormats.td @@ -366,12 +366,6 @@ class LUI_FM { let Inst{15-0} = imm16; } -class NOP_FM { - bits<32> Inst; - - let Inst{31-0} = 0; -} - class JALR_FM { bits<5> rs; diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index f9e3af5..7e33cb1 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -918,8 +918,7 @@ def CLO : CountLeading1<"clo", CPURegsOpnd>, CLO_FM<0x21>; def WSBH : SubwordSwap<"wsbh", CPURegsOpnd>, SEB_FM<2, 0x20>; /// No operation. -/// FIXME: NOP should be an alias of "sll $0, $0, 0". -def NOP : InstSE<(outs), (ins), "nop", [], IIAlu, FrmJ>, NOP_FM; +def NOP : PseudoSE<(outs), (ins), []>, PseudoInstExpansion<(SLL ZERO, ZERO, 0)>; // FrameIndexes are legalized when they are operands from load/store // instructions. The same not happens for stack address copies, so an -- cgit v1.1 From 231b83d8fd3ce86fb5cd154d50dd3f920ca0d71f Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Wed, 6 Feb 2013 21:53:53 +0000 Subject: Remove some dead code. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174547 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 31 ------------------------------- lib/CodeGen/AsmPrinter/DwarfDebug.h | 3 --- 2 files changed, 34 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 3409490..85d5af5 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -2504,37 +2504,6 @@ CompileUnit *DwarfDebug::constructSkeletonCU(const MDNode *N) { return NewCU; } -void DwarfDebug::emitSkeletonCU(const MCSection *Section) { - Asm->OutStreamer.SwitchSection(Section); - DIE *Die = SkeletonCU->getCUDie(); - - // Emit the compile units header. - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol(Section->getLabelBeginName(), - SkeletonCU->getUniqueID())); - - // Emit size of content not including length itself - unsigned ContentSize = Die->getSize() + - sizeof(int16_t) + // DWARF version number - sizeof(int32_t) + // Offset Into Abbrev. Section - sizeof(int8_t); // Pointer Size (in bytes) - - Asm->OutStreamer.AddComment("Length of Compilation Unit Info"); - Asm->EmitInt32(ContentSize); - Asm->OutStreamer.AddComment("DWARF version number"); - Asm->EmitInt16(dwarf::DWARF_VERSION); - Asm->OutStreamer.AddComment("Offset Into Abbrev. Section"); - - const MCSection *ASec = Asm->getObjFileLowering().getDwarfAbbrevSection(); - Asm->EmitSectionOffset(Asm->GetTempSymbol(ASec->getLabelBeginName()), - DwarfAbbrevSectionSym); - Asm->OutStreamer.AddComment("Address Size (in bytes)"); - Asm->EmitInt8(Asm->getDataLayout().getPointerSize()); - - emitDIE(Die, &SkeletonAbbrevs); - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol(Section->getLabelEndName(), - SkeletonCU->getUniqueID())); -} - void DwarfDebug::emitSkeletonAbbrevs(const MCSection *Section) { assert(useSplitDwarf() && "No split dwarf debug info?"); emitAbbrevs(Section, &SkeletonAbbrevs); diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index 9cff128..df3dc11 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -527,9 +527,6 @@ private: /// section. CompileUnit *constructSkeletonCU(const MDNode *); - /// \brief Emit the local split debug info section. - void emitSkeletonCU(const MCSection *); - /// \brief Emit the local split abbreviations. void emitSkeletonAbbrevs(const MCSection *); -- cgit v1.1 From 01776a5a9480243f95fff50c96e3f35fad41e4cb Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Wed, 6 Feb 2013 21:53:56 +0000 Subject: Clean up multiple skeleton compile units if we have multiple compile units coming in. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174548 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 10 ++++------ lib/CodeGen/AsmPrinter/DwarfDebug.h | 4 ++-- 2 files changed, 6 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 85d5af5..3b8bebe 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -159,7 +159,6 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) PrevLabel(NULL), GlobalCUIndexCount(0), InfoHolder(A, &AbbreviationsSet, &Abbreviations, "info_string", DIEValueAllocator), - SkeletonCU(0), SkeletonAbbrevSet(InitAbbreviationsSetSize), SkeletonHolder(A, &SkeletonAbbrevSet, &SkeletonAbbrevs, "skel_string", DIEValueAllocator) { @@ -1040,11 +1039,12 @@ void DwarfDebug::endModule() { E = CUMap.end(); I != E; ++I) delete I->second; - delete SkeletonCU; + for (SmallVector::iterator I = SkeletonCUs.begin(), + E = SkeletonCUs.end(); I != E; ++I) + delete *I; // Reset these for the next Module if we have one. FirstCU = NULL; - SkeletonCU = NULL; } // Find abstract variable, if any, associated with Var. @@ -2496,10 +2496,8 @@ CompileUnit *DwarfDebug::constructSkeletonCU(const MDNode *N) { if (!CompilationDir.empty()) NewCU->addLocalString(Die, dwarf::DW_AT_comp_dir, CompilationDir); - if (!SkeletonCU) - SkeletonCU = NewCU; - SkeletonHolder.addUnit(NewCU); + SkeletonCUs.push_back(NewCU); return NewCU; } diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index df3dc11..fb90202 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -415,8 +415,8 @@ class DwarfDebug { // original object file, rather than things that are meant // to be in the .dwo sections. - // The CU left in the original object file for separated debug info. - CompileUnit *SkeletonCU; + // The CUs left in the original object file for separated debug info. + SmallVector SkeletonCUs; // Used to uniquely define abbreviations for the skeleton emission. FoldingSet SkeletonAbbrevSet; -- cgit v1.1 From acb6194f93440425776cdd730a2726fd95499505 Mon Sep 17 00:00:00 2001 From: Joe Abbey Date: Wed, 6 Feb 2013 22:14:06 +0000 Subject: Code Custodian (trivial whitespace cleanup) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174550 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Bitcode/Reader/BitcodeReader.cpp | 56 ++++++++++++------------- lib/Bitcode/Reader/BitstreamReader.cpp | 77 +++++++++++++++++----------------- 2 files changed, 66 insertions(+), 67 deletions(-) (limited to 'lib') diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index e10e6d6..2c1e535 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -462,7 +462,7 @@ bool BitcodeReader::ParseAttributeBlock() { // Read all the records. while (1) { BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); - + switch (Entry.Kind) { case BitstreamEntry::SubBlock: // Handled for us already. case BitstreamEntry::Error: @@ -473,7 +473,7 @@ bool BitcodeReader::ParseAttributeBlock() { // The interesting case. break; } - + // Read a record. Record.clear(); switch (Stream.readRecord(Entry.ID, Record)) { @@ -517,7 +517,7 @@ bool BitcodeReader::ParseTypeTableBody() { // Read all the records for this type table. while (1) { BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); - + switch (Entry.Kind) { case BitstreamEntry::SubBlock: // Handled for us already. case BitstreamEntry::Error: @@ -734,7 +734,7 @@ bool BitcodeReader::ParseValueSymbolTable() { SmallString<128> ValueName; while (1) { BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); - + switch (Entry.Kind) { case BitstreamEntry::SubBlock: // Handled for us already. case BitstreamEntry::Error: @@ -789,7 +789,7 @@ bool BitcodeReader::ParseMetadata() { // Read all the records. while (1) { BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); - + switch (Entry.Kind) { case BitstreamEntry::SubBlock: // Handled for us already. case BitstreamEntry::Error: @@ -944,7 +944,7 @@ bool BitcodeReader::ParseConstants() { unsigned NextCstNo = ValueList.size(); while (1) { BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); - + switch (Entry.Kind) { case BitstreamEntry::SubBlock: // Handled for us already. case BitstreamEntry::Error: @@ -952,7 +952,7 @@ bool BitcodeReader::ParseConstants() { case BitstreamEntry::EndBlock: if (NextCstNo != ValueList.size()) return Error("Invalid constant reference!"); - + // Once all the constants have been read, go through and resolve forward // references. ValueList.ResolveConstantForwardRefs(); @@ -1337,7 +1337,7 @@ bool BitcodeReader::ParseUseLists() { // Read all the records. while (1) { BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); - + switch (Entry.Kind) { case BitstreamEntry::SubBlock: // Handled for us already. case BitstreamEntry::Error: @@ -1425,14 +1425,14 @@ bool BitcodeReader::ParseModule(bool Resume) { // Read all the records for this module. while (1) { BitstreamEntry Entry = Stream.advance(); - + switch (Entry.Kind) { case BitstreamEntry::Error: Error("malformed module block"); return true; case BitstreamEntry::EndBlock: return GlobalCleanup(); - + case BitstreamEntry::SubBlock: switch (Entry.ID) { default: // Skip unknown content. @@ -1473,7 +1473,7 @@ bool BitcodeReader::ParseModule(bool Resume) { return true; SeenFirstFunctionBody = true; } - + if (RememberAndSkipFunctionBody()) return true; // For streaming bitcode, suspend parsing when we reach the function @@ -1493,7 +1493,7 @@ bool BitcodeReader::ParseModule(bool Resume) { break; } continue; - + case BitstreamEntry::Record: // The interesting case. break; @@ -1714,17 +1714,17 @@ bool BitcodeReader::ParseBitcodeInto(Module *M) { while (1) { if (Stream.AtEndOfStream()) return false; - + BitstreamEntry Entry = Stream.advance(BitstreamCursor::AF_DontAutoprocessAbbrevs); - + switch (Entry.Kind) { case BitstreamEntry::Error: Error("malformed module file"); return true; case BitstreamEntry::EndBlock: return false; - + case BitstreamEntry::SubBlock: switch (Entry.ID) { case bitc::BLOCKINFO_BLOCK_ID: @@ -1748,7 +1748,7 @@ bool BitcodeReader::ParseBitcodeInto(Module *M) { continue; case BitstreamEntry::Record: // There should be no records in the top-level of blocks. - + // The ranlib in Xcode 4 will align archive members by appending newlines // to the end of them. If this file size is a multiple of 4 but not 8, we // have to read and ignore these final 4 bytes :-( @@ -1756,7 +1756,7 @@ bool BitcodeReader::ParseBitcodeInto(Module *M) { Stream.Read(6) == 2 && Stream.Read(24) == 0xa0a0a && Stream.AtEndOfStream()) return false; - + return Error("Invalid record at top-level"); } } @@ -1771,7 +1771,7 @@ bool BitcodeReader::ParseModuleTriple(std::string &Triple) { // Read all the records for this module. while (1) { BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); - + switch (Entry.Kind) { case BitstreamEntry::SubBlock: // Handled for us already. case BitstreamEntry::Error: @@ -1814,25 +1814,25 @@ bool BitcodeReader::ParseTriple(std::string &Triple) { // need to understand them all. while (1) { BitstreamEntry Entry = Stream.advance(); - + switch (Entry.Kind) { case BitstreamEntry::Error: Error("malformed module file"); return true; case BitstreamEntry::EndBlock: return false; - + case BitstreamEntry::SubBlock: if (Entry.ID == bitc::MODULE_BLOCK_ID) return ParseModuleTriple(Triple); - + // Ignore other sub-blocks. if (Stream.SkipBlock()) { Error("malformed block record in AST file"); return true; } continue; - + case BitstreamEntry::Record: Stream.skipRecord(Entry.ID); continue; @@ -1848,7 +1848,7 @@ bool BitcodeReader::ParseMetadataAttachment() { SmallVector Record; while (1) { BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); - + switch (Entry.Kind) { case BitstreamEntry::SubBlock: // Handled for us already. case BitstreamEntry::Error: @@ -1908,13 +1908,13 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { SmallVector Record; while (1) { BitstreamEntry Entry = Stream.advance(); - + switch (Entry.Kind) { case BitstreamEntry::Error: return Error("Bitcode error in function block"); case BitstreamEntry::EndBlock: goto OutOfRecordLoop; - + case BitstreamEntry::SubBlock: switch (Entry.ID) { default: // Skip unknown content. @@ -1936,12 +1936,12 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { break; } continue; - + case BitstreamEntry::Record: // The interesting case. break; } - + // Read a record. Record.clear(); Instruction *I = 0; @@ -2723,7 +2723,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { } OutOfRecordLoop: - + // Check the function list for unresolved values. if (Argument *A = dyn_cast(ValueList.back())) { if (A->getParent() == 0) { diff --git a/lib/Bitcode/Reader/BitstreamReader.cpp b/lib/Bitcode/Reader/BitstreamReader.cpp index 7984512..85076f3 100644 --- a/lib/Bitcode/Reader/BitstreamReader.cpp +++ b/lib/Bitcode/Reader/BitstreamReader.cpp @@ -17,19 +17,19 @@ using namespace llvm; void BitstreamCursor::operator=(const BitstreamCursor &RHS) { freeState(); - + BitStream = RHS.BitStream; NextChar = RHS.NextChar; CurWord = RHS.CurWord; BitsInCurWord = RHS.BitsInCurWord; CurCodeSize = RHS.CurCodeSize; - + // Copy abbreviations, and bump ref counts. CurAbbrevs = RHS.CurAbbrevs; for (unsigned i = 0, e = static_cast(CurAbbrevs.size()); i != e; ++i) CurAbbrevs[i]->addRef(); - + // Copy block scope and bump ref counts. BlockScope = RHS.BlockScope; for (unsigned S = 0, e = static_cast(BlockScope.size()); @@ -47,7 +47,7 @@ void BitstreamCursor::freeState() { i != e; ++i) CurAbbrevs[i]->dropRef(); CurAbbrevs.clear(); - + // Free all the Abbrevs in the block scope. for (unsigned S = 0, e = static_cast(BlockScope.size()); S != e; ++S) { @@ -65,7 +65,7 @@ bool BitstreamCursor::EnterSubBlock(unsigned BlockID, unsigned *NumWordsP) { // Save the current block's state on BlockScope. BlockScope.push_back(Block(CurCodeSize)); BlockScope.back().PrevAbbrevs.swap(CurAbbrevs); - + // Add the abbrevs specific to this block to the CurAbbrevs list. if (const BitstreamReader::BlockInfo *Info = BitStream->getBlockInfo(BlockID)) { @@ -75,17 +75,17 @@ bool BitstreamCursor::EnterSubBlock(unsigned BlockID, unsigned *NumWordsP) { CurAbbrevs.back()->addRef(); } } - + // Get the codesize of this block. CurCodeSize = ReadVBR(bitc::CodeLenWidth); SkipToFourByteBoundary(); unsigned NumWords = Read(bitc::BlockSizeWidth); if (NumWordsP) *NumWordsP = NumWords; - + // Validate that this block is sane. if (CurCodeSize == 0 || AtEndOfStream()) return true; - + return false; } @@ -99,7 +99,7 @@ void BitstreamCursor::readAbbreviatedLiteral(const BitCodeAbbrevOp &Op, void BitstreamCursor::readAbbreviatedField(const BitCodeAbbrevOp &Op, SmallVectorImpl &Vals) { assert(!Op.isLiteral() && "Use ReadAbbreviatedLiteral for literals!"); - + // Decode the value as we are commanded. switch (Op.getEncoding()) { case BitCodeAbbrevOp::Array: @@ -119,7 +119,7 @@ void BitstreamCursor::readAbbreviatedField(const BitCodeAbbrevOp &Op, void BitstreamCursor::skipAbbreviatedField(const BitCodeAbbrevOp &Op) { assert(!Op.isLiteral() && "Use ReadAbbreviatedLiteral for literals!"); - + // Decode the value as we are commanded. switch (Op.getEncoding()) { case BitCodeAbbrevOp::Array: @@ -152,47 +152,47 @@ void BitstreamCursor::skipRecord(unsigned AbbrevID) { } const BitCodeAbbrev *Abbv = getAbbrev(AbbrevID); - + for (unsigned i = 0, e = Abbv->getNumOperandInfos(); i != e; ++i) { const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i); if (Op.isLiteral()) continue; - + if (Op.getEncoding() != BitCodeAbbrevOp::Array && Op.getEncoding() != BitCodeAbbrevOp::Blob) { skipAbbreviatedField(Op); continue; } - + if (Op.getEncoding() == BitCodeAbbrevOp::Array) { // Array case. Read the number of elements as a vbr6. unsigned NumElts = ReadVBR(6); - + // Get the element encoding. assert(i+2 == e && "array op not second to last?"); const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i); - + // Read all the elements. for (; NumElts; --NumElts) skipAbbreviatedField(EltEnc); continue; } - + assert(Op.getEncoding() == BitCodeAbbrevOp::Blob); // Blob case. Read the number of bytes as a vbr6. unsigned NumElts = ReadVBR(6); SkipToFourByteBoundary(); // 32-bit alignment - + // Figure out where the end of this blob will be including tail padding. size_t NewEnd = GetCurrentBitNo()+((NumElts+3)&~3)*8; - + // If this would read off the end of the bitcode file, just set the // record to empty and return. if (!canSkipToPos(NewEnd/8)) { NextChar = BitStream->getBitcodeBytes().getExtent(); break; } - + // Skip over the blob. JumpToBit(NewEnd); } @@ -208,45 +208,45 @@ unsigned BitstreamCursor::readRecord(unsigned AbbrevID, Vals.push_back(ReadVBR64(6)); return Code; } - + const BitCodeAbbrev *Abbv = getAbbrev(AbbrevID); - + for (unsigned i = 0, e = Abbv->getNumOperandInfos(); i != e; ++i) { const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i); if (Op.isLiteral()) { readAbbreviatedLiteral(Op, Vals); continue; } - + if (Op.getEncoding() != BitCodeAbbrevOp::Array && Op.getEncoding() != BitCodeAbbrevOp::Blob) { readAbbreviatedField(Op, Vals); continue; } - + if (Op.getEncoding() == BitCodeAbbrevOp::Array) { // Array case. Read the number of elements as a vbr6. unsigned NumElts = ReadVBR(6); - + // Get the element encoding. assert(i+2 == e && "array op not second to last?"); const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i); - + // Read all the elements. for (; NumElts; --NumElts) readAbbreviatedField(EltEnc, Vals); continue; } - + assert(Op.getEncoding() == BitCodeAbbrevOp::Blob); // Blob case. Read the number of bytes as a vbr6. unsigned NumElts = ReadVBR(6); SkipToFourByteBoundary(); // 32-bit alignment - + // Figure out where the end of this blob will be including tail padding. size_t CurBitPos = GetCurrentBitNo(); size_t NewEnd = CurBitPos+((NumElts+3)&~3)*8; - + // If this would read off the end of the bitcode file, just set the // record to empty and return. if (!canSkipToPos(NewEnd/8)) { @@ -254,11 +254,11 @@ unsigned BitstreamCursor::readRecord(unsigned AbbrevID, NextChar = BitStream->getBitcodeBytes().getExtent(); break; } - + // Otherwise, inform the streamer that we need these bytes in memory. const char *Ptr = (const char*) BitStream->getBitcodeBytes().getPointer(CurBitPos/8, NumElts); - + // If we can return a reference to the data, do so to avoid copying it. if (Blob) { *Blob = StringRef(Ptr, NumElts); @@ -270,7 +270,7 @@ unsigned BitstreamCursor::readRecord(unsigned AbbrevID, // Skip over tail padding. JumpToBit(NewEnd); } - + unsigned Code = (unsigned)Vals[0]; Vals.erase(Vals.begin()); return Code; @@ -286,7 +286,7 @@ void BitstreamCursor::ReadAbbrevRecord() { Abbv->Add(BitCodeAbbrevOp(ReadVBR64(8))); continue; } - + BitCodeAbbrevOp::Encoding E = (BitCodeAbbrevOp::Encoding)Read(3); if (BitCodeAbbrevOp::hasEncodingData(E)) Abbv->Add(BitCodeAbbrevOp(E, ReadVBR64(5))); @@ -300,16 +300,16 @@ bool BitstreamCursor::ReadBlockInfoBlock() { // If this is the second stream to get to the block info block, skip it. if (BitStream->hasBlockInfoRecords()) return SkipBlock(); - + if (EnterSubBlock(bitc::BLOCKINFO_BLOCK_ID)) return true; - + SmallVector Record; BitstreamReader::BlockInfo *CurBlockInfo = 0; - + // Read all the records for this module. while (1) { BitstreamEntry Entry = advanceSkippingSubblocks(AF_DontAutoprocessAbbrevs); - + switch (Entry.Kind) { case llvm::BitstreamEntry::SubBlock: // Handled for us already. case llvm::BitstreamEntry::Error: @@ -325,7 +325,7 @@ bool BitstreamCursor::ReadBlockInfoBlock() { if (Entry.ID == bitc::DEFINE_ABBREV) { if (!CurBlockInfo) return true; ReadAbbrevRecord(); - + // ReadAbbrevRecord installs the abbrev in CurAbbrevs. Move it to the // appropriate BlockInfo. BitCodeAbbrev *Abbv = CurAbbrevs.back(); @@ -333,7 +333,7 @@ bool BitstreamCursor::ReadBlockInfoBlock() { CurBlockInfo->Abbrevs.push_back(Abbv); continue; } - + // Read a record. Record.clear(); switch (readRecord(Entry.ID, Record)) { @@ -365,4 +365,3 @@ bool BitstreamCursor::ReadBlockInfoBlock() { } } - -- cgit v1.1 From 42258e0ea8781dd29cae4b1a6eb54d8c70dcea0b Mon Sep 17 00:00:00 2001 From: Owen Anderson Date: Wed, 6 Feb 2013 22:43:31 +0000 Subject: Signficantly generalize our ability to constant fold floating point intrinsics, including ones on half types. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174555 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/ConstantFolding.cpp | 102 +++++++++++++++++++++++++++++++++------ 1 file changed, 88 insertions(+), 14 deletions(-) (limited to 'lib') diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index 91424b2..e499c73 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -289,6 +289,10 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, C = FoldBitCast(C, Type::getInt32Ty(C->getContext()), TD); return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, TD); } + if (CFP->getType()->isHalfTy()){ + C = FoldBitCast(C, Type::getInt16Ty(C->getContext()), TD); + return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, TD); + } return false; } @@ -381,7 +385,9 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C, // that address spaces don't matter here since we're not going to result in // an actual new load. Type *MapTy; - if (LoadTy->isFloatTy()) + if (LoadTy->isHalfTy()) + MapTy = Type::getInt16PtrTy(C->getContext()); + else if (LoadTy->isFloatTy()) MapTy = Type::getInt32PtrTy(C->getContext()); else if (LoadTy->isDoubleTy()) MapTy = Type::getInt64PtrTy(C->getContext()); @@ -1089,6 +1095,13 @@ Constant *llvm::ConstantFoldLoadThroughGEPIndices(Constant *C, bool llvm::canConstantFoldCallTo(const Function *F) { switch (F->getIntrinsicID()) { + case Intrinsic::fabs: + case Intrinsic::log: + case Intrinsic::log2: + case Intrinsic::log10: + case Intrinsic::exp: + case Intrinsic::exp2: + case Intrinsic::floor: case Intrinsic::sqrt: case Intrinsic::pow: case Intrinsic::powi: @@ -1156,11 +1169,17 @@ static Constant *ConstantFoldFP(double (*NativeFP)(double), double V, return 0; } + if (Ty->isHalfTy()) { + APFloat APF(V); + bool unused; + APF.convert(APFloat::IEEEhalf, APFloat::rmNearestTiesToEven, &unused); + return ConstantFP::get(Ty->getContext(), APF); + } if (Ty->isFloatTy()) return ConstantFP::get(Ty->getContext(), APFloat((float)V)); if (Ty->isDoubleTy()) return ConstantFP::get(Ty->getContext(), APFloat(V)); - llvm_unreachable("Can only constant fold float/double"); + llvm_unreachable("Can only constant fold half/float/double"); } static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double), @@ -1172,11 +1191,17 @@ static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double), return 0; } + if (Ty->isHalfTy()) { + APFloat APF(V); + bool unused; + APF.convert(APFloat::IEEEhalf, APFloat::rmNearestTiesToEven, &unused); + return ConstantFP::get(Ty->getContext(), APF); + } if (Ty->isFloatTy()) return ConstantFP::get(Ty->getContext(), APFloat((float)V)); if (Ty->isDoubleTy()) return ConstantFP::get(Ty->getContext(), APFloat(V)); - llvm_unreachable("Can only constant fold float/double"); + llvm_unreachable("Can only constant fold half/float/double"); } /// ConstantFoldConvertToInt - Attempt to an SSE floating point to integer @@ -1228,7 +1253,7 @@ llvm::ConstantFoldCall(Function *F, ArrayRef Operands, if (!TLI) return 0; - if (!Ty->isFloatTy() && !Ty->isDoubleTy()) + if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy()) return 0; /// We only fold functions with finite arguments. Folding NaN and inf is @@ -1241,8 +1266,36 @@ llvm::ConstantFoldCall(Function *F, ArrayRef Operands, /// the host native double versions. Float versions are not called /// directly but for all these it is true (float)(f((double)arg)) == /// f(arg). Long double not supported yet. - double V = Ty->isFloatTy() ? (double)Op->getValueAPF().convertToFloat() : - Op->getValueAPF().convertToDouble(); + double V; + if (Ty->isFloatTy()) + V = Op->getValueAPF().convertToFloat(); + else if (Ty->isDoubleTy()) + V = Op->getValueAPF().convertToDouble(); + else { + bool unused; + APFloat APF = Op->getValueAPF(); + APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &unused); + V = APF.convertToDouble(); + } + + switch (F->getIntrinsicID()) { + default: break; + case Intrinsic::fabs: + return ConstantFoldFP(fabs, V, Ty); + case Intrinsic::log2: + return ConstantFoldFP(log2, V, Ty); + case Intrinsic::log: + return ConstantFoldFP(log, V, Ty); + case Intrinsic::log10: + return ConstantFoldFP(log10, V, Ty); + case Intrinsic::exp: + return ConstantFoldFP(exp, V, Ty); + case Intrinsic::exp2: + return ConstantFoldFP(exp2, V, Ty); + case Intrinsic::floor: + return ConstantFoldFP(floor, V, Ty); + } + switch (Name[0]) { case 'a': if (Name == "acos" && TLI->has(LibFunc::acos)) @@ -1284,7 +1337,7 @@ llvm::ConstantFoldCall(Function *F, ArrayRef Operands, else if (Name == "log10" && V > 0 && TLI->has(LibFunc::log10)) return ConstantFoldFP(log10, V, Ty); else if (F->getIntrinsicID() == Intrinsic::sqrt && - (Ty->isFloatTy() || Ty->isDoubleTy())) { + (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy())) { if (V >= -0.0) return ConstantFoldFP(sqrt, V, Ty); else // Undefined @@ -1376,18 +1429,35 @@ llvm::ConstantFoldCall(Function *F, ArrayRef Operands, if (Operands.size() == 2) { if (ConstantFP *Op1 = dyn_cast(Operands[0])) { - if (!Ty->isFloatTy() && !Ty->isDoubleTy()) + if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy()) return 0; - double Op1V = Ty->isFloatTy() ? - (double)Op1->getValueAPF().convertToFloat() : - Op1->getValueAPF().convertToDouble(); + double Op1V; + if (Ty->isFloatTy()) + Op1V = Op1->getValueAPF().convertToFloat(); + else if (Ty->isDoubleTy()) + Op1V = Op1->getValueAPF().convertToDouble(); + else { + bool unused; + APFloat APF = Op1->getValueAPF(); + APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &unused); + Op1V = APF.convertToDouble(); + } + if (ConstantFP *Op2 = dyn_cast(Operands[1])) { if (Op2->getType() != Op1->getType()) return 0; - double Op2V = Ty->isFloatTy() ? - (double)Op2->getValueAPF().convertToFloat(): - Op2->getValueAPF().convertToDouble(); + double Op2V; + if (Ty->isFloatTy()) + Op2V = Op2->getValueAPF().convertToFloat(); + else if (Ty->isDoubleTy()) + Op2V = Op2->getValueAPF().convertToDouble(); + else { + bool unused; + APFloat APF = Op2->getValueAPF(); + APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &unused); + Op2V = APF.convertToDouble(); + } if (F->getIntrinsicID() == Intrinsic::pow) { return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty); @@ -1401,6 +1471,10 @@ llvm::ConstantFoldCall(Function *F, ArrayRef Operands, if (Name == "atan2" && TLI->has(LibFunc::atan2)) return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty); } else if (ConstantInt *Op2C = dyn_cast(Operands[1])) { + if (F->getIntrinsicID() == Intrinsic::powi && Ty->isHalfTy()) + return ConstantFP::get(F->getContext(), + APFloat((float)std::pow((float)Op1V, + (int)Op2C->getZExtValue()))); if (F->getIntrinsicID() == Intrinsic::powi && Ty->isFloatTy()) return ConstantFP::get(F->getContext(), APFloat((float)std::pow((float)Op1V, -- cgit v1.1 From ec643abe6903284fc9ae7b6d9ae5e0b4ad40ad28 Mon Sep 17 00:00:00 2001 From: Owen Anderson Date: Thu, 7 Feb 2013 00:21:34 +0000 Subject: Conditionalize constant folding of math intrinsics on the availability of an implementation on the host. This is a little bit unfortunate, but until someone decides to implement a full libm for APFloat, we don't have a better way to get this functionality. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174561 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/ConstantFolding.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'lib') diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index e499c73..26e3888 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -1282,16 +1282,26 @@ llvm::ConstantFoldCall(Function *F, ArrayRef Operands, default: break; case Intrinsic::fabs: return ConstantFoldFP(fabs, V, Ty); +#if HAVE_LOG2 case Intrinsic::log2: return ConstantFoldFP(log2, V, Ty); +#endif +#if HAVE_LOG case Intrinsic::log: return ConstantFoldFP(log, V, Ty); +#endif +#if HAVE_LOG10 case Intrinsic::log10: return ConstantFoldFP(log10, V, Ty); +#endif +#if HAVE_EXP case Intrinsic::exp: return ConstantFoldFP(exp, V, Ty); +#endif +#if HAVE_EXP2 case Intrinsic::exp2: return ConstantFoldFP(exp2, V, Ty); +#endif case Intrinsic::floor: return ConstantFoldFP(floor, V, Ty); } -- cgit v1.1 From 0fae64fde0a75cb2b36a92fb249f88d6a4378202 Mon Sep 17 00:00:00 2001 From: Michael Ilseman Date: Thu, 7 Feb 2013 01:40:15 +0000 Subject: Preserve fast-math flags after reassociation and commutation. Update test cases git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174571 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../InstCombine/InstructionCombining.cpp | 25 +++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index dc7fe5c..c6115e3 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -162,6 +162,21 @@ static bool MaintainNoSignedWrap(BinaryOperator &I, Value *B, Value *C) { return !Overflow; } +/// Conservatively clears subclassOptionalData after a reassociation or +/// commutation. We preserve fast-math flags when applicable as they can be +/// preserved. +static void ClearSubclassDataAfterReassociation(BinaryOperator &I) { + FPMathOperator *FPMO = dyn_cast(&I); + if (!FPMO) { + I.clearSubclassOptionalData(); + return; + } + + FastMathFlags FMF = I.getFastMathFlags(); + I.clearSubclassOptionalData(); + I.setFastMathFlags(FMF); +} + /// SimplifyAssociativeOrCommutative - This performs a few simplifications for /// operators which are associative or commutative: // @@ -219,7 +234,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { I.clearSubclassOptionalData(); I.setHasNoSignedWrap(true); } else { - I.clearSubclassOptionalData(); + ClearSubclassDataAfterReassociation(I); } Changed = true; @@ -241,7 +256,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { I.setOperand(1, C); // Conservatively clear the optional flags, since they may not be // preserved by the reassociation. - I.clearSubclassOptionalData(); + ClearSubclassDataAfterReassociation(I); Changed = true; ++NumReassoc; continue; @@ -263,7 +278,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { I.setOperand(1, B); // Conservatively clear the optional flags, since they may not be // preserved by the reassociation. - I.clearSubclassOptionalData(); + ClearSubclassDataAfterReassociation(I); Changed = true; ++NumReassoc; continue; @@ -283,7 +298,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { I.setOperand(1, V); // Conservatively clear the optional flags, since they may not be // preserved by the reassociation. - I.clearSubclassOptionalData(); + ClearSubclassDataAfterReassociation(I); Changed = true; ++NumReassoc; continue; @@ -310,7 +325,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { I.setOperand(1, Folded); // Conservatively clear the optional flags, since they may not be // preserved by the reassociation. - I.clearSubclassOptionalData(); + ClearSubclassDataAfterReassociation(I); Changed = true; continue; -- cgit v1.1 From 90e01ac0ea5bdc6dd6bccd9c59c3acb04e339666 Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Thu, 7 Feb 2013 02:02:27 +0000 Subject: DWARFDebugFrame.cpp: Fix formatting on i686 hosts. FIXME: Are they really truncated to i32 from i64 unconditionally? git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174574 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/DebugInfo/DWARFDebugFrame.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/DebugInfo/DWARFDebugFrame.cpp b/lib/DebugInfo/DWARFDebugFrame.cpp index 62e4856..6781da6 100644 --- a/lib/DebugInfo/DWARFDebugFrame.cpp +++ b/lib/DebugInfo/DWARFDebugFrame.cpp @@ -64,7 +64,9 @@ public: } void dumpHeader(raw_ostream &OS) const { - OS << format("%08x %08x %08x CIE", Offset, Length, DW_CIE_ID) << "\n"; + OS << format("%08x %08x %08x CIE", + (uint32_t)Offset, (uint32_t)Length, DW_CIE_ID) + << "\n"; OS << format(" Version: %d\n", Version); OS << " Augmentation: \"" << Augmentation << "\"\n"; OS << format(" Code alignment factor: %u\n", CodeAlignmentFactor); @@ -103,9 +105,10 @@ public: } void dumpHeader(raw_ostream &OS) const { - OS << format("%08x %08x %08x FDE ", Offset, Length, LinkedCIEOffset); + OS << format("%08x %08x %08x FDE ", + (uint32_t)Offset, (uint32_t)Length, LinkedCIEOffset); OS << format("cie=%08x pc=%08x...%08x\n", - LinkedCIEOffset, InitialLocation, + (uint32_t)LinkedCIEOffset, (uint32_t)InitialLocation, InitialLocation + AddressRange); OS << "\n"; if (LinkedCIE) { -- cgit v1.1 From 6e3443eed44a463bdbc9d2e01f01b85f07d5ca40 Mon Sep 17 00:00:00 2001 From: Reed Kotler Date: Thu, 7 Feb 2013 03:49:51 +0000 Subject: Enable jumps when in -static mode. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174580 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips16InstrInfo.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td index b906f6b..f0d6a2a 100644 --- a/lib/Target/Mips/Mips16InstrInfo.td +++ b/lib/Target/Mips/Mips16InstrInfo.td @@ -1088,7 +1088,7 @@ def: StoreM16_pat; // Unconditional branch class UncondBranch16_pat: Mips16Pat<(OpNode bb:$imm16), (I bb:$imm16)> { - let Predicates = [RelocPIC, InMips16Mode]; + let Predicates = [InMips16Mode]; } def : Mips16Pat<(MipsJmpLink (i32 tglobaladdr:$dst)), -- cgit v1.1 From 65c46b0cff2a7bcefff9b58895cdf8d710e3b6f7 Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Thu, 7 Feb 2013 04:12:57 +0000 Subject: Moved some comments due to the recent refactoring of ObjCARC. 1. Moved a comment from ObjCARCOpts.cpp -> ObjCARCContract.cpp. 2. Removed a comment from ObjCARCOpts.cpp that was already moved to ObjCARCAliasAnalysis.h/.cpp. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174581 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/ObjCARC/ObjCARCContract.cpp | 3 +++ lib/Transforms/ObjCARC/ObjCARCOpts.cpp | 6 +----- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/lib/Transforms/ObjCARC/ObjCARCContract.cpp index ca02b00..1c13d1c 100644 --- a/lib/Transforms/ObjCARC/ObjCARCContract.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCContract.cpp @@ -11,6 +11,9 @@ /// Reference Counting and is a system for managing reference counts for objects /// in Objective C. /// +/// This specific file mainly deals with ``contracting'' multiple lower level +/// operations into singular higher level operations through pattern matching. +/// /// WARNING: This file knows about certain library functions. It recognizes them /// by name, and hardwires knowledge of their semantics. /// diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp index e6cd1a7..7f66b1c 100644 --- a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp @@ -13,11 +13,7 @@ /// /// The optimizations performed include elimination of redundant, partially /// redundant, and inconsequential reference count operations, elimination of -/// redundant weak pointer operations, pattern-matching and replacement of -/// low-level operations into higher-level operations, and numerous minor -/// simplifications. -/// -/// This file also defines a simple ARC-aware AliasAnalysis. +/// redundant weak pointer operations, and numerous minor simplifications. /// /// WARNING: This file knows about certain library functions. It recognizes them /// by name, and hardwires knowledge of their semantics. -- cgit v1.1 From a170f520a990a50c35f72d81b4415dc4c3ec50de Mon Sep 17 00:00:00 2001 From: Sean Silva Date: Thu, 7 Feb 2013 04:30:39 +0000 Subject: tblgen: Diagnose duplicate includes. A double inclusion will pretty much always be an error in TableGen, so there's no point going on just to die with "def already defined" or whatnot. I'm not too thrilled about the "public: ... private: ..." to expose the DependenciesMapTy, but I really didn't see a better way to keep that type centralized. It's a smell that indicates that some refactoring is needed to make this code more loosely coupled. This should avoid all bugs of the same nature as PR15189. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174582 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/TableGen/Main.cpp | 8 ++++---- lib/TableGen/TGLexer.cpp | 10 +++++++++- lib/TableGen/TGLexer.h | 13 +++++++++---- lib/TableGen/TGParser.h | 2 +- 4 files changed, 23 insertions(+), 10 deletions(-) (limited to 'lib') diff --git a/lib/TableGen/Main.cpp b/lib/TableGen/Main.cpp index d0ca756..e1cd623 100644 --- a/lib/TableGen/Main.cpp +++ b/lib/TableGen/Main.cpp @@ -64,11 +64,11 @@ static int createDependencyFile(const TGParser &Parser, const char *argv0) { return 1; } DepOut.os() << OutputFilename << ":"; - const std::vector &Dependencies = Parser.getDependencies(); - for (std::vector::const_iterator I = Dependencies.begin(), - E = Dependencies.end(); + const TGLexer::DependenciesMapTy &Dependencies = Parser.getDependencies(); + for (TGLexer::DependenciesMapTy::const_iterator I = Dependencies.begin(), + E = Dependencies.end(); I != E; ++I) { - DepOut.os() << " " << (*I); + DepOut.os() << " " << I->first; } DepOut.os() << "\n"; DepOut.keep(); diff --git a/lib/TableGen/TGLexer.cpp b/lib/TableGen/TGLexer.cpp index e75abcf..c6be4f8 100644 --- a/lib/TableGen/TGLexer.cpp +++ b/lib/TableGen/TGLexer.cpp @@ -309,7 +309,15 @@ bool TGLexer::LexInclude() { return true; } - Dependencies.push_back(IncludedFile); + DependenciesMapTy::const_iterator Found = Dependencies.find(IncludedFile); + if (Found != Dependencies.end()) { + PrintError(getLoc(), + "File '" + IncludedFile + "' has already been included."); + SrcMgr.PrintMessage(Found->second, SourceMgr::DK_Note, + "previously included here"); + return true; + } + Dependencies.insert(std::make_pair(IncludedFile, getLoc())); // Save the line number and lex buffer of the includer. CurBuf = SrcMgr.getMemoryBuffer(CurBuffer); CurPtr = CurBuf->getBufferStart(); diff --git a/lib/TableGen/TGLexer.h b/lib/TableGen/TGLexer.h index a0818f9..d1bd70d 100644 --- a/lib/TableGen/TGLexer.h +++ b/lib/TableGen/TGLexer.h @@ -15,9 +15,10 @@ #define TGLEXER_H #include "llvm/Support/DataTypes.h" +#include "llvm/Support/SMLoc.h" #include +#include #include -#include namespace llvm { class MemoryBuffer; @@ -73,9 +74,13 @@ class TGLexer { /// CurBuffer - This is the current buffer index we're lexing from as managed /// by the SourceMgr object. int CurBuffer; + +public: + typedef std::map DependenciesMapTy; +private: /// Dependencies - This is the list of all included files. - std::vector Dependencies; - + DependenciesMapTy Dependencies; + public: TGLexer(SourceMgr &SrcMgr); ~TGLexer() {} @@ -84,7 +89,7 @@ public: return CurCode = LexToken(); } - const std::vector &getDependencies() const { + const DependenciesMapTy &getDependencies() const { return Dependencies; } diff --git a/lib/TableGen/TGParser.h b/lib/TableGen/TGParser.h index e55805d..044e3a0 100644 --- a/lib/TableGen/TGParser.h +++ b/lib/TableGen/TGParser.h @@ -96,7 +96,7 @@ public: bool TokError(const Twine &Msg) const { return Error(Lex.getLoc(), Msg); } - const std::vector &getDependencies() const { + const TGLexer::DependenciesMapTy &getDependencies() const { return Lex.getDependencies(); } -- cgit v1.1 From 24b339dcdc2411bc3305e6f58964caa393fd9ea0 Mon Sep 17 00:00:00 2001 From: Reed Kotler Date: Thu, 7 Feb 2013 04:34:51 +0000 Subject: Make sure we call externals from libraries properly when -static. For example, when we are doing mips16 hard float or soft float. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174583 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips16InstrInfo.td | 3 +++ 1 file changed, 3 insertions(+) (limited to 'lib') diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td index f0d6a2a..135df75 100644 --- a/lib/Target/Mips/Mips16InstrInfo.td +++ b/lib/Target/Mips/Mips16InstrInfo.td @@ -1094,6 +1094,9 @@ class UncondBranch16_pat: def : Mips16Pat<(MipsJmpLink (i32 tglobaladdr:$dst)), (Jal16 tglobaladdr:$dst)>; +def : Mips16Pat<(MipsJmpLink (i32 texternalsym:$dst)), + (Jal16 texternalsym:$dst)>; + // Indirect branch def: Mips16Pat< (brind CPU16Regs:$rs), -- cgit v1.1 From 8ff0631967c64d51b193b862aa0a6f1e8eb06f78 Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Thu, 7 Feb 2013 10:57:42 +0000 Subject: FDE::dumpHeader(): Forgot to fix one more formatting. It affected bigendian hosts. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174602 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/DebugInfo/DWARFDebugFrame.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/DebugInfo/DWARFDebugFrame.cpp b/lib/DebugInfo/DWARFDebugFrame.cpp index 6781da6..b70a285 100644 --- a/lib/DebugInfo/DWARFDebugFrame.cpp +++ b/lib/DebugInfo/DWARFDebugFrame.cpp @@ -108,8 +108,9 @@ public: OS << format("%08x %08x %08x FDE ", (uint32_t)Offset, (uint32_t)Length, LinkedCIEOffset); OS << format("cie=%08x pc=%08x...%08x\n", - (uint32_t)LinkedCIEOffset, (uint32_t)InitialLocation, - InitialLocation + AddressRange); + (uint32_t)LinkedCIEOffset, + (uint32_t)InitialLocation, + (uint32_t)InitialLocation + (uint32_t)AddressRange); OS << "\n"; if (LinkedCIE) { OS << format("%p\n", LinkedCIE); -- cgit v1.1 From b4409610a25506cdef36ac549287fc3240ae0887 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 7 Feb 2013 14:02:27 +0000 Subject: R600: Fix assembly name for SETGT_INT git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174607 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/R600Instructions.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index afb30ec..d40e75b 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -771,7 +771,7 @@ def SETE_INT : R600_2OP < >; def SETGT_INT : R600_2OP < - 0x3B, "SGT_INT", + 0x3B, "SETGT_INT", [(set (i32 R600_Reg32:$dst), (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETGT))] >; -- cgit v1.1 From 1234c9be42b4ebd4b398df461123205dccf3706c Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 7 Feb 2013 14:02:35 +0000 Subject: R600: Add support for SET*_DX10 instructions These instructions compare two floating point values and return an integer true (-1) or false (0) value. When compiling code generated by the Mesa GLSL frontend, the SET*_DX10 instructions save us four instructions for most branch decisions that use floating-point comparisons. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174609 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/R600ISelLowering.cpp | 108 +++++++++++++++++++++++++---------- lib/Target/R600/R600Instructions.td | 52 +++++++++++++++++ 2 files changed, 131 insertions(+), 29 deletions(-) (limited to 'lib') diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index 85187f8..8f4ec94 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -90,7 +90,9 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) : setOperationAction(ISD::FrameIndex, MVT::i32, Custom); setTargetDAGCombine(ISD::FP_ROUND); + setTargetDAGCombine(ISD::FP_TO_SINT); setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); + setTargetDAGCombine(ISD::SELECT_CC); setSchedulingPreference(Sched::VLIW); } @@ -670,9 +672,12 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const } // Try to lower to a SET* instruction: - // We need all the operands of SELECT_CC to have the same value type, so if - // necessary we need to change True and False to be the same type as LHS and - // RHS, and then convert the result of the select_cc back to the correct type. + // + // CompareVT == MVT::f32 and VT == MVT::i32 is supported by the hardware, + // but for the other case where CompareVT != VT, all operands of + // SELECT_CC need to have the same value type, so we need to change True and + // False to be the same type as LHS and RHS, and then convert the result of + // the select_cc back to the correct type. // Move hardware True/False values to the correct operand. if (isHWTrueValue(False) && isHWFalseValue(True)) { @@ -682,32 +687,17 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const } if (isHWTrueValue(True) && isHWFalseValue(False)) { - if (CompareVT != VT) { - if (VT == MVT::f32 && CompareVT == MVT::i32) { - SDValue Boolean = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, - LHS, RHS, - DAG.getConstant(-1, MVT::i32), - DAG.getConstant(0, MVT::i32), - CC); - // Convert integer values of true (-1) and false (0) to fp values of - // true (1.0f) and false (0.0f). - SDValue LSB = DAG.getNode(ISD::AND, DL, MVT::i32, Boolean, - DAG.getConstant(1, MVT::i32)); - return DAG.getNode(ISD::UINT_TO_FP, DL, VT, LSB); - } else if (VT == MVT::i32 && CompareVT == MVT::f32) { - SDValue BoolAsFlt = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, - LHS, RHS, - DAG.getConstantFP(1.0f, MVT::f32), - DAG.getConstantFP(0.0f, MVT::f32), - CC); - // Convert fp values of true (1.0f) and false (0.0f) to integer values - // of true (-1) and false (0). - SDValue Neg = DAG.getNode(ISD::FNEG, DL, MVT::f32, BoolAsFlt); - return DAG.getNode(ISD::FP_TO_SINT, DL, VT, Neg); - } else { - // I don't think there will be any other type pairings. - assert(!"Unhandled operand type parings in SELECT_CC"); - } + if (CompareVT != VT && VT == MVT::f32 && CompareVT == MVT::i32) { + SDValue Boolean = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, + LHS, RHS, + DAG.getConstant(-1, MVT::i32), + DAG.getConstant(0, MVT::i32), + CC); + // Convert integer values of true (-1) and false (0) to fp values of + // true (1.0f) and false (0.0f). + SDValue LSB = DAG.getNode(ISD::AND, DL, MVT::i32, Boolean, + DAG.getConstant(1, MVT::i32)); + return DAG.getNode(ISD::UINT_TO_FP, DL, VT, LSB); } else { // This SELECT_CC is already legal. return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC); @@ -1128,6 +1118,35 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N, } break; } + + // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) -> + // (i32 select_cc f32, f32, -1, 0 cc) + // + // Mesa's GLSL frontend generates the above pattern a lot and we can lower + // this to one of the SET*_DX10 instructions. + case ISD::FP_TO_SINT: { + SDValue FNeg = N->getOperand(0); + if (FNeg.getOpcode() != ISD::FNEG) { + return SDValue(); + } + SDValue SelectCC = FNeg.getOperand(0); + if (SelectCC.getOpcode() != ISD::SELECT_CC || + SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS + SelectCC.getOperand(2).getValueType() != MVT::f32 || // True + !isHWTrueValue(SelectCC.getOperand(2)) || + !isHWFalseValue(SelectCC.getOperand(3))) { + return SDValue(); + } + + return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), N->getValueType(0), + SelectCC.getOperand(0), // LHS + SelectCC.getOperand(1), // RHS + DAG.getConstant(-1, MVT::i32), // True + DAG.getConstant(0, MVT::i32), // Flase + SelectCC.getOperand(4)); // CC + + break; + } // Extract_vec (Build_vector) generated by custom lowering // also needs to be customly combined case ISD::EXTRACT_VECTOR_ELT: { @@ -1147,6 +1166,37 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N, } } } + + case ISD::SELECT_CC: { + // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq -> + // selectcc x, y, a, b, inv(cc) + SDValue LHS = N->getOperand(0); + if (LHS.getOpcode() != ISD::SELECT_CC) { + return SDValue(); + } + + SDValue RHS = N->getOperand(1); + SDValue True = N->getOperand(2); + SDValue False = N->getOperand(3); + + if (LHS.getOperand(2).getNode() != True.getNode() || + LHS.getOperand(3).getNode() != False.getNode() || + RHS.getNode() != False.getNode() || + cast(N->getOperand(4))->get() != ISD::SETEQ) { + return SDValue(); + } + + ISD::CondCode CCOpcode = cast(LHS->getOperand(4))->get(); + CCOpcode = ISD::getSetCCInverse( + CCOpcode, LHS.getOperand(0).getValueType().isInteger()); + return DAG.getSelectCC(N->getDebugLoc(), + LHS.getOperand(0), + LHS.getOperand(1), + LHS.getOperand(2), + LHS.getOperand(3), + CCOpcode); + + } } return SDValue(); } diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index d40e75b..06196e7 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -711,6 +711,34 @@ def SNE : R600_2OP < COND_NE))] >; +def SETE_DX10 : R600_2OP < + 0xC, "SETE_DX10", + [(set R600_Reg32:$dst, + (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0), + COND_EQ))] +>; + +def SETGT_DX10 : R600_2OP < + 0xD, "SETGT_DX10", + [(set R600_Reg32:$dst, + (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0), + COND_GT))] +>; + +def SETGE_DX10 : R600_2OP < + 0xE, "SETGE_DX10", + [(set R600_Reg32:$dst, + (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0), + COND_GE))] +>; + +def SETNE_DX10 : R600_2OP < + 0xF, "SETNE_DX10", + [(set R600_Reg32:$dst, + (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0), + COND_NE))] +>; + def FRACT : R600_1OP_Helper <0x10, "FRACT", AMDGPUfract>; def TRUNC : R600_1OP_Helper <0x11, "TRUNC", int_AMDGPU_trunc>; def CEIL : R600_1OP_Helper <0x12, "CEIL", fceil>; @@ -1772,6 +1800,18 @@ def : Pat < (SGE R600_Reg32:$src1, R600_Reg32:$src0) >; +// SETGT_DX10 reverse args +def : Pat < + (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, COND_LT), + (SETGT_DX10 R600_Reg32:$src1, R600_Reg32:$src0) +>; + +// SETGE_DX10 reverse args +def : Pat < + (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, COND_LE), + (SETGE_DX10 R600_Reg32:$src1, R600_Reg32:$src0) +>; + // SETGT_INT reverse args def : Pat < (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETLT), @@ -1810,12 +1850,24 @@ def : Pat < (SETE R600_Reg32:$src0, R600_Reg32:$src1) >; +//SETE_DX10 - 'true if ordered' +def : Pat < + (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETO), + (SETE_DX10 R600_Reg32:$src0, R600_Reg32:$src1) +>; + //SNE - 'true if unordered' def : Pat < (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, SETUO), (SNE R600_Reg32:$src0, R600_Reg32:$src1) >; +//SETNE_DX10 - 'true if ordered' +def : Pat < + (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUO), + (SETNE_DX10 R600_Reg32:$src0, R600_Reg32:$src1) +>; + def : Extract_Element ; def : Extract_Element ; def : Extract_Element ; -- cgit v1.1 From 07b59ba69713f9aabb7597193f0df4b02c29d8f9 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 7 Feb 2013 14:02:37 +0000 Subject: R600: Consolidate sub register indices. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use sub0-15 everywhere. Patch by: Michel Dänzerr Reviewed-by: Tom Stellard Signed-off-by: Michel Dänzer git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174610 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUInstructions.td | 4 +-- lib/Target/R600/AMDGPURegisterInfo.cpp | 32 +++++++++++------------ lib/Target/R600/AMDGPURegisterInfo.td | 7 +---- lib/Target/R600/R600Instructions.td | 36 ++++++++++++------------- lib/Target/R600/R600RegisterInfo.cpp | 8 +++--- lib/Target/R600/R600RegisterInfo.td | 7 +++-- lib/Target/R600/SIInstructions.td | 48 +++++++++++++++++----------------- lib/Target/R600/SIRegisterInfo.td | 24 ++++------------- 8 files changed, 73 insertions(+), 93 deletions(-) (limited to 'lib') diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td index 3dee004..6765bc8 100644 --- a/lib/Target/R600/AMDGPUInstructions.td +++ b/lib/Target/R600/AMDGPUInstructions.td @@ -196,8 +196,8 @@ class Vector_Build ; // bitconvert pattern diff --git a/lib/Target/R600/AMDGPURegisterInfo.cpp b/lib/Target/R600/AMDGPURegisterInfo.cpp index 7878d60..fe994d2 100644 --- a/lib/Target/R600/AMDGPURegisterInfo.cpp +++ b/lib/Target/R600/AMDGPURegisterInfo.cpp @@ -51,22 +51,22 @@ unsigned AMDGPURegisterInfo::getFrameRegister(const MachineFunction &MF) const { unsigned AMDGPURegisterInfo::getIndirectSubReg(unsigned IndirectIndex) const { switch(IndirectIndex) { - case 0: return AMDGPU::indirect_0; - case 1: return AMDGPU::indirect_1; - case 2: return AMDGPU::indirect_2; - case 3: return AMDGPU::indirect_3; - case 4: return AMDGPU::indirect_4; - case 5: return AMDGPU::indirect_5; - case 6: return AMDGPU::indirect_6; - case 7: return AMDGPU::indirect_7; - case 8: return AMDGPU::indirect_8; - case 9: return AMDGPU::indirect_9; - case 10: return AMDGPU::indirect_10; - case 11: return AMDGPU::indirect_11; - case 12: return AMDGPU::indirect_12; - case 13: return AMDGPU::indirect_13; - case 14: return AMDGPU::indirect_14; - case 15: return AMDGPU::indirect_15; + case 0: return AMDGPU::sub0; + case 1: return AMDGPU::sub1; + case 2: return AMDGPU::sub2; + case 3: return AMDGPU::sub3; + case 4: return AMDGPU::sub4; + case 5: return AMDGPU::sub5; + case 6: return AMDGPU::sub6; + case 7: return AMDGPU::sub7; + case 8: return AMDGPU::sub8; + case 9: return AMDGPU::sub9; + case 10: return AMDGPU::sub10; + case 11: return AMDGPU::sub11; + case 12: return AMDGPU::sub12; + case 13: return AMDGPU::sub13; + case 14: return AMDGPU::sub14; + case 15: return AMDGPU::sub15; default: llvm_unreachable("indirect index out of range"); } } diff --git a/lib/Target/R600/AMDGPURegisterInfo.td b/lib/Target/R600/AMDGPURegisterInfo.td index 0b4482c..b5aca03 100644 --- a/lib/Target/R600/AMDGPURegisterInfo.td +++ b/lib/Target/R600/AMDGPURegisterInfo.td @@ -12,14 +12,9 @@ //===----------------------------------------------------------------------===// let Namespace = "AMDGPU" in { - def sel_x : SubRegIndex; - def sel_y : SubRegIndex; - def sel_z : SubRegIndex; - def sel_w : SubRegIndex; - foreach Index = 0-15 in { - def indirect_#Index : SubRegIndex; + def sub#Index : SubRegIndex; } def INDIRECT_BASE_ADDR : Register <"INDIRECT_BASE_ADDR">; diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index 06196e7..c9d1ffc 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -578,13 +578,13 @@ class ExportBufWord1 { multiclass ExportPattern cf_inst> { def : Pat<(int_R600_store_pixel_depth R600_Reg32:$reg), (ExportInst - (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sel_x), + (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sub0), 0, 61, 0, 7, 7, 7, cf_inst, 0) >; def : Pat<(int_R600_store_pixel_stencil R600_Reg32:$reg), (ExportInst - (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sel_x), + (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sub0), 0, 61, 7, 0, 7, 7, cf_inst, 0) >; @@ -1868,25 +1868,25 @@ def : Pat < (SETNE_DX10 R600_Reg32:$src0, R600_Reg32:$src1) >; -def : Extract_Element ; -def : Extract_Element ; -def : Extract_Element ; -def : Extract_Element ; +def : Extract_Element ; +def : Extract_Element ; +def : Extract_Element ; +def : Extract_Element ; -def : Insert_Element ; -def : Insert_Element ; -def : Insert_Element ; -def : Insert_Element ; +def : Insert_Element ; +def : Insert_Element ; +def : Insert_Element ; +def : Insert_Element ; -def : Extract_Element ; -def : Extract_Element ; -def : Extract_Element ; -def : Extract_Element ; +def : Extract_Element ; +def : Extract_Element ; +def : Extract_Element ; +def : Extract_Element ; -def : Insert_Element ; -def : Insert_Element ; -def : Insert_Element ; -def : Insert_Element ; +def : Insert_Element ; +def : Insert_Element ; +def : Insert_Element ; +def : Insert_Element ; def : Vector_Build ; def : Vector_Build ; diff --git a/lib/Target/R600/R600RegisterInfo.cpp b/lib/Target/R600/R600RegisterInfo.cpp index cd3fc4a..33e858d 100644 --- a/lib/Target/R600/R600RegisterInfo.cpp +++ b/lib/Target/R600/R600RegisterInfo.cpp @@ -84,10 +84,10 @@ const TargetRegisterClass * R600RegisterInfo::getCFGStructurizerRegClass( unsigned R600RegisterInfo::getSubRegFromChannel(unsigned Channel) const { switch (Channel) { default: assert(!"Invalid channel index"); return 0; - case 0: return AMDGPU::sel_x; - case 1: return AMDGPU::sel_y; - case 2: return AMDGPU::sel_z; - case 3: return AMDGPU::sel_w; + case 0: return AMDGPU::sub0; + case 1: return AMDGPU::sub1; + case 2: return AMDGPU::sub2; + case 3: return AMDGPU::sub3; } } diff --git a/lib/Target/R600/R600RegisterInfo.td b/lib/Target/R600/R600RegisterInfo.td index 9a8b859..3812eb7 100644 --- a/lib/Target/R600/R600RegisterInfo.td +++ b/lib/Target/R600/R600RegisterInfo.td @@ -19,7 +19,7 @@ class R600RegWithChan sel, string chan> : class R600Reg_128 subregs, bits<16> encoding> : RegisterWithSubRegs { let Namespace = "AMDGPU"; - let SubRegIndices = [sel_x, sel_y, sel_z, sel_w]; + let SubRegIndices = [sub0, sub1, sub2, sub3]; let HWEncoding = encoding; } @@ -126,9 +126,8 @@ class IndirectSuper subregs> : RegisterWithSubRegs { let Namespace = "AMDGPU"; let SubRegIndices = - [indirect_0,indirect_1,indirect_2,indirect_3,indirect_4,indirect_5,indirect_6, - indirect_7,indirect_8,indirect_9,indirect_10,indirect_11,indirect_12, - indirect_13,indirect_14,indirect_15]; + [sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7, + sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15]; } def IndirectSuperReg : IndirectSuper<"Indirect", diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 85bb73a..50605d0 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1215,15 +1215,15 @@ def CLAMP_SI : CLAMP; def FABS_SI : FABS; def FNEG_SI : FNEG; -def : Extract_Element ; -def : Extract_Element ; -def : Extract_Element ; -def : Extract_Element ; +def : Extract_Element ; +def : Extract_Element ; +def : Extract_Element ; +def : Extract_Element ; -def : Insert_Element ; -def : Insert_Element ; -def : Insert_Element ; -def : Insert_Element ; +def : Insert_Element ; +def : Insert_Element ; +def : Insert_Element ; +def : Insert_Element ; def : Vector_Build ; def : Vector_Build ; @@ -1338,22 +1338,22 @@ def : Pat < def : Pat < (int_AMDGPU_cube VReg_128:$src), (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), - (V_CUBETC_F32 (EXTRACT_SUBREG VReg_128:$src, sel_x), - (EXTRACT_SUBREG VReg_128:$src, sel_y), - (EXTRACT_SUBREG VReg_128:$src, sel_z), - 0, 0, 0, 0), sel_x), - (V_CUBESC_F32 (EXTRACT_SUBREG VReg_128:$src, sel_x), - (EXTRACT_SUBREG VReg_128:$src, sel_y), - (EXTRACT_SUBREG VReg_128:$src, sel_z), - 0, 0, 0, 0), sel_y), - (V_CUBEMA_F32 (EXTRACT_SUBREG VReg_128:$src, sel_x), - (EXTRACT_SUBREG VReg_128:$src, sel_y), - (EXTRACT_SUBREG VReg_128:$src, sel_z), - 0, 0, 0, 0), sel_z), - (V_CUBEID_F32 (EXTRACT_SUBREG VReg_128:$src, sel_x), - (EXTRACT_SUBREG VReg_128:$src, sel_y), - (EXTRACT_SUBREG VReg_128:$src, sel_z), - 0, 0, 0, 0), sel_w) + (V_CUBETC_F32 (EXTRACT_SUBREG VReg_128:$src, sub0), + (EXTRACT_SUBREG VReg_128:$src, sub1), + (EXTRACT_SUBREG VReg_128:$src, sub2), + 0, 0, 0, 0), sub0), + (V_CUBESC_F32 (EXTRACT_SUBREG VReg_128:$src, sub0), + (EXTRACT_SUBREG VReg_128:$src, sub1), + (EXTRACT_SUBREG VReg_128:$src, sub2), + 0, 0, 0, 0), sub1), + (V_CUBEMA_F32 (EXTRACT_SUBREG VReg_128:$src, sub0), + (EXTRACT_SUBREG VReg_128:$src, sub1), + (EXTRACT_SUBREG VReg_128:$src, sub2), + 0, 0, 0, 0), sub2), + (V_CUBEID_F32 (EXTRACT_SUBREG VReg_128:$src, sub0), + (EXTRACT_SUBREG VReg_128:$src, sub1), + (EXTRACT_SUBREG VReg_128:$src, sub2), + 0, 0, 0, 0), sub3) >; /********** ================== **********/ diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td index c3f1361..a1c7a86 100644 --- a/lib/Target/R600/SIRegisterInfo.td +++ b/lib/Target/R600/SIRegisterInfo.td @@ -1,18 +1,4 @@ -let Namespace = "AMDGPU" in { - def low : SubRegIndex; - def high : SubRegIndex; - - def sub0 : SubRegIndex; - def sub1 : SubRegIndex; - def sub2 : SubRegIndex; - def sub3 : SubRegIndex; - def sub4 : SubRegIndex; - def sub5 : SubRegIndex; - def sub6 : SubRegIndex; - def sub7 : SubRegIndex; -} - class SIReg encoding = 0> : Register { let Namespace = "AMDGPU"; let HWEncoding = encoding; @@ -20,7 +6,7 @@ class SIReg encoding = 0> : Register { class SI_64 subregs, bits<16> encoding> : RegisterWithSubRegs { let Namespace = "AMDGPU"; - let SubRegIndices = [low, high]; + let SubRegIndices = [sub0, sub1]; let HWEncoding = encoding; } @@ -73,12 +59,12 @@ def SGPR_32 : RegisterClass<"AMDGPU", [f32, i32], 32, (add (sequence "SGPR%u", 0, 101))>; // SGPR 64-bit registers -def SGPR_64 : RegisterTuples<[low, high], +def SGPR_64 : RegisterTuples<[sub0, sub1], [(add (decimate SGPR_32, 2)), (add(decimate (rotl SGPR_32, 1), 2))]>; // SGPR 128-bit registers -def SGPR_128 : RegisterTuples<[sel_x, sel_y, sel_z, sel_w], +def SGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3], [(add (decimate SGPR_32, 4)), (add (decimate (rotl SGPR_32, 1), 4)), (add (decimate (rotl SGPR_32, 2), 4)), @@ -104,12 +90,12 @@ def VGPR_32 : RegisterClass<"AMDGPU", [f32, i32], 32, (add (sequence "VGPR%u", 0, 255))>; // VGPR 64-bit registers -def VGPR_64 : RegisterTuples<[low, high], +def VGPR_64 : RegisterTuples<[sub0, sub1], [(add VGPR_32), (add (rotl VGPR_32, 1))]>; // VGPR 128-bit registers -def VGPR_128 : RegisterTuples<[sel_x, sel_y, sel_z, sel_w], +def VGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3], [(add VGPR_32), (add (rotl VGPR_32, 1)), (add (rotl VGPR_32, 2)), -- cgit v1.1 From d9a8d43ed3e7c6c32f52ab5d0f627f7b1cdb6aac Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Thu, 7 Feb 2013 14:54:42 +0000 Subject: FDE::dumpHeader(): Forgot to fix one more formatting, ... take two! Excuse me, I could not test it locally. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174614 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/DebugInfo/DWARFDebugFrame.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/DebugInfo/DWARFDebugFrame.cpp b/lib/DebugInfo/DWARFDebugFrame.cpp index b70a285..244ff4c 100644 --- a/lib/DebugInfo/DWARFDebugFrame.cpp +++ b/lib/DebugInfo/DWARFDebugFrame.cpp @@ -106,9 +106,9 @@ public: void dumpHeader(raw_ostream &OS) const { OS << format("%08x %08x %08x FDE ", - (uint32_t)Offset, (uint32_t)Length, LinkedCIEOffset); + (uint32_t)Offset, (uint32_t)Length, (int32_t)LinkedCIEOffset); OS << format("cie=%08x pc=%08x...%08x\n", - (uint32_t)LinkedCIEOffset, + (int32_t)LinkedCIEOffset, (uint32_t)InitialLocation, (uint32_t)InitialLocation + (uint32_t)AddressRange); OS << "\n"; -- cgit v1.1 From 7db31f100793cd4588de8f71b00a26784dd97c86 Mon Sep 17 00:00:00 2001 From: Michel Danzer Date: Thu, 7 Feb 2013 14:55:16 +0000 Subject: R600/SI: Add pattern for flog2 22 more little piglits with radeonsi. Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174615 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIInstructions.td | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 50605d0..d33e113 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -615,7 +615,9 @@ defm V_EXP_F32 : VOP1_32 <0x00000025, "V_EXP_F32", [(set VReg_32:$dst, (fexp2 AllReg_32:$src0))] >; defm V_LOG_CLAMP_F32 : VOP1_32 <0x00000026, "V_LOG_CLAMP_F32", []>; -defm V_LOG_F32 : VOP1_32 <0x00000027, "V_LOG_F32", []>; +defm V_LOG_F32 : VOP1_32 <0x00000027, "V_LOG_F32", + [(set VReg_32:$dst, (flog2 AllReg_32:$src0))] +>; defm V_RCP_CLAMP_F32 : VOP1_32 <0x00000028, "V_RCP_CLAMP_F32", []>; defm V_RCP_LEGACY_F32 : VOP1_32 <0x00000029, "V_RCP_LEGACY_F32", []>; defm V_RCP_F32 : VOP1_32 <0x0000002a, "V_RCP_F32", -- cgit v1.1 From 66f535a273e52d56199c7ce8f975796017b6cbb2 Mon Sep 17 00:00:00 2001 From: Arnold Schwaighofer Date: Thu, 7 Feb 2013 16:10:15 +0000 Subject: ARM cost model: Add costs for vector selects Vector selects are cheap on NEON. They get lowered to a vbsl instruction. radar://13158753 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174631 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMTargetTransformInfo.cpp | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'lib') diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp index bf83d51..1f91e0e 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -117,6 +117,8 @@ public: unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const; + unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) const; + unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const; /// @} }; @@ -311,3 +313,16 @@ unsigned ARMTTI::getVectorInstrCost(unsigned Opcode, Type *ValTy, return TargetTransformInfo::getVectorInstrCost(Opcode, ValTy, Index); } + +unsigned ARMTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, + Type *CondTy) const { + + int ISD = TLI->InstructionOpcodeToISD(Opcode); + // On NEON a a vector select gets lowered to vbsl. + if (ST->hasNEON() && ValTy->isVectorTy() && ISD == ISD::SELECT) { + std::pair LT = TLI->getTypeLegalizationCost(ValTy); + return LT.first; + } + + return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy); +} -- cgit v1.1 From 36ba9091843bd1205fe3499ba4b55bbedc6583c9 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 7 Feb 2013 17:02:09 +0000 Subject: R600/SI: Add basic support for more integer vector types. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v1i32, v2i32, v8i32 and v16i32. Only add VGPR register classes for integer vector types, to avoid attempts copying from VGPR to SGPR registers, which is not possible. Patch By: Michel Dänzer Signed-off-by: Michel Dänzer Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174632 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUAsmPrinter.cpp | 6 +++++ lib/Target/R600/AMDGPUInstructions.td | 51 +++++++++++++++++++++++++++++++++++ lib/Target/R600/SIISelLowering.cpp | 7 +++-- lib/Target/R600/SIInstructions.td | 16 ++++++----- lib/Target/R600/SIRegisterInfo.td | 41 +++++++++++++++++++++++++--- 5 files changed, 110 insertions(+), 11 deletions(-) (limited to 'lib') diff --git a/lib/Target/R600/AMDGPUAsmPrinter.cpp b/lib/Target/R600/AMDGPUAsmPrinter.cpp index 715a378..082e734 100644 --- a/lib/Target/R600/AMDGPUAsmPrinter.cpp +++ b/lib/Target/R600/AMDGPUAsmPrinter.cpp @@ -118,6 +118,12 @@ void AMDGPUAsmPrinter::EmitProgramInfo(MachineFunction &MF) { } else if (AMDGPU::SReg_256RegClass.contains(reg)) { isSGPR = true; width = 8; + } else if (AMDGPU::VReg_256RegClass.contains(reg)) { + isSGPR = false; + width = 8; + } else if (AMDGPU::VReg_512RegClass.contains(reg)) { + isSGPR = false; + width = 16; } else { assert(!"Unknown register class"); } diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td index 6765bc8..0559a5a 100644 --- a/lib/Target/R600/AMDGPUInstructions.td +++ b/lib/Target/R600/AMDGPUInstructions.td @@ -191,6 +191,19 @@ class Insert_Element ; // Vector Build pattern +class Vector1_Build : Pat < + (vecType (build_vector (elemType elemClass:$src))), + (vecType elemClass:$src) +>; + +class Vector2_Build : Pat < + (vecType (build_vector (elemType elemClass:$sub0), (elemType elemClass:$sub1))), + (INSERT_SUBREG (INSERT_SUBREG + (vecType (IMPLICIT_DEF)), elemClass:$sub0, sub0), elemClass:$sub1, sub1) +>; + class Vector_Build : Pat < (vecType (build_vector (elemType elemClass:$x), (elemType elemClass:$y), @@ -200,6 +213,44 @@ class Vector_Build ; +class Vector8_Build : Pat < + (vecType (build_vector (elemType elemClass:$sub0), (elemType elemClass:$sub1), + (elemType elemClass:$sub2), (elemType elemClass:$sub3), + (elemType elemClass:$sub4), (elemType elemClass:$sub5), + (elemType elemClass:$sub6), (elemType elemClass:$sub7))), + (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG + (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG + (vecType (IMPLICIT_DEF)), elemClass:$sub0, sub0), elemClass:$sub1, sub1), + elemClass:$sub2, sub2), elemClass:$sub3, sub3), + elemClass:$sub4, sub4), elemClass:$sub5, sub5), + elemClass:$sub6, sub6), elemClass:$sub7, sub7) +>; + +class Vector16_Build : Pat < + (vecType (build_vector (elemType elemClass:$sub0), (elemType elemClass:$sub1), + (elemType elemClass:$sub2), (elemType elemClass:$sub3), + (elemType elemClass:$sub4), (elemType elemClass:$sub5), + (elemType elemClass:$sub6), (elemType elemClass:$sub7), + (elemType elemClass:$sub8), (elemType elemClass:$sub9), + (elemType elemClass:$sub10), (elemType elemClass:$sub11), + (elemType elemClass:$sub12), (elemType elemClass:$sub13), + (elemType elemClass:$sub14), (elemType elemClass:$sub15))), + (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG + (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG + (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG + (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG + (vecType (IMPLICIT_DEF)), elemClass:$sub0, sub0), elemClass:$sub1, sub1), + elemClass:$sub2, sub2), elemClass:$sub3, sub3), + elemClass:$sub4, sub4), elemClass:$sub5, sub5), + elemClass:$sub6, sub6), elemClass:$sub7, sub7), + elemClass:$sub8, sub8), elemClass:$sub9, sub9), + elemClass:$sub10, sub10), elemClass:$sub11, sub11), + elemClass:$sub12, sub12), elemClass:$sub13, sub13), + elemClass:$sub14, sub14), elemClass:$sub15, sub15) +>; + // bitconvert pattern class BitConvert : Pat < (dt (bitconvert (st rc:$src0))), diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index ef9d17c..afafa8c 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -34,8 +34,11 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : addRegisterClass(MVT::i1, &AMDGPU::SCCRegRegClass); addRegisterClass(MVT::i1, &AMDGPU::VCCRegRegClass); - addRegisterClass(MVT::v4i32, &AMDGPU::SReg_128RegClass); - addRegisterClass(MVT::v8i32, &AMDGPU::SReg_256RegClass); + addRegisterClass(MVT::v1i32, &AMDGPU::VReg_32RegClass); + addRegisterClass(MVT::v2i32, &AMDGPU::VReg_64RegClass); + addRegisterClass(MVT::v4i32, &AMDGPU::VReg_128RegClass); + addRegisterClass(MVT::v8i32, &AMDGPU::VReg_256RegClass); + addRegisterClass(MVT::v16i32, &AMDGPU::VReg_512RegClass); computeRegisterProperties(); diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index d33e113..dd779cf 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -584,7 +584,7 @@ defm V_CVT_F32_I32 : VOP1_32 <0x00000005, "V_CVT_F32_I32", //defm V_CVT_F32_U32 : VOP1_32 <0x00000006, "V_CVT_F32_U32", []>; //defm V_CVT_U32_F32 : VOP1_32 <0x00000007, "V_CVT_U32_F32", []>; defm V_CVT_I32_F32 : VOP1_32 <0x00000008, "V_CVT_I32_F32", - [(set VReg_32:$dst, (fp_to_sint AllReg_32:$src0))] + [(set (i32 VReg_32:$dst), (fp_to_sint AllReg_32:$src0))] >; defm V_MOV_FED_B32 : VOP1_32 <0x00000009, "V_MOV_FED_B32", []>; ////def V_CVT_F16_F32 : VOP1_F16 <0x0000000a, "V_CVT_F16_F32", []>; @@ -1000,17 +1000,17 @@ def S_BFE_I64 : SOP2_64 <0x0000002a, "S_BFE_I64", []>; //def S_CBRANCH_G_FORK : SOP2_ <0x0000002b, "S_CBRANCH_G_FORK", []>; def S_ABSDIFF_I32 : SOP2_32 <0x0000002c, "S_ABSDIFF_I32", []>; -class V_MOV_IMM : InstSI < +class V_MOV_IMM : InstSI < (outs VReg_32:$dst), (ins immType:$src0), "V_MOV_IMM", - [(set VReg_32:$dst, (immNode:$src0))] + [(set VReg_32:$dst, (type immNode:$src0))] >; let isCodeGenOnly = 1, isPseudo = 1 in { -def V_MOV_IMM_I32 : V_MOV_IMM; -def V_MOV_IMM_F32 : V_MOV_IMM; +def V_MOV_IMM_I32 : V_MOV_IMM; +def V_MOV_IMM_F32 : V_MOV_IMM; def S_MOV_IMM_I32 : InstSI < (outs SReg_32:$dst), @@ -1227,8 +1227,12 @@ def : Insert_Element ; def : Insert_Element ; def : Insert_Element ; +def : Vector1_Build ; +def : Vector2_Build ; def : Vector_Build ; -def : Vector_Build ; +def : Vector_Build ; +def : Vector8_Build ; +def : Vector16_Build ; def : BitConvert ; def : BitConvert ; diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td index a1c7a86..9b483eb 100644 --- a/lib/Target/R600/SIRegisterInfo.td +++ b/lib/Target/R600/SIRegisterInfo.td @@ -101,6 +101,37 @@ def VGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3], (add (rotl VGPR_32, 2)), (add (rotl VGPR_32, 3))]>; +// VGPR 256-bit registers +def VGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7], + [(add VGPR_32), + (add (rotl VGPR_32, 1)), + (add (rotl VGPR_32, 2)), + (add (rotl VGPR_32, 3)), + (add (rotl VGPR_32, 4)), + (add (rotl VGPR_32, 5)), + (add (rotl VGPR_32, 6)), + (add (rotl VGPR_32, 7))]>; + +// VGPR 512-bit registers +def VGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7, + sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15], + [(add VGPR_32), + (add (rotl VGPR_32, 1)), + (add (rotl VGPR_32, 2)), + (add (rotl VGPR_32, 3)), + (add (rotl VGPR_32, 4)), + (add (rotl VGPR_32, 5)), + (add (rotl VGPR_32, 6)), + (add (rotl VGPR_32, 7)), + (add (rotl VGPR_32, 8)), + (add (rotl VGPR_32, 9)), + (add (rotl VGPR_32, 10)), + (add (rotl VGPR_32, 11)), + (add (rotl VGPR_32, 12)), + (add (rotl VGPR_32, 13)), + (add (rotl VGPR_32, 14)), + (add (rotl VGPR_32, 15))]>; + // Register class for all scalar registers (SGPRs + Special Registers) def SReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32, (add SGPR_32, SREG_LIT_0, M0, EXEC_LO, EXEC_HI) @@ -115,7 +146,7 @@ def SReg_128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, (add SGPR_128)>; def SReg_256 : RegisterClass<"AMDGPU", [v8i32], 256, (add SGPR_256)>; // Register class for all vector registers (VGPRs + Interploation Registers) -def VReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32, +def VReg_32 : RegisterClass<"AMDGPU", [f32, i32, v1i32], 32, (add VGPR_32, PERSP_SAMPLE_I, PERSP_SAMPLE_J, PERSP_CENTER_I, PERSP_CENTER_J, @@ -136,9 +167,13 @@ def VReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32, ) >; -def VReg_64 : RegisterClass<"AMDGPU", [i64], 64, (add VGPR_64)>; +def VReg_64 : RegisterClass<"AMDGPU", [i64, v2i32], 64, (add VGPR_64)>; + +def VReg_128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, (add VGPR_128)>; + +def VReg_256 : RegisterClass<"AMDGPU", [v8i32], 256, (add VGPR_256)>; -def VReg_128 : RegisterClass<"AMDGPU", [v4f32], 128, (add VGPR_128)>; +def VReg_512 : RegisterClass<"AMDGPU", [v16i32], 512, (add VGPR_512)>; // AllReg_* - A set of all scalar and vector registers of a given width. def AllReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32, (add VReg_32, SReg_32)>; -- cgit v1.1 From 914e47bb0c5d01e4c129b8753ef315517aae8f2f Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 7 Feb 2013 17:02:13 +0000 Subject: R600/SI: Make sample intrinsic address parameter type overloaded. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Handle vectors of 1 to 16 integers. Change the intrinsic names to prevent the wrong one from being selected at runtime due to the overloading. Patch By: Michel Dänzer Signed-off-by: Michel Dänzer Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174633 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIInstrInfo.td | 2 +- lib/Target/R600/SIInstructions.td | 52 +++++++++++++++++++++++++-------------- lib/Target/R600/SIIntrinsics.td | 6 ++--- 3 files changed, 38 insertions(+), 22 deletions(-) (limited to 'lib') diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index 8ff2d6d..9d9f5f6 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -519,7 +519,7 @@ class MIMG_Load_Helper op, string asm> : MIMG < op, (outs VReg_128:$vdata), (ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128, - i1imm:$tfe, i1imm:$lwe, i1imm:$slc, VReg_128:$vaddr, + i1imm:$tfe, i1imm:$lwe, i1imm:$slc, VReg_32:$vaddr, GPR4Align:$srsrc, GPR4Align:$ssamp), asm, []> { diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index dd779cf..22ec7fe 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1186,33 +1186,49 @@ def : Pat < VReg_32:$src0, VReg_32:$src1, VReg_32:$src2, VReg_32:$src3) >; -/* int_SI_sample */ -def : Pat < - (int_SI_sample imm:$writemask, VReg_128:$coord, SReg_256:$rsrc, SReg_128:$sampler, imm), - (IMAGE_SAMPLE imm:$writemask, 0, 0, 0, 0, 0, 0, 0, VReg_128:$coord, - SReg_256:$rsrc, SReg_128:$sampler) ->; +/* int_SI_sample for simple 1D texture lookup */ def : Pat < - (int_SI_sample imm:$writemask, VReg_128:$coord, SReg_256:$rsrc, SReg_128:$sampler, TEX_RECT), - (IMAGE_SAMPLE imm:$writemask, 1, 0, 0, 0, 0, 0, 0, VReg_128:$coord, + (int_SI_sample imm:$writemask, (v1i32 VReg_32:$addr), + SReg_256:$rsrc, SReg_128:$sampler, imm), + (IMAGE_SAMPLE imm:$writemask, 0, 0, 0, 0, 0, 0, 0, + (i32 (COPY_TO_REGCLASS VReg_32:$addr, VReg_32)), SReg_256:$rsrc, SReg_128:$sampler) >; -/* int_SI_sample_lod */ -def : Pat < - (int_SI_sample_lod imm:$writemask, VReg_128:$coord, SReg_256:$rsrc, SReg_128:$sampler, imm), - (IMAGE_SAMPLE_L imm:$writemask, 0, 0, 0, 0, 0, 0, 0, VReg_128:$coord, - SReg_256:$rsrc, SReg_128:$sampler) +class SamplePattern : Pat < + (name imm:$writemask, (addr_type addr_class:$addr), + SReg_256:$rsrc, SReg_128:$sampler, imm), + (opcode imm:$writemask, 0, 0, 0, 0, 0, 0, 0, + (EXTRACT_SUBREG addr_class:$addr, sub0), + SReg_256:$rsrc, SReg_128:$sampler) >; -/* int_SI_sample_bias */ -def : Pat < - (int_SI_sample_bias imm:$writemask, VReg_128:$coord, SReg_256:$rsrc, SReg_128:$sampler, imm), - (IMAGE_SAMPLE_B imm:$writemask, 0, 0, 0, 0, 0, 0, 0, VReg_128:$coord, - SReg_256:$rsrc, SReg_128:$sampler) +class SampleRectPattern : Pat < + (name imm:$writemask, (addr_type addr_class:$addr), + SReg_256:$rsrc, SReg_128:$sampler, TEX_RECT), + (opcode imm:$writemask, 1, 0, 0, 0, 0, 0, 0, + (EXTRACT_SUBREG addr_class:$addr, sub0), + SReg_256:$rsrc, SReg_128:$sampler) >; +/* int_SI_sample* for texture lookups consuming more address parameters */ +multiclass SamplePatterns { + def : SamplePattern ; + def : SampleRectPattern ; + + def : SamplePattern ; + + def : SamplePattern ; +} + +defm : SamplePatterns; +defm : SamplePatterns; +defm : SamplePatterns; +defm : SamplePatterns; + def CLAMP_SI : CLAMP; def FABS_SI : FABS; def FNEG_SI : FNEG; diff --git a/lib/Target/R600/SIIntrinsics.td b/lib/Target/R600/SIIntrinsics.td index 4393144..611b9c4 100644 --- a/lib/Target/R600/SIIntrinsics.td +++ b/lib/Target/R600/SIIntrinsics.td @@ -22,11 +22,11 @@ let TargetPrefix = "SI", isTarget = 1 in { def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_v4i32_ty, llvm_i16_ty, llvm_i32_ty], [IntrReadMem]> ; def int_SI_wqm : Intrinsic <[], [], []>; - class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_v4f32_ty, llvm_v8i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrReadMem]>; + class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_anyvector_ty, llvm_v8i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrReadMem]>; def int_SI_sample : Sample; - def int_SI_sample_bias : Sample; - def int_SI_sample_lod : Sample; + def int_SI_sampleb : Sample; + def int_SI_samplel : Sample; /* Interpolation Intrinsics */ -- cgit v1.1 From 64dca86fb4312669086ea8d2df1a13da76b54258 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 7 Feb 2013 17:02:14 +0000 Subject: R600/SI: Use proper instructions for array/shadow samplers. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch by: Michel Dänzer Signed-off-by: Michel Dänzer Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174634 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/R600Instructions.td | 16 +++++++++++++- lib/Target/R600/SIInstructions.td | 42 ++++++++++++++++++++++++++++++++++--- 2 files changed, 54 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index c9d1ffc..2eab765 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -399,7 +399,7 @@ class R600_TEX inst, string opName, list pattern, def TEX_SHADOW : PatLeaf< (imm), [{uint32_t TType = (uint32_t)N->getZExtValue(); - return (TType >= 6 && TType <= 8) || (TType >= 11 && TType <= 13); + return (TType >= 6 && TType <= 8) || TType == 13; }] >; @@ -410,6 +410,20 @@ def TEX_RECT : PatLeaf< }] >; +def TEX_ARRAY : PatLeaf< + (imm), + [{uint32_t TType = (uint32_t)N->getZExtValue(); + return TType == 9 || TType == 10 || TType == 15 || TType == 16; + }] +>; + +def TEX_SHADOW_ARRAY : PatLeaf< + (imm), + [{uint32_t TType = (uint32_t)N->getZExtValue(); + return TType == 11 || TType == 12 || TType == 17; + }] +>; + class EG_CF_RAT cf_inst, bits <6> rat_inst, bits<4> rat_id, dag outs, dag ins, string asm, list pattern> : InstR600ISA { diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 22ec7fe..dd5bb42 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -511,12 +511,12 @@ def IMAGE_SAMPLE_L : MIMG_Load_Helper <0x00000024, "IMAGE_SAMPLE_L">; def IMAGE_SAMPLE_B : MIMG_Load_Helper <0x00000025, "IMAGE_SAMPLE_B">; //def IMAGE_SAMPLE_B_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_B_CL", 0x00000026>; //def IMAGE_SAMPLE_LZ : MIMG_NoPattern_ <"IMAGE_SAMPLE_LZ", 0x00000027>; -//def IMAGE_SAMPLE_C : MIMG_NoPattern_ <"IMAGE_SAMPLE_C", 0x00000028>; +def IMAGE_SAMPLE_C : MIMG_Load_Helper <0x00000028, "IMAGE_SAMPLE_C">; //def IMAGE_SAMPLE_C_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CL", 0x00000029>; //def IMAGE_SAMPLE_C_D : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D", 0x0000002a>; //def IMAGE_SAMPLE_C_D_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D_CL", 0x0000002b>; -//def IMAGE_SAMPLE_C_L : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_L", 0x0000002c>; -//def IMAGE_SAMPLE_C_B : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B", 0x0000002d>; +def IMAGE_SAMPLE_C_L : MIMG_Load_Helper <0x0000002c, "IMAGE_SAMPLE_C_L">; +def IMAGE_SAMPLE_C_B : MIMG_Load_Helper <0x0000002d, "IMAGE_SAMPLE_C_B">; //def IMAGE_SAMPLE_C_B_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_CL", 0x0000002e>; //def IMAGE_SAMPLE_C_LZ : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_LZ", 0x0000002f>; //def IMAGE_SAMPLE_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_O", 0x00000030>; @@ -1214,14 +1214,50 @@ class SampleRectPattern; +class SampleArrayPattern : Pat < + (name imm:$writemask, (addr_type addr_class:$addr), + SReg_256:$rsrc, SReg_128:$sampler, TEX_ARRAY), + (opcode imm:$writemask, 0, 0, 1, 0, 0, 0, 0, + (EXTRACT_SUBREG addr_class:$addr, sub0), + SReg_256:$rsrc, SReg_128:$sampler) +>; + +class SampleShadowPattern : Pat < + (name imm:$writemask, (addr_type addr_class:$addr), + SReg_256:$rsrc, SReg_128:$sampler, TEX_SHADOW), + (opcode imm:$writemask, 0, 0, 0, 0, 0, 0, 0, + (EXTRACT_SUBREG addr_class:$addr, sub0), + SReg_256:$rsrc, SReg_128:$sampler) +>; + +class SampleShadowArrayPattern : Pat < + (name imm:$writemask, (addr_type addr_class:$addr), + SReg_256:$rsrc, SReg_128:$sampler, TEX_SHADOW_ARRAY), + (opcode imm:$writemask, 0, 0, 1, 0, 0, 0, 0, + (EXTRACT_SUBREG addr_class:$addr, sub0), + SReg_256:$rsrc, SReg_128:$sampler) +>; + /* int_SI_sample* for texture lookups consuming more address parameters */ multiclass SamplePatterns { def : SamplePattern ; def : SampleRectPattern ; + def : SampleArrayPattern ; + def : SampleShadowPattern ; + def : SampleShadowArrayPattern ; def : SamplePattern ; + def : SampleArrayPattern ; + def : SampleShadowPattern ; + def : SampleShadowArrayPattern ; def : SamplePattern ; + def : SampleArrayPattern ; + def : SampleShadowPattern ; + def : SampleShadowArrayPattern ; } defm : SamplePatterns; -- cgit v1.1 From 96a6555b5706af59e408bb190c3685f0f15bc2a9 Mon Sep 17 00:00:00 2001 From: Arnold Schwaighofer Date: Thu, 7 Feb 2013 19:05:21 +0000 Subject: Loop Vectorizer: Refactor Memory Cost Computation We don't want too many classes in a pass and the classes obscure the details. I was going a little overboard with object modeling here. Replace classes by generic code that handles both loads and stores. No functionality change intended. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174646 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/LoopVectorize.cpp | 232 +++++++---------------------- 1 file changed, 52 insertions(+), 180 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 7d696a7..7e97c8f 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -560,11 +560,6 @@ public: /// \return information about the register usage of the loop. RegisterUsage calculateRegisterUsage(); - /// A helper function for converting Scalar types to vector types. - /// If the incoming type is void, we return void. If the VF is 1, we return - /// the scalar type. - static Type* ToVectorTy(Type *Scalar, unsigned VF); - private: /// Returns the expected execution cost. The unit of the cost does /// not matter because we use the 'cost' units to compare different @@ -576,6 +571,11 @@ private: /// width. Vector width of one means scalar. unsigned getInstructionCost(Instruction *I, unsigned VF); + /// A helper function for converting Scalar types to vector types. + /// If the incoming type is void, we return void. If the VF is 1, we return + /// the scalar type. + static Type* ToVectorTy(Type *Scalar, unsigned VF); + /// Returns whether the instruction is a load or store and will be a emitted /// as a vector operation. bool isConsecutiveLoadOrStore(Instruction *I); @@ -594,177 +594,6 @@ private: DataLayout *DL; }; -/// A helper class to compute the cost of a memory operation (load or store). -class MemoryCostComputation { -public: - /// \brief This function computes the cost of a memory instruction, either of - /// a load or of a store. - /// \param Inst a pointer to a LoadInst or a StoreInst. - /// \param VF the vector factor to use. - /// \param TTI the target transform information used to obtain costs. - /// \param Legality the legality class used by this function to obtain the - /// access strid of the memory operation. - /// \returns the estimated cost of the memory instruction. - static unsigned computeCost(Value *Inst, unsigned VF, - const TargetTransformInfo &TTI, - LoopVectorizationLegality *Legality) { - if (StoreInst *Store = dyn_cast(Inst)) - return StoreCost(Store, VF, TTI, Legality).cost(); - - return LoadCost(cast(Inst), VF, TTI, Legality).cost(); - } - -private: - /// An helper class to compute the cost of vectorize memory instruction. It is - /// subclassed by load and store cost computation classes who fill the fields - /// with values that require knowing about the concrete Load/StoreInst class. - class MemoryOpCost { - public: - /// \return the cost of vectorizing the memory access instruction. - unsigned cost() { - if (VectorFactor == 1) - return TTI.getMemoryOpCost(Opcode, VectorTy, Alignment, AddressSpace); - - if ((Stride = Legality->isConsecutivePtr(PointerOperand))) - return costOfWideMemInst(); - - return costOfScalarizedMemInst(); - } - - protected: - /// The pointer operand of the memory instruction. - Value *PointerOperand; - /// The scalar type of the memory access. - Type *ScalarTy; - /// The vector type of the memory access. - Type *VectorTy; - /// The vector factor by which we vectorize. - unsigned VectorFactor; - /// The stride of the memory access. - int Stride; - /// The alignment of the memory operation. - unsigned Alignment; - /// The address space of the memory operation. - unsigned AddressSpace; - /// The opcode of the memory instruction. - unsigned Opcode; - /// Are we looking at a load or store instruction. - bool IsLoadInst; - const TargetTransformInfo &TTI; - LoopVectorizationLegality *Legality; - - /// Constructs a helper class to compute the cost of a memory instruction. - /// \param VF the vector factor (the length of the vector). - /// \param TI the target transform information used by this class to obtain - /// costs. - /// \param L the legality class used by this class to obtain the access - /// stride of the memory operation. - MemoryOpCost(unsigned VF, const TargetTransformInfo &TI, - LoopVectorizationLegality *L) : - VectorFactor(VF), TTI(TI), Legality(L) { - } - - private: - /// \return the cost if the memory instruction is scalarized. - unsigned costOfScalarizedMemInst() { - unsigned Cost = 0; - Cost += costOfExtractFromPointerVector(); - Cost += costOfExtractFromValueVector(); - Cost += VectorFactor * TTI.getMemoryOpCost(Opcode, ScalarTy, Alignment, - AddressSpace); - Cost += costOfInsertIntoValueVector(); - return Cost; - } - - /// \return the cost of extracting the pointers out of the pointer vector. - unsigned costOfExtractFromPointerVector() { - Type *PtrTy = getVectorizedPointerOperandType(); - return costOfVectorInstForAllElems(Instruction::ExtractElement, PtrTy); - } - - /// \return the cost for extracting values out of the value vector if the - /// memory instruction is a store and zero otherwise. - unsigned costOfExtractFromValueVector() { - if (IsLoadInst) - return 0; - - return costOfVectorInstForAllElems(Instruction::ExtractElement, VectorTy); - } - - /// \return the cost of insert values into the value vector if the memory - /// instruction was a load and zero otherwise. - unsigned costOfInsertIntoValueVector() { - if (!IsLoadInst) - return 0; - - return costOfVectorInstForAllElems(Instruction::InsertElement, VectorTy); - } - - /// \return the cost of a vector memory instruction. - unsigned costOfWideMemInst() { - unsigned Cost = TTI.getMemoryOpCost(Opcode, VectorTy, Alignment, - AddressSpace); - // Reverse stride. - if (Stride < 0) - Cost += TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, VectorTy, - 0); - return Cost; - } - - /// Helper function to compute the cost of one insert- or extractelement - /// instruction per vector element. - /// \param VecOpcode the vector instruction opcode (Can be either - /// InsertElement or an ExtractElement). - /// \param Ty the vector type the vector instruction operates on. - /// \return the cost of an vector instruction applied to each vector - /// element. - unsigned costOfVectorInstForAllElems(unsigned VecOpcode, Type *Ty) { - unsigned Cost = 0; - for (unsigned i = 0; i < VectorFactor; ++i) - Cost += TTI.getVectorInstrCost(VecOpcode, Ty, i); - return Cost; - } - - /// \return a vectorized type for the pointer operand. - Type * getVectorizedPointerOperandType() { - Type *PointerOpTy = PointerOperand->getType(); - return LoopVectorizationCostModel::ToVectorTy(PointerOpTy, VectorFactor); - } - }; - - /// Implementation of the abstract memory cost base class. Sets field of base - /// class whose value depends on the LoadInst. - class LoadCost : public MemoryOpCost { - public: - LoadCost(LoadInst *Load, unsigned VF, const TargetTransformInfo &TI, - LoopVectorizationLegality *L) : MemoryOpCost(VF, TI, L) { - PointerOperand = Load->getPointerOperand(); - ScalarTy = Load->getType(); - VectorTy = LoopVectorizationCostModel::ToVectorTy(ScalarTy, VF); - Alignment = Load->getAlignment(); - AddressSpace = Load->getPointerAddressSpace(); - Opcode = Load->getOpcode(); - IsLoadInst = true; - } - }; - - /// Implementation of the abstract memory cost base class. Sets field of base - /// class whose value depends on the StoreInst. - class StoreCost : public MemoryOpCost { - public: - StoreCost(StoreInst *Store, unsigned VF, const TargetTransformInfo &TI, - LoopVectorizationLegality *L) : MemoryOpCost(VF, TI, L) { - PointerOperand = Store->getPointerOperand(); - ScalarTy = Store->getValueOperand()->getType(); - VectorTy = LoopVectorizationCostModel::ToVectorTy(ScalarTy, VF); - Alignment = Store->getAlignment(); - AddressSpace = Store->getPointerAddressSpace(); - Opcode = Store->getOpcode(); - IsLoadInst = false; - } - }; -}; - /// The LoopVectorize Pass. struct LoopVectorize : public LoopPass { /// Pass identification, replacement for typeid @@ -3268,11 +3097,54 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { VectorTy = ToVectorTy(ValTy, VF); return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy); } - case Instruction::Load: - case Instruction::Store: { - return MemoryCostComputation::computeCost(I, VF, TTI, Legal); - } + case Instruction::Store: + case Instruction::Load: { + StoreInst *SI = dyn_cast(I); + LoadInst *LI = dyn_cast(I); + Type *ValTy = (SI ? SI->getValueOperand()->getType() : + LI->getType()); + VectorTy = ToVectorTy(ValTy, VF); + + unsigned Alignment = SI ? SI->getAlignment() : LI->getAlignment(); + unsigned AS = SI ? SI->getPointerAddressSpace() : + LI->getPointerAddressSpace(); + Value *Ptr = SI ? SI->getPointerOperand() : LI->getPointerOperand(); + if (VF == 1) + return TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS); + + // Scalarized loads/stores. + int Stride = Legal->isConsecutivePtr(Ptr); + bool Reverse = Stride < 0; + if (0 == Stride) { + unsigned Cost = 0; + // The cost of extracting from the value vector and pointer vector. + Type *PtrTy = ToVectorTy(Ptr->getType(), VF); + for (unsigned i = 0; i < VF; ++i) { + // The cost of extracting the pointer operand. + Cost += TTI.getVectorInstrCost(Instruction::ExtractElement, PtrTy, i); + // In case of STORE, the cost of ExtractElement from the vector. + // In case of LOAD, the cost of InsertElement into the returned + // vector. + Cost += TTI.getVectorInstrCost(SI ? Instruction::ExtractElement : + Instruction::InsertElement, + VectorTy, i); + } + + // The cost of the scalar stores. + Cost += VF * TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(), + Alignment, AS); + return Cost; + } + + // Wide load/stores. + unsigned Cost = TTI.getMemoryOpCost(I->getOpcode(), VectorTy, + Alignment, AS); + if (Reverse) + Cost += TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, + VectorTy, 0); + return Cost; + } case Instruction::ZExt: case Instruction::SExt: case Instruction::FPToUI: -- cgit v1.1 From f89de816ae5cf2d0dad7869882dd626532b934ef Mon Sep 17 00:00:00 2001 From: Michael Ilseman Date: Thu, 7 Feb 2013 19:26:05 +0000 Subject: Identify and simplify idempotent intrinsics. Test case included. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174650 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/InstructionSimplify.cpp | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'lib') diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index 34ff64d..4a3c74e 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -2925,6 +2925,37 @@ Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, RecursionLimit); } +static bool IsIdempotent(Intrinsic::ID ID) { + switch (ID) { + default: return false; + + // Unary idempotent: f(f(x)) = f(x) + case Intrinsic::fabs: + case Intrinsic::floor: + case Intrinsic::ceil: + case Intrinsic::trunc: + case Intrinsic::rint: + case Intrinsic::nearbyint: + return true; + } +} + +template +static Value *SimplifyIntrinsic(Intrinsic::ID IID, IterTy ArgBegin, IterTy ArgEnd, + const Query &Q, unsigned MaxRecurse) { + // Perform idempotent optimizations + if (!IsIdempotent(IID)) + return 0; + + // Unary Ops + if (std::distance(ArgBegin, ArgEnd) == 1) + if (IntrinsicInst *II = dyn_cast(*ArgBegin)) + if (II->getIntrinsicID() == IID) + return II; + + return 0; +} + template static Value *SimplifyCall(Value *V, IterTy ArgBegin, IterTy ArgEnd, const Query &Q, unsigned MaxRecurse) { @@ -2941,6 +2972,11 @@ static Value *SimplifyCall(Value *V, IterTy ArgBegin, IterTy ArgEnd, if (!F) return 0; + if (unsigned IID = F->getIntrinsicID()) + if (Value *Ret = + SimplifyIntrinsic((Intrinsic::ID) IID, ArgBegin, ArgEnd, Q, MaxRecurse)) + return Ret; + if (!canConstantFoldCallTo(F)) return 0; -- cgit v1.1 From b0b1a7feeedf1b93b23fcd6e675cbced0295f632 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 7 Feb 2013 19:39:34 +0000 Subject: R600: Add an explicit default processor This is for the case when no processor is passed to the backend. This prevents the '' is not a recognized processor for this target (ignoring processor) warning from being generated by clang. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174651 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/Processors.td | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/Target/R600/Processors.td b/lib/Target/R600/Processors.td index 3dc1ecd..868810c 100644 --- a/lib/Target/R600/Processors.td +++ b/lib/Target/R600/Processors.td @@ -13,6 +13,7 @@ class Proc Features> : Processor; +def : Proc<"", R600_EG_Itin, [FeatureR600ALUInst]>; def : Proc<"r600", R600_EG_Itin, [FeatureR600ALUInst]>; def : Proc<"rv710", R600_EG_Itin, []>; def : Proc<"rv730", R600_EG_Itin, []>; -- cgit v1.1 From 75ddd4cd4c726b3bf93b2a83b51d95a505ce0739 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 7 Feb 2013 19:39:38 +0000 Subject: R600/SI: add proper 64bit immediate support v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v2: rebased on current upstream Patch by: Christian König Signed-off-by: Christian König Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174652 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIInstrInfo.td | 10 ++++++++++ lib/Target/R600/SIInstructions.td | 19 ++++++++----------- lib/Target/R600/SILowerLiteralConstants.cpp | 1 - 3 files changed, 18 insertions(+), 12 deletions(-) (limited to 'lib') diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index 9d9f5f6..83ee2cf 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -38,6 +38,16 @@ def SIvcc_bitcast : SDNode<"SIISD::VCC_BITCAST", SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisInt<1>]> >; +// Transformation function, extract the lower 32bit of a 64bit immediate +def LO32 : SDNodeXFormgetTargetConstant(N->getZExtValue() & 0xffffffff, MVT::i32); +}]>; + +// Transformation function, extract the upper 32bit of a 64bit immediate +def HI32 : SDNodeXFormgetTargetConstant(N->getZExtValue() >> 32, MVT::i32); +}]>; + class InstSI pattern> : AMDGPUInst { diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index dd5bb42..7d35561 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1019,19 +1019,16 @@ def S_MOV_IMM_I32 : InstSI < [(set SReg_32:$dst, (imm:$src0))] >; -// i64 immediates aren't really supported in hardware, but LLVM will use the i64 -// type for indices on load and store instructions. The pattern for -// S_MOV_IMM_I64 will only match i64 immediates that can fit into 32-bits, -// which the hardware can handle. -def S_MOV_IMM_I64 : InstSI < - (outs SReg_64:$dst), - (ins i64imm:$src0), - "S_MOV_IMM_I64 $dst, $src0", - [(set SReg_64:$dst, (IMM32bitIn64bit:$src0))] ->; - } // End isCodeGenOnly, isPseudo = 1 +// i64 immediates aren't supported in hardware, split it into two 32bit values +def : Pat < + (i64 imm:$imm), + (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (S_MOV_IMM_I32 (LO32 imm:$imm)), sub0), + (S_MOV_IMM_I32 (HI32 imm:$imm)), sub1) +>; + class SI_LOAD_LITERAL : Enc32 <(outs), (ins ImmType:$imm), "LOAD_LITERAL $imm", []> { diff --git a/lib/Target/R600/SILowerLiteralConstants.cpp b/lib/Target/R600/SILowerLiteralConstants.cpp index c0411e9..6f5fd36 100644 --- a/lib/Target/R600/SILowerLiteralConstants.cpp +++ b/lib/Target/R600/SILowerLiteralConstants.cpp @@ -73,7 +73,6 @@ bool SILowerLiteralConstantsPass::runOnMachineFunction(MachineFunction &MF) { switch (MI.getOpcode()) { default: break; case AMDGPU::S_MOV_IMM_I32: - case AMDGPU::S_MOV_IMM_I64: case AMDGPU::V_MOV_IMM_F32: case AMDGPU::V_MOV_IMM_I32: { unsigned MovOpcode; -- cgit v1.1 From fc207d8f57d3bd27aa0dc2dd40ecd344229477d3 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 7 Feb 2013 19:39:40 +0000 Subject: R600/SI: simplify and fix SMRD encoding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The _SGPR variants where wrong. Patch by: Christian König Signed-off-by: Christian König Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174653 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUCodeEmitter.h | 4 - lib/Target/R600/AMDILISelDAGToDAG.cpp | 53 ------------- lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h | 4 - lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp | 34 -------- lib/Target/R600/SIInstrInfo.td | 90 ++++++++-------------- lib/Target/R600/SIInstructions.td | 39 +++++++++- 6 files changed, 70 insertions(+), 154 deletions(-) (limited to 'lib') diff --git a/lib/Target/R600/AMDGPUCodeEmitter.h b/lib/Target/R600/AMDGPUCodeEmitter.h index 84f3588..5d61cd0 100644 --- a/lib/Target/R600/AMDGPUCodeEmitter.h +++ b/lib/Target/R600/AMDGPUCodeEmitter.h @@ -38,10 +38,6 @@ public: unsigned OpNo) const { return 0; } - virtual uint32_t SMRDmemriEncode(const MachineInstr &MI, unsigned OpNo) - const { - return 0; - } }; } // End namespace llvm diff --git a/lib/Target/R600/AMDILISelDAGToDAG.cpp b/lib/Target/R600/AMDILISelDAGToDAG.cpp index 2699409..a88e8c7 100644 --- a/lib/Target/R600/AMDILISelDAGToDAG.cpp +++ b/lib/Target/R600/AMDILISelDAGToDAG.cpp @@ -72,8 +72,6 @@ private: bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr); bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg, SDValue& Offset); - bool SelectADDR8BitOffset(SDValue Addr, SDValue& Base, SDValue& Offset); - bool SelectADDRReg(SDValue Addr, SDValue& Base, SDValue& Offset); bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset); @@ -527,43 +525,6 @@ bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr, return false; } -bool AMDGPUDAGToDAGISel::SelectADDR8BitOffset(SDValue Addr, SDValue& Base, - SDValue& Offset) { - if (Addr.getOpcode() == ISD::TargetExternalSymbol || - Addr.getOpcode() == ISD::TargetGlobalAddress) { - return false; - } - - - if (Addr.getOpcode() == ISD::ADD) { - bool Match = false; - - // Find the base ptr and the offset - for (unsigned i = 0; i < Addr.getNumOperands(); i++) { - SDValue Arg = Addr.getOperand(i); - ConstantSDNode * OffsetNode = dyn_cast(Arg); - // This arg isn't a constant so it must be the base PTR. - if (!OffsetNode) { - Base = Addr.getOperand(i); - continue; - } - // Check if the constant argument fits in 8-bits. The offset is in bytes - // so we need to convert it to dwords. - if (isUInt<8>(OffsetNode->getZExtValue() >> 2)) { - Match = true; - Offset = CurDAG->getTargetConstant(OffsetNode->getZExtValue() >> 2, - MVT::i32); - } - } - return Match; - } - - // Default case, no offset - Base = Addr; - Offset = CurDAG->getTargetConstant(0, MVT::i32); - return true; -} - bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset) { ConstantSDNode * IMMOffset; @@ -591,20 +552,6 @@ bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, return true; } -bool AMDGPUDAGToDAGISel::SelectADDRReg(SDValue Addr, SDValue& Base, - SDValue& Offset) { - if (Addr.getOpcode() == ISD::TargetExternalSymbol || - Addr.getOpcode() == ISD::TargetGlobalAddress || - Addr.getOpcode() != ISD::ADD) { - return false; - } - - Base = Addr.getOperand(0); - Offset = Addr.getOperand(1); - - return true; -} - bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset) { ConstantSDNode *C; diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h b/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h index 9d0d6cf..3b3816a 100644 --- a/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h +++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h @@ -49,10 +49,6 @@ public: SmallVectorImpl &Fixups) const { return 0; } - virtual uint32_t SMRDmemriEncode(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixups) const { - return 0; - } }; } // End namespace llvm diff --git a/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp index b4bdb25..8acc78f 100644 --- a/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp +++ b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp @@ -92,10 +92,6 @@ public: virtual unsigned GPR4AlignEncode(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixup) const; - /// \brief Encoding for SMRD indexed loads - virtual uint32_t SMRDmemriEncode(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixup) const; - /// \brief Post-Encoder method for VOP instructions virtual uint64_t VOPPostEncode(const MCInst &MI, uint64_t Value) const; @@ -183,36 +179,6 @@ unsigned SIMCCodeEmitter::GPR4AlignEncode(const MCInst &MI, return GPRAlign(MI, OpNo, 2); } -#define SMRD_OFFSET_MASK 0xff -#define SMRD_IMM_SHIFT 8 -#define SMRD_SBASE_MASK 0x3f -#define SMRD_SBASE_SHIFT 9 -/// This function is responsibe for encoding the offset -/// and the base ptr for SMRD instructions it should return a bit string in -/// this format: -/// -/// OFFSET = bits{7-0} -/// IMM = bits{8} -/// SBASE = bits{14-9} -/// -uint32_t SIMCCodeEmitter::SMRDmemriEncode(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixup) const { - uint32_t Encoding; - - const MCOperand &OffsetOp = MI.getOperand(OpNo + 1); - - //XXX: Use this function for SMRD loads with register offsets - assert(OffsetOp.isImm()); - - Encoding = - (getMachineOpValue(MI, OffsetOp, Fixup) & SMRD_OFFSET_MASK) - | (1 << SMRD_IMM_SHIFT) //XXX If the Offset is a register we shouldn't set this bit - | ((GPR2AlignEncode(MI, OpNo, Fixup) & SMRD_SBASE_MASK) << SMRD_SBASE_SHIFT) - ; - - return Encoding; -} - //===----------------------------------------------------------------------===// // Post Encoder Callbacks //===----------------------------------------------------------------------===// diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index 83ee2cf..13cf9f7 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -38,6 +38,11 @@ def SIvcc_bitcast : SDNode<"SIISD::VCC_BITCAST", SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisInt<1>]> >; +// SMRD takes a 64bit memory address and can only add an 32bit offset +def SIadd64bit32bit : SDNode<"ISD::ADD", + SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisVT<0, i64>, SDTCisVT<2, i32>]> +>; + // Transformation function, extract the lower 32bit of a 64bit immediate def LO32 : SDNodeXFormgetTargetConstant(N->getZExtValue() & 0xffffffff, MVT::i32); @@ -48,6 +53,20 @@ def HI32 : SDNodeXFormgetTargetConstant(N->getZExtValue() >> 32, MVT::i32); }]>; +def IMM8bitDWORD : ImmLeaf < + i32, [{ + return (Imm & ~0x3FC) == 0; + }], SDNodeXFormgetTargetConstant( + N->getZExtValue() >> 2, MVT::i32); + }]> +>; + +def IMM12bit : ImmLeaf < + i16, + [{return isUInt<12>(Imm);}] +>; + class InstSI pattern> : AMDGPUInst { @@ -79,49 +98,16 @@ class SIOperand : Operand { let MIOperandInfo = opInfo; } -def IMM16bit : ImmLeaf < - i16, - [{return isInt<16>(Imm);}] ->; - -def IMM8bit : ImmLeaf < - i32, - [{return (int32_t)Imm >= 0 && (int32_t)Imm <= 0xff;}] ->; - -def IMM12bit : ImmLeaf < - i16, - [{return (int16_t)Imm >= 0 && (int16_t)Imm <= 0xfff;}] ->; - -def IMM32bitIn64bit : ImmLeaf < - i64, - [{return isInt<32>(Imm);}] ->; - class GPR4Align : Operand { let EncoderMethod = "GPR4AlignEncode"; let MIOperandInfo = (ops rc:$reg); } -class GPR2Align : Operand { +class GPR2Align : Operand { let EncoderMethod = "GPR2AlignEncode"; let MIOperandInfo = (ops rc:$reg); } -def SMRDmemrr : Operand { - let MIOperandInfo = (ops SReg_64, SReg_32); - let EncoderMethod = "GPR2AlignEncode"; -} - -def SMRDmemri : Operand { - let MIOperandInfo = (ops SReg_64, i32imm); - let EncoderMethod = "SMRDmemriEncode"; -} - -def ADDR_Reg : ComplexPattern; -def ADDR_Offset8 : ComplexPattern; - let Uses = [EXEC] in { def EXP : Enc64< @@ -272,17 +258,15 @@ class MUBUF op, dag outs, dag ins, string asm, list pattern> : } // End Uses = [EXEC] -class SMRD op, dag outs, dag ins, string asm, list pattern> : - Enc32 { +class SMRD op, bits<1> imm, dag outs, dag ins, string asm, + list pattern> : Enc32 { bits<7> SDST; - bits<15> PTR; - bits<8> OFFSET = PTR{7-0}; - bits<1> IMM = PTR{8}; - bits<6> SBASE = PTR{14-9}; + bits<6> SBASE; + bits<8> OFFSET; let Inst{7-0} = OFFSET; - let Inst{8} = IMM; + let Inst{8} = imm; let Inst{14-9} = SBASE; let Inst{21-15} = SDST; let Inst{26-22} = op; @@ -573,29 +557,23 @@ class MTBUF_Store_Helper op, string asm, RegisterClass regClass> : MTBU let mayLoad = 0; } -multiclass SMRD_Helper op, string asm, RegisterClass dstClass, - ValueType vt> { +multiclass SMRD_Helper op, string asm, RegisterClass dstClass> { def _IMM : SMRD < - op, - (outs dstClass:$dst), - (ins SMRDmemri:$src0), - asm, - [(set (vt dstClass:$dst), (constant_load ADDR_Offset8:$src0))] + op, 1, + (outs dstClass:$dst), + (ins GPR2Align:$sbase, i32imm:$offset), + asm, + [] >; def _SGPR : SMRD < - op, + op, 0, (outs dstClass:$dst), - (ins SMRDmemrr:$src0), + (ins GPR2Align:$sbase, SReg_32:$soff), asm, - [(set (vt dstClass:$dst), (constant_load ADDR_Reg:$src0))] + [] >; } -multiclass SMRD_32 op, string asm, RegisterClass dstClass> { - defm _F32 : SMRD_Helper ; - defm _I32 : SMRD_Helper ; -} - include "SIInstrFormats.td" include "SIInstructions.td" diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 7d35561..fc98e4f 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -461,11 +461,13 @@ def TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Load_Helper <0x00000003, "TBUFFER_LOAD_FORM //def TBUFFER_STORE_FORMAT_XYZ : MTBUF_ <0x00000006, "TBUFFER_STORE_FORMAT_XYZ", []>; //def TBUFFER_STORE_FORMAT_XYZW : MTBUF_ <0x00000007, "TBUFFER_STORE_FORMAT_XYZW", []>; -defm S_LOAD_DWORD : SMRD_32 <0x00000000, "S_LOAD_DWORD", SReg_32>; +let mayLoad = 1 in { + +defm S_LOAD_DWORD : SMRD_Helper <0x00000000, "S_LOAD_DWORD", SReg_32>; //def S_LOAD_DWORDX2 : SMRD_DWORDX2 <0x00000001, "S_LOAD_DWORDX2", []>; -defm S_LOAD_DWORDX4 : SMRD_Helper <0x00000002, "S_LOAD_DWORDX4", SReg_128, v4i32>; -defm S_LOAD_DWORDX8 : SMRD_Helper <0x00000003, "S_LOAD_DWORDX8", SReg_256, v8i32>; +defm S_LOAD_DWORDX4 : SMRD_Helper <0x00000002, "S_LOAD_DWORDX4", SReg_128>; +defm S_LOAD_DWORDX8 : SMRD_Helper <0x00000003, "S_LOAD_DWORDX8", SReg_256>; //def S_LOAD_DWORDX16 : SMRD_DWORDX16 <0x00000004, "S_LOAD_DWORDX16", []>; //def S_BUFFER_LOAD_DWORD : SMRD_ <0x00000008, "S_BUFFER_LOAD_DWORD", []>; //def S_BUFFER_LOAD_DWORDX2 : SMRD_DWORDX2 <0x00000009, "S_BUFFER_LOAD_DWORDX2", []>; @@ -473,6 +475,8 @@ defm S_LOAD_DWORDX8 : SMRD_Helper <0x00000003, "S_LOAD_DWORDX8", SReg_256, v8i32 //def S_BUFFER_LOAD_DWORDX8 : SMRD_DWORDX8 <0x0000000b, "S_BUFFER_LOAD_DWORDX8", []>; //def S_BUFFER_LOAD_DWORDX16 : SMRD_DWORDX16 <0x0000000c, "S_BUFFER_LOAD_DWORDX16", []>; +} // mayLoad = 1 + //def S_MEMTIME : SMRD_ <0x0000001e, "S_MEMTIME", []>; //def S_DCACHE_INV : SMRD_ <0x0000001f, "S_DCACHE_INV", []>; //def IMAGE_LOAD : MIMG_NoPattern_ <"IMAGE_LOAD", 0x00000000>; @@ -1419,4 +1423,33 @@ def : Pat <(f32 (IL_mad AllReg_32:$src0, VReg_32:$src1, VReg_32:$src2)), (V_MAD_LEGACY_F32 AllReg_32:$src0, VReg_32:$src1, VReg_32:$src2, 0, 0, 0, 0)>; +/********** ================== **********/ +/********** SMRD Patterns **********/ +/********** ================== **********/ + +multiclass SMRD_Pattern { + // 1. Offset as 8bit DWORD immediate + def : Pat < + (constant_load (SIadd64bit32bit SReg_64:$sbase, IMM8bitDWORD:$offset)), + (vt (Instr_IMM SReg_64:$sbase, IMM8bitDWORD:$offset)) + >; + + // 2. Offset loaded in an 32bit SGPR + def : Pat < + (constant_load (SIadd64bit32bit SReg_64:$sbase, imm:$offset)), + (vt (Instr_SGPR SReg_64:$sbase, (S_MOV_IMM_I32 imm:$offset))) + >; + + // 3. No offset at all + def : Pat < + (constant_load SReg_64:$sbase), + (vt (Instr_IMM SReg_64:$sbase, 0)) + >; +} + +defm : SMRD_Pattern ; +defm : SMRD_Pattern ; +defm : SMRD_Pattern ; +defm : SMRD_Pattern ; + } // End isSI predicate -- cgit v1.1 From e7384db6f6e0d84b7a8a7f629b9e7d07a459de93 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 7 Feb 2013 19:39:42 +0000 Subject: R600/SI: Add pattern for mul. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 20 more little piglits with radeonsi. Patch by: Michel Dänzer Signed-off-by: Michel Dänzer Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174654 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIInstructions.td | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'lib') diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index fc98e4f..a09f243 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -932,6 +932,10 @@ def V_LDEXP_F64 : VOP3_64 <0x00000168, "V_LDEXP_F64", []>; def V_MUL_LO_U32 : VOP3_32 <0x00000169, "V_MUL_LO_U32", []>; def V_MUL_HI_U32 : VOP3_32 <0x0000016a, "V_MUL_HI_U32", []>; def V_MUL_LO_I32 : VOP3_32 <0x0000016b, "V_MUL_LO_I32", []>; +def : Pat < + (mul AllReg_32:$src0, VReg_32:$src1), + (V_MUL_LO_I32 AllReg_32:$src0, VReg_32:$src1, (IMPLICIT_DEF), 0, 0, 0, 0) +>; def V_MUL_HI_I32 : VOP3_32 <0x0000016c, "V_MUL_HI_I32", []>; def V_DIV_SCALE_F32 : VOP3_32 <0x0000016d, "V_DIV_SCALE_F32", []>; def V_DIV_SCALE_F64 : VOP3_64 <0x0000016e, "V_DIV_SCALE_F64", []>; -- cgit v1.1 From 60fc58262f4dba20c1ea5ede63e5a2c322489d32 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 7 Feb 2013 19:39:43 +0000 Subject: R600/SI: Handle VGPR64 destination in copyPhysReg(). MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allows nexuiz to run with radeonsi. Patch by: Michel Dänzer Signed-off-by: Michel Dänzer Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174655 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIInstrInfo.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp index b40337d..f6d00f5 100644 --- a/lib/Target/R600/SIInstrInfo.cpp +++ b/lib/Target/R600/SIInstrInfo.cpp @@ -41,7 +41,15 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, // never be necessary. assert(DestReg != AMDGPU::SCC && SrcReg != AMDGPU::SCC); - if (AMDGPU::SReg_64RegClass.contains(DestReg)) { + if (AMDGPU::VReg_64RegClass.contains(DestReg)) { + assert(AMDGPU::VReg_64RegClass.contains(SrcReg) || + AMDGPU::SReg_64RegClass.contains(SrcReg)); + BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), RI.getSubReg(DestReg, AMDGPU::sub0)) + .addReg(RI.getSubReg(SrcReg, AMDGPU::sub0), getKillRegState(KillSrc)) + .addReg(DestReg, RegState::Define | RegState::Implicit); + BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), RI.getSubReg(DestReg, AMDGPU::sub1)) + .addReg(RI.getSubReg(SrcReg, AMDGPU::sub1), getKillRegState(KillSrc)); + } else if (AMDGPU::SReg_64RegClass.contains(DestReg)) { assert(AMDGPU::SReg_64RegClass.contains(SrcReg)); BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg) .addReg(SrcReg, getKillRegState(KillSrc)); -- cgit v1.1 From 184f5c1545e06a99951f14d846a1d853ff19a2b8 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 7 Feb 2013 19:39:45 +0000 Subject: R600/SI: cleanup VGPR encoding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove all the unused code. Patch by: Christian König Signed-off-by: Christian König Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174656 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUAsmPrinter.cpp | 2 +- lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp | 141 +---------------------- lib/Target/R600/SIInstrInfo.h | 12 +- lib/Target/R600/SIInstrInfo.td | 35 +----- lib/Target/R600/SIRegisterInfo.td | 4 +- 5 files changed, 16 insertions(+), 178 deletions(-) (limited to 'lib') diff --git a/lib/Target/R600/AMDGPUAsmPrinter.cpp b/lib/Target/R600/AMDGPUAsmPrinter.cpp index 082e734..89d1307 100644 --- a/lib/Target/R600/AMDGPUAsmPrinter.cpp +++ b/lib/Target/R600/AMDGPUAsmPrinter.cpp @@ -127,7 +127,7 @@ void AMDGPUAsmPrinter::EmitProgramInfo(MachineFunction &MF) { } else { assert(!"Unknown register class"); } - hwReg = RI->getEncodingValue(reg); + hwReg = RI->getEncodingValue(reg) & 0xff; maxUsed = hwReg + width - 1; if (isSGPR) { MaxSGPR = maxUsed > MaxSGPR ? maxUsed : MaxSGPR; diff --git a/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp index 8acc78f..fbdf77e 100644 --- a/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp +++ b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp @@ -24,35 +24,6 @@ #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/raw_ostream.h" -#define VGPR_BIT(src_idx) (1ULL << (9 * src_idx - 1)) -#define SI_INSTR_FLAGS_ENCODING_MASK 0xf - -// These must be kept in sync with SIInstructions.td and also the -// InstrEncodingInfo array in SIInstrInfo.cpp. -// -// NOTE: This enum is only used to identify the encoding type within LLVM, -// the actual encoding type that is part of the instruction format is different -namespace SIInstrEncodingType { - enum Encoding { - EXP = 0, - LDS = 1, - MIMG = 2, - MTBUF = 3, - MUBUF = 4, - SMRD = 5, - SOP1 = 6, - SOP2 = 7, - SOPC = 8, - SOPK = 9, - SOPP = 10, - VINTRP = 11, - VOP1 = 12, - VOP2 = 13, - VOP3 = 14, - VOPC = 15 - }; -} - using namespace llvm; namespace { @@ -91,25 +62,6 @@ public: /// \brief Encoding for when 4 consectuive registers are used virtual unsigned GPR4AlignEncode(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixup) const; - - /// \brief Post-Encoder method for VOP instructions - virtual uint64_t VOPPostEncode(const MCInst &MI, uint64_t Value) const; - -private: - - /// \returns this SIInstrEncodingType for this instruction. - unsigned getEncodingType(const MCInst &MI) const; - - /// \brief Get then size in bytes of this instructions encoding. - unsigned getEncodingBytes(const MCInst &MI) const; - - /// \returns the hardware encoding for a register - unsigned getRegBinaryCode(unsigned reg) const; - - /// \brief Generated function that returns the hardware encoding for - /// a register - unsigned getHWRegNum(unsigned reg) const; - }; } // End anonymous namespace @@ -124,7 +76,7 @@ MCCodeEmitter *llvm::createSIMCCodeEmitter(const MCInstrInfo &MCII, void SIMCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl &Fixups) const { uint64_t Encoding = getBinaryCodeForInstr(MI, Fixups); - unsigned bytes = getEncodingBytes(MI); + unsigned bytes = MCII.get(MI.getOpcode()).getSize(); for (unsigned i = 0; i < bytes; i++) { OS.write((uint8_t) ((Encoding >> (8 * i)) & 0xff)); } @@ -134,7 +86,7 @@ uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI, const MCOperand &MO, SmallVectorImpl &Fixups) const { if (MO.isReg()) { - return getRegBinaryCode(MO.getReg()); + return MRI.getEncodingValue(MO.getReg()); } else if (MO.isImm()) { return MO.getImm(); } else if (MO.isFPImm()) { @@ -163,9 +115,8 @@ uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI, unsigned SIMCCodeEmitter::GPRAlign(const MCInst &MI, unsigned OpNo, unsigned shift) const { - unsigned regCode = getRegBinaryCode(MI.getOperand(OpNo).getReg()); - return regCode >> shift; - return 0; + unsigned regCode = MRI.getEncodingValue(MI.getOperand(OpNo).getReg()); + return (regCode & 0xff) >> shift; } unsigned SIMCCodeEmitter::GPR2AlignEncode(const MCInst &MI, unsigned OpNo , @@ -178,87 +129,3 @@ unsigned SIMCCodeEmitter::GPR4AlignEncode(const MCInst &MI, SmallVectorImpl &Fixup) const { return GPRAlign(MI, OpNo, 2); } - -//===----------------------------------------------------------------------===// -// Post Encoder Callbacks -//===----------------------------------------------------------------------===// - -uint64_t SIMCCodeEmitter::VOPPostEncode(const MCInst &MI, uint64_t Value) const{ - unsigned encodingType = getEncodingType(MI); - unsigned numSrcOps; - unsigned vgprBitOffset; - - if (encodingType == SIInstrEncodingType::VOP3) { - numSrcOps = 3; - vgprBitOffset = 32; - } else { - numSrcOps = 1; - vgprBitOffset = 0; - } - - // Add one to skip over the destination reg operand. - for (unsigned opIdx = 1; opIdx < numSrcOps + 1; opIdx++) { - const MCOperand &MO = MI.getOperand(opIdx); - if (MO.isReg()) { - unsigned reg = MI.getOperand(opIdx).getReg(); - if (AMDGPUMCRegisterClasses[AMDGPU::VReg_32RegClassID].contains(reg) || - AMDGPUMCRegisterClasses[AMDGPU::VReg_64RegClassID].contains(reg)) { - Value |= (VGPR_BIT(opIdx)) << vgprBitOffset; - } - } else if (MO.isFPImm()) { - union { - float f; - uint32_t i; - } Imm; - // XXX: Not all instructions can use inline literals - // XXX: We should make sure this is a 32-bit constant - Imm.f = MO.getFPImm(); - Value |= ((uint64_t)Imm.i) << 32; - } - } - return Value; -} - -//===----------------------------------------------------------------------===// -// Encoding helper functions -//===----------------------------------------------------------------------===// - -unsigned SIMCCodeEmitter::getEncodingType(const MCInst &MI) const { - return MCII.get(MI.getOpcode()).TSFlags & SI_INSTR_FLAGS_ENCODING_MASK; -} - -unsigned SIMCCodeEmitter::getEncodingBytes(const MCInst &MI) const { - - // These instructions aren't real instructions with an encoding type, so - // we need to manually specify their size. - switch (MI.getOpcode()) { - default: break; - case AMDGPU::SI_LOAD_LITERAL_I32: - case AMDGPU::SI_LOAD_LITERAL_F32: - return 4; - } - - unsigned encoding_type = getEncodingType(MI); - switch (encoding_type) { - case SIInstrEncodingType::EXP: - case SIInstrEncodingType::LDS: - case SIInstrEncodingType::MUBUF: - case SIInstrEncodingType::MTBUF: - case SIInstrEncodingType::MIMG: - case SIInstrEncodingType::VOP3: - return 8; - default: - return 4; - } -} - - -unsigned SIMCCodeEmitter::getRegBinaryCode(unsigned reg) const { - switch (reg) { - case AMDGPU::M0: return 124; - case AMDGPU::SREG_LIT_0: return 128; - case AMDGPU::SI_LITERAL_CONSTANT: return 255; - default: return MRI.getEncodingValue(reg); - } -} - diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h index e4de4b8..a65f7b6 100644 --- a/lib/Target/R600/SIInstrInfo.h +++ b/lib/Target/R600/SIInstrInfo.h @@ -35,12 +35,6 @@ public: unsigned DestReg, unsigned SrcReg, bool KillSrc) const; - /// \returns the encoding type of this instruction. - unsigned getEncodingType(const MachineInstr &MI) const; - - /// \returns the size of this instructions encoding in number of bytes. - unsigned getEncodingBytes(const MachineInstr &MI) const; - virtual MachineInstr * getMovImmInstr(MachineFunction *MF, unsigned DstReg, int64_t Imm) const; @@ -81,9 +75,9 @@ public: namespace SIInstrFlags { enum Flags { // First 4 bits are the instruction encoding - VM_CNT = 1 << 4, - EXP_CNT = 1 << 5, - LGKM_CNT = 1 << 6 + VM_CNT = 1 << 0, + EXP_CNT = 1 << 1, + LGKM_CNT = 1 << 2 }; } diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index 13cf9f7..b983e8a 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -70,27 +70,27 @@ def IMM12bit : ImmLeaf < class InstSI pattern> : AMDGPUInst { - field bits<4> EncodingType = 0; field bits<1> VM_CNT = 0; field bits<1> EXP_CNT = 0; field bits<1> LGKM_CNT = 0; - let TSFlags{3-0} = EncodingType; - let TSFlags{4} = VM_CNT; - let TSFlags{5} = EXP_CNT; - let TSFlags{6} = LGKM_CNT; + let TSFlags{0} = VM_CNT; + let TSFlags{1} = EXP_CNT; + let TSFlags{2} = LGKM_CNT; } class Enc32 pattern> : InstSI { field bits<32> Inst; + let Size = 4; } class Enc64 pattern> : InstSI { field bits<64> Inst; + let Size = 8; } class SIOperand : Operand { @@ -137,7 +137,6 @@ def EXP : Enc64< let Inst{47-40} = VSRC1; let Inst{55-48} = VSRC2; let Inst{63-56} = VSRC3; - let EncodingType = 0; //SIInstrEncodingType::EXP let EXP_CNT = 1; } @@ -172,7 +171,6 @@ class MIMG op, dag outs, dag ins, string asm, list pattern> : let Inst{47-40} = VDATA; let Inst{52-48} = SRSRC; let Inst{57-53} = SSAMP; - let EncodingType = 2; //SIInstrEncodingType::MIMG let VM_CNT = 1; let EXP_CNT = 1; @@ -210,7 +208,6 @@ class MTBUF op, dag outs, dag ins, string asm, list pattern> : let Inst{54} = SLC; let Inst{55} = TFE; let Inst{63-56} = SOFFSET; - let EncodingType = 3; //SIInstrEncodingType::MTBUF let VM_CNT = 1; let EXP_CNT = 1; @@ -248,7 +245,6 @@ class MUBUF op, dag outs, dag ins, string asm, list pattern> : let Inst{54} = SLC; let Inst{55} = TFE; let Inst{63-56} = SOFFSET; - let EncodingType = 4; //SIInstrEncodingType::MUBUF let VM_CNT = 1; let EXP_CNT = 1; @@ -271,7 +267,6 @@ class SMRD op, bits<1> imm, dag outs, dag ins, string asm, let Inst{21-15} = SDST; let Inst{26-22} = op; let Inst{31-27} = 0x18; //encoding - let EncodingType = 5; //SIInstrEncodingType::SMRD let LGKM_CNT = 1; } @@ -286,7 +281,6 @@ class SOP1 op, dag outs, dag ins, string asm, list pattern> : let Inst{15-8} = op; let Inst{22-16} = SDST; let Inst{31-23} = 0x17d; //encoding; - let EncodingType = 6; //SIInstrEncodingType::SOP1 let mayLoad = 0; let mayStore = 0; @@ -305,7 +299,6 @@ class SOP2 op, dag outs, dag ins, string asm, list pattern> : let Inst{22-16} = SDST; let Inst{29-23} = op; let Inst{31-30} = 0x2; // encoding - let EncodingType = 7; // SIInstrEncodingType::SOP2 let mayLoad = 0; let mayStore = 0; @@ -322,7 +315,6 @@ class SOPC op, dag outs, dag ins, string asm, list pattern> : let Inst{15-8} = SSRC1; let Inst{22-16} = op; let Inst{31-23} = 0x17e; - let EncodingType = 8; // SIInstrEncodingType::SOPC let DisableEncoding = "$dst"; let mayLoad = 0; @@ -340,7 +332,6 @@ class SOPK op, dag outs, dag ins, string asm, list pattern> : let Inst{22-16} = SDST; let Inst{27-23} = op; let Inst{31-28} = 0xb; //encoding - let EncodingType = 9; // SIInstrEncodingType::SOPK let mayLoad = 0; let mayStore = 0; @@ -358,7 +349,6 @@ class SOPP op, dag ins, string asm, list pattern> : Enc32 < let Inst{15-0} = SIMM16; let Inst{22-16} = op; let Inst{31-23} = 0x17f; // encoding - let EncodingType = 10; // SIInstrEncodingType::SOPP let mayLoad = 0; let mayStore = 0; @@ -381,7 +371,6 @@ class VINTRP op, dag outs, dag ins, string asm, list pattern> : let Inst{17-16} = op; let Inst{25-18} = VDST; let Inst{31-26} = 0x32; // encoding - let EncodingType = 11; // SIInstrEncodingType::VINTRP let neverHasSideEffects = 1; let mayLoad = 1; @@ -399,9 +388,6 @@ class VOP1 op, dag outs, dag ins, string asm, list pattern> : let Inst{24-17} = VDST; let Inst{31-25} = 0x3f; //encoding - let EncodingType = 12; // SIInstrEncodingType::VOP1 - let PostEncoderMethod = "VOPPostEncode"; - let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; @@ -420,9 +406,6 @@ class VOP2 op, dag outs, dag ins, string asm, list pattern> : let Inst{30-25} = op; let Inst{31} = 0x0; //encoding - let EncodingType = 13; // SIInstrEncodingType::VOP2 - let PostEncoderMethod = "VOPPostEncode"; - let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; @@ -451,9 +434,6 @@ class VOP3 op, dag outs, dag ins, string asm, list pattern> : let Inst{60-59} = OMOD; let Inst{63-61} = NEG; - let EncodingType = 14; // SIInstrEncodingType::VOP3 - let PostEncoderMethod = "VOPPostEncode"; - let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; @@ -480,9 +460,6 @@ class VOP3b op, dag outs, dag ins, string asm, list pattern> : let Inst{60-59} = OMOD; let Inst{63-61} = NEG; - let EncodingType = 14; // SIInstrEncodingType::VOP3 - let PostEncoderMethod = "VOPPostEncode"; - let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; @@ -499,8 +476,6 @@ class VOPC op, dag ins, string asm, list pattern> : let Inst{24-17} = op; let Inst{31-25} = 0x3e; - let EncodingType = 15; //SIInstrEncodingType::VOPC - let PostEncoderMethod = "VOPPostEncode"; let DisableEncoding = "$dst"; let mayLoad = 0; let mayStore = 0; diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td index 9b483eb..809d503 100644 --- a/lib/Target/R600/SIRegisterInfo.td +++ b/lib/Target/R600/SIRegisterInfo.td @@ -12,7 +12,9 @@ class SI_64 subregs, bits<16> encoding> : RegisterWith class SGPR_32 num, string name> : SIReg; -class VGPR_32 num, string name> : SIReg; +class VGPR_32 num, string name> : SIReg { + let HWEncoding{8} = 1; +} // Special Registers def VCC : SIReg<"VCC", 106>; -- cgit v1.1 From 0c66403efdf88ff4f247b6a9f45339bb3a893235 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Thu, 7 Feb 2013 19:48:00 +0000 Subject: [mips] Add definition of JALR instruction which has two register operands. Change the original JALR instruction with one register operand to be a pseudo-instruction. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174657 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips64InstrInfo.td | 3 +++ lib/Target/Mips/MipsInstrFormats.td | 3 ++- lib/Target/Mips/MipsInstrInfo.td | 11 +++++++++-- 3 files changed, 14 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index db92c64..494ba87 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -170,6 +170,7 @@ def BLTZ64 : CBranchZero<"bltz", setlt, CPU64Regs>, BGEZ_FM<1, 0>; } let DecoderNamespace = "Mips64" in def JALR64 : JumpLinkReg<"jalr", CPU64Regs>, JALR_FM; +def JALR64Pseudo : JumpLinkRegPseudo; def TAILCALL64_R : JumpFR, MTLO_FM<8>, IsTailCall; let DecoderNamespace = "Mips64" in { @@ -329,6 +330,8 @@ def : InstAlias<"not $rt, $rs", (NOR64 CPU64RegsOpnd:$rt, CPU64RegsOpnd:$rs, ZERO_64), 1>, Requires<[HasMips64]>; def : InstAlias<"j $rs", (JR64 CPU64Regs:$rs), 0>, Requires<[HasMips64]>; +def : InstAlias<"jalr $rs", (JALR64 RA_64, CPU64Regs:$rs)>, + Requires<[HasMips64]>; def : InstAlias<"daddu $rs, $rt, $imm", (DADDiu CPU64RegsOpnd:$rs, CPU64RegsOpnd:$rt, simm16_64:$imm), 1>; diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td index 6977e45..ee432c8 100644 --- a/lib/Target/Mips/MipsInstrFormats.td +++ b/lib/Target/Mips/MipsInstrFormats.td @@ -367,6 +367,7 @@ class LUI_FM { } class JALR_FM { + bits<5> rd; bits<5> rs; bits<32> Inst; @@ -374,7 +375,7 @@ class JALR_FM { let Inst{31-26} = 0; let Inst{25-21} = rs; let Inst{20-16} = 0; - let Inst{15-11} = 31; + let Inst{15-11} = rd; let Inst{10-6} = 0; let Inst{5-0} = 9; } diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 7e33cb1..c85b547 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -565,9 +565,14 @@ let isCall=1, hasDelaySlot=1, Defs = [RA] in { let DecoderMethod = "DecodeJumpTarget"; } + class JumpLinkRegPseudo: + PseudoSE<(outs), (ins RC:$rs), [(MipsJmpLink RC:$rs)], IIBranch>, + PseudoInstExpansion<(JALRInst RetReg, RC:$rs)>; + class JumpLinkReg: - InstSE<(outs), (ins RC:$rs), !strconcat(opstr, "\t$rs"), - [(MipsJmpLink RC:$rs)], IIBranch, FrmR>; + InstSE<(outs RC:$rd), (ins RC:$rs), !strconcat(opstr, "\t$rd, $rs"), + [], IIBranch, FrmR>; class BGEZAL_FT : InstSE<(outs), (ins RO:$rs, brtarget:$offset), @@ -865,6 +870,7 @@ def BAL_BR: BAL_FT, BAL_FM; def JAL : JumpLink<"jal">, FJ<3>; def JALR : JumpLinkReg<"jalr", CPURegs>, JALR_FM; +def JALRPseudo : JumpLinkRegPseudo; def BGEZAL : BGEZAL_FT<"bgezal", CPURegsOpnd>, BGEZAL_FM<0x11>; def BLTZAL : BGEZAL_FT<"bltzal", CPURegsOpnd>, BGEZAL_FM<0x10>; def TAILCALL : JumpFJ, FJ<2>, IsTailCall; @@ -972,6 +978,7 @@ def : InstAlias<"and $rs, $rt, $imm", (ANDi CPURegsOpnd:$rs, CPURegsOpnd:$rt, simm16:$imm), 0>; def : InstAlias<"j $rs", (JR CPURegs:$rs), 0>, Requires<[NotMips64]>; +def : InstAlias<"jalr $rs", (JALR RA, CPURegs:$rs)>, Requires<[NotMips64]>; def : InstAlias<"not $rt, $rs", (NOR CPURegsOpnd:$rt, CPURegsOpnd:$rs, ZERO), 1>; def : InstAlias<"neg $rt, $rs", -- cgit v1.1 From 8f7dc823bfda3efede14a3cf9c294a2da3ad8777 Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Thu, 7 Feb 2013 20:33:57 +0000 Subject: Constrain PowerPC autovectorization to fix bug 15041. Certain vector operations don't vectorize well with the current PowerPC implementation. Element insert/extract performs poorly without VSX support because Altivec requires going through memory. SREM, UREM, and VSELECT all produce bad scalar code. There's a lot of work to do for the cost model before autovectorization will be tuned well, and this is not an attempt to address the larger problem. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174660 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCTargetTransformInfo.cpp | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'lib') diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index 88b63e6..f57d764 100644 --- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -194,6 +194,25 @@ unsigned PPCTTI::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const { assert(Val->isVectorTy() && "This must be a vector type"); + const unsigned Awful = 1000; + + // Vector element insert/extract with Altivec is very expensive. + // Until VSX is available, avoid vectorizing loops that require + // these operations. + if (Opcode == ISD::EXTRACT_VECTOR_ELT || + Opcode == ISD::INSERT_VECTOR_ELT) + return Awful; + + // We don't vectorize SREM/UREM so well. Constrain the vectorizer + // for those as well. + if (Opcode == ISD::SREM || Opcode == ISD::UREM) + return Awful; + + // VSELECT is not yet implemented, leading to use of insert/extract + // and ISEL, hence not a good idea. + if (Opcode == ISD::VSELECT) + return Awful; + return TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index); } -- cgit v1.1 From df5b3305c71b21eb3ace56922d75fb777ac25c17 Mon Sep 17 00:00:00 2001 From: Will Schmidt Date: Thu, 7 Feb 2013 20:43:33 +0000 Subject: [patch] bug 15055 Add Unistd.h to OProfileWrapper.cpp Add #include to OProfileWrapper.cpp. This provides the declarations for 'read' and 'close' that are otherwise missing, and result in 'error: was not declared in this scope'. This matches the issue as reported in bug 15055 "Can no longer compile LLVM with --with-oprofile" git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174661 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp b/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp index d67f537..7c0d395 100644 --- a/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp +++ b/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp @@ -29,6 +29,7 @@ #include #include #include +#include namespace { -- cgit v1.1 From 7a0103c30d2f423c78527aeeeebc7915e3bc9ee9 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Thu, 7 Feb 2013 21:19:50 +0000 Subject: "Clean up" line section symbol emission by emitting the section syms before constructing the compile units so we're not emitting section references to sections not there already. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174663 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 18 +++++++++++------- lib/CodeGen/AsmPrinter/DwarfDebug.h | 2 +- 2 files changed, 12 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 3b8bebe..5d19a8d 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -165,7 +165,7 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) DwarfInfoSectionSym = DwarfAbbrevSectionSym = 0; DwarfStrSectionSym = TextSectionSym = 0; - DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = 0; + DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = DwarfLineSectionSym = 0; DwarfAbbrevDWOSectionSym = DwarfStrDWOSectionSym = 0; FunctionBeginSym = FunctionEndSym = 0; @@ -677,7 +677,7 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) { NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0); else NewCU->addDelta(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, - LineTableStartSym, Asm->GetTempSymbol("section_line")); + LineTableStartSym, DwarfLineSectionSym); if (!CompilationDir.empty()) NewCU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir); @@ -782,6 +782,9 @@ bool DwarfDebug::collectLegacyDebugInfo(const Module *M) { } if (!HasDebugInfo) return false; + // Emit initial sections so we can refer to them later. + emitSectionLabels(); + // Create all the compile unit DIEs. for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(), E = DbgFinder.compile_unit_end(); I != E; ++I) @@ -819,6 +822,9 @@ void DwarfDebug::beginModule() { // module using debug info finder to collect debug info. NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu"); if (CU_Nodes) { + // Emit initial sections so we can reference labels later. + emitSectionLabels(); + for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) { DICompileUnit CUNode(CU_Nodes->getOperand(i)); CompileUnit *CU = constructCompileUnit(CUNode); @@ -951,9 +957,6 @@ void DwarfDebug::endModule() { // Finalize the debug info for the module. finalizeModuleInfo(); - // Emit initial sections. - emitSectionLabels(); - if (!useSplitDwarf()) { // Emit all the DIEs into a debug info section. emitDebugInfo(); @@ -1775,7 +1778,8 @@ void DwarfDebug::emitSectionLabels() { if (const MCSection *MacroInfo = TLOF.getDwarfMacroInfoSection()) emitSectionSym(Asm, MacroInfo); - emitSectionSym(Asm, TLOF.getDwarfLineSection(), "section_line"); + DwarfLineSectionSym = + emitSectionSym(Asm, TLOF.getDwarfLineSection(), "section_line"); emitSectionSym(Asm, TLOF.getDwarfLocSection()); emitSectionSym(Asm, TLOF.getDwarfPubTypesSection()); DwarfStrSectionSym = @@ -2489,7 +2493,7 @@ CompileUnit *DwarfDebug::constructSkeletonCU(const MDNode *N) { // compile unit in debug_line section. if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_sec_offset, - Asm->GetTempSymbol("section_line")); + DwarfLineSectionSym); else NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_sec_offset, 0); diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index fb90202..7d57a82 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -386,7 +386,7 @@ class DwarfDebug { // section offsets and are created by EmitSectionLabels. MCSymbol *DwarfInfoSectionSym, *DwarfAbbrevSectionSym; MCSymbol *DwarfStrSectionSym, *TextSectionSym, *DwarfDebugRangeSectionSym; - MCSymbol *DwarfDebugLocSectionSym; + MCSymbol *DwarfDebugLocSectionSym, *DwarfLineSectionSym; MCSymbol *FunctionBeginSym, *FunctionEndSym; MCSymbol *DwarfAbbrevDWOSectionSym, *DwarfStrDWOSectionSym; -- cgit v1.1 From e0501e866ca15601f4da2dc6b5572dc2de53abc8 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Thu, 7 Feb 2013 21:19:56 +0000 Subject: Formatting. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174664 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCDwarf.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp index 5465af6..fea057a 100644 --- a/lib/MC/MCDwarf.cpp +++ b/lib/MC/MCDwarf.cpp @@ -272,8 +272,7 @@ const MCSymbol *MCDwarfFileTable::EmitCU(MCStreamer *MCOS, unsigned CUID) { // total length, the 2 bytes for the version, and these 4 bytes for the // length of the prologue. MCOS->EmitAbsValue(MakeStartMinusEndExpr(*MCOS, *LineStartSym, *ProEndSym, - (4 + 2 + 4)), - 4, 0); + (4 + 2 + 4)), 4, 0); // Parameters of the state machine, are next. MCOS->EmitIntValue(DWARF2_LINE_MIN_INSN_LENGTH, 1); -- cgit v1.1 From 5dd41c95f3075fc5c01cfb6822a66ac584fcc8c7 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Thu, 7 Feb 2013 21:32:32 +0000 Subject: [mips] Make Filler a class and reduce indentation. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174666 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsDelaySlotFiller.cpp | 72 +++++++++++++++++---------------- 1 file changed, 38 insertions(+), 34 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp index 041a9d0..cf0d9db 100644 --- a/lib/Target/Mips/MipsDelaySlotFiller.cpp +++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp @@ -45,15 +45,8 @@ static cl::opt SkipDelaySlotFiller( cl::Hidden); namespace { - struct Filler : public MachineFunctionPass { - typedef MachineBasicBlock::instr_iterator InstrIter; - typedef MachineBasicBlock::reverse_instr_iterator ReverseInstrIter; - - TargetMachine &TM; - const TargetInstrInfo *TII; - InstrIter LastFiller; - - static char ID; + class Filler : public MachineFunctionPass { + public: Filler(TargetMachine &tm) : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()) { } @@ -61,7 +54,6 @@ namespace { return "Mips Delay Slot Filler"; } - bool runOnMachineBasicBlock(MachineBasicBlock &MBB); bool runOnMachineFunction(MachineFunction &F) { if (SkipDelaySlotFiller) return false; @@ -73,6 +65,12 @@ namespace { return Changed; } + private: + typedef MachineBasicBlock::instr_iterator InstrIter; + typedef MachineBasicBlock::reverse_instr_iterator ReverseInstrIter; + + bool runOnMachineBasicBlock(MachineBasicBlock &MBB); + bool isDelayFiller(MachineBasicBlock &MBB, InstrIter candidate); @@ -96,7 +94,11 @@ namespace { findDelayInstr(MachineBasicBlock &MBB, InstrIter slot, InstrIter &Filler); + TargetMachine &TM; + const TargetInstrInfo *TII; + InstrIter LastFiller; + static char ID; }; char Filler::ID = 0; } // end of anonymous namespace @@ -108,31 +110,33 @@ runOnMachineBasicBlock(MachineBasicBlock &MBB) { bool Changed = false; LastFiller = MBB.instr_end(); - for (InstrIter I = MBB.instr_begin(); I != MBB.instr_end(); ++I) - if (I->hasDelaySlot()) { - ++FilledSlots; - Changed = true; - InstrIter InstrWithSlot = I; - InstrIter D; - - // Delay slot filling is disabled at -O0. - if (!DisableDelaySlotFiller && (TM.getOptLevel() != CodeGenOpt::None) && - findDelayInstr(MBB, I, D)) { - MBB.splice(llvm::next(I), &MBB, D); - ++UsefulSlots; - } else - BuildMI(MBB, llvm::next(I), I->getDebugLoc(), TII->get(Mips::NOP)); - - // Record the filler instruction that filled the delay slot. - // The instruction after it will be visited in the next iteration. - LastFiller = ++I; - - // Bundle the delay slot filler to InstrWithSlot so that the machine - // verifier doesn't expect this instruction to be a terminator. - MIBundleBuilder(MBB, InstrWithSlot, llvm::next(LastFiller)); - } - return Changed; + for (InstrIter I = MBB.instr_begin(); I != MBB.instr_end(); ++I) { + if (!I->hasDelaySlot()) + continue; + ++FilledSlots; + Changed = true; + InstrIter InstrWithSlot = I; + InstrIter D; + + // Delay slot filling is disabled at -O0. + if (!DisableDelaySlotFiller && (TM.getOptLevel() != CodeGenOpt::None) && + findDelayInstr(MBB, I, D)) { + MBB.splice(llvm::next(I), &MBB, D); + ++UsefulSlots; + } else + BuildMI(MBB, llvm::next(I), I->getDebugLoc(), TII->get(Mips::NOP)); + + // Record the filler instruction that filled the delay slot. + // The instruction after it will be visited in the next iteration. + LastFiller = ++I; + + // Bundle the delay slot filler to InstrWithSlot so that the machine + // verifier doesn't expect this instruction to be a terminator. + MIBundleBuilder(MBB, InstrWithSlot, llvm::next(LastFiller)); + } + + return Changed; } /// createMipsDelaySlotFillerPass - Returns a pass that fills in delay -- cgit v1.1 From 4fae15e664290a974d44145f4a178ec06e8538e9 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Thu, 7 Feb 2013 22:34:07 +0000 Subject: fix 80-col violation and fix the docs. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174671 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/LoopVectorize.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 7e97c8f..32fcb8f 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -102,8 +102,11 @@ EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, /// We don't vectorize loops with a known constant trip count below this number. static cl::opt -TinyTripCountVectorThreshold("vectorizer-min-trip-count", cl::init(16), cl::Hidden, - cl::desc("The minimum trip count in the loops to vectorize.")); +TinyTripCountVectorThreshold("vectorizer-min-trip-count", cl::init(16), + cl::Hidden, + cl::desc("Don't vectorize loops with a constant " + "trip count that is smaller than this " + "value.")); /// We don't unroll loops with a known constant trip count below this number. static const unsigned TinyTripCountUnrollThreshold = 128; @@ -531,7 +534,8 @@ public: /// This method checks every power of two up to VF. If UserVF is not ZERO /// then this vectorization factor will be selected if vectorization is /// possible. - VectorizationFactor selectVectorizationFactor(bool OptForSize, unsigned UserVF); + VectorizationFactor selectVectorizationFactor(bool OptForSize, + unsigned UserVF); /// \return The size (in bits) of the widest type in the code that /// needs to be vectorized. We ignore values that remain scalar such as -- cgit v1.1 From 3854a5d90fee52af1065edbed34521fff6cdc18d Mon Sep 17 00:00:00 2001 From: Michael Ilseman Date: Thu, 7 Feb 2013 23:01:35 +0000 Subject: Have InstCombine call SipmlifyCall when handling calls. Test case included. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174675 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/InstCombine/InstCombineCalls.cpp | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'lib') diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 64cd1bd..cb9ba44 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -13,6 +13,7 @@ #include "InstCombine.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/IR/DataLayout.h" #include "llvm/Support/CallSite.h" @@ -210,6 +211,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { return &CI; } + CallSite CS(&CI); + if (Value *V = SimplifyCall(CS.getCalledValue(), CS.arg_begin(), CS.arg_end(), + TD)) + return ReplaceInstUsesWith(CI, V); + IntrinsicInst *II = dyn_cast(&CI); if (!II) return visitCallSite(&CI); -- cgit v1.1 From ea007fa608146acdfc5b889a1d86a4ac7c528bf7 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Fri, 8 Feb 2013 00:52:31 +0000 Subject: Use ParseFnAttributeValuePairs instead of ParseOptionalFuncAttrs The functionality of ParseOptionalFuncAttrs was there in ParseFnAttributeValuePairs. So just use that instead. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174686 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/AsmParser/LLParser.cpp | 137 ++++++++++++++++----------------------------- lib/AsmParser/LLParser.h | 3 +- 2 files changed, 50 insertions(+), 90 deletions(-) (limited to 'lib') diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index 22c21c6..ba3db34 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -752,7 +752,7 @@ bool LLParser::ParseUnnamedAttrGrp() { if (ParseToken(lltok::equal, "expected '=' here") || ParseToken(lltok::kw_attributes, "expected 'attributes' keyword here") || ParseToken(lltok::lbrace, "expected '{' here") || - ParseAttributeValuePairs(ForwardRefAttrBuilder[VarID]) || + ParseFnAttributeValuePairs(ForwardRefAttrBuilder[VarID], true) || ParseToken(lltok::rbrace, "expected end of attribute group")) return true; @@ -762,13 +762,18 @@ bool LLParser::ParseUnnamedAttrGrp() { return false; } -/// ParseAttributeValuePairs +/// ParseFnAttributeValuePairs /// ::= | '=' -bool LLParser::ParseAttributeValuePairs(AttrBuilder &B) { +bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B, bool inAttrGrp) { + bool HaveError = false; + + B.clear(); + while (true) { lltok::Kind Token = Lex.getKind(); switch (Token) { default: + if (!inAttrGrp) return HaveError; return Error(Lex.getLoc(), "unterminated attribute group"); case lltok::rbrace: // Finished. @@ -789,31 +794,38 @@ bool LLParser::ParseAttributeValuePairs(AttrBuilder &B) { // Target-independent attributes: case lltok::kw_align: { + // As a hack, we allow "align 2" on functions as a synonym for "alignstack + // 2". unsigned Alignment; - if (ParseToken(lltok::equal, "expected '=' here") || - ParseUInt32(Alignment)) - return true; + if (inAttrGrp) { + if (ParseToken(lltok::equal, "expected '=' here") || + ParseUInt32(Alignment)) + return true; + } else { + if (ParseOptionalAlignment(Alignment)) + return true; + } B.addAlignmentAttr(Alignment); - break; + continue; } case lltok::kw_alignstack: { unsigned Alignment; - if (ParseToken(lltok::equal, "expected '=' here") || - ParseUInt32(Alignment)) - return true; + if (inAttrGrp) { + if (ParseToken(lltok::equal, "expected '=' here") || + ParseUInt32(Alignment)) + return true; + } else { + if (ParseOptionalStackAlignment(Alignment)) + return true; + } B.addStackAlignmentAttr(Alignment); - break; + continue; } case lltok::kw_address_safety: B.addAttribute(Attribute::AddressSafety); break; case lltok::kw_alwaysinline: B.addAttribute(Attribute::AlwaysInline); break; - case lltok::kw_byval: B.addAttribute(Attribute::ByVal); break; case lltok::kw_inlinehint: B.addAttribute(Attribute::InlineHint); break; - case lltok::kw_inreg: B.addAttribute(Attribute::InReg); break; case lltok::kw_minsize: B.addAttribute(Attribute::MinSize); break; case lltok::kw_naked: B.addAttribute(Attribute::Naked); break; - case lltok::kw_nest: B.addAttribute(Attribute::Nest); break; - case lltok::kw_noalias: B.addAttribute(Attribute::NoAlias); break; - case lltok::kw_nocapture: B.addAttribute(Attribute::NoCapture); break; case lltok::kw_noduplicate: B.addAttribute(Attribute::NoDuplicate); break; case lltok::kw_noimplicitfloat: B.addAttribute(Attribute::NoImplicitFloat); break; case lltok::kw_noinline: B.addAttribute(Attribute::NoInline); break; @@ -825,13 +837,28 @@ bool LLParser::ParseAttributeValuePairs(AttrBuilder &B) { case lltok::kw_readnone: B.addAttribute(Attribute::ReadNone); break; case lltok::kw_readonly: B.addAttribute(Attribute::ReadOnly); break; case lltok::kw_returns_twice: B.addAttribute(Attribute::ReturnsTwice); break; - case lltok::kw_signext: B.addAttribute(Attribute::SExt); break; - case lltok::kw_sret: B.addAttribute(Attribute::StructRet); break; case lltok::kw_ssp: B.addAttribute(Attribute::StackProtect); break; case lltok::kw_sspreq: B.addAttribute(Attribute::StackProtectReq); break; case lltok::kw_sspstrong: B.addAttribute(Attribute::StackProtectStrong); break; case lltok::kw_uwtable: B.addAttribute(Attribute::UWTable); break; - case lltok::kw_zeroext: B.addAttribute(Attribute::ZExt); break; + + // Error handling. + case lltok::kw_inreg: + case lltok::kw_signext: + case lltok::kw_zeroext: + HaveError |= + Error(Lex.getLoc(), + "invalid use of attribute on a function"); + break; + case lltok::kw_byval: + case lltok::kw_nest: + case lltok::kw_noalias: + case lltok::kw_nocapture: + case lltok::kw_sret: + HaveError |= + Error(Lex.getLoc(), + "invalid use of parameter-only attribute on a function"); + break; } Lex.Lex(); @@ -1016,72 +1043,6 @@ bool LLParser::ParseOptionalAddrSpace(unsigned &AddrSpace) { ParseToken(lltok::rparen, "expected ')' in address space"); } -/// ParseOptionalFuncAttrs - Parse a potentially empty list of function attributes. -bool LLParser::ParseOptionalFuncAttrs(AttrBuilder &B) { - bool HaveError = false; - - B.clear(); - - while (1) { - lltok::Kind Token = Lex.getKind(); - switch (Token) { - default: // End of attributes. - return HaveError; - case lltok::kw_alignstack: { - unsigned Alignment; - if (ParseOptionalStackAlignment(Alignment)) - return true; - B.addStackAlignmentAttr(Alignment); - continue; - } - case lltok::kw_align: { - // As a hack, we allow "align 2" on functions as a synonym for "alignstack - // 2". - unsigned Alignment; - if (ParseOptionalAlignment(Alignment)) - return true; - B.addAlignmentAttr(Alignment); - continue; - } - case lltok::kw_address_safety: B.addAttribute(Attribute::AddressSafety); break; - case lltok::kw_alwaysinline: B.addAttribute(Attribute::AlwaysInline); break; - case lltok::kw_inlinehint: B.addAttribute(Attribute::InlineHint); break; - case lltok::kw_minsize: B.addAttribute(Attribute::MinSize); break; - case lltok::kw_naked: B.addAttribute(Attribute::Naked); break; - case lltok::kw_noinline: B.addAttribute(Attribute::NoInline); break; - case lltok::kw_nonlazybind: B.addAttribute(Attribute::NonLazyBind); break; - case lltok::kw_noredzone: B.addAttribute(Attribute::NoRedZone); break; - case lltok::kw_noimplicitfloat: B.addAttribute(Attribute::NoImplicitFloat); break; - case lltok::kw_noreturn: B.addAttribute(Attribute::NoReturn); break; - case lltok::kw_nounwind: B.addAttribute(Attribute::NoUnwind); break; - case lltok::kw_optsize: B.addAttribute(Attribute::OptimizeForSize); break; - case lltok::kw_readnone: B.addAttribute(Attribute::ReadNone); break; - case lltok::kw_readonly: B.addAttribute(Attribute::ReadOnly); break; - case lltok::kw_returns_twice: B.addAttribute(Attribute::ReturnsTwice); break; - case lltok::kw_ssp: B.addAttribute(Attribute::StackProtect); break; - case lltok::kw_sspreq: B.addAttribute(Attribute::StackProtectReq); break; - case lltok::kw_sspstrong: B.addAttribute(Attribute::StackProtectStrong); break; - case lltok::kw_uwtable: B.addAttribute(Attribute::UWTable); break; - case lltok::kw_noduplicate: B.addAttribute(Attribute::NoDuplicate); break; - - // Error handling. - case lltok::kw_zeroext: - case lltok::kw_signext: - case lltok::kw_inreg: - HaveError |= Error(Lex.getLoc(), "invalid use of attribute on a function"); - break; - case lltok::kw_sret: case lltok::kw_noalias: - case lltok::kw_nocapture: case lltok::kw_byval: - case lltok::kw_nest: - HaveError |= - Error(Lex.getLoc(), "invalid use of parameter-only attribute on a function"); - break; - } - - Lex.Lex(); - } -} - /// ParseOptionalParamAttrs - Parse a potentially empty list of parameter attributes. bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) { bool HaveError = false; @@ -2904,7 +2865,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { if (ParseArgumentList(ArgList, isVarArg) || ParseOptionalToken(lltok::kw_unnamed_addr, UnnamedAddr, &UnnamedAddrLoc) || - ParseOptionalFuncAttrs(FuncAttrs) || + ParseFnAttributeValuePairs(FuncAttrs, false) || (EatIfPresent(lltok::kw_section) && ParseStringConstant(Section)) || ParseOptionalAlignment(Alignment) || @@ -3435,7 +3396,7 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) { ParseType(RetType, RetTypeLoc, true /*void allowed*/) || ParseValID(CalleeID) || ParseParameterList(ArgList, PFS) || - ParseOptionalFuncAttrs(FnAttrs) || + ParseFnAttributeValuePairs(FnAttrs, false) || ParseToken(lltok::kw_to, "expected 'to' in invoke") || ParseTypeAndBasicBlock(NormalBB, PFS) || ParseToken(lltok::kw_unwind, "expected 'unwind' in invoke") || @@ -3841,7 +3802,7 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS, ParseType(RetType, RetTypeLoc, true /*void allowed*/) || ParseValID(CalleeID) || ParseParameterList(ArgList, PFS) || - ParseOptionalFuncAttrs(FnAttrs)) + ParseFnAttributeValuePairs(FnAttrs, false)) return true; // If RetType is a non-function pointer type, then this is the short syntax diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h index 131331a..3cc2a8d 100644 --- a/lib/AsmParser/LLParser.h +++ b/lib/AsmParser/LLParser.h @@ -194,7 +194,6 @@ namespace llvm { bool ParseTLSModel(GlobalVariable::ThreadLocalMode &TLM); bool ParseOptionalThreadLocal(GlobalVariable::ThreadLocalMode &TLM); bool ParseOptionalAddrSpace(unsigned &AddrSpace); - bool ParseOptionalFuncAttrs(AttrBuilder &B); bool ParseOptionalParamAttrs(AttrBuilder &B); bool ParseOptionalReturnAttrs(AttrBuilder &B); bool ParseOptionalLinkage(unsigned &Linkage, bool &HasLinkage); @@ -240,7 +239,7 @@ namespace llvm { bool ParseMDNodeID(MDNode *&Result); bool ParseMDNodeID(MDNode *&Result, unsigned &SlotNo); bool ParseUnnamedAttrGrp(); - bool ParseAttributeValuePairs(AttrBuilder &B); + bool ParseFnAttributeValuePairs(AttrBuilder &B, bool inAttrGrp); // Type Parsing. bool ParseType(Type *&Result, bool AllowVoid = false); -- cgit v1.1 From d2bcda7706cc2a6caf3b4b304b39a9649c703278 Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Fri, 8 Feb 2013 01:55:39 +0000 Subject: Revert "Have InstCombine call SipmlifyCall when handling calls. Test case included." This reverts commit 3854a5d90fee52af1065edbed34521fff6cdc18d. This causes a clang unit test to hang: vtable-available-externally.cpp. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174692 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/InstCombine/InstCombineCalls.cpp | 6 ------ 1 file changed, 6 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index cb9ba44..64cd1bd 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -13,7 +13,6 @@ #include "InstCombine.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/IR/DataLayout.h" #include "llvm/Support/CallSite.h" @@ -211,11 +210,6 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { return &CI; } - CallSite CS(&CI); - if (Value *V = SimplifyCall(CS.getCalledValue(), CS.arg_begin(), CS.arg_end(), - TD)) - return ReplaceInstUsesWith(CI, V); - IntrinsicInst *II = dyn_cast(&CI); if (!II) return visitCallSite(&CI); -- cgit v1.1 From 61b97b8c1721ba45e5c10ca307ceebe1efdf72a9 Mon Sep 17 00:00:00 2001 From: Reed Kotler Date: Fri, 8 Feb 2013 03:57:41 +0000 Subject: When Mips16 frames grow large, the immediate field may exceed the maximum allowed size for the instruction. This code uses RegScavenger to fix this. We sometimes need 2 registers for Mips16 so we must handle things differently than how register scavenger is normally used. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174696 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips16InstrInfo.cpp | 75 ++++++++++++++++++++++++++++++++-- lib/Target/Mips/Mips16InstrInfo.h | 12 +++--- lib/Target/Mips/Mips16RegisterInfo.cpp | 13 +++++- 3 files changed, 90 insertions(+), 10 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp index 117faea..9f27ac3 100644 --- a/lib/Target/Mips/Mips16InstrInfo.cpp +++ b/lib/Target/Mips/Mips16InstrInfo.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" @@ -306,11 +307,79 @@ void Mips16InstrInfo::adjustStackPtr(unsigned SP, int64_t Amount, /// This function generates the sequence of instructions needed to get the /// result of adding register REG and immediate IMM. unsigned -Mips16InstrInfo::loadImmediate(int64_t Imm, MachineBasicBlock &MBB, +Mips16InstrInfo::loadImmediate(unsigned FrameReg, + int64_t Imm, MachineBasicBlock &MBB, MachineBasicBlock::iterator II, DebugLoc DL, - unsigned *NewImm) const { + unsigned &NewImm) const { + // + // given original instruction is: + // Instr rx, T[offset] where offset is too big. + // + // lo = offset & 0xFFFF + // hi = ((offset >> 16) + (lo >> 15)) & 0xFFFF; + // + // let T = temporary register + // li T, hi + // shl T, 16 + // add T, Rx, T + // + RegScavenger rs; + int32_t lo = Imm & 0xFFFF; + int32_t hi = ((Imm >> 16) + (lo >> 15)) & 0xFFFF; + NewImm = lo; + unsigned Reg =0; + unsigned SpReg = 0; + rs.enterBasicBlock(&MBB); + rs.forward(II); + // + // we use T0 for the first register, if we need to save something away. + // we use T1 for the second register, if we need to save something away. + // + unsigned FirstRegSaved =0, SecondRegSaved=0; + unsigned FirstRegSavedTo = 0, SecondRegSavedTo = 0; + + Reg = rs.FindUnusedReg(&Mips::CPU16RegsRegClass); + if (Reg == 0) { + FirstRegSaved = Reg = Mips::V0; + FirstRegSavedTo = Mips::T0; + copyPhysReg(MBB, II, DL, FirstRegSavedTo, FirstRegSaved, true); + } + else + rs.setUsed(Reg); + BuildMI(MBB, II, DL, get(Mips::LiRxImmX16), Reg).addImm(hi); + BuildMI(MBB, II, DL, get(Mips::SllX16), Reg).addReg(Reg). + addImm(16); + if (FrameReg == Mips::SP) { + SpReg = rs.FindUnusedReg(&Mips::CPU16RegsRegClass); + if (SpReg == 0) { + if (Reg != Mips::V1) { + SecondRegSaved = SpReg = Mips::V1; + SecondRegSavedTo = Mips::T1; + } + else { + SecondRegSaved = SpReg = Mips::V0; + SecondRegSavedTo = Mips::T0; + } + copyPhysReg(MBB, II, DL, SecondRegSavedTo, SecondRegSaved, true); + } + else + rs.setUsed(SpReg); - return 0; + copyPhysReg(MBB, II, DL, SpReg, Mips::SP, false); + BuildMI(MBB, II, DL, get(Mips:: AdduRxRyRz16), Reg).addReg(SpReg) + .addReg(Reg); + } + else + BuildMI(MBB, II, DL, get(Mips:: AdduRxRyRz16), Reg).addReg(FrameReg) + .addReg(Reg, RegState::Kill); + if (FirstRegSaved || SecondRegSaved) { + II = llvm::next(II); + if (FirstRegSaved) + copyPhysReg(MBB, II, DL, FirstRegSaved, FirstRegSavedTo, true); + if (SecondRegSaved) + copyPhysReg(MBB, II, DL, SecondRegSaved, SecondRegSavedTo, true); + } + return Reg; } unsigned Mips16InstrInfo::GetAnalyzableBrOpc(unsigned Opc) const { diff --git a/lib/Target/Mips/Mips16InstrInfo.h b/lib/Target/Mips/Mips16InstrInfo.h index 3704e25..26a5a5e 100644 --- a/lib/Target/Mips/Mips16InstrInfo.h +++ b/lib/Target/Mips/Mips16InstrInfo.h @@ -77,12 +77,14 @@ public: void adjustStackPtr(unsigned SP, int64_t Amount, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; - /// Emit a series of instructions to load an immediate. If NewImm is a - /// non-NULL parameter, the last instruction is not emitted, but instead - /// its immediate operand is returned in NewImm. - unsigned loadImmediate(int64_t Imm, MachineBasicBlock &MBB, + /// Emit a series of instructions to load an immediate. + // This is to adjust some FrameReg. We return the new register to be used + // in place of FrameReg and the adjusted immediate field (&NewImm) + // + unsigned loadImmediate(unsigned FrameReg, + int64_t Imm, MachineBasicBlock &MBB, MachineBasicBlock::iterator II, DebugLoc DL, - unsigned *NewImm) const; + unsigned &NewImm) const; private: virtual unsigned GetAnalyzableBrOpc(unsigned Opc) const; diff --git a/lib/Target/Mips/Mips16RegisterInfo.cpp b/lib/Target/Mips/Mips16RegisterInfo.cpp index c2e09a7..a181a34 100644 --- a/lib/Target/Mips/Mips16RegisterInfo.cpp +++ b/lib/Target/Mips/Mips16RegisterInfo.cpp @@ -1,3 +1,4 @@ + //===-- Mips16RegisterInfo.cpp - MIPS16 Register Information -== ----------===// // // The LLVM Compiler Infrastructure @@ -12,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "Mips16RegisterInfo.h" +#include "Mips16InstrInfo.h" #include "Mips.h" #include "Mips16InstrInfo.h" #include "MipsAnalyzeImmediate.h" @@ -23,6 +25,7 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/DebugInfo.h" #include "llvm/IR/Constants.h" @@ -140,6 +143,7 @@ void Mips16RegisterInfo::eliminateFI(MachineBasicBlock::iterator II, // by adding the size of the stack: // incoming argument, callee-saved register location or local variable. int64_t Offset; + bool IsKill = false; Offset = SPOffset + (int64_t)StackSize; Offset += MI.getOperand(OpNo + 1).getImm(); @@ -148,9 +152,14 @@ void Mips16RegisterInfo::eliminateFI(MachineBasicBlock::iterator II, if (!MI.isDebugValue() && ( ((FrameReg != Mips::SP) && !isInt<16>(Offset)) || ((FrameReg == Mips::SP) && !isInt<15>(Offset)) )) { - llvm_unreachable("frame offset does not fit in instruction"); + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc DL = II->getDebugLoc(); + unsigned NewImm; + FrameReg = TII.loadImmediate(FrameReg, Offset, MBB, II, DL, NewImm); + Offset = SignExtend64<16>(NewImm); + IsKill = true; } - MI.getOperand(OpNo).ChangeToRegister(FrameReg, false); + MI.getOperand(OpNo).ChangeToRegister(FrameReg, false, false, IsKill); MI.getOperand(OpNo + 1).ChangeToImmediate(Offset); -- cgit v1.1 From baad55c3ab85d57003e9711084f1a95fcab76fdf Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Fri, 8 Feb 2013 06:32:06 +0000 Subject: Parse the attribute group reference on a function. Attribute references are of this form: define void @foo() #0 #1 #2 { ... } Parse them for function attributes. If there's more than one reference, then they are merged together. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174697 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/AsmParser/LLParser.cpp | 95 +++++++++++++++++++++++++++++++++++++++++++--- lib/AsmParser/LLParser.h | 7 +++- 2 files changed, 94 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index ba3db34..6f076e2 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -65,6 +65,64 @@ bool LLParser::ValidateEndOfModule() { ForwardRefInstMetadata.clear(); } + // Handle any function attribute group forward references. + for (std::map >::iterator + I = ForwardRefAttrGroups.begin(), E = ForwardRefAttrGroups.end(); + I != E; ++I) { + Value *V = I->first; + std::vector &Vec = I->second; + AttrBuilder B; + + for (std::vector::iterator VI = Vec.begin(), VE = Vec.end(); + VI != VE; ++VI) + B.merge(NumberedAttrBuilders[*VI]); + + if (Function *Fn = dyn_cast(V)) { + AttributeSet AS = Fn->getAttributes(); + AttrBuilder FnAttrs(AS.getFnAttributes(), AttributeSet::FunctionIndex); + AS = AS.removeAttributes(Context, AttributeSet::FunctionIndex, + AS.getFnAttributes()); + + FnAttrs.merge(B); + + // If the alignment was parsed as an attribute, move to the alignment + // field. + if (FnAttrs.hasAlignmentAttr()) { + Fn->setAlignment(FnAttrs.getAlignment()); + FnAttrs.removeAttribute(Attribute::Alignment); + } + + AS = AS.addAttributes(Context, AttributeSet::FunctionIndex, + AttributeSet::get(Context, + AttributeSet::FunctionIndex, + FnAttrs)); + Fn->setAttributes(AS); + } else if (CallInst *CI = dyn_cast(V)) { + AttributeSet AS = CI->getAttributes(); + AttrBuilder FnAttrs(AS.getFnAttributes(), AttributeSet::FunctionIndex); + AS = AS.removeAttributes(Context, AttributeSet::FunctionIndex, + AS.getFnAttributes()); + + AS = AS.addAttributes(Context, AttributeSet::FunctionIndex, + AttributeSet::get(Context, + AttributeSet::FunctionIndex, + FnAttrs)); + CI->setAttributes(AS); + } else if (InvokeInst *II = dyn_cast(V)) { + AttributeSet AS = II->getAttributes(); + AttrBuilder FnAttrs(AS.getFnAttributes(), AttributeSet::FunctionIndex); + AS = AS.removeAttributes(Context, AttributeSet::FunctionIndex, + AS.getFnAttributes()); + + AS = AS.addAttributes(Context, AttributeSet::FunctionIndex, + AttributeSet::get(Context, + AttributeSet::FunctionIndex, + FnAttrs)); + II->setAttributes(AS); + } else { + llvm_unreachable("invalid object with forward attribute group reference"); + } + } // If there are entries in ForwardRefBlockAddresses at this point, they are // references after the function was defined. Resolve those now. @@ -747,16 +805,17 @@ bool LLParser::ParseUnnamedAttrGrp() { assert(Lex.getKind() == lltok::AttrGrpID); LocTy AttrGrpLoc = Lex.getLoc(); unsigned VarID = Lex.getUIntVal(); + std::vector unused; Lex.Lex(); if (ParseToken(lltok::equal, "expected '=' here") || ParseToken(lltok::kw_attributes, "expected 'attributes' keyword here") || ParseToken(lltok::lbrace, "expected '{' here") || - ParseFnAttributeValuePairs(ForwardRefAttrBuilder[VarID], true) || + ParseFnAttributeValuePairs(NumberedAttrBuilders[VarID], unused, true) || ParseToken(lltok::rbrace, "expected end of attribute group")) return true; - if (!ForwardRefAttrBuilder[VarID].hasAttributes()) + if (!NumberedAttrBuilders[VarID].hasAttributes()) return Error(AttrGrpLoc, "attribute group has no attributes"); return false; @@ -764,7 +823,9 @@ bool LLParser::ParseUnnamedAttrGrp() { /// ParseFnAttributeValuePairs /// ::= | '=' -bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B, bool inAttrGrp) { +bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B, + std::vector &FwdRefAttrGrps, + bool inAttrGrp) { bool HaveError = false; B.clear(); @@ -779,6 +840,22 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B, bool inAttrGrp) { // Finished. return false; + case lltok::AttrGrpID: { + // Allow a function to reference an attribute group: + // + // define void @foo() #1 { ... } + if (inAttrGrp) + HaveError |= + Error(Lex.getLoc(), + "cannot have an attribute group reference in an attribute group"); + + unsigned AttrGrpNum = Lex.getUIntVal(); + if (inAttrGrp) break; + + // Save the reference to the attribute group. We'll fill it in later. + FwdRefAttrGrps.push_back(AttrGrpNum); + break; + } // Target-dependent attributes: case lltok::StringConstant: { std::string Attr = Lex.getStrVal(); @@ -2856,6 +2933,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { SmallVector ArgList; bool isVarArg; AttrBuilder FuncAttrs; + std::vector FwdRefAttrGrps; std::string Section; unsigned Alignment; std::string GC; @@ -2865,7 +2943,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { if (ParseArgumentList(ArgList, isVarArg) || ParseOptionalToken(lltok::kw_unnamed_addr, UnnamedAddr, &UnnamedAddrLoc) || - ParseFnAttributeValuePairs(FuncAttrs, false) || + ParseFnAttributeValuePairs(FuncAttrs, FwdRefAttrGrps, false) || (EatIfPresent(lltok::kw_section) && ParseStringConstant(Section)) || ParseOptionalAlignment(Alignment) || @@ -2965,6 +3043,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { Fn->setAlignment(Alignment); Fn->setSection(Section); if (!GC.empty()) Fn->setGC(GC.c_str()); + ForwardRefAttrGroups[Fn] = FwdRefAttrGrps; // Add all of the arguments we parsed to the function. Function::arg_iterator ArgIt = Fn->arg_begin(); @@ -3384,6 +3463,7 @@ bool LLParser::ParseIndirectBr(Instruction *&Inst, PerFunctionState &PFS) { bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) { LocTy CallLoc = Lex.getLoc(); AttrBuilder RetAttrs, FnAttrs; + std::vector FwdRefAttrGrps; CallingConv::ID CC; Type *RetType = 0; LocTy RetTypeLoc; @@ -3396,7 +3476,7 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) { ParseType(RetType, RetTypeLoc, true /*void allowed*/) || ParseValID(CalleeID) || ParseParameterList(ArgList, PFS) || - ParseFnAttributeValuePairs(FnAttrs, false) || + ParseFnAttributeValuePairs(FnAttrs, FwdRefAttrGrps, false) || ParseToken(lltok::kw_to, "expected 'to' in invoke") || ParseTypeAndBasicBlock(NormalBB, PFS) || ParseToken(lltok::kw_unwind, "expected 'unwind' in invoke") || @@ -3471,6 +3551,7 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) { InvokeInst *II = InvokeInst::Create(Callee, NormalBB, UnwindBB, Args); II->setCallingConv(CC); II->setAttributes(PAL); + ForwardRefAttrGroups[II] = FwdRefAttrGrps; Inst = II; return false; } @@ -3789,6 +3870,7 @@ bool LLParser::ParseLandingPad(Instruction *&Inst, PerFunctionState &PFS) { bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS, bool isTail) { AttrBuilder RetAttrs, FnAttrs; + std::vector FwdRefAttrGrps; CallingConv::ID CC; Type *RetType = 0; LocTy RetTypeLoc; @@ -3802,7 +3884,7 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS, ParseType(RetType, RetTypeLoc, true /*void allowed*/) || ParseValID(CalleeID) || ParseParameterList(ArgList, PFS) || - ParseFnAttributeValuePairs(FnAttrs, false)) + ParseFnAttributeValuePairs(FnAttrs, FwdRefAttrGrps, false)) return true; // If RetType is a non-function pointer type, then this is the short syntax @@ -3874,6 +3956,7 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS, CI->setTailCall(isTail); CI->setCallingConv(CC); CI->setAttributes(PAL); + ForwardRefAttrGroups[CI] = FwdRefAttrGrps; Inst = CI; return false; } diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h index 3cc2a8d..42cdbd5 100644 --- a/lib/AsmParser/LLParser.h +++ b/lib/AsmParser/LLParser.h @@ -126,7 +126,8 @@ namespace llvm { ForwardRefBlockAddresses; // Attribute builder reference information. - std::map ForwardRefAttrBuilder; + std::map > ForwardRefAttrGroups; + std::map NumberedAttrBuilders; public: LLParser(MemoryBuffer *F, SourceMgr &SM, SMDiagnostic &Err, Module *m) : @@ -239,7 +240,9 @@ namespace llvm { bool ParseMDNodeID(MDNode *&Result); bool ParseMDNodeID(MDNode *&Result, unsigned &SlotNo); bool ParseUnnamedAttrGrp(); - bool ParseFnAttributeValuePairs(AttrBuilder &B, bool inAttrGrp); + bool ParseFnAttributeValuePairs(AttrBuilder &B, + std::vector &FwdRefAttrGrps, + bool inAttrGrp); // Type Parsing. bool ParseType(Type *&Result, bool AllowVoid = false); -- cgit v1.1 From bcf60de5eb05e255f0c1bcf7ce26125c57026b65 Mon Sep 17 00:00:00 2001 From: Michael Kuperstein Date: Fri, 8 Feb 2013 12:58:29 +0000 Subject: Test Commit git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174709 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/LoopVectorize.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 32fcb8f..91d5659 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -32,7 +32,7 @@ // D. Nuzman and R. Henderson. Multi-platform Auto-vectorization. // // Variable uniformity checks are inspired by: -// Karrenberg, R. and Hack, S. Whole Function Vectorization. +// Karrenberg, R. and Hack, S. Whole Function Vectorization. // // Other ideas/concepts are from: // A. Zaks and D. Nuzman. Autovectorization in GCC-two years later. -- cgit v1.1 From fb55a8fd7c38aa09d9c243d48a8a72d890f36a3d Mon Sep 17 00:00:00 2001 From: Arnold Schwaighofer Date: Fri, 8 Feb 2013 14:50:48 +0000 Subject: ARM cost model: Address computation in vector mem ops not free Adds a function to target transform info to query for the cost of address computation. The cost model analysis pass now also queries this interface. The code in LoopVectorize adds the cost of address computation as part of the memory instruction cost calculation. Only there, we know whether the instruction will be scalarized or not. Increase the penality for inserting in to D registers on swift. This becomes necessary because we now always assume that address computation has a cost and three is a closer value to the architecture. radar://13097204 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174713 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/CostModel.cpp | 5 +++++ lib/Analysis/TargetTransformInfo.cpp | 7 +++++++ lib/CodeGen/BasicTargetTransformInfo.cpp | 5 +++++ lib/Target/ARM/ARMTargetTransformInfo.cpp | 13 +++++++++++-- lib/Transforms/Vectorize/LoopVectorize.cpp | 22 ++++++++++++++-------- 5 files changed, 42 insertions(+), 10 deletions(-) (limited to 'lib') diff --git a/lib/Analysis/CostModel.cpp b/lib/Analysis/CostModel.cpp index 1784512..8435e39 100644 --- a/lib/Analysis/CostModel.cpp +++ b/lib/Analysis/CostModel.cpp @@ -85,6 +85,11 @@ unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const { return -1; switch (I->getOpcode()) { + case Instruction::GetElementPtr:{ + Type *ValTy = I->getOperand(0)->getType()->getPointerElementType(); + return TTI->getAddressComputationCost(ValTy); + } + case Instruction::Ret: case Instruction::PHI: case Instruction::Br: { diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp index 9fc21fd..72421a0 100644 --- a/lib/Analysis/TargetTransformInfo.cpp +++ b/lib/Analysis/TargetTransformInfo.cpp @@ -196,6 +196,9 @@ unsigned TargetTransformInfo::getNumberOfParts(Type *Tp) const { return PrevTTI->getNumberOfParts(Tp); } +unsigned TargetTransformInfo::getAddressComputationCost(Type *Tp) const { + return PrevTTI->getAddressComputationCost(Tp); +} namespace { @@ -535,6 +538,10 @@ struct NoTTI : ImmutablePass, TargetTransformInfo { unsigned getNumberOfParts(Type *Tp) const { return 0; } + + unsigned getAddressComputationCost(Type *Tp) const { + return 0; + } }; } // end anonymous namespace diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp index ea5e937..e8b5b4f 100644 --- a/lib/CodeGen/BasicTargetTransformInfo.cpp +++ b/lib/CodeGen/BasicTargetTransformInfo.cpp @@ -101,6 +101,7 @@ public: virtual unsigned getIntrinsicInstrCost(Intrinsic::ID, Type *RetTy, ArrayRef Tys) const; virtual unsigned getNumberOfParts(Type *Tp) const; + virtual unsigned getAddressComputationCost(Type *Ty) const; /// @} }; @@ -400,3 +401,7 @@ unsigned BasicTTI::getNumberOfParts(Type *Tp) const { std::pair LT = TLI->getTypeLegalizationCost(Tp); return LT.first; } + +unsigned BasicTTI::getAddressComputationCost(Type *Ty) const { + return 0; +} diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp index 1f91e0e..f6fa319 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -120,6 +120,8 @@ public: unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) const; unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const; + + unsigned getAddressComputationCost(Type *Val) const; /// @} }; @@ -304,12 +306,13 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst, unsigned ARMTTI::getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index) const { - // Penalize inserting into an D-subregister. + // Penalize inserting into an D-subregister. We end up with a three times + // lower estimated throughput on swift. if (ST->isSwift() && Opcode == Instruction::InsertElement && ValTy->isVectorTy() && ValTy->getScalarSizeInBits() <= 32) - return 2; + return 3; return TargetTransformInfo::getVectorInstrCost(Opcode, ValTy, Index); } @@ -326,3 +329,9 @@ unsigned ARMTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy); } + +unsigned ARMTTI::getAddressComputationCost(Type *Ty) const { + // In many cases the address computation is not merged into the instruction + // addressing mode. + return 1; +} diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 91d5659..f12b0bf 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3056,9 +3056,10 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { // TODO: We need to estimate the cost of intrinsic calls. switch (I->getOpcode()) { case Instruction::GetElementPtr: - // We mark this instruction as zero-cost because scalar GEPs are usually - // lowered to the intruction addressing mode. At the moment we don't - // generate vector geps. + // We mark this instruction as zero-cost because the cost of GEPs in + // vectorized code depends on whether the corresponding memory instruction + // is scalarized or not. Therefore, we handle GEPs with the memory + // instruction cost. return 0; case Instruction::Br: { return TTI.getCFInstrCost(I->getOpcode()); @@ -3113,9 +3114,12 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { unsigned AS = SI ? SI->getPointerAddressSpace() : LI->getPointerAddressSpace(); Value *Ptr = SI ? SI->getPointerOperand() : LI->getPointerOperand(); - + // We add the cost of address computation here instead of with the gep + // instruction because only here we know whether the operation is + // scalarized. if (VF == 1) - return TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS); + return TTI.getAddressComputationCost(VectorTy) + + TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS); // Scalarized loads/stores. int Stride = Legal->isConsecutivePtr(Ptr); @@ -3135,15 +3139,17 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { VectorTy, i); } - // The cost of the scalar stores. + // The cost of the scalar loads/stores. + Cost += VF * TTI.getAddressComputationCost(ValTy->getScalarType()); Cost += VF * TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(), Alignment, AS); return Cost; } // Wide load/stores. - unsigned Cost = TTI.getMemoryOpCost(I->getOpcode(), VectorTy, - Alignment, AS); + unsigned Cost = TTI.getAddressComputationCost(VectorTy); + Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS); + if (Reverse) Cost += TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, VectorTy, 0); -- cgit v1.1 From 0b821eff4c9c8d3b7ac872691bc453337ad3d03a Mon Sep 17 00:00:00 2001 From: David Tweed Date: Fri, 8 Feb 2013 16:35:10 +0000 Subject: The patch to fix some issues in r174543 fixed the lines failing the test, but missed a couple of lines which weren't being explicitly looked at and were printing incorrect results. These values clearly must lie within 32 bits, so the casts are definitely safe. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174717 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/DebugInfo/DWARFDebugFrame.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/DebugInfo/DWARFDebugFrame.cpp b/lib/DebugInfo/DWARFDebugFrame.cpp index 244ff4c..9da304d31 100644 --- a/lib/DebugInfo/DWARFDebugFrame.cpp +++ b/lib/DebugInfo/DWARFDebugFrame.cpp @@ -69,9 +69,9 @@ public: << "\n"; OS << format(" Version: %d\n", Version); OS << " Augmentation: \"" << Augmentation << "\"\n"; - OS << format(" Code alignment factor: %u\n", CodeAlignmentFactor); - OS << format(" Data alignment factor: %d\n", DataAlignmentFactor); - OS << format(" Return address column: %d\n", ReturnAddressRegister); + OS << format(" Code alignment factor: %u\n", (uint32_t)CodeAlignmentFactor); + OS << format(" Data alignment factor: %d\n", (int32_t)DataAlignmentFactor); + OS << format(" Return address column: %d\n", (int32_t)ReturnAddressRegister); OS << "\n"; } -- cgit v1.1 From 9de31bd8298ae4c94c13bef9b8984745bfe41a60 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Fri, 8 Feb 2013 17:43:32 +0000 Subject: Typos. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174723 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/LoopVectorize.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index f12b0bf..9fb451b 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -9,10 +9,10 @@ // // This is the LLVM loop vectorizer. This pass modifies 'vectorizable' loops // and generates target-independent LLVM-IR. Legalization of the IR is done -// in the codegen. However, the vectorizes uses (will use) the codegen +// in the codegen. However, the vectorizer uses (will use) the codegen // interfaces to generate IR that is likely to result in an optimal binary. // -// The loop vectorizer combines consecutive loop iteration into a single +// The loop vectorizer combines consecutive loop iterations into a single // 'wide' iteration. After this transformation the index is incremented // by the SIMD vector width, and not by one. // @@ -637,7 +637,7 @@ struct LoopVectorize : public LoopPass { // Use the cost model. LoopVectorizationCostModel CM(L, SE, LI, &LVL, *TTI, DL); - // Check the function attribues to find out if this function should be + // Check the function attributes to find out if this function should be // optimized for size. Function *F = L->getHeader()->getParent(); Attribute::AttrKind SzAttr = Attribute::OptimizeForSize; @@ -668,7 +668,7 @@ struct LoopVectorize : public LoopPass { F->getParent()->getModuleIdentifier()<<"\n"); DEBUG(dbgs() << "LV: Unroll Factor is " << UF << "\n"); - // If we decided that it is *legal* to vectorizer the loop then do it. + // If we decided that it is *legal* to vectorize the loop then do it. InnerLoopVectorizer LB(L, SE, LI, DT, DL, VF.Width, UF); LB.vectorize(&LVL); -- cgit v1.1 From 33daeab1bb8df65273fd9ecbf1a261f96733732e Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Fri, 8 Feb 2013 18:00:14 +0000 Subject: [SimplifyLibCalls] Library call simplification doen't work if the call site isn't using the default calling convention. However, if the transformation is from a call to inline IR, then the calling convention doesn't matter. rdar://13157990 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174724 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Utils/SimplifyLibCalls.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index 83c74e7..cccf0a6 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -50,6 +50,10 @@ public: virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) =0; + /// ignoreCallingConv - Returns false if this transformation could possibly + /// change the calling convention. + virtual bool ignoreCallingConv() { return false; } + Value *optimizeCall(CallInst *CI, const DataLayout *TD, const TargetLibraryInfo *TLI, const LibCallSimplifier *LCS, IRBuilder<> &B) { @@ -61,7 +65,7 @@ public: Context = &CI->getCalledFunction()->getContext(); // We never change the calling convention. - if (CI->getCallingConv() != llvm::CallingConv::C) + if (!ignoreCallingConv() && CI->getCallingConv() != llvm::CallingConv::C) return NULL; return callOptimizer(CI->getCalledFunction(), CI, B); @@ -724,6 +728,7 @@ struct StrNCpyOpt : public LibCallOptimization { }; struct StrLenOpt : public LibCallOptimization { + virtual bool ignoreCallingConv() { return true; } virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 1 || @@ -1260,6 +1265,7 @@ struct FFSOpt : public LibCallOptimization { }; struct AbsOpt : public LibCallOptimization { + virtual bool ignoreCallingConv() { return true; } virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { FunctionType *FT = Callee->getFunctionType(); // We require integer(integer) where the types agree. -- cgit v1.1 From 0f58dbae4ab2397dc310e19d171a4bc60fc6c9a7 Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Fri, 8 Feb 2013 18:19:17 +0000 Subject: Refine fix to bug 15041. Thanks to help from Nadav and Hal, I have a more reasonable (and even correct!) approach. This specifically penalizes the insertelement and extractelement operations for the performance hit that will occur on PowerPC processors. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174725 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCTargetTransformInfo.cpp | 35 +++++++++++++-------------- 1 file changed, 17 insertions(+), 18 deletions(-) (limited to 'lib') diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index f57d764..5e9ad34 100644 --- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -194,24 +194,23 @@ unsigned PPCTTI::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const { assert(Val->isVectorTy() && "This must be a vector type"); - const unsigned Awful = 1000; - - // Vector element insert/extract with Altivec is very expensive. - // Until VSX is available, avoid vectorizing loops that require - // these operations. - if (Opcode == ISD::EXTRACT_VECTOR_ELT || - Opcode == ISD::INSERT_VECTOR_ELT) - return Awful; - - // We don't vectorize SREM/UREM so well. Constrain the vectorizer - // for those as well. - if (Opcode == ISD::SREM || Opcode == ISD::UREM) - return Awful; - - // VSELECT is not yet implemented, leading to use of insert/extract - // and ISEL, hence not a good idea. - if (Opcode == ISD::VSELECT) - return Awful; + int ISD = TLI->InstructionOpcodeToISD(Opcode); + assert(ISD && "Invalid opcode"); + + // Estimated cost of a load-hit-store delay. This was obtained + // experimentally as a minimum needed to prevent unprofitable + // vectorization for the paq8p benchmark. It may need to be + // raised further if other unprofitable cases remain. + unsigned LHSPenalty = 12; + + // Vector element insert/extract with Altivec is very expensive, + // because they require store and reload with the attendant + // processor stall for load-hit-store. Until VSX is available, + // these need to be estimated as very costly. + if (ISD == ISD::EXTRACT_VECTOR_ELT || + ISD == ISD::INSERT_VECTOR_ELT) + return LHSPenalty + + TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index); return TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index); } -- cgit v1.1 From 8f637adbd383afc2defb5d3f75433b6f2c25d527 Mon Sep 17 00:00:00 2001 From: Bob Wilson Date: Fri, 8 Feb 2013 20:35:15 +0000 Subject: Revert 172027 and 174336. Remove diagnostics about over-aligned stack objects. Aside from the question of whether we report a warning or an error when we can't satisfy a requested stack object alignment, the current implementation of this is not good. We're not providing any source location in the diagnostics and the current warning is not connected to any warning group so you can't control it. We could improve the source location somewhat, but we can do a much better job if this check is implemented in the front-end, so let's do that instead. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174741 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineFunction.cpp | 48 +++++++++-------------- lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp | 3 +- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 3 +- 3 files changed, 20 insertions(+), 34 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index 1898222..5e04f2d 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -466,32 +466,24 @@ void MachineFrameInfo::ensureMaxAlignment(unsigned Align) { } /// clampStackAlignment - Clamp the alignment if requested and emit a warning. -static inline unsigned clampStackAlignment(bool ShouldClamp, unsigned PrefAlign, - unsigned MinAlign, unsigned StackAlign, - const AllocaInst *Alloca = 0) { - if (!ShouldClamp || PrefAlign <= StackAlign) - return PrefAlign; - if (Alloca && MinAlign > StackAlign) - Alloca->getParent()->getContext().emitWarning(Alloca, - "Requested alignment exceeds the stack alignment!"); - else - assert(MinAlign <= StackAlign && - "Requested alignment exceeds the stack alignment!"); +static inline unsigned clampStackAlignment(bool ShouldClamp, unsigned Align, + unsigned StackAlign) { + if (!ShouldClamp || Align <= StackAlign) + return Align; + DEBUG(dbgs() << "Warning: requested alignment " << Align + << " exceeds the stack alignment " << StackAlign + << " when stack realignment is off" << '\n'); return StackAlign; } -/// CreateStackObjectWithMinAlign - Create a new statically sized stack -/// object, returning a nonnegative identifier to represent it. This function -/// takes a preferred alignment and a minimal alignment. +/// CreateStackObject - Create a new statically sized stack object, returning +/// a nonnegative identifier to represent it. /// -int MachineFrameInfo::CreateStackObjectWithMinAlign(uint64_t Size, - unsigned PrefAlignment, unsigned MinAlignment, - bool isSS, bool MayNeedSP, const AllocaInst *Alloca) { +int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment, + bool isSS, bool MayNeedSP, const AllocaInst *Alloca) { assert(Size != 0 && "Cannot allocate zero size stack objects!"); - unsigned Alignment = clampStackAlignment( - !TFI.isStackRealignable() || !RealignOption, - PrefAlignment, MinAlignment, - TFI.getStackAlignment(), Alloca); + Alignment = clampStackAlignment(!TFI.isStackRealignable() || !RealignOption, + Alignment, TFI.getStackAlignment()); Objects.push_back(StackObject(Size, Alignment, 0, false, isSS, MayNeedSP, Alloca)); int Index = (int)Objects.size() - NumFixedObjects - 1; @@ -507,8 +499,7 @@ int MachineFrameInfo::CreateStackObjectWithMinAlign(uint64_t Size, int MachineFrameInfo::CreateSpillStackObject(uint64_t Size, unsigned Alignment) { Alignment = clampStackAlignment(!TFI.isStackRealignable() || !RealignOption, - Alignment, 0, - TFI.getStackAlignment()); + Alignment, TFI.getStackAlignment()); CreateStackObject(Size, Alignment, true, false); int Index = (int)Objects.size() - NumFixedObjects - 1; ensureMaxAlignment(Alignment); @@ -520,13 +511,10 @@ int MachineFrameInfo::CreateSpillStackObject(uint64_t Size, /// variable sized object is created, whether or not the index returned is /// actually used. /// -int MachineFrameInfo::CreateVariableSizedObject(unsigned PrefAlignment, - unsigned MinAlignment, const AllocaInst *Alloca) { +int MachineFrameInfo::CreateVariableSizedObject(unsigned Alignment) { HasVarSizedObjects = true; - unsigned Alignment = clampStackAlignment( - !TFI.isStackRealignable() || !RealignOption, - PrefAlignment, MinAlignment, - TFI.getStackAlignment(), Alloca); + Alignment = clampStackAlignment(!TFI.isStackRealignable() || !RealignOption, + Alignment, TFI.getStackAlignment()); Objects.push_back(StackObject(0, Alignment, 0, false, false, true, 0)); ensureMaxAlignment(Alignment); return (int)Objects.size()-NumFixedObjects-1; @@ -547,7 +535,7 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset, unsigned StackAlign = TFI.getStackAlignment(); unsigned Align = MinAlign(SPOffset, StackAlign); Align = clampStackAlignment(!TFI.isStackRealignable() || !RealignOption, - Align, 0, TFI.getStackAlignment()); + Align, TFI.getStackAlignment()); Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable, /*isSS*/ false, /*NeedSP*/ false, diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 229c50b..b46edad 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -95,8 +95,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) { (TySize >= 8 && isa(Ty) && cast(Ty)->getElementType()->isIntegerTy(8))); StaticAllocaMap[AI] = - MF->getFrameInfo()->CreateStackObjectWithMinAlign(TySize, Align, - AI->getAlignment(), false, + MF->getFrameInfo()->CreateStackObject(TySize, Align, false, MayNeedSP, AI); } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index e0d6643..405e1e8 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3259,8 +3259,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { // Inform the Frame Information that we have just allocated a variable-sized // object. - FuncInfo.MF->getFrameInfo()->CreateVariableSizedObject(Align ? Align : 1, - I.getAlignment(), &I); + FuncInfo.MF->getFrameInfo()->CreateVariableSizedObject(Align ? Align : 1); } void SelectionDAGBuilder::visitLoad(const LoadInst &I) { -- cgit v1.1 From 0cf5d396c14c71dd4fa1d102c2b3d178b1191436 Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Fri, 8 Feb 2013 21:13:39 +0000 Subject: BBVectorize: Use TTI->getAddressComputationCost This is a follow-up to the cost-model change in r174713 which splits the cost of a memory operation between the address computation and the actual memory access. In r174713, this cost is always added to the memory operation cost, and so BBVectorize will do the same. Currently, this new cost function is used only by ARM, and I don't have any ARM test cases for BBVectorize. Assistance in generating some good ARM test cases for BBVectorize would be greatly appreciated! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174743 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/BBVectorize.cpp | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'lib') diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp index 9bf09ff..ec10b42 100644 --- a/lib/Transforms/Vectorize/BBVectorize.cpp +++ b/lib/Transforms/Vectorize/BBVectorize.cpp @@ -972,6 +972,11 @@ namespace { unsigned VCost = TTI->getMemoryOpCost(I->getOpcode(), VType, BottomAlignment, IAddressSpace); + + ICost += TTI->getAddressComputationCost(aTypeI); + JCost += TTI->getAddressComputationCost(aTypeJ); + VCost += TTI->getAddressComputationCost(VType); + if (VCost > ICost + JCost) return false; -- cgit v1.1 From 089a5f8a8c5e24f996dd41419de2c7bc7b42ea29 Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Fri, 8 Feb 2013 21:35:47 +0000 Subject: DAGCombiner: Constant folding around pre-increment loads/stores Previously, even when a pre-increment load or store was generated, we often needed to keep a copy of the original base register for use with other offsets. If all of these offsets are constants (including the offset which was combined into the addressing mode), then this is clearly unnecessary. This change adjusts these other offsets to use the new incremented address. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174746 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 89 ++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index d694bc7..472919c 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -6917,6 +6917,16 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { ISD::MemIndexedMode AM = ISD::UNINDEXED; if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG)) return false; + + // Backends without true r+i pre-indexed forms may need to pass a + // constant base with a variable offset so that constant coercion + // will work with the patterns in canonical form. + bool Swapped = false; + if (isa(BasePtr)) { + std::swap(BasePtr, Offset); + Swapped = true; + } + // Don't create a indexed load / store with zero offset. if (isa(Offset) && cast(Offset)->isNullValue()) @@ -6942,6 +6952,48 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { return false; } + // If the offset is a constant, there may be other adds of constants that + // can be folded with this one. We should do this to avoid having to keep + // a copy of the original base pointer. + SmallVector OtherUses; + if (isa(Offset)) + for (SDNode::use_iterator I = BasePtr.getNode()->use_begin(), + E = BasePtr.getNode()->use_end(); I != E; ++I) { + SDNode *Use = *I; + if (Use == Ptr.getNode()) + continue; + + if (Use->isPredecessorOf(N)) + continue; + + if (Use->getOpcode() != ISD::ADD && Use->getOpcode() != ISD::SUB) { + OtherUses.clear(); + break; + } + + SDValue Op0 = Use->getOperand(0), Op1 = Use->getOperand(1); + if (Op1.getNode() == BasePtr.getNode()) + std::swap(Op0, Op1); + assert(Op0.getNode() == BasePtr.getNode() && + "Use of ADD/SUB but not an operand"); + + if (!isa(Op1)) { + OtherUses.clear(); + break; + } + + // FIXME: In some cases, we can be smarter about this. + if (Op1.getValueType() != Offset.getValueType()) { + OtherUses.clear(); + break; + } + + OtherUses.push_back(Use); + } + + if (Swapped) + std::swap(BasePtr, Offset); + // Now check for #3 and #4. bool RealUse = false; @@ -6991,6 +7043,43 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { // Finally, since the node is now dead, remove it from the graph. DAG.DeleteNode(N); + if (Swapped) + std::swap(BasePtr, Offset); + + // Replace other uses of BasePtr that can be updated to use Ptr + for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) { + unsigned OffsetIdx = 1; + if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode()) + OffsetIdx = 0; + assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() == + BasePtr.getNode() && "Expected BasePtr operand"); + + APInt OV = + cast(Offset)->getAPIntValue(); + if (AM == ISD::PRE_DEC) + OV = -OV; + + ConstantSDNode *CN = + cast(OtherUses[i]->getOperand(OffsetIdx)); + APInt CNV = CN->getAPIntValue(); + if (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) + CNV += OV; + else + CNV -= OV; + + SDValue NewOp1 = Result.getValue(isLoad ? 1 : 0); + SDValue NewOp2 = DAG.getConstant(CNV, CN->getValueType(0)); + if (OffsetIdx == 0) + std::swap(NewOp1, NewOp2); + + SDValue NewUse = DAG.getNode(OtherUses[i]->getOpcode(), + OtherUses[i]->getDebugLoc(), + OtherUses[i]->getValueType(0), NewOp1, NewOp2); + DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse); + removeFromWorkList(OtherUses[i]); + DAG.DeleteNode(OtherUses[i]); + } + // Replace the uses of Ptr with uses of the updated base value. DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0)); removeFromWorkList(Ptr.getNode()); -- cgit v1.1 From b2d1275188c997e279293afc031a88e03871f9e0 Mon Sep 17 00:00:00 2001 From: Reed Kotler Date: Fri, 8 Feb 2013 21:42:56 +0000 Subject: Add the 16 bit version of addiu. To the assembler, the 16 and 32 bit are the same so we put in the comment field an indicator when we think we are emitting the 16 bit version. For the direct object emitter, the difference is important as well as for other passes which need an accurate count of program size. There will be other similar putbacks to this for various instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174747 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips16InstrInfo.td | 17 +++++++++++++++++ lib/Target/Mips/MipsInstrInfo.td | 4 ++++ 2 files changed, 21 insertions(+) (limited to 'lib') diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td index 135df75..49048db 100644 --- a/lib/Target/Mips/Mips16InstrInfo.td +++ b/lib/Target/Mips/Mips16InstrInfo.td @@ -32,6 +32,18 @@ def mem16_ea : Operand { } // +// RI instruction format +// + + +class F2RI16_ins _op, string asmstr, + InstrItinClass itin>: + FRI16<_op, (outs CPU16Regs:$rx), (ins CPU16Regs:$rx_, simm16:$imm), + !strconcat(asmstr, "\t$rx, $imm\t# 16 bit inst"), [], itin> { + let Constraints = "$rx_ = $rx"; +} + +// // Compare a register and immediate and place result in CC // Implicit use of T8 // @@ -416,6 +428,10 @@ class MayStore { // def AddiuRxImmX16: FEXT_RI16_ins<0b01001, "addiu", IIAlu>; +def AddiuRxRxImm16: F2RI16_ins<0b01001, "addiu", IIAlu>, + ArithLogic16Defs<0> { + let AddedComplexity = 5; +} def AddiuRxRxImmX16: FEXT_2RI16_ins<0b01001, "addiu", IIAlu>, ArithLogic16Defs<0>; @@ -1055,6 +1071,7 @@ class ArithLogicI16_pat : Mips16Pat<(OpNode CPU16Regs:$in, imm_type:$imm), (I CPU16Regs:$in, imm_type:$imm)>; +def: ArithLogicI16_pat; def: ArithLogicI16_pat; def: ArithLogicI16_pat; def: ArithLogicI16_pat; diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index c85b547..052e855 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -301,6 +301,10 @@ def HI16 : SDNodeXForm(N->getSExtValue()); }]>; + +// Node immediate fits as 16-bit sign extended on target immediate. +// e.g. addi, andi def immSExt16 : PatLeaf<(imm), [{ return isInt<16>(N->getSExtValue()); }]>; // Node immediate fits as 15-bit sign extended on target immediate. -- cgit v1.1 From 58446916b71c4ff79962081ea7c4df078c388b0e Mon Sep 17 00:00:00 2001 From: Bob Wilson Date: Fri, 8 Feb 2013 21:48:29 +0000 Subject: Revert "Add LLVMContext::emitWarning methods and use them. " This reverts r171041. This was a nice idea that didn't work out well. Clang warnings need to be associated with warning groups so that they can be selectively disabled, promoted to errors, etc. This simplistic patch didn't allow for that. Enhancing it to provide some way for the backend to specify a front-end warning type seems like overkill for the few uses of this, at least for now. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174748 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/ProfileDataLoaderPass.cpp | 4 +-- lib/Analysis/ProfileInfoLoaderPass.cpp | 17 +++++----- lib/CodeGen/IntrinsicLowering.cpp | 36 +++++++++------------- lib/IR/LLVMContext.cpp | 29 +---------------- lib/Transforms/Instrumentation/EdgeProfiling.cpp | 5 ++- .../Instrumentation/OptimalEdgeProfiling.cpp | 5 ++- lib/Transforms/Instrumentation/PathProfiling.cpp | 4 +-- 7 files changed, 31 insertions(+), 69 deletions(-) (limited to 'lib') diff --git a/lib/Analysis/ProfileDataLoaderPass.cpp b/lib/Analysis/ProfileDataLoaderPass.cpp index 51b7f1d..2ee0093 100644 --- a/lib/Analysis/ProfileDataLoaderPass.cpp +++ b/lib/Analysis/ProfileDataLoaderPass.cpp @@ -177,8 +177,8 @@ bool ProfileMetadataLoaderPass::runOnModule(Module &M) { unsigned ReadCount = matchEdges(M, PB, Counters); if (ReadCount != Counters.size()) { - M.getContext().emitWarning("profile information is inconsistent " - "with the current program"); + errs() << "WARNING: profile information is inconsistent with " + << "the current program!\n"; } NumEdgesRead = ReadCount; diff --git a/lib/Analysis/ProfileInfoLoaderPass.cpp b/lib/Analysis/ProfileInfoLoaderPass.cpp index 094c107..346f8d6 100644 --- a/lib/Analysis/ProfileInfoLoaderPass.cpp +++ b/lib/Analysis/ProfileInfoLoaderPass.cpp @@ -19,7 +19,6 @@ #include "llvm/Analysis/ProfileInfoLoader.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/InstrTypes.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" #include "llvm/Support/CFG.h" @@ -171,8 +170,8 @@ bool LoaderPass::runOnModule(Module &M) { } } if (ReadCount != Counters.size()) { - M.getContext().emitWarning("profile information is inconsistent " - "with the current program"); + errs() << "WARNING: profile information is inconsistent with " + << "the current program!\n"; } NumEdgesRead = ReadCount; } @@ -219,8 +218,8 @@ bool LoaderPass::runOnModule(Module &M) { } } if (ReadCount != Counters.size()) { - M.getContext().emitWarning("profile information is inconsistent " - "with the current program"); + errs() << "WARNING: profile information is inconsistent with " + << "the current program!\n"; } NumEdgesRead = ReadCount; } @@ -240,8 +239,8 @@ bool LoaderPass::runOnModule(Module &M) { BlockInformation[F][BB] = (double)Counters[ReadCount++]; } if (ReadCount != Counters.size()) { - M.getContext().emitWarning("profile information is inconsistent " - "with the current program"); + errs() << "WARNING: profile information is inconsistent with " + << "the current program!\n"; } } @@ -259,8 +258,8 @@ bool LoaderPass::runOnModule(Module &M) { FunctionInformation[F] = (double)Counters[ReadCount++]; } if (ReadCount != Counters.size()) { - M.getContext().emitWarning("profile information is inconsistent " - "with the current program"); + errs() << "WARNING: profile information is inconsistent with " + << "the current program!\n"; } } diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp index 16e7968..07f0ccf 100644 --- a/lib/CodeGen/IntrinsicLowering.cpp +++ b/lib/CodeGen/IntrinsicLowering.cpp @@ -413,30 +413,22 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { } case Intrinsic::stacksave: + case Intrinsic::stackrestore: { if (!Warned) - Context.emitWarning("this target does not support the " - "llvm.stacksave intrinsic"); - Warned = true; - CI->replaceAllUsesWith(Constant::getNullValue(CI->getType())); - break; - - case Intrinsic::stackrestore: - if (!Warned) - Context.emitWarning("this target does not support the " - "llvm.stackrestore intrinsic"); + errs() << "WARNING: this target does not support the llvm.stack" + << (Callee->getIntrinsicID() == Intrinsic::stacksave ? + "save" : "restore") << " intrinsic.\n"; Warned = true; + if (Callee->getIntrinsicID() == Intrinsic::stacksave) + CI->replaceAllUsesWith(Constant::getNullValue(CI->getType())); break; + } case Intrinsic::returnaddress: - Context.emitWarning("this target does not support the " - "llvm.returnaddress intrinsic"); - CI->replaceAllUsesWith(ConstantPointerNull::get( - cast(CI->getType()))); - break; - case Intrinsic::frameaddress: - Context.emitWarning("this target does not support the " - "llvm.frameaddress intrinsic"); + errs() << "WARNING: this target does not support the llvm." + << (Callee->getIntrinsicID() == Intrinsic::returnaddress ? + "return" : "frame") << "address intrinsic.\n"; CI->replaceAllUsesWith(ConstantPointerNull::get( cast(CI->getType()))); break; @@ -446,12 +438,12 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { case Intrinsic::pcmarker: break; // Simply strip out pcmarker on unsupported architectures - case Intrinsic::readcyclecounter: - Context.emitWarning("this target does not support the " - "llvm.readcyclecounter intrinsic; " - "it is being lowered to a constant 0"); + case Intrinsic::readcyclecounter: { + errs() << "WARNING: this target does not support the llvm.readcyclecoun" + << "ter intrinsic. It is being lowered to a constant 0\n"; CI->replaceAllUsesWith(ConstantInt::get(Type::getInt64Ty(Context), 0)); break; + } case Intrinsic::dbg_declare: break; // Simply strip out debugging intrinsics diff --git a/lib/IR/LLVMContext.cpp b/lib/IR/LLVMContext.cpp index 282779c..774c591 100644 --- a/lib/IR/LLVMContext.cpp +++ b/lib/IR/LLVMContext.cpp @@ -95,30 +95,16 @@ void LLVMContext::emitError(const Twine &ErrorStr) { emitError(0U, ErrorStr); } -void LLVMContext::emitWarning(const Twine &ErrorStr) { - emitWarning(0U, ErrorStr); -} - -static unsigned getSrcLocation(const Instruction *I) { +void LLVMContext::emitError(const Instruction *I, const Twine &ErrorStr) { unsigned LocCookie = 0; if (const MDNode *SrcLoc = I->getMetadata("srcloc")) { if (SrcLoc->getNumOperands() != 0) if (const ConstantInt *CI = dyn_cast(SrcLoc->getOperand(0))) LocCookie = CI->getZExtValue(); } - return LocCookie; -} - -void LLVMContext::emitError(const Instruction *I, const Twine &ErrorStr) { - unsigned LocCookie = getSrcLocation(I); return emitError(LocCookie, ErrorStr); } -void LLVMContext::emitWarning(const Instruction *I, const Twine &ErrorStr) { - unsigned LocCookie = getSrcLocation(I); - return emitWarning(LocCookie, ErrorStr); -} - void LLVMContext::emitError(unsigned LocCookie, const Twine &ErrorStr) { // If there is no error handler installed, just print the error and exit. if (pImpl->DiagHandler == 0) { @@ -132,19 +118,6 @@ void LLVMContext::emitError(unsigned LocCookie, const Twine &ErrorStr) { pImpl->DiagHandler(Diag, pImpl->DiagContext, LocCookie); } -void LLVMContext::emitWarning(unsigned LocCookie, const Twine &ErrorStr) { - // If there is no handler installed, just print the warning. - if (pImpl->DiagHandler == 0) { - errs() << "warning: " << ErrorStr << "\n"; - return; - } - - // If we do have a handler, we can report the warning. - SMDiagnostic Diag("", SourceMgr::DK_Warning, ErrorStr.str()); - - pImpl->DiagHandler(Diag, pImpl->DiagContext, LocCookie); -} - //===----------------------------------------------------------------------===// // Metadata Kind Uniquing //===----------------------------------------------------------------------===// diff --git a/lib/Transforms/Instrumentation/EdgeProfiling.cpp b/lib/Transforms/Instrumentation/EdgeProfiling.cpp index 0b18b4c..a2459fb 100644 --- a/lib/Transforms/Instrumentation/EdgeProfiling.cpp +++ b/lib/Transforms/Instrumentation/EdgeProfiling.cpp @@ -21,7 +21,6 @@ #include "llvm/Transforms/Instrumentation.h" #include "ProfilingUtils.h" #include "llvm/ADT/Statistic.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" #include "llvm/Support/raw_ostream.h" @@ -55,8 +54,8 @@ ModulePass *llvm::createEdgeProfilerPass() { return new EdgeProfiler(); } bool EdgeProfiler::runOnModule(Module &M) { Function *Main = M.getFunction("main"); if (Main == 0) { - M.getContext().emitWarning("cannot insert edge profiling into a module" - " with no main function"); + errs() << "WARNING: cannot insert edge profiling into a module" + << " with no main function!\n"; return false; // No main, no instrumentation! } diff --git a/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp b/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp index c5a1fe9..b45aef6 100644 --- a/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp +++ b/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp @@ -22,7 +22,6 @@ #include "llvm/Analysis/ProfileInfo.h" #include "llvm/Analysis/ProfileInfoLoader.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" @@ -76,8 +75,8 @@ inline static void printEdgeCounter(ProfileInfo::Edge e, bool OptimalEdgeProfiler::runOnModule(Module &M) { Function *Main = M.getFunction("main"); if (Main == 0) { - M.getContext().emitWarning("cannot insert edge profiling into a module" - " with no main function"); + errs() << "WARNING: cannot insert edge profiling into a module" + << " with no main function!\n"; return false; // No main, no instrumentation! } diff --git a/lib/Transforms/Instrumentation/PathProfiling.cpp b/lib/Transforms/Instrumentation/PathProfiling.cpp index 358bbeb..7de7326 100644 --- a/lib/Transforms/Instrumentation/PathProfiling.cpp +++ b/lib/Transforms/Instrumentation/PathProfiling.cpp @@ -1345,8 +1345,8 @@ bool PathProfiler::runOnModule(Module &M) { Main = M.getFunction("MAIN__"); if (!Main) { - Context->emitWarning("cannot insert edge profiling into a module" - " with no main function"); + errs() << "WARNING: cannot insert path profiling into a module" + << " with no main function!\n"; return false; } -- cgit v1.1 From d3da6d5dda00761d383a1c339f54da470643c663 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Fri, 8 Feb 2013 22:01:47 +0000 Subject: Minor cleanup. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174756 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/AsmWriter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp index 11e06c9..efa5978 100644 --- a/lib/IR/AsmWriter.cpp +++ b/lib/IR/AsmWriter.cpp @@ -1923,7 +1923,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) { } else if (const AllocaInst *AI = dyn_cast(&I)) { Out << ' '; - TypePrinter.print(AI->getType()->getElementType(), Out); + TypePrinter.print(AI->getAllocatedType(), Out); if (!AI->getArraySize() || AI->isArrayAllocation()) { Out << ", "; writeOperand(AI->getArraySize(), true); -- cgit v1.1 From 13cf6cb57ad6e0bcd66c0ff11b4c4c568ee2f164 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 8 Feb 2013 22:24:35 +0000 Subject: R600: fix PHI value adding in the structurizer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise we sometimes produce invalid code. Patch by: Christian König Signed-off-by: Christian König Tested-by: Michel Dänzer git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174760 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUStructurizeCFG.cpp | 146 +++++++++++++++++-------------- 1 file changed, 81 insertions(+), 65 deletions(-) (limited to 'lib') diff --git a/lib/Target/R600/AMDGPUStructurizeCFG.cpp b/lib/Target/R600/AMDGPUStructurizeCFG.cpp index 8295efd..7f3a35e 100644 --- a/lib/Target/R600/AMDGPUStructurizeCFG.cpp +++ b/lib/Target/R600/AMDGPUStructurizeCFG.cpp @@ -41,6 +41,7 @@ typedef DenseMap BBPhiMap; typedef DenseMap BBPredicates; typedef DenseMap PredMap; typedef DenseMap VisitedMap; +typedef DenseMap BB2BBVecMap; // The name for newly created blocks. @@ -109,6 +110,7 @@ class AMDGPUStructurizeCFG : public RegionPass { VisitedMap Visited; PredMap Predicates; BBPhiMap DeletedPhis; + BB2BBVecMap AddedPhis; BBVector FlowsInserted; BasicBlock *LoopStart; @@ -126,16 +128,18 @@ class AMDGPUStructurizeCFG : public RegionPass { void collectInfos(); + void delPhiValues(BasicBlock *From, BasicBlock *To); + + void addPhiValues(BasicBlock *From, BasicBlock *To); + + void setPhiValues(); + bool dominatesPredicates(BasicBlock *A, BasicBlock *B); void killTerminator(BasicBlock *BB); RegionNode *skipChained(RegionNode *Node); - void delPhiValues(BasicBlock *From, BasicBlock *To); - - void addPhiValues(BasicBlock *From, BasicBlock *To); - BasicBlock *getNextFlow(BasicBlock *Prev); bool isPredictableTrue(BasicBlock *Prev, BasicBlock *Node); @@ -309,6 +313,76 @@ void AMDGPUStructurizeCFG::collectInfos() { } } +/// \brief Remove all PHI values coming from "From" into "To" and remember +/// them in DeletedPhis +void AMDGPUStructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) { + PhiMap &Map = DeletedPhis[To]; + for (BasicBlock::iterator I = To->begin(), E = To->end(); + I != E && isa(*I);) { + + PHINode &Phi = cast(*I++); + while (Phi.getBasicBlockIndex(From) != -1) { + Value *Deleted = Phi.removeIncomingValue(From, false); + Map[&Phi].push_back(std::make_pair(From, Deleted)); + } + } +} + +/// \brief Add a dummy PHI value as soon as we knew the new predecessor +void AMDGPUStructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) { + for (BasicBlock::iterator I = To->begin(), E = To->end(); + I != E && isa(*I);) { + + PHINode &Phi = cast(*I++); + Value *Undef = UndefValue::get(Phi.getType()); + Phi.addIncoming(Undef, From); + } + AddedPhis[To].push_back(From); +} + +/// \brief Add the real PHI value as soon as everything is set up +void AMDGPUStructurizeCFG::setPhiValues() { + + SSAUpdater Updater; + for (BB2BBVecMap::iterator AI = AddedPhis.begin(), AE = AddedPhis.end(); + AI != AE; ++AI) { + + BasicBlock *To = AI->first; + BBVector &From = AI->second; + + if (!DeletedPhis.count(To)) + continue; + + PhiMap &Map = DeletedPhis[To]; + for (PhiMap::iterator PI = Map.begin(), PE = Map.end(); + PI != PE; ++PI) { + + PHINode *Phi = PI->first; + Value *Undef = UndefValue::get(Phi->getType()); + Updater.Initialize(Phi->getType(), ""); + Updater.AddAvailableValue(&Func->getEntryBlock(), Undef); + Updater.AddAvailableValue(To, Undef); + + for (BBValueVector::iterator VI = PI->second.begin(), + VE = PI->second.end(); VI != VE; ++VI) { + + Updater.AddAvailableValue(VI->first, VI->second); + } + + for (BBVector::iterator FI = From.begin(), FE = From.end(); + FI != FE; ++FI) { + + int Idx = Phi->getBasicBlockIndex(*FI); + assert(Idx != -1); + Phi->setIncomingValue(Idx, Updater.GetValueAtEndOfBlock(*FI)); + } + } + + DeletedPhis.erase(To); + } + assert(DeletedPhis.empty()); +} + /// \brief Does A dominate all the predicates of B ? bool AMDGPUStructurizeCFG::dominatesPredicates(BasicBlock *A, BasicBlock *B) { BBPredicates &Preds = Predicates[B]; @@ -406,57 +480,6 @@ RegionNode *AMDGPUStructurizeCFG::skipChained(RegionNode *Node) { return ParentRegion->getNode(wireFlowBlock(BB, Next)); } -/// \brief Remove all PHI values coming from "From" into "To" and remember -/// them in DeletedPhis -void AMDGPUStructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) { - PhiMap &Map = DeletedPhis[To]; - for (BasicBlock::iterator I = To->begin(), E = To->end(); - I != E && isa(*I);) { - - PHINode &Phi = cast(*I++); - while (Phi.getBasicBlockIndex(From) != -1) { - Value *Deleted = Phi.removeIncomingValue(From, false); - Map[&Phi].push_back(std::make_pair(From, Deleted)); - } - } -} - -/// \brief Add the PHI values back once we knew the new predecessor -void AMDGPUStructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) { - if (!DeletedPhis.count(To)) - return; - - PhiMap &Map = DeletedPhis[To]; - SSAUpdater Updater; - - for (PhiMap::iterator I = Map.begin(), E = Map.end(); I != E; ++I) { - - PHINode *Phi = I->first; - Updater.Initialize(Phi->getType(), ""); - BasicBlock *Fallback = To; - bool HaveFallback = false; - - for (BBValueVector::iterator VI = I->second.begin(), VE = I->second.end(); - VI != VE; ++VI) { - - Updater.AddAvailableValue(VI->first, VI->second); - BasicBlock *Dom = DT->findNearestCommonDominator(Fallback, VI->first); - if (Dom == VI->first) - HaveFallback = true; - else if (Dom != Fallback) - HaveFallback = false; - Fallback = Dom; - } - if (!HaveFallback) { - Value *Undef = UndefValue::get(Phi->getType()); - Updater.AddAvailableValue(Fallback, Undef); - } - - Phi->addIncoming(Updater.GetValueAtEndOfBlock(From), From); - } - DeletedPhis.erase(To); -} - /// \brief Create a new flow node and update dominator tree and region info BasicBlock *AMDGPUStructurizeCFG::getNextFlow(BasicBlock *Prev) { LLVMContext &Context = Func->getContext(); @@ -554,6 +577,7 @@ BasicBlock *AMDGPUStructurizeCFG::wireFlowBlock(BasicBlock *Prev, /// branches only have undefined conditions. void AMDGPUStructurizeCFG::createFlow() { DeletedPhis.clear(); + AddedPhis.clear(); BasicBlock *Prev = Order.pop_back_val()->getEntry(); assert(Prev == ParentRegion->getEntry() && "Incorrect node order!"); @@ -601,18 +625,8 @@ void AMDGPUStructurizeCFG::createFlow() { if (DT->dominates(ParentRegion->getEntry(), Exit)) DT->changeImmediateDominator(Exit, Prev); - if (LoopStart && LoopEnd) { - BBVector::iterator FI = std::find(FlowsInserted.begin(), - FlowsInserted.end(), - LoopStart); - for (; *FI != LoopEnd; ++FI) { - addPhiValues(*FI, (*FI)->getTerminator()->getSuccessor(0)); - } - } - assert(Order.empty()); assert(Visited.empty()); - assert(DeletedPhis.empty()); } /// \brief Insert the missing branch conditions @@ -697,12 +711,14 @@ bool AMDGPUStructurizeCFG::runOnRegion(Region *R, RGPassManager &RGM) { collectInfos(); createFlow(); insertConditions(); + setPhiValues(); rebuildSSA(); Order.clear(); Visited.clear(); Predicates.clear(); DeletedPhis.clear(); + AddedPhis.clear(); FlowsInserted.clear(); return true; -- cgit v1.1 From 27f5d0618188a4a51cc222a0d71c5aa845f31189 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 8 Feb 2013 22:24:37 +0000 Subject: R600: fix loop analyses in the structurizer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch by: Christian König Intersecting loop handling was wrong. Signed-off-by: Christian König Tested-by: Michel Dänzer git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174761 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUStructurizeCFG.cpp | 296 +++++++++++++++++++------------ 1 file changed, 183 insertions(+), 113 deletions(-) (limited to 'lib') diff --git a/lib/Target/R600/AMDGPUStructurizeCFG.cpp b/lib/Target/R600/AMDGPUStructurizeCFG.cpp index 7f3a35e..e1c7790 100644 --- a/lib/Target/R600/AMDGPUStructurizeCFG.cpp +++ b/lib/Target/R600/AMDGPUStructurizeCFG.cpp @@ -30,12 +30,14 @@ namespace { // Definition of the complex types used in this pass. typedef std::pair BBValuePair; -typedef ArrayRef BBVecRef; typedef SmallVector RNVector; typedef SmallVector BBVector; +typedef SmallVector BranchVector; typedef SmallVector BBValueVector; +typedef SmallPtrSet BBSet; + typedef DenseMap PhiMap; typedef DenseMap BBPhiMap; typedef DenseMap BBPredicates; @@ -111,23 +113,27 @@ class AMDGPUStructurizeCFG : public RegionPass { PredMap Predicates; BBPhiMap DeletedPhis; BB2BBVecMap AddedPhis; - BBVector FlowsInserted; + BranchVector Conditions; BasicBlock *LoopStart; BasicBlock *LoopEnd; + BBSet LoopTargets; BBPredicates LoopPred; void orderNodes(); - void buildPredicate(BranchInst *Term, unsigned Idx, - BBPredicates &Pred, bool Invert); + Value *buildCondition(BranchInst *Term, unsigned Idx, bool Invert); + + bool analyzeLoopStart(BasicBlock *From, BasicBlock *To, Value *Condition); - void analyzeBlock(BasicBlock *BB); + void analyzeNode(RegionNode *N); - void analyzeLoop(BasicBlock *BB, unsigned &LoopIdx); + void analyzeLoopEnd(RegionNode *N); void collectInfos(); + void insertConditions(); + void delPhiValues(BasicBlock *From, BasicBlock *To); void addPhiValues(BasicBlock *From, BasicBlock *To); @@ -148,8 +154,6 @@ class AMDGPUStructurizeCFG : public RegionPass { void createFlow(); - void insertConditions(); - void rebuildSSA(); public: @@ -202,114 +206,209 @@ void AMDGPUStructurizeCFG::orderNodes() { } } -/// \brief Build blocks and loop predicates -void AMDGPUStructurizeCFG::buildPredicate(BranchInst *Term, unsigned Idx, - BBPredicates &Pred, bool Invert) { - Value *True = Invert ? BoolFalse : BoolTrue; - Value *False = Invert ? BoolTrue : BoolFalse; +/// \brief Build the condition for one edge +Value *AMDGPUStructurizeCFG::buildCondition(BranchInst *Term, unsigned Idx, + bool Invert) { + Value *Cond = Invert ? BoolFalse : BoolTrue; + if (Term->isConditional()) { + Cond = Term->getCondition(); - RegionInfo *RI = ParentRegion->getRegionInfo(); - BasicBlock *BB = Term->getParent(); + if (Idx != Invert) + Cond = BinaryOperator::CreateNot(Cond, "", Term); + } + return Cond; +} - // Handle the case where multiple regions start at the same block - Region *R = BB != ParentRegion->getEntry() ? - RI->getRegionFor(BB) : ParentRegion; +/// \brief Analyze the start of a loop and insert predicates as necessary +bool AMDGPUStructurizeCFG::analyzeLoopStart(BasicBlock *From, BasicBlock *To, + Value *Condition) { + LoopPred[From] = Condition; + LoopTargets.insert(To); + if (!LoopStart) { + LoopStart = To; + return true; + + } else if (LoopStart == To) + return true; + + // We need to handle the case of intersecting loops, e. g. + // + // /----<----- + // | | + // -> A -> B -> C -> D + // | | + // -----<----/ - if (R == ParentRegion) { - // It's a top level block in our region - Value *Cond = True; - if (Term->isConditional()) { - BasicBlock *Other = Term->getSuccessor(!Idx); + RNVector::reverse_iterator OI = Order.rbegin(), OE = Order.rend(); - if (Visited.count(Other)) { - if (!Pred.count(Other)) - Pred[Other] = False; + for (;OI != OE; ++OI) + if ((*OI)->getEntry() == LoopStart) + break; - if (!Pred.count(BB)) - Pred[BB] = True; - return; - } - Cond = Term->getCondition(); + for (;OI != OE && (*OI)->getEntry() != To; ++OI) { + BBPredicates &Pred = Predicates[(*OI)->getEntry()]; + if (!Pred.count(From)) + Pred[From] = Condition; + } + return false; +} - if (Idx != Invert) - Cond = BinaryOperator::CreateNot(Cond, "", Term); - } +/// \brief Analyze the predecessors of each block and build up predicates +void AMDGPUStructurizeCFG::analyzeNode(RegionNode *N) { + RegionInfo *RI = ParentRegion->getRegionInfo(); + BasicBlock *BB = N->getEntry(); + BBPredicates &Pred = Predicates[BB]; - Pred[BB] = Cond; + for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); + PI != PE; ++PI) { - } else if (ParentRegion->contains(R)) { - // It's a block in a sub region - while(R->getParent() != ParentRegion) - R = R->getParent(); + if (!ParentRegion->contains(*PI)) { + // It's a branch from outside into our region entry + Pred[*PI] = BoolTrue; + continue; + } - Pred[R->getEntry()] = True; + Region *R = RI->getRegionFor(*PI); + if (R == ParentRegion) { - } else { - // It's a branch from outside into our parent region - Pred[BB] = True; - } -} + // It's a top level block in our region + BranchInst *Term = cast((*PI)->getTerminator()); + for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) { + BasicBlock *Succ = Term->getSuccessor(i); + if (Succ != BB) + continue; -/// \brief Analyze the successors of each block and build up predicates -void AMDGPUStructurizeCFG::analyzeBlock(BasicBlock *BB) { - pred_iterator PI = pred_begin(BB), PE = pred_end(BB); - BBPredicates &Pred = Predicates[BB]; + if (Visited.count(*PI)) { + // Normal forward edge + if (Term->isConditional()) { + // Try to treat it like an ELSE block + BasicBlock *Other = Term->getSuccessor(!i); + if (Visited.count(Other) && !LoopTargets.count(Other) && + !Pred.count(Other) && !Pred.count(*PI)) { + + Pred[Other] = BoolFalse; + Pred[*PI] = BoolTrue; + continue; + } + } + + } else { + // Back edge + if (analyzeLoopStart(*PI, BB, buildCondition(Term, i, true))) + continue; + } + Pred[*PI] = buildCondition(Term, i, false); + } - for (; PI != PE; ++PI) { - BranchInst *Term = cast((*PI)->getTerminator()); + } else { - for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) { - BasicBlock *Succ = Term->getSuccessor(i); - if (Succ != BB) + // It's an exit from a sub region + while(R->getParent() != ParentRegion) + R = R->getParent(); + + // Edge from inside a subregion to its entry, ignore it + if (R == N) continue; - buildPredicate(Term, i, Pred, false); + + BasicBlock *Entry = R->getEntry(); + if (!Visited.count(Entry)) + if (analyzeLoopStart(Entry, BB, BoolFalse)) + continue; + + Pred[Entry] = BoolTrue; } } } -/// \brief Analyze the conditions leading to loop to a previous block -void AMDGPUStructurizeCFG::analyzeLoop(BasicBlock *BB, unsigned &LoopIdx) { - BranchInst *Term = cast(BB->getTerminator()); +/// \brief Determine the end of the loop +void AMDGPUStructurizeCFG::analyzeLoopEnd(RegionNode *N) { - for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) { - BasicBlock *Succ = Term->getSuccessor(i); + if (N->isSubRegion()) { + // Test for exit as back edge + BasicBlock *Exit = N->getNodeAs()->getExit(); + if (Visited.count(Exit)) + LoopEnd = N->getEntry(); - // Ignore it if it's not a back edge - if (!Visited.count(Succ)) - continue; + } else { + // Test for sucessors as back edge + BasicBlock *BB = N->getNodeAs(); + BranchInst *Term = cast(BB->getTerminator()); - buildPredicate(Term, i, LoopPred, true); + for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) { + BasicBlock *Succ = Term->getSuccessor(i); - LoopEnd = BB; - if (Visited[Succ] < LoopIdx) { - LoopIdx = Visited[Succ]; - LoopStart = Succ; + if (Visited.count(Succ)) + LoopEnd = BB; } } } /// \brief Collect various loop and predicate infos void AMDGPUStructurizeCFG::collectInfos() { - unsigned Number = 0, LoopIdx = ~0; + unsigned Number = 0; // Reset predicate Predicates.clear(); // and loop infos LoopStart = LoopEnd = 0; + LoopTargets.clear(); LoopPred.clear(); - RNVector::reverse_iterator OI = Order.rbegin(), OE = Order.rend(); - for (Visited.clear(); OI != OE; Visited[(*OI++)->getEntry()] = ++Number) { + // Reset the visited nodes + Visited.clear(); + + for (RNVector::reverse_iterator OI = Order.rbegin(), OE = Order.rend(); + OI != OE; ++OI) { // Analyze all the conditions leading to a node - analyzeBlock((*OI)->getEntry()); + analyzeNode(*OI); - if ((*OI)->isSubRegion()) - continue; + // Remember that we've seen this node + Visited[(*OI)->getEntry()] = ++Number; - // Find the first/last loop nodes and loop predicates - analyzeLoop((*OI)->getNodeAs(), LoopIdx); + // Find the last back edge + analyzeLoopEnd(*OI); + } + + // Both or neither must be set + assert(!LoopStart == !LoopEnd); +} + +/// \brief Insert the missing branch conditions +void AMDGPUStructurizeCFG::insertConditions() { + SSAUpdater PhiInserter; + + for (BranchVector::iterator I = Conditions.begin(), + E = Conditions.end(); I != E; ++I) { + + BranchInst *Term = *I; + BasicBlock *Parent = Term->getParent(); + + assert(Term->isConditional()); + + PhiInserter.Initialize(Boolean, ""); + if (Parent == LoopEnd) { + PhiInserter.AddAvailableValue(LoopStart, BoolTrue); + } else { + PhiInserter.AddAvailableValue(&Func->getEntryBlock(), BoolFalse); + PhiInserter.AddAvailableValue(Parent, BoolFalse); + } + + bool ParentHasValue = false; + BasicBlock *Succ = Term->getSuccessor(0); + BBPredicates &Preds = (Parent == LoopEnd) ? LoopPred : Predicates[Succ]; + for (BBPredicates::iterator PI = Preds.begin(), PE = Preds.end(); + PI != PE; ++PI) { + + PhiInserter.AddAvailableValue(PI->first, PI->second); + ParentHasValue |= PI->first == Parent; + } + + if (ParentHasValue) + Term->setCondition(PhiInserter.GetValueAtEndOfBlock(Parent)); + else + Term->setCondition(PhiInserter.GetValueInMiddleOfBlock(Parent)); } } @@ -474,7 +573,6 @@ RegionNode *AMDGPUStructurizeCFG::skipChained(RegionNode *Node) { assert(I != E); killTerminator(BB); - FlowsInserted.push_back(BB); Visited.erase(Succ); Order.erase(I); return ParentRegion->getNode(wireFlowBlock(BB, Next)); @@ -489,7 +587,6 @@ BasicBlock *AMDGPUStructurizeCFG::getNextFlow(BasicBlock *Prev) { Func, Insert); DT->addNewBlock(Flow, Prev); ParentRegion->getRegionInfo()->setRegionFor(Flow, ParentRegion); - FlowsInserted.push_back(Flow); return Flow; } @@ -517,10 +614,8 @@ BasicBlock *AMDGPUStructurizeCFG::wireFlowBlock(BasicBlock *Prev, RegionNode *Node) { BasicBlock *Entry = Node->getEntry(); - if (LoopStart == Entry) { + if (LoopStart == Entry) LoopStart = Prev; - LoopPred[Prev] = BoolTrue; - } // Wire it up temporary, skipChained may recurse into us BranchInst::Create(Entry, Prev); @@ -533,7 +628,7 @@ BasicBlock *AMDGPUStructurizeCFG::wireFlowBlock(BasicBlock *Prev, if (!isPredictableTrue(Prev, Entry)) { // Let Prev point to entry and next block Prev->getTerminator()->eraseFromParent(); - BranchInst::Create(Entry, Next, BoolUndef, Prev); + Conditions.push_back(BranchInst::Create(Entry, Next, BoolUndef, Prev)); } else { DT->changeImmediateDominator(Next, Entry); } @@ -591,7 +686,6 @@ void AMDGPUStructurizeCFG::createFlow() { ParentRegion->getRegionInfo()->setRegionFor(Split, ParentRegion); Predicates[Split] = Predicates[Prev]; Order.push_back(ParentRegion->getBBNode(Split)); - LoopPred[Prev] = BoolTrue; } else if (LoopStart == Order.back()->getEntry()) { // Loop starts behind entry, split entry so that we can jump to it @@ -603,8 +697,6 @@ void AMDGPUStructurizeCFG::createFlow() { } killTerminator(Prev); - FlowsInserted.clear(); - FlowsInserted.push_back(Prev); while (!Order.empty()) { RegionNode *Node = Order.pop_back_val(); @@ -614,7 +706,8 @@ void AMDGPUStructurizeCFG::createFlow() { // Create an extra loop end node LoopEnd = Prev; Prev = getNextFlow(LoopEnd); - BranchInst::Create(Prev, LoopStart, BoolUndef, LoopEnd); + Conditions.push_back(BranchInst::Create(Prev, LoopStart, + BoolUndef, LoopEnd)); addPhiValues(LoopEnd, LoopStart); } } @@ -629,32 +722,6 @@ void AMDGPUStructurizeCFG::createFlow() { assert(Visited.empty()); } -/// \brief Insert the missing branch conditions -void AMDGPUStructurizeCFG::insertConditions() { - SSAUpdater PhiInserter; - - for (BBVector::iterator FI = FlowsInserted.begin(), FE = FlowsInserted.end(); - FI != FE; ++FI) { - - BranchInst *Term = cast((*FI)->getTerminator()); - if (Term->isUnconditional()) - continue; - - PhiInserter.Initialize(Boolean, ""); - PhiInserter.AddAvailableValue(&Func->getEntryBlock(), BoolFalse); - - BasicBlock *Succ = Term->getSuccessor(0); - BBPredicates &Preds = (*FI == LoopEnd) ? LoopPred : Predicates[Succ]; - for (BBPredicates::iterator PI = Preds.begin(), PE = Preds.end(); - PI != PE; ++PI) { - - PhiInserter.AddAvailableValue(PI->first, PI->second); - } - - Term->setCondition(PhiInserter.GetValueAtEndOfBlock(*FI)); - } -} - /// Handle a rare case where the disintegrated nodes instructions /// no longer dominate all their uses. Not sure if this is really nessasary void AMDGPUStructurizeCFG::rebuildSSA() { @@ -714,12 +781,15 @@ bool AMDGPUStructurizeCFG::runOnRegion(Region *R, RGPassManager &RGM) { setPhiValues(); rebuildSSA(); + // Cleanup Order.clear(); Visited.clear(); Predicates.clear(); DeletedPhis.clear(); AddedPhis.clear(); - FlowsInserted.clear(); + Conditions.clear(); + LoopTargets.clear(); + LoopPred.clear(); return true; } -- cgit v1.1 From f4e471a49ed6d8e957aeb62972b8dae5304b4440 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 8 Feb 2013 22:24:38 +0000 Subject: R600: rework flow creation in the structurizer v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes a couple of bugs and incorrect assumptions, in total four more piglit tests now pass. v2: fix small bug in the dominator updating Patch by: Christian König Signed-off-by: Christian König git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174762 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUStructurizeCFG.cpp | 372 ++++++++++++++++--------------- 1 file changed, 195 insertions(+), 177 deletions(-) (limited to 'lib') diff --git a/lib/Target/R600/AMDGPUStructurizeCFG.cpp b/lib/Target/R600/AMDGPUStructurizeCFG.cpp index e1c7790..c4c9762 100644 --- a/lib/Target/R600/AMDGPUStructurizeCFG.cpp +++ b/lib/Target/R600/AMDGPUStructurizeCFG.cpp @@ -42,7 +42,6 @@ typedef DenseMap PhiMap; typedef DenseMap BBPhiMap; typedef DenseMap BBPredicates; typedef DenseMap PredMap; -typedef DenseMap VisitedMap; typedef DenseMap BB2BBVecMap; // The name for newly created blocks. @@ -109,7 +108,7 @@ class AMDGPUStructurizeCFG : public RegionPass { DominatorTree *DT; RNVector Order; - VisitedMap Visited; + BBSet Visited; PredMap Predicates; BBPhiMap DeletedPhis; BB2BBVecMap AddedPhis; @@ -140,17 +139,24 @@ class AMDGPUStructurizeCFG : public RegionPass { void setPhiValues(); - bool dominatesPredicates(BasicBlock *A, BasicBlock *B); - void killTerminator(BasicBlock *BB); - RegionNode *skipChained(RegionNode *Node); + void changeExit(RegionNode *Node, BasicBlock *NewExit, + bool IncludeDominator); + + BasicBlock *getNextFlow(BasicBlock *Dominator); + + BasicBlock *needPrefix(RegionNode *&Prev, RegionNode *Node); - BasicBlock *getNextFlow(BasicBlock *Prev); + BasicBlock *needPostfix(BasicBlock *Flow, bool ExitUseAllowed); - bool isPredictableTrue(BasicBlock *Prev, BasicBlock *Node); + RegionNode *getNextPrev(BasicBlock *Next); - BasicBlock *wireFlowBlock(BasicBlock *Prev, RegionNode *Node); + bool dominatesPredicates(BasicBlock *BB, RegionNode *Node); + + bool isPredictableTrue(RegionNode *Who, RegionNode *Where); + + RegionNode *wireFlow(RegionNode *&Prev, bool ExitUseAllowed); void createFlow(); @@ -345,7 +351,6 @@ void AMDGPUStructurizeCFG::analyzeLoopEnd(RegionNode *N) { /// \brief Collect various loop and predicate infos void AMDGPUStructurizeCFG::collectInfos() { - unsigned Number = 0; // Reset predicate Predicates.clear(); @@ -365,7 +370,7 @@ void AMDGPUStructurizeCFG::collectInfos() { analyzeNode(*OI); // Remember that we've seen this node - Visited[(*OI)->getEntry()] = ++Number; + Visited.insert((*OI)->getEntry()); // Find the last back edge analyzeLoopEnd(*OI); @@ -482,19 +487,7 @@ void AMDGPUStructurizeCFG::setPhiValues() { assert(DeletedPhis.empty()); } -/// \brief Does A dominate all the predicates of B ? -bool AMDGPUStructurizeCFG::dominatesPredicates(BasicBlock *A, BasicBlock *B) { - BBPredicates &Preds = Predicates[B]; - for (BBPredicates::iterator PI = Preds.begin(), PE = Preds.end(); - PI != PE; ++PI) { - - if (!DT->dominates(A, PI->first)) - return false; - } - return true; -} - -/// \brief Remove phi values from all successors and the remove the terminator. +/// \brief Remove phi values from all successors and then remove the terminator. void AMDGPUStructurizeCFG::killTerminator(BasicBlock *BB) { TerminatorInst *Term = BB->getTerminator(); if (!Term) @@ -509,92 +502,153 @@ void AMDGPUStructurizeCFG::killTerminator(BasicBlock *BB) { Term->eraseFromParent(); } -/// First: Skip forward to the first region node that either isn't a subregion or not -/// dominating it's exit, remove all the skipped nodes from the node order. -/// -/// Second: Handle the first successor directly if the resulting nodes successor -/// predicates are still dominated by the original entry -RegionNode *AMDGPUStructurizeCFG::skipChained(RegionNode *Node) { - BasicBlock *Entry = Node->getEntry(); +/// \brief Let node exit(s) point to NewExit +void AMDGPUStructurizeCFG::changeExit(RegionNode *Node, BasicBlock *NewExit, + bool IncludeDominator) { - // Skip forward as long as it is just a linear flow - while (true) { - BasicBlock *Entry = Node->getEntry(); - BasicBlock *Exit; + if (Node->isSubRegion()) { + Region *SubRegion = Node->getNodeAs(); + BasicBlock *OldExit = SubRegion->getExit(); + BasicBlock *Dominator = 0; - if (Node->isSubRegion()) { - Exit = Node->getNodeAs()->getExit(); - } else { - TerminatorInst *Term = Entry->getTerminator(); - if (Term->getNumSuccessors() != 1) - break; - Exit = Term->getSuccessor(0); - } + // Find all the edges from the sub region to the exit + for (pred_iterator I = pred_begin(OldExit), E = pred_end(OldExit); + I != E;) { - // It's a back edge, break here so we can insert a loop node - if (!Visited.count(Exit)) - return Node; + BasicBlock *BB = *I++; + if (!SubRegion->contains(BB)) + continue; - // More than node edges are pointing to exit - if (!DT->dominates(Entry, Exit)) - return Node; + // Modify the edges to point to the new exit + delPhiValues(BB, OldExit); + BB->getTerminator()->replaceUsesOfWith(OldExit, NewExit); + addPhiValues(BB, NewExit); + + // Find the new dominator (if requested) + if (IncludeDominator) { + if (!Dominator) + Dominator = BB; + else + Dominator = DT->findNearestCommonDominator(Dominator, BB); + } + } - RegionNode *Next = ParentRegion->getNode(Exit); - RNVector::iterator I = std::find(Order.begin(), Order.end(), Next); - assert(I != Order.end()); + // Change the dominator (if requested) + if (Dominator) + DT->changeImmediateDominator(NewExit, Dominator); - Visited.erase(Next->getEntry()); - Order.erase(I); - Node = Next; - } + // Update the region info + SubRegion->replaceExit(NewExit); - BasicBlock *BB = Node->getEntry(); - TerminatorInst *Term = BB->getTerminator(); - if (Term->getNumSuccessors() != 2) - return Node; - - // Our node has exactly two succesors, check if we can handle - // any of them directly - BasicBlock *Succ = Term->getSuccessor(0); - if (!Visited.count(Succ) || !dominatesPredicates(Entry, Succ)) { - Succ = Term->getSuccessor(1); - if (!Visited.count(Succ) || !dominatesPredicates(Entry, Succ)) - return Node; } else { - BasicBlock *Succ2 = Term->getSuccessor(1); - if (Visited.count(Succ2) && Visited[Succ] > Visited[Succ2] && - dominatesPredicates(Entry, Succ2)) - Succ = Succ2; + BasicBlock *BB = Node->getNodeAs(); + killTerminator(BB); + BranchInst::Create(NewExit, BB); + addPhiValues(BB, NewExit); + if (IncludeDominator) + DT->changeImmediateDominator(NewExit, BB); } - - RegionNode *Next = ParentRegion->getNode(Succ); - RNVector::iterator E = Order.end(); - RNVector::iterator I = std::find(Order.begin(), E, Next); - assert(I != E); - - killTerminator(BB); - Visited.erase(Succ); - Order.erase(I); - return ParentRegion->getNode(wireFlowBlock(BB, Next)); } /// \brief Create a new flow node and update dominator tree and region info -BasicBlock *AMDGPUStructurizeCFG::getNextFlow(BasicBlock *Prev) { +BasicBlock *AMDGPUStructurizeCFG::getNextFlow(BasicBlock *Dominator) { LLVMContext &Context = Func->getContext(); BasicBlock *Insert = Order.empty() ? ParentRegion->getExit() : Order.back()->getEntry(); BasicBlock *Flow = BasicBlock::Create(Context, FlowBlockName, Func, Insert); - DT->addNewBlock(Flow, Prev); + DT->addNewBlock(Flow, Dominator); ParentRegion->getRegionInfo()->setRegionFor(Flow, ParentRegion); return Flow; } +/// \brief Create a new or reuse the previous node as flow node +BasicBlock *AMDGPUStructurizeCFG::needPrefix(RegionNode *&Prev, + RegionNode *Node) { + + if (!Prev || Prev->isSubRegion() || + (Node && Node->getEntry() == LoopStart)) { + + // We need to insert a flow node, first figure out the dominator + DomTreeNode *Dominator = Prev ? DT->getNode(Prev->getEntry()) : 0; + if (!Dominator) + Dominator = DT->getNode(Node->getEntry())->getIDom(); + assert(Dominator && "Illegal loop to function entry"); + + // then create the flow node + BasicBlock *Flow = getNextFlow(Dominator->getBlock()); + + // wire up the new flow + if (Prev) { + changeExit(Prev, Flow, true); + } else { + // Parent regions entry needs predicates, create a new region entry + BasicBlock *Entry = Node->getEntry(); + for (pred_iterator I = pred_begin(Entry), E = pred_end(Entry); + I != E;) { + + BasicBlock *BB = *(I++); + if (ParentRegion->contains(BB)) + continue; + + // Remove PHY values from outside to our entry node + delPhiValues(BB, Entry); + + // Update the branch instructions + BB->getTerminator()->replaceUsesOfWith(Entry, Flow); + } + + // Populate the region tree with the new entry + for (Region *R = ParentRegion; R && R->getEntry() == Entry; + R = R->getParent()) { + R->replaceEntry(Flow); + } + } + Prev = ParentRegion->getBBNode(Flow); + + } else { + killTerminator(Prev->getEntry()); + } + + return Prev->getEntry(); +} + +/// \brief Returns the region exit if possible, otherwise just a new flow node +BasicBlock *AMDGPUStructurizeCFG::needPostfix(BasicBlock *Flow, + bool ExitUseAllowed) { + + if (Order.empty() && ExitUseAllowed) { + BasicBlock *Exit = ParentRegion->getExit(); + DT->changeImmediateDominator(Exit, Flow); + addPhiValues(Flow, Exit); + return Exit; + } + return getNextFlow(Flow); +} + +/// \brief Returns the region node for Netx, or null if Next is the exit +RegionNode *AMDGPUStructurizeCFG::getNextPrev(BasicBlock *Next) { + return ParentRegion->contains(Next) ? ParentRegion->getBBNode(Next) : 0; +} + +/// \brief Does BB dominate all the predicates of Node ? +bool AMDGPUStructurizeCFG::dominatesPredicates(BasicBlock *BB, RegionNode *Node) { + BBPredicates &Preds = Predicates[Node->getEntry()]; + for (BBPredicates::iterator PI = Preds.begin(), PE = Preds.end(); + PI != PE; ++PI) { + + if (!DT->dominates(BB, PI->first)) + return false; + } + return true; +} + /// \brief Can we predict that this node will always be called? -bool AMDGPUStructurizeCFG::isPredictableTrue(BasicBlock *Prev, - BasicBlock *Node) { - BBPredicates &Preds = Predicates[Node]; - bool Dominated = false; +bool AMDGPUStructurizeCFG::isPredictableTrue(RegionNode *Who, + RegionNode *Where) { + + BBPredicates &Preds = Predicates[Who->getEntry()]; + bool Dominated = Where == 0; for (BBPredicates::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) { @@ -602,124 +656,88 @@ bool AMDGPUStructurizeCFG::isPredictableTrue(BasicBlock *Prev, if (I->second != BoolTrue) return false; - if (!Dominated && DT->dominates(I->first, Prev)) + if (!Dominated && DT->dominates(I->first, Where->getEntry())) Dominated = true; } + + // TODO: The dominator check is too strict return Dominated; } -/// \brief Wire up the new control flow by inserting or updating the branch -/// instructions at node exits -BasicBlock *AMDGPUStructurizeCFG::wireFlowBlock(BasicBlock *Prev, - RegionNode *Node) { - BasicBlock *Entry = Node->getEntry(); - - if (LoopStart == Entry) - LoopStart = Prev; +/// Take one node from the order vector and wire it up +RegionNode *AMDGPUStructurizeCFG::wireFlow(RegionNode *&Prev, + bool ExitUseAllowed) { - // Wire it up temporary, skipChained may recurse into us - BranchInst::Create(Entry, Prev); - DT->changeImmediateDominator(Entry, Prev); - addPhiValues(Prev, Entry); + RegionNode *Node = Order.pop_back_val(); - Node = skipChained(Node); + if (isPredictableTrue(Node, Prev)) { + // Just a linear flow + if (Prev) { + changeExit(Prev, Node->getEntry(), true); + } + Prev = Node; - BasicBlock *Next = getNextFlow(Prev); - if (!isPredictableTrue(Prev, Entry)) { - // Let Prev point to entry and next block - Prev->getTerminator()->eraseFromParent(); - Conditions.push_back(BranchInst::Create(Entry, Next, BoolUndef, Prev)); } else { - DT->changeImmediateDominator(Next, Entry); - } + // Insert extra prefix node (or reuse last one) + BasicBlock *Flow = needPrefix(Prev, Node); + if (Node->getEntry() == LoopStart) + LoopStart = Flow; - // Let node exit(s) point to next block - if (Node->isSubRegion()) { - Region *SubRegion = Node->getNodeAs(); - BasicBlock *Exit = SubRegion->getExit(); - - // Find all the edges from the sub region to the exit - BBVector ToDo; - for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I) { - if (SubRegion->contains(*I)) - ToDo.push_back(*I); - } - - // Modify the edges to point to the new flow block - for (BBVector::iterator I = ToDo.begin(), E = ToDo.end(); I != E; ++I) { - delPhiValues(*I, Exit); - TerminatorInst *Term = (*I)->getTerminator(); - Term->replaceUsesOfWith(Exit, Next); + // Insert extra postfix node (or use exit instead) + BasicBlock *Entry = Node->getEntry(); + BasicBlock *Next = needPostfix(Flow, ExitUseAllowed && Entry != LoopEnd); + + // let it point to entry and next block + Conditions.push_back(BranchInst::Create(Entry, Next, BoolUndef, Flow)); + addPhiValues(Flow, Entry); + DT->changeImmediateDominator(Entry, Flow); + + Prev = Node; + while (!Order.empty() && Node->getEntry() != LoopEnd && + !LoopTargets.count(Order.back()->getEntry()) && + dominatesPredicates(Entry, Order.back())) { + Node = wireFlow(Prev, false); } - // Update the region info - SubRegion->replaceExit(Next); - - } else { - BasicBlock *BB = Node->getNodeAs(); - killTerminator(BB); - BranchInst::Create(Next, BB); - - if (BB == LoopEnd) - LoopEnd = 0; + changeExit(Prev, Next, false); + Prev = getNextPrev(Next); } - return Next; + return Node; } -/// Destroy node order and visited map, build up flow order instead. /// After this function control flow looks like it should be, but -/// branches only have undefined conditions. +/// branches and PHI nodes only have undefined conditions. void AMDGPUStructurizeCFG::createFlow() { + + BasicBlock *Exit = ParentRegion->getExit(); + bool EntryDominatesExit = DT->dominates(ParentRegion->getEntry(), Exit); + DeletedPhis.clear(); AddedPhis.clear(); + Conditions.clear(); - BasicBlock *Prev = Order.pop_back_val()->getEntry(); - assert(Prev == ParentRegion->getEntry() && "Incorrect node order!"); - Visited.erase(Prev); - - if (LoopStart == Prev) { - // Loop starts at entry, split entry so that we can predicate it - BasicBlock::iterator Insert = Prev->getFirstInsertionPt(); - BasicBlock *Split = Prev->splitBasicBlock(Insert, FlowBlockName); - DT->addNewBlock(Split, Prev); - ParentRegion->getRegionInfo()->setRegionFor(Split, ParentRegion); - Predicates[Split] = Predicates[Prev]; - Order.push_back(ParentRegion->getBBNode(Split)); - - } else if (LoopStart == Order.back()->getEntry()) { - // Loop starts behind entry, split entry so that we can jump to it - Instruction *Term = Prev->getTerminator(); - BasicBlock *Split = Prev->splitBasicBlock(Term, FlowBlockName); - DT->addNewBlock(Split, Prev); - ParentRegion->getRegionInfo()->setRegionFor(Split, ParentRegion); - Prev = Split; - } + RegionNode *Prev = 0; + while (!Order.empty()) { - killTerminator(Prev); + RegionNode *Node = wireFlow(Prev, EntryDominatesExit); - while (!Order.empty()) { - RegionNode *Node = Order.pop_back_val(); - Visited.erase(Node->getEntry()); - Prev = wireFlowBlock(Prev, Node); - if (LoopStart && !LoopEnd) { - // Create an extra loop end node - LoopEnd = Prev; - Prev = getNextFlow(LoopEnd); - Conditions.push_back(BranchInst::Create(Prev, LoopStart, + // Create an extra loop end node + if (Node->getEntry() == LoopEnd) { + LoopEnd = needPrefix(Prev, 0); + BasicBlock *Next = needPostfix(LoopEnd, EntryDominatesExit); + + Conditions.push_back(BranchInst::Create(Next, LoopStart, BoolUndef, LoopEnd)); addPhiValues(LoopEnd, LoopStart); + Prev = getNextPrev(Next); } } - BasicBlock *Exit = ParentRegion->getExit(); - BranchInst::Create(Exit, Prev); - addPhiValues(Prev, Exit); - if (DT->dominates(ParentRegion->getEntry(), Exit)) - DT->changeImmediateDominator(Exit, Prev); - - assert(Order.empty()); - assert(Visited.empty()); + if (Prev) + changeExit(Prev, Exit, EntryDominatesExit); + else + assert(EntryDominatesExit); } /// Handle a rare case where the disintegrated nodes instructions -- cgit v1.1 From d66bd704cc92fed8292e06018e4f26547d1c96d0 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 8 Feb 2013 22:24:40 +0000 Subject: R600: Dump the function name when TargetLowering::LowerCall() fails Also output a more useful error message. NOTE: This is a candidate for the Mesa stable branch git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174763 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelLowering.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'lib') diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h index 927ed09..4b844a3 100644 --- a/lib/Target/R600/AMDGPUISelLowering.h +++ b/lib/Target/R600/AMDGPUISelLowering.h @@ -53,6 +53,11 @@ public: const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, DebugLoc DL, SelectionDAG &DAG) const; + virtual SDValue LowerCall(CallLoweringInfo &CLI, + SmallVectorImpl &InVals) const { + CLI.Callee.dump(); + llvm_unreachable("Undefined function"); + } virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; SDValue LowerIntrinsicIABS(SDValue Op, SelectionDAG &DAG) const; -- cgit v1.1 From ea871523ecaa99a8fd7f12b5f57e2ee19743362d Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Fri, 8 Feb 2013 23:18:37 +0000 Subject: Turn on -new-live-intervals by default. This uses a liveness algorithm that does not depend on data from the LiveVariables analysis, it is the first step towards removing LiveVariables completely. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174774 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/LiveIntervalAnalysis.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index 4198457..555dcc6 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -40,10 +40,10 @@ #include using namespace llvm; -// Switch to the new experimental algorithm for computing live intervals. +// Switch to the new algorithm for computing live intervals. static cl::opt -NewLiveIntervals("new-live-intervals", cl::Hidden, - cl::desc("Use new algorithm forcomputing live intervals")); +NewLiveIntervals("new-live-intervals", cl::Hidden, cl::init(true), + cl::desc("Use new algorithm for computing live intervals")); char LiveIntervals::ID = 0; char &llvm::LiveIntervalsID = LiveIntervals::ID; -- cgit v1.1 From 68b2faf6be3a08064687a67a19efee0a713435de Mon Sep 17 00:00:00 2001 From: Sergei Larin Date: Fri, 8 Feb 2013 23:37:41 +0000 Subject: Enable *BasicBlockPass::createPrinterPass() Enables raw_ostream I/O for BasicBlockPass. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174776 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Core.cpp | 1 + lib/IR/Pass.cpp | 3 +-- lib/IR/PrintModulePass.cpp | 36 ++++++++++++++++++++++++++++++++++++ 3 files changed, 38 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/IR/Core.cpp b/lib/IR/Core.cpp index aaf661f..b696ed0 100644 --- a/lib/IR/Core.cpp +++ b/lib/IR/Core.cpp @@ -39,6 +39,7 @@ void llvm::initializeCore(PassRegistry &Registry) { initializeDominatorTreePass(Registry); initializePrintModulePassPass(Registry); initializePrintFunctionPassPass(Registry); + initializePrintBasicBlockPassPass(Registry); initializeVerifierPass(Registry); initializePreVerifierPass(Registry); } diff --git a/lib/IR/Pass.cpp b/lib/IR/Pass.cpp index ec448e6..7fc4828 100644 --- a/lib/IR/Pass.cpp +++ b/lib/IR/Pass.cpp @@ -143,8 +143,7 @@ PassManagerType FunctionPass::getPotentialPassManagerType() const { Pass *BasicBlockPass::createPrinterPass(raw_ostream &O, const std::string &Banner) const { - - llvm_unreachable("BasicBlockPass printing unsupported."); + return createPrintBasicBlockPass(&O, false, Banner); } bool BasicBlockPass::doInitialization(Function &) { diff --git a/lib/IR/PrintModulePass.cpp b/lib/IR/PrintModulePass.cpp index e4e9939..5026bc2 100644 --- a/lib/IR/PrintModulePass.cpp +++ b/lib/IR/PrintModulePass.cpp @@ -73,6 +73,31 @@ namespace { AU.setPreservesAll(); } }; + + class PrintBasicBlockPass : public BasicBlockPass { + std::string Banner; + raw_ostream *Out; // raw_ostream to print on + bool DeleteStream; // Delete the ostream in our dtor? + public: + static char ID; + PrintBasicBlockPass() : BasicBlockPass(ID), Out(&dbgs()), + DeleteStream(false) {} + PrintBasicBlockPass(const std::string &B, raw_ostream *o, bool DS) + : BasicBlockPass(ID), Banner(B), Out(o), DeleteStream(DS) {} + + ~PrintBasicBlockPass() { + if (DeleteStream) delete Out; + } + + bool runOnBasicBlock(BasicBlock &BB) { + (*Out) << Banner << BB; + return false; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + }; } char PrintModulePass::ID = 0; @@ -81,6 +106,9 @@ INITIALIZE_PASS(PrintModulePass, "print-module", char PrintFunctionPass::ID = 0; INITIALIZE_PASS(PrintFunctionPass, "print-function", "Print function to stderr", false, false) +char PrintBasicBlockPass::ID = 0; +INITIALIZE_PASS(PrintBasicBlockPass, "print-bb", + "Print BB to stderr", false, false) /// createPrintModulePass - Create and return a pass that writes the /// module to the specified raw_ostream. @@ -98,3 +126,11 @@ FunctionPass *llvm::createPrintFunctionPass(const std::string &Banner, return new PrintFunctionPass(Banner, OS, DeleteStream); } +/// createPrintBasicBlockPass - Create and return a pass that writes the +/// BB to the specified raw_ostream. +BasicBlockPass *llvm::createPrintBasicBlockPass(llvm::raw_ostream *OS, + bool DeleteStream, + const std::string &Banner) { + return new PrintBasicBlockPass(Banner, OS, DeleteStream); +} + -- cgit v1.1 From ec7b25d753867f43e2fbd00e58daf75e29783bd4 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Sat, 9 Feb 2013 00:04:07 +0000 Subject: Remove the old liveness algorithm. This is part of the plan to delete LiveVariables. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174783 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/LiveIntervalAnalysis.cpp | 313 +---------------------------------- 1 file changed, 5 insertions(+), 308 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index 555dcc6..0d4ec11 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -40,11 +40,6 @@ #include using namespace llvm; -// Switch to the new algorithm for computing live intervals. -static cl::opt -NewLiveIntervals("new-live-intervals", cl::Hidden, cl::init(true), - cl::desc("Use new algorithm for computing live intervals")); - char LiveIntervals::ID = 0; char &llvm::LiveIntervalsID = LiveIntervals::ID; INITIALIZE_PASS_BEGIN(LiveIntervals, "liveintervals", @@ -60,6 +55,9 @@ void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addRequired(); AU.addPreserved(); + // LiveVariables isn't really required by this analysis, it is only required + // here to make sure it is live during TwoAddressInstructionPass and + // PHIElimination. This is temporary. AU.addRequired(); AU.addPreserved(); AU.addPreservedID(MachineLoopInfoID); @@ -105,7 +103,6 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) { TRI = TM->getRegisterInfo(); TII = TM->getInstrInfo(); AA = &getAnalysis(); - LV = &getAnalysis(); Indexes = &getAnalysis(); DomTree = &getAnalysis(); if (!LRCalc) @@ -114,16 +111,8 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) { // Allocate space for all virtual registers. VirtRegIntervals.resize(MRI->getNumVirtRegs()); - if (NewLiveIntervals) { - // This is the new way of computing live intervals. - // It is independent of LiveVariables, and it can run at any time. - computeVirtRegs(); - computeRegMasks(); - } else { - // This is the old way of computing live intervals. - // It depends on LiveVariables. - computeIntervals(); - } + computeVirtRegs(); + computeRegMasks(); computeLiveInRegUnits(); DEBUG(dump()); @@ -165,298 +154,6 @@ void LiveIntervals::dumpInstrs() const { } #endif -static -bool MultipleDefsBySameMI(const MachineInstr &MI, unsigned MOIdx) { - unsigned Reg = MI.getOperand(MOIdx).getReg(); - for (unsigned i = MOIdx+1, e = MI.getNumOperands(); i < e; ++i) { - const MachineOperand &MO = MI.getOperand(i); - if (!MO.isReg()) - continue; - if (MO.getReg() == Reg && MO.isDef()) { - assert(MI.getOperand(MOIdx).getSubReg() != MO.getSubReg() && - MI.getOperand(MOIdx).getSubReg() && - (MO.getSubReg() || MO.isImplicit())); - return true; - } - } - return false; -} - -/// isPartialRedef - Return true if the specified def at the specific index is -/// partially re-defining the specified live interval. A common case of this is -/// a definition of the sub-register. -bool LiveIntervals::isPartialRedef(SlotIndex MIIdx, MachineOperand &MO, - LiveInterval &interval) { - if (!MO.getSubReg() || MO.isEarlyClobber()) - return false; - - SlotIndex RedefIndex = MIIdx.getRegSlot(); - const LiveRange *OldLR = - interval.getLiveRangeContaining(RedefIndex.getRegSlot(true)); - MachineInstr *DefMI = getInstructionFromIndex(OldLR->valno->def); - if (DefMI != 0) { - return DefMI->findRegisterDefOperandIdx(interval.reg) != -1; - } - return false; -} - -void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, - MachineBasicBlock::iterator mi, - SlotIndex MIIdx, - MachineOperand& MO, - unsigned MOIdx, - LiveInterval &interval) { - DEBUG(dbgs() << "\t\tregister: " << PrintReg(interval.reg, TRI)); - - // Virtual registers may be defined multiple times (due to phi - // elimination and 2-addr elimination). Much of what we do only has to be - // done once for the vreg. We use an empty interval to detect the first - // time we see a vreg. - LiveVariables::VarInfo& vi = LV->getVarInfo(interval.reg); - if (interval.empty()) { - // Get the Idx of the defining instructions. - SlotIndex defIndex = MIIdx.getRegSlot(MO.isEarlyClobber()); - - // Make sure the first definition is not a partial redefinition. - assert(!MO.readsReg() && "First def cannot also read virtual register " - "missing flag?"); - - VNInfo *ValNo = interval.getNextValue(defIndex, VNInfoAllocator); - assert(ValNo->id == 0 && "First value in interval is not 0?"); - - // Loop over all of the blocks that the vreg is defined in. There are - // two cases we have to handle here. The most common case is a vreg - // whose lifetime is contained within a basic block. In this case there - // will be a single kill, in MBB, which comes after the definition. - if (vi.Kills.size() == 1 && vi.Kills[0]->getParent() == mbb) { - // FIXME: what about dead vars? - SlotIndex killIdx; - if (vi.Kills[0] != mi) - killIdx = getInstructionIndex(vi.Kills[0]).getRegSlot(); - else - killIdx = defIndex.getDeadSlot(); - - // If the kill happens after the definition, we have an intra-block - // live range. - if (killIdx > defIndex) { - assert(vi.AliveBlocks.empty() && - "Shouldn't be alive across any blocks!"); - LiveRange LR(defIndex, killIdx, ValNo); - interval.addRange(LR); - DEBUG(dbgs() << " +" << LR << "\n"); - return; - } - } - - // The other case we handle is when a virtual register lives to the end - // of the defining block, potentially live across some blocks, then is - // live into some number of blocks, but gets killed. Start by adding a - // range that goes from this definition to the end of the defining block. - LiveRange NewLR(defIndex, getMBBEndIdx(mbb), ValNo); - DEBUG(dbgs() << " +" << NewLR); - interval.addRange(NewLR); - - bool PHIJoin = LV->isPHIJoin(interval.reg); - - if (PHIJoin) { - // A phi join register is killed at the end of the MBB and revived as a - // new valno in the killing blocks. - assert(vi.AliveBlocks.empty() && "Phi join can't pass through blocks"); - DEBUG(dbgs() << " phi-join"); - } else { - // Iterate over all of the blocks that the variable is completely - // live in, adding [insrtIndex(begin), instrIndex(end)+4) to the - // live interval. - for (SparseBitVector<>::iterator I = vi.AliveBlocks.begin(), - E = vi.AliveBlocks.end(); I != E; ++I) { - MachineBasicBlock *aliveBlock = MF->getBlockNumbered(*I); - LiveRange LR(getMBBStartIdx(aliveBlock), getMBBEndIdx(aliveBlock), - ValNo); - interval.addRange(LR); - DEBUG(dbgs() << " +" << LR); - } - } - - // Finally, this virtual register is live from the start of any killing - // block to the 'use' slot of the killing instruction. - for (unsigned i = 0, e = vi.Kills.size(); i != e; ++i) { - MachineInstr *Kill = vi.Kills[i]; - SlotIndex Start = getMBBStartIdx(Kill->getParent()); - SlotIndex killIdx = getInstructionIndex(Kill).getRegSlot(); - - // Create interval with one of a NEW value number. Note that this value - // number isn't actually defined by an instruction, weird huh? :) - if (PHIJoin) { - assert(getInstructionFromIndex(Start) == 0 && - "PHI def index points at actual instruction."); - ValNo = interval.getNextValue(Start, VNInfoAllocator); - } - LiveRange LR(Start, killIdx, ValNo); - interval.addRange(LR); - DEBUG(dbgs() << " +" << LR); - } - - } else { - if (MultipleDefsBySameMI(*mi, MOIdx)) - // Multiple defs of the same virtual register by the same instruction. - // e.g. %reg1031:5, %reg1031:6 = VLD1q16 %reg1024, ... - // This is likely due to elimination of REG_SEQUENCE instructions. Return - // here since there is nothing to do. - return; - - // If this is the second time we see a virtual register definition, it - // must be due to phi elimination or two addr elimination. If this is - // the result of two address elimination, then the vreg is one of the - // def-and-use register operand. - - // It may also be partial redef like this: - // 80 %reg1041:6 = VSHRNv4i16 %reg1034, 12, pred:14, pred:%reg0 - // 120 %reg1041:5 = VSHRNv4i16 %reg1039, 12, pred:14, pred:%reg0 - bool PartReDef = isPartialRedef(MIIdx, MO, interval); - if (PartReDef || mi->isRegTiedToUseOperand(MOIdx)) { - // If this is a two-address definition, then we have already processed - // the live range. The only problem is that we didn't realize there - // are actually two values in the live interval. Because of this we - // need to take the LiveRegion that defines this register and split it - // into two values. - SlotIndex RedefIndex = MIIdx.getRegSlot(MO.isEarlyClobber()); - - const LiveRange *OldLR = - interval.getLiveRangeContaining(RedefIndex.getRegSlot(true)); - VNInfo *OldValNo = OldLR->valno; - SlotIndex DefIndex = OldValNo->def.getRegSlot(); - - // Delete the previous value, which should be short and continuous, - // because the 2-addr copy must be in the same MBB as the redef. - interval.removeRange(DefIndex, RedefIndex); - - // The new value number (#1) is defined by the instruction we claimed - // defined value #0. - VNInfo *ValNo = interval.createValueCopy(OldValNo, VNInfoAllocator); - - // Value#0 is now defined by the 2-addr instruction. - OldValNo->def = RedefIndex; - - // Add the new live interval which replaces the range for the input copy. - LiveRange LR(DefIndex, RedefIndex, ValNo); - DEBUG(dbgs() << " replace range with " << LR); - interval.addRange(LR); - - // If this redefinition is dead, we need to add a dummy unit live - // range covering the def slot. - if (MO.isDead()) - interval.addRange(LiveRange(RedefIndex, RedefIndex.getDeadSlot(), - OldValNo)); - - DEBUG(dbgs() << " RESULT: " << interval); - } else if (LV->isPHIJoin(interval.reg)) { - // In the case of PHI elimination, each variable definition is only - // live until the end of the block. We've already taken care of the - // rest of the live range. - - SlotIndex defIndex = MIIdx.getRegSlot(); - if (MO.isEarlyClobber()) - defIndex = MIIdx.getRegSlot(true); - - VNInfo *ValNo = interval.getNextValue(defIndex, VNInfoAllocator); - - SlotIndex killIndex = getMBBEndIdx(mbb); - LiveRange LR(defIndex, killIndex, ValNo); - interval.addRange(LR); - DEBUG(dbgs() << " phi-join +" << LR); - } else { - llvm_unreachable("Multiply defined register"); - } - } - - DEBUG(dbgs() << '\n'); -} - -void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB, - MachineBasicBlock::iterator MI, - SlotIndex MIIdx, - MachineOperand& MO, - unsigned MOIdx) { - if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) - handleVirtualRegisterDef(MBB, MI, MIIdx, MO, MOIdx, - getOrCreateInterval(MO.getReg())); -} - -/// computeIntervals - computes the live intervals for virtual -/// registers. for some ordering of the machine instructions [1,N] a -/// live interval is an interval [i, j) where 1 <= i <= j < N for -/// which a variable is live -void LiveIntervals::computeIntervals() { - DEBUG(dbgs() << "********** COMPUTING LIVE INTERVALS **********\n" - << "********** Function: " << MF->getName() << '\n'); - - RegMaskBlocks.resize(MF->getNumBlockIDs()); - - SmallVector UndefUses; - for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); - MBBI != E; ++MBBI) { - MachineBasicBlock *MBB = MBBI; - RegMaskBlocks[MBB->getNumber()].first = RegMaskSlots.size(); - - if (MBB->empty()) - continue; - - // Track the index of the current machine instr. - SlotIndex MIIndex = getMBBStartIdx(MBB); - DEBUG(dbgs() << "BB#" << MBB->getNumber() - << ":\t\t# derived from " << MBB->getName() << "\n"); - - // Skip over empty initial indices. - if (getInstructionFromIndex(MIIndex) == 0) - MIIndex = Indexes->getNextNonNullIndex(MIIndex); - - for (MachineBasicBlock::iterator MI = MBB->begin(), miEnd = MBB->end(); - MI != miEnd; ++MI) { - DEBUG(dbgs() << MIIndex << "\t" << *MI); - if (MI->isDebugValue()) - continue; - assert(Indexes->getInstructionFromIndex(MIIndex) == MI && - "Lost SlotIndex synchronization"); - - // Handle defs. - for (int i = MI->getNumOperands() - 1; i >= 0; --i) { - MachineOperand &MO = MI->getOperand(i); - - // Collect register masks. - if (MO.isRegMask()) { - RegMaskSlots.push_back(MIIndex.getRegSlot()); - RegMaskBits.push_back(MO.getRegMask()); - continue; - } - - if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) - continue; - - // handle register defs - build intervals - if (MO.isDef()) - handleRegisterDef(MBB, MI, MIIndex, MO, i); - else if (MO.isUndef()) - UndefUses.push_back(MO.getReg()); - } - - // Move to the next instr slot. - MIIndex = Indexes->getNextNonNullIndex(MIIndex); - } - - // Compute the number of register mask instructions in this block. - std::pair &RMB = RegMaskBlocks[MBB->getNumber()]; - RMB.second = RegMaskSlots.size() - RMB.first; - } - - // Create empty intervals for registers defined by implicit_def's (except - // for those implicit_def that define values which are liveout of their - // blocks. - for (unsigned i = 0, e = UndefUses.size(); i != e; ++i) { - unsigned UndefReg = UndefUses[i]; - (void)getOrCreateInterval(UndefReg); - } -} - LiveInterval* LiveIntervals::createInterval(unsigned reg) { float Weight = TargetRegisterInfo::isPhysicalRegister(reg) ? HUGE_VALF : 0.0F; return new LiveInterval(reg, Weight); -- cgit v1.1 From f2d89ff5c82c78f6160a9a8611c525771fdd2033 Mon Sep 17 00:00:00 2001 From: Manman Ren Date: Sat, 9 Feb 2013 00:41:44 +0000 Subject: Dwarf: do not use line_table_start in at_stmt_list since we do not always emit line table entries in assembly. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174785 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 5d19a8d..6d3759d 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -670,9 +670,12 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) { // DW_AT_stmt_list is a offset of line number information for this // compile unit in debug_line section. + // The line table entries are not always emitted in assembly, so it + // is not okay to use line_table_start here. if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, - LineTableStartSym); + NewCU->getUniqueID() == 0 ? + Asm->GetTempSymbol("section_line") : LineTableStartSym); else if (NewCU->getUniqueID() == 0) NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0); else -- cgit v1.1 From 4fa57932c7b13ec42c563e33a2e40fd04194b64e Mon Sep 17 00:00:00 2001 From: Jakub Staszak Date: Sat, 9 Feb 2013 01:04:28 +0000 Subject: Remove #includes from the commonly used LoopInfo.h. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174786 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/LoopInstSimplify.cpp | 1 + lib/Transforms/Scalar/LoopStrengthReduce.cpp | 1 + lib/Transforms/Utils/CodeExtractor.cpp | 1 + 3 files changed, 3 insertions(+) (limited to 'lib') diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp index c48808f..a23860a 100644 --- a/lib/Transforms/Scalar/LoopInstSimplify.cpp +++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp @@ -14,6 +14,7 @@ #define DEBUG_TYPE "loop-instsimplify" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 9237077..b83bec2 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -58,6 +58,7 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallBitVector.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/IVUsers.h" #include "llvm/Analysis/LoopPass.h" diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp index 3a21528..f7c659f 100644 --- a/lib/Transforms/Utils/CodeExtractor.cpp +++ b/lib/Transforms/Utils/CodeExtractor.cpp @@ -15,6 +15,7 @@ #include "llvm/Transforms/Utils/CodeExtractor.h" #include "llvm/ADT/SetVector.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopInfo.h" -- cgit v1.1 From 6050edfe3e66ac45dbfaee72422b332ecaabb2ae Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Sat, 9 Feb 2013 01:11:01 +0000 Subject: LSR IVChain improvement. Handle chains in which the same offset is used for both loads and stores to the same array. Fixes rdar://11410078. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174789 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/LoopStrengthReduce.cpp | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index b83bec2..5847dfe 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -2537,6 +2537,7 @@ void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper, // Add this IV user to the end of the chain. IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr)); } + IVChain &Chain = IVChainVec[ChainIdx]; SmallPtrSet &NearUsers = ChainUsersVec[ChainIdx].NearUsers; // This chain's NearUsers become FarUsers. @@ -2554,8 +2555,19 @@ void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper, for (Value::use_iterator UseIter = IVOper->use_begin(), UseEnd = IVOper->use_end(); UseIter != UseEnd; ++UseIter) { Instruction *OtherUse = dyn_cast(*UseIter); - if (!OtherUse || OtherUse == UserInst) + if (!OtherUse) continue; + // Uses in the chain will no longer be uses if the chain is formed. + // Include the head of the chain in this iteration (not Chain.begin()). + IVChain::const_iterator IncIter = Chain.Incs.begin(); + IVChain::const_iterator IncEnd = Chain.Incs.end(); + for( ; IncIter != IncEnd; ++IncIter) { + if (IncIter->UserInst == OtherUse) + break; + } + if (IncIter != IncEnd) + continue; + if (SE.isSCEVable(OtherUse->getType()) && !isa(SE.getSCEV(OtherUse)) && IU.isIVUserOrOperand(OtherUse)) { -- cgit v1.1 From b24f5b7c0838f22abc6f1ba5de2a17d25293cd17 Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Sat, 9 Feb 2013 07:07:29 +0000 Subject: Fix the underlying problem that was causing read(0) to be called: sometimes the bitcode writer would generate abbrev records saying that the abbrev should be filled with fixed zero-bit bitfields (this happens in the .bc writer when the number of types used in a module is exactly one, since log2(1) == 0). In this case, just handle it as a literal zero. We can't "just fix" the writer without breaking compatibility with existing bc files, so have the abbrev reader do the substitution. Strengthen the assert in read to reject reads of zero bits so we catch such crimes in the future, and remove the special case designed to handle this. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174801 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Bitcode/Reader/BitstreamReader.cpp | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Bitcode/Reader/BitstreamReader.cpp b/lib/Bitcode/Reader/BitstreamReader.cpp index 85076f3..b133502 100644 --- a/lib/Bitcode/Reader/BitstreamReader.cpp +++ b/lib/Bitcode/Reader/BitstreamReader.cpp @@ -288,9 +288,20 @@ void BitstreamCursor::ReadAbbrevRecord() { } BitCodeAbbrevOp::Encoding E = (BitCodeAbbrevOp::Encoding)Read(3); - if (BitCodeAbbrevOp::hasEncodingData(E)) - Abbv->Add(BitCodeAbbrevOp(E, ReadVBR64(5))); - else + if (BitCodeAbbrevOp::hasEncodingData(E)) { + unsigned Data = ReadVBR64(5); + + // As a special case, handle fixed(0) (i.e., a fixed field with zero bits) + // and vbr(0) as a literal zero. This is decoded the same way, and avoids + // a slow path in Read() to have to handle reading zero bits. + if ((E == BitCodeAbbrevOp::Fixed || E == BitCodeAbbrevOp::VBR) && + Data == 0) { + Abbv->Add(BitCodeAbbrevOp(0)); + continue; + } + + Abbv->Add(BitCodeAbbrevOp(E, Data)); + } else Abbv->Add(BitCodeAbbrevOp(E)); } CurAbbrevs.push_back(Abbv); -- cgit v1.1 From 00d9da1ac45429cb69c8b298b9f25e10a4b57813 Mon Sep 17 00:00:00 2001 From: Justin Holewinski Date: Sat, 9 Feb 2013 13:34:15 +0000 Subject: [NVPTX] Make address space errors more explicit (llvm_unreachable -> report_fatal_error) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174808 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/NVPTX/NVPTXAsmPrinter.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index 22da8f3..3f99d1d 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -1314,7 +1314,8 @@ void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace, O << "shared" ; break; default: - llvm_unreachable("unexpected address space"); + report_fatal_error("Bad address space found while emitting PTX"); + break; } } -- cgit v1.1 From 0b77866f938315f5d21ad5dce774482528b8835d Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Sat, 9 Feb 2013 15:48:49 +0000 Subject: TEMPORARY SYNTAX CHANGE! The original syntax for the attribute groups was ambiguous. For example: declare void @foo() #1 #0 = attributes { noinline } The '#0' would be parsed as an attribute reference for '@foo' and not as a top-level entity. In order to continue forward while waiting for a decision on what the correct syntax is, I'm changing it to this instead: declare void @foo() #1 attributes #0 = { noinline } Repeat: This is TEMPORARY until we decide what the correct syntax should be. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174813 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/AsmParser/LLParser.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index 6f076e2..76eb596 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -233,7 +233,6 @@ bool LLParser::ParseTopLevelEntities() { case lltok::GlobalVar: if (ParseNamedGlobal()) return true; break; case lltok::exclaim: if (ParseStandaloneMetadata()) return true; break; case lltok::MetadataVar:if (ParseNamedMetadata()) return true; break; - case lltok::AttrGrpID: if (ParseUnnamedAttrGrp()) return true; break; // The Global variable production with no name can have many different // optional leading prefixes, the production is: @@ -279,6 +278,8 @@ bool LLParser::ParseTopLevelEntities() { case lltok::kw_global: // GlobalType if (ParseGlobal("", SMLoc(), 0, false, 0)) return true; break; + + case lltok::kw_attributes: if (ParseUnnamedAttrGrp()) return true; break; } } } @@ -800,16 +801,18 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc, } /// ParseUnnamedAttrGrp -/// ::= AttrGrpID '=' '{' AttrValPair+ '}' +/// ::= 'attributes' AttrGrpID '=' '{' AttrValPair+ '}' bool LLParser::ParseUnnamedAttrGrp() { - assert(Lex.getKind() == lltok::AttrGrpID); + assert(Lex.getKind() == lltok::kw_attributes); LocTy AttrGrpLoc = Lex.getLoc(); + Lex.Lex(); + + assert(Lex.getKind() == lltok::AttrGrpID); unsigned VarID = Lex.getUIntVal(); std::vector unused; Lex.Lex(); if (ParseToken(lltok::equal, "expected '=' here") || - ParseToken(lltok::kw_attributes, "expected 'attributes' keyword here") || ParseToken(lltok::lbrace, "expected '{' here") || ParseFnAttributeValuePairs(NumberedAttrBuilders[VarID], unused, true) || ParseToken(lltok::rbrace, "expected end of attribute group")) -- cgit v1.1 From 7dcb23a0522eb23c3a50f6c8249f10ccdf214993 Mon Sep 17 00:00:00 2001 From: Jakub Staszak Date: Sat, 9 Feb 2013 20:54:05 +0000 Subject: Remove unneeded "TargetMachine.h" #includes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174817 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AllocationOrder.cpp | 1 - lib/CodeGen/AsmPrinter/ARMException.cpp | 1 - lib/CodeGen/AsmPrinter/DwarfException.cpp | 1 - lib/CodeGen/AsmPrinter/Win64Exception.cpp | 1 - lib/CodeGen/TargetRegisterInfo.cpp | 1 - 5 files changed, 5 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/AllocationOrder.cpp b/lib/CodeGen/AllocationOrder.cpp index 94754a0..1d09d20 100644 --- a/lib/CodeGen/AllocationOrder.cpp +++ b/lib/CodeGen/AllocationOrder.cpp @@ -22,7 +22,6 @@ #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetMachine.h" using namespace llvm; diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp index 1728331..9310069 100644 --- a/lib/CodeGen/AsmPrinter/ARMException.cpp +++ b/lib/CodeGen/AsmPrinter/ARMException.cpp @@ -32,7 +32,6 @@ #include "llvm/Support/FormattedStream.h" #include "llvm/Target/Mangler.h" #include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp index 8e53900..7133458 100644 --- a/lib/CodeGen/AsmPrinter/DwarfException.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp @@ -33,7 +33,6 @@ #include "llvm/Target/Mangler.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; diff --git a/lib/CodeGen/AsmPrinter/Win64Exception.cpp b/lib/CodeGen/AsmPrinter/Win64Exception.cpp index cb25674..1561012 100644 --- a/lib/CodeGen/AsmPrinter/Win64Exception.cpp +++ b/lib/CodeGen/AsmPrinter/Win64Exception.cpp @@ -33,7 +33,6 @@ #include "llvm/Target/Mangler.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; diff --git a/lib/CodeGen/TargetRegisterInfo.cpp b/lib/CodeGen/TargetRegisterInfo.cpp index 9b776d1..84b4bfc 100644 --- a/lib/CodeGen/TargetRegisterInfo.cpp +++ b/lib/CodeGen/TargetRegisterInfo.cpp @@ -17,7 +17,6 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetMachine.h" using namespace llvm; -- cgit v1.1 From 85b3fbecdfe934ac7519a8831c4bd262cba99d12 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Sun, 10 Feb 2013 05:00:40 +0000 Subject: Add accessor for the LLVMContext. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174824 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Attributes.cpp | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'lib') diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 99fafae..04e95ef 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -709,6 +709,10 @@ AttributeSet AttributeSet::removeAttributes(LLVMContext &C, unsigned Idx, // AttributeSet Accessor Methods //===----------------------------------------------------------------------===// +LLVMContext &AttributeSet::getContext() const { + return pImpl->getContext(); +} + AttributeSet AttributeSet::getParamAttributes(unsigned Idx) const { return pImpl && hasAttributes(Idx) ? AttributeSet::get(pImpl->getContext(), -- cgit v1.1 From fe0fd35d5339467fedd59f0cf5bdadb163a8d766 Mon Sep 17 00:00:00 2001 From: Cameron Zwarich Date: Sun, 10 Feb 2013 06:42:30 +0000 Subject: Make LiveVariables an instance variable of PHIElimination. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174828 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/PHIElimination.cpp | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp index 4daa211..b5f044e 100644 --- a/lib/CodeGen/PHIElimination.cpp +++ b/lib/CodeGen/PHIElimination.cpp @@ -42,6 +42,7 @@ DisableEdgeSplitting("disable-phi-elim-edge-splitting", cl::init(false), namespace { class PHIElimination : public MachineFunctionPass { MachineRegisterInfo *MRI; // Machine register information + LiveVariables *LV; public: static char ID; // Pass identification, replacement for typeid @@ -70,7 +71,7 @@ namespace { /// Split critical edges where necessary for good coalescer performance. bool SplitPHIEdges(MachineFunction &MF, MachineBasicBlock &MBB, - LiveVariables &LV, MachineLoopInfo *MLI); + MachineLoopInfo *MLI); typedef std::pair BBVRegPair; typedef DenseMap VRegPHIUse; @@ -110,6 +111,7 @@ void PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const { bool PHIElimination::runOnMachineFunction(MachineFunction &MF) { MRI = &MF.getRegInfo(); + LV = getAnalysisIfAvailable(); bool Changed = false; @@ -117,12 +119,10 @@ bool PHIElimination::runOnMachineFunction(MachineFunction &MF) { MRI->leaveSSA(); // Split critical edges to help the coalescer - if (!DisableEdgeSplitting) { - if (LiveVariables *LV = getAnalysisIfAvailable()) { - MachineLoopInfo *MLI = getAnalysisIfAvailable(); - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) - Changed |= SplitPHIEdges(MF, *I, *LV, MLI); - } + if (!DisableEdgeSplitting && LV) { + MachineLoopInfo *MLI = getAnalysisIfAvailable(); + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) + Changed |= SplitPHIEdges(MF, *I, MLI); } // Populate VRegPHIUseCount @@ -244,7 +244,6 @@ void PHIElimination::LowerAtomicPHINode( } // Update live variable information if there is any. - LiveVariables *LV = getAnalysisIfAvailable(); if (LV) { MachineInstr *PHICopy = prior(AfterPHIsIt); @@ -418,7 +417,6 @@ void PHIElimination::analyzePHINodes(const MachineFunction& MF) { bool PHIElimination::SplitPHIEdges(MachineFunction &MF, MachineBasicBlock &MBB, - LiveVariables &LV, MachineLoopInfo *MLI) { if (MBB.empty() || !MBB.front().isPHI() || MBB.isLandingPad()) return false; // Quick exit for basic blocks without PHIs. @@ -450,7 +448,7 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF, // there is a risk it may not be coalesced away. // // If the copy would be a kill, there is no need to split the edge. - if (!LV.isLiveOut(Reg, *PreMBB)) + if (!LV->isLiveOut(Reg, *PreMBB)) continue; DEBUG(dbgs() << PrintReg(Reg) << " live-out before critical edge BB#" @@ -465,7 +463,7 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF, // is likely to be left after coalescing. If we are looking at a loop // exiting edge, split it so we won't insert code in the loop, otherwise // don't bother. - bool ShouldSplit = !LV.isLiveIn(Reg, MBB); + bool ShouldSplit = !LV->isLiveIn(Reg, MBB); // Check for a loop exiting edge. if (!ShouldSplit && CurLoop != PreLoop) { -- cgit v1.1 From 02513c05c6333e2f7418b1327eded162b2791828 Mon Sep 17 00:00:00 2001 From: Cameron Zwarich Date: Sun, 10 Feb 2013 06:42:32 +0000 Subject: Remove ancient references to 'atomic' phis in PHIElimination that don't really make sense anymore. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174829 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/PHIElimination.cpp | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp index b5f044e..3f459b7 100644 --- a/lib/CodeGen/PHIElimination.cpp +++ b/lib/CodeGen/PHIElimination.cpp @@ -58,8 +58,8 @@ namespace { /// in predecessor basic blocks. /// bool EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB); - void LowerAtomicPHINode(MachineBasicBlock &MBB, - MachineBasicBlock::iterator AfterPHIsIt); + void LowerPHINode(MachineBasicBlock &MBB, + MachineBasicBlock::iterator AfterPHIsIt); /// analyzePHINodes - Gather information about the PHI nodes in /// here. In particular, we want to map the number of uses of a virtual @@ -88,7 +88,7 @@ namespace { }; } -STATISTIC(NumAtomic, "Number of atomic phis lowered"); +STATISTIC(NumLowered, "Number of phis lowered"); STATISTIC(NumCriticalEdgesSplit, "Number of critical edges split"); STATISTIC(NumReused, "Number of reused lowered phis"); @@ -166,7 +166,7 @@ bool PHIElimination::EliminatePHINodes(MachineFunction &MF, MachineBasicBlock::iterator AfterPHIsIt = MBB.SkipPHIsAndLabels(MBB.begin()); while (MBB.front().isPHI()) - LowerAtomicPHINode(MBB, AfterPHIsIt); + LowerPHINode(MBB, AfterPHIsIt); return true; } @@ -193,15 +193,11 @@ static bool isSourceDefinedByImplicitDef(const MachineInstr *MPhi, } -/// LowerAtomicPHINode - Lower the PHI node at the top of the specified block, -/// under the assumption that it needs to be lowered in a way that supports -/// atomic execution of PHIs. This lowering method is always correct all of the -/// time. +/// LowerPHINode - Lower the PHI node at the top of the specified block, /// -void PHIElimination::LowerAtomicPHINode( - MachineBasicBlock &MBB, - MachineBasicBlock::iterator AfterPHIsIt) { - ++NumAtomic; +void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, + MachineBasicBlock::iterator AfterPHIsIt) { + ++NumLowered; // Unlink the PHI node from the basic block, but don't delete the PHI yet. MachineInstr *MPhi = MBB.remove(MBB.begin()); -- cgit v1.1 From d7c7a686ac18f8f3c035d158d8b713077be83ea4 Mon Sep 17 00:00:00 2001 From: Cameron Zwarich Date: Sun, 10 Feb 2013 06:42:34 +0000 Subject: Fix a typo. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174830 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/Passes.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp index 02a1491..b79f9f9 100644 --- a/lib/CodeGen/Passes.cpp +++ b/lib/CodeGen/Passes.cpp @@ -89,7 +89,7 @@ PrintMachineInstrs("print-machineinstrs", cl::ValueOptional, cl::desc("Print machine instrs"), cl::value_desc("pass-name"), cl::init("option-unspecified")); -// Experimental option to run live inteerval analysis early. +// Experimental option to run live interval analysis early. static cl::opt EarlyLiveIntervals("early-live-intervals", cl::Hidden, cl::desc("Run live interval analysis earlier in the pipeline")); -- cgit v1.1 From b7cfac32f32f17e64a5addfdb833702160650f14 Mon Sep 17 00:00:00 2001 From: Cameron Zwarich Date: Sun, 10 Feb 2013 06:42:36 +0000 Subject: Add support for updating LiveIntervals to PHIElimination. If LiveIntervals are present, it currently verifies them with the MachineVerifier, and this passed all of the test cases in 'make check' (when accounting for existing verifier errors). There were some assertion failures in the two-address pass, but they also happened on code without phis and look like they are caused by different kill flags from LiveIntervals. The only part that doesn't work is the critical edge splitting heuristic, because there isn't currently an efficient way to update LiveIntervals after splitting an edge. I'll probably start by implementing the slow fallback and test that it works before tackling the fast path for single-block ranges. The existing code that updates LiveVariables is fairly slow as it is. There isn't a command-line option for enabling this; instead, just edit PHIElimination.cpp to require LiveIntervals. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174831 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/PHIElimination.cpp | 171 ++++++++++++++++++++++++++++++++++------- 1 file changed, 142 insertions(+), 29 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp index 3f459b7..b952aab 100644 --- a/lib/CodeGen/PHIElimination.cpp +++ b/lib/CodeGen/PHIElimination.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineInstr.h" @@ -43,6 +44,7 @@ namespace { class PHIElimination : public MachineFunctionPass { MachineRegisterInfo *MRI; // Machine register information LiveVariables *LV; + LiveIntervals *LIS; public: static char ID; // Pass identification, replacement for typeid @@ -104,6 +106,7 @@ INITIALIZE_PASS_END(PHIElimination, "phi-node-elimination", void PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved(); + AU.addPreserved(); AU.addPreserved(); AU.addPreserved(); MachineFunctionPass::getAnalysisUsage(AU); @@ -112,14 +115,16 @@ void PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const { bool PHIElimination::runOnMachineFunction(MachineFunction &MF) { MRI = &MF.getRegInfo(); LV = getAnalysisIfAvailable(); + LIS = getAnalysisIfAvailable(); bool Changed = false; // This pass takes the function out of SSA form. MRI->leaveSSA(); - // Split critical edges to help the coalescer - if (!DisableEdgeSplitting && LV) { + // Split critical edges to help the coalescer. This does not yet support + // updating LiveIntervals, so we disable it. + if (!DisableEdgeSplitting && LV && !LIS) { MachineLoopInfo *MLI = getAnalysisIfAvailable(); for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) Changed |= SplitPHIEdges(MF, *I, MLI); @@ -137,19 +142,28 @@ bool PHIElimination::runOnMachineFunction(MachineFunction &MF) { E = ImpDefs.end(); I != E; ++I) { MachineInstr *DefMI = *I; unsigned DefReg = DefMI->getOperand(0).getReg(); - if (MRI->use_nodbg_empty(DefReg)) + if (MRI->use_nodbg_empty(DefReg)) { + if (LIS) + LIS->RemoveMachineInstrFromMaps(DefMI); DefMI->eraseFromParent(); + } } // Clean up the lowered PHI instructions. for (LoweredPHIMap::iterator I = LoweredPHIs.begin(), E = LoweredPHIs.end(); - I != E; ++I) + I != E; ++I) { + if (LIS) + LIS->RemoveMachineInstrFromMaps(I->first); MF.DeleteMachineInstr(I->first); + } LoweredPHIs.clear(); ImpDefs.clear(); VRegPHIUseCount.clear(); + if (LIS) + MF.verify(this, "After PHI elimination"); + return Changed; } @@ -278,6 +292,47 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, } } + // Update LiveIntervals for the new copy or implicit def. + if (LIS) { + MachineInstr *NewInstr = prior(AfterPHIsIt); + LIS->InsertMachineInstrInMaps(NewInstr); + + SlotIndex MBBStartIndex = LIS->getMBBStartIdx(&MBB); + SlotIndex DestCopyIndex = LIS->getInstructionIndex(NewInstr); + if (IncomingReg) { + // Add the region from the beginning of MBB to the copy instruction to + // IncomingReg's live interval. + LiveInterval &IncomingLI = LIS->getOrCreateInterval(IncomingReg); + VNInfo *IncomingVNI = IncomingLI.getVNInfoAt(MBBStartIndex); + if (!IncomingVNI) + IncomingVNI = IncomingLI.getNextValue(MBBStartIndex, + LIS->getVNInfoAllocator()); + IncomingLI.addRange(LiveRange(MBBStartIndex, + DestCopyIndex.getRegSlot(), + IncomingVNI)); + } + + LiveInterval &DestLI = LIS->getOrCreateInterval(DestReg); + if (NewInstr->getOperand(0).isDead()) { + // A dead PHI's live range begins and ends at the start of the MBB, but + // the lowered copy, which will still be dead, needs to begin and end at + // the copy instruction. + VNInfo *OrigDestVNI = DestLI.getVNInfoAt(MBBStartIndex); + assert(OrigDestVNI && "PHI destination should be live at block entry."); + DestLI.removeRange(MBBStartIndex, MBBStartIndex.getDeadSlot()); + DestLI.createDeadDef(DestCopyIndex.getRegSlot(), + LIS->getVNInfoAllocator()); + DestLI.removeValNo(OrigDestVNI); + } else { + // Otherwise, remove the region from the beginning of MBB to the copy + // instruction from DestReg's live interval. + DestLI.removeRange(MBBStartIndex, DestCopyIndex.getRegSlot()); + VNInfo *DestVNI = DestLI.getVNInfoAt(DestCopyIndex.getRegSlot()); + assert(DestVNI && "PHI destination should be live at its definition."); + DestVNI->def = DestCopyIndex.getRegSlot(); + } + } + // Adjust the VRegPHIUseCount map to account for the removal of this PHI node. for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) --VRegPHIUseCount[BBVRegPair(MPhi->getOperand(i+1).getMBB()->getNumber(), @@ -310,45 +365,44 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, findPHICopyInsertPoint(&opBlock, &MBB, SrcReg); // Insert the copy. + MachineInstr *NewSrcInstr = 0; if (!reusedIncoming && IncomingReg) { if (SrcUndef) { // The source register is undefined, so there is no need for a real // COPY, but we still need to ensure joint dominance by defs. // Insert an IMPLICIT_DEF instruction. - BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(), - TII->get(TargetOpcode::IMPLICIT_DEF), IncomingReg); + NewSrcInstr = BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(), + TII->get(TargetOpcode::IMPLICIT_DEF), + IncomingReg); // Clean up the old implicit-def, if there even was one. if (MachineInstr *DefMI = MRI->getVRegDef(SrcReg)) if (DefMI->isImplicitDef()) ImpDefs.insert(DefMI); } else { - BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(), - TII->get(TargetOpcode::COPY), IncomingReg) - .addReg(SrcReg, 0, SrcSubReg); + NewSrcInstr = BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(), + TII->get(TargetOpcode::COPY), IncomingReg) + .addReg(SrcReg, 0, SrcSubReg); } } - // Now update live variable information if we have it. Otherwise we're done - if (SrcUndef || !LV) continue; - - // We want to be able to insert a kill of the register if this PHI (aka, the - // copy we just inserted) is the last use of the source value. Live - // variable analysis conservatively handles this by saying that the value is - // live until the end of the block the PHI entry lives in. If the value - // really is dead at the PHI copy, there will be no successor blocks which - // have the value live-in. - - // Also check to see if this register is in use by another PHI node which - // has not yet been eliminated. If so, it will be killed at an appropriate - // point later. - - // Is it used by any PHI instructions in this block? - bool ValueIsUsed = VRegPHIUseCount[BBVRegPair(opBlock.getNumber(), SrcReg)]; + // We only need to update the LiveVariables kill of SrcReg if this was the + // last PHI use of SrcReg to be lowered on this CFG edge and it is not live + // out of the predecessor. We can also ignore undef sources. + if (LV && !SrcUndef && + !VRegPHIUseCount[BBVRegPair(opBlock.getNumber(), SrcReg)] && + !LV->isLiveOut(SrcReg, opBlock)) { + // We want to be able to insert a kill of the register if this PHI (aka, + // the copy we just inserted) is the last use of the source value. Live + // variable analysis conservatively handles this by saying that the value + // is live until the end of the block the PHI entry lives in. If the value + // really is dead at the PHI copy, there will be no successor blocks which + // have the value live-in. + + // Okay, if we now know that the value is not live out of the block, we + // can add a kill marker in this block saying that it kills the incoming + // value! - // Okay, if we now know that the value is not live out of the block, we can - // add a kill marker in this block saying that it kills the incoming value! - if (!ValueIsUsed && !LV->isLiveOut(SrcReg, opBlock)) { // In our final twist, we have to decide which instruction kills the // register. In most cases this is the copy, however, terminator // instructions at the end of the block may also use the value. In this @@ -389,11 +443,70 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, unsigned opBlockNum = opBlock.getNumber(); LV->getVarInfo(SrcReg).AliveBlocks.reset(opBlockNum); } + + if (LIS) { + if (NewSrcInstr) { + LIS->InsertMachineInstrInMaps(NewSrcInstr); + LIS->addLiveRangeToEndOfBlock(IncomingReg, NewSrcInstr); + } + + if (!SrcUndef && + !VRegPHIUseCount[BBVRegPair(opBlock.getNumber(), SrcReg)]) { + LiveInterval &SrcLI = LIS->getInterval(SrcReg); + + bool isLiveOut = false; + for (MachineBasicBlock::succ_iterator SI = opBlock.succ_begin(), + SE = opBlock.succ_end(); SI != SE; ++SI) { + if (SrcLI.liveAt(LIS->getMBBStartIdx(*SI))) { + isLiveOut = true; + break; + } + } + + if (!isLiveOut) { + MachineBasicBlock::iterator KillInst = opBlock.end(); + MachineBasicBlock::iterator FirstTerm = opBlock.getFirstTerminator(); + for (MachineBasicBlock::iterator Term = FirstTerm; + Term != opBlock.end(); ++Term) { + if (Term->readsRegister(SrcReg)) + KillInst = Term; + } + + if (KillInst == opBlock.end()) { + // No terminator uses the register. + + if (reusedIncoming || !IncomingReg) { + // We may have to rewind a bit if we didn't just insert a copy. + KillInst = FirstTerm; + while (KillInst != opBlock.begin()) { + --KillInst; + if (KillInst->isDebugValue()) + continue; + if (KillInst->readsRegister(SrcReg)) + break; + } + } else { + // We just inserted this copy. + KillInst = prior(InsertPos); + } + } + assert(KillInst->readsRegister(SrcReg) && + "Cannot find kill instruction"); + + SlotIndex LastUseIndex = LIS->getInstructionIndex(KillInst); + SrcLI.removeRange(LastUseIndex.getRegSlot(), + LIS->getMBBEndIdx(&opBlock)); + } + } + } } // Really delete the PHI instruction now, if it is not in the LoweredPHIs map. - if (reusedIncoming || !IncomingReg) + if (reusedIncoming || !IncomingReg) { + if (LIS) + LIS->RemoveMachineInstrFromMaps(MPhi); MF.DeleteMachineInstr(MPhi); + } } /// analyzePHINodes - Gather information about the PHI nodes in here. In -- cgit v1.1 From 87de71cb9f12d874e88d4f314ab245985c1b36bc Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Sun, 10 Feb 2013 10:12:06 +0000 Subject: Add 'empty' query methods to the builder and use them in the verifier. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174832 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Verifier.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp index babc295..31312dc 100644 --- a/lib/IR/Verifier.cpp +++ b/lib/IR/Verifier.cpp @@ -744,7 +744,7 @@ void Verifier::VerifyFunctionAttrs(FunctionType *FT, AttrBuilder NotFn(Attrs, AttributeSet::FunctionIndex); NotFn.removeFunctionOnlyAttrs(); - Assert1(!NotFn.hasAttributes(), "Attribute '" + + Assert1(NotFn.empty(), "Attributes '" + AttributeSet::get(V->getContext(), AttributeSet::FunctionIndex, NotFn).getAsString(AttributeSet::FunctionIndex) + -- cgit v1.1 From 0f7422057e7cf0426f5bb293107b756b5de80523 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Sun, 10 Feb 2013 10:12:50 +0000 Subject: Use a 'continue' here to stop from double lexing. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174833 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/AsmParser/LLParser.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index 76eb596..9a76007 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -869,7 +869,7 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B, return true; B.addAttribute(Attr, Val); - break; + continue; } // Target-independent attributes: -- cgit v1.1 From 09ed9101c8c7e93c1d814e75ff906bf904778dbb Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Sun, 10 Feb 2013 10:13:23 +0000 Subject: Handle string attributes in the AttrBuilder. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174834 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Attributes.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 04e95ef..e64603c 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -879,7 +879,11 @@ AttrBuilder &AttrBuilder::addAttribute(Attribute::AttrKind Val) { } AttrBuilder &AttrBuilder::addAttribute(Attribute Attr) { - // FIXME: Handle string attributes. + if (Attr.isStringAttribute()) { + addAttribute(Attr.getKindAsString(), Attr.getValueAsString()); + return *this; + } + Attribute::AttrKind Kind = Attr.getKindAsEnum(); Attrs.insert(Kind); -- cgit v1.1 From a311c526ed1da64c450bb8842630f6f80c691eca Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Sun, 10 Feb 2013 17:57:33 +0000 Subject: Test Commit - Remove some trailing whitespace in R600Instructions.td git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174839 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/R600Instructions.td | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index 2eab765..591f66d 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -70,7 +70,7 @@ class InstFlag let PrintMethod = PM; } -// src_sel for ALU src operands, see also ALU_CONST, ALU_PARAM registers +// src_sel for ALU src operands, see also ALU_CONST, ALU_PARAM registers def SEL : OperandWithDefaultOps { let PrintMethod = "printSel"; } @@ -681,7 +681,7 @@ class ExportBufInst : InstR600ISA<( let Inst{63-32} = Word1; } -let Predicates = [isR600toCayman] in { +let Predicates = [isR600toCayman] in { //===----------------------------------------------------------------------===// // Common Instructions R600, R700, Evergreen, Cayman @@ -1199,7 +1199,7 @@ let Predicates = [isR700] in { //===----------------------------------------------------------------------===// let Predicates = [isEG] in { - + def RECIP_IEEE_eg : RECIP_IEEE_Common<0x86>; defm DIV_eg : DIV_Common; @@ -1450,7 +1450,7 @@ class VTX_READ_32_eg buffer_id, list pattern> // This is not really necessary, but there were some GPU hangs that appeared // to be caused by ALU instructions in the next instruction group that wrote - // to the $ptr registers of the VTX_READ. + // to the $ptr registers of the VTX_READ. // e.g. // %T3_X = VTX_READ_PARAM_32_eg %T2_X, 24 // %T2_X = MOV %ZERO @@ -1529,7 +1529,7 @@ defm R600_ : RegisterLoadStore ; let Predicates = [isCayman] in { -let isVector = 1 in { +let isVector = 1 in { def RECIP_IEEE_cm : RECIP_IEEE_Common<0x86>; @@ -1811,7 +1811,7 @@ def : Pat < // SGE Reverse args def : Pat < (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, COND_LE), - (SGE R600_Reg32:$src1, R600_Reg32:$src0) + (SGE R600_Reg32:$src1, R600_Reg32:$src0) >; // SETGT_DX10 reverse args -- cgit v1.1 From 8c2e77f895301967bf37d04e905fb1f069ec91b2 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Sun, 10 Feb 2013 23:06:02 +0000 Subject: Add support for attribute groups in the value enumerator. Attribute groups are essentially all AttributeSets which are used by the program. Enumerate them here. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174844 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Bitcode/Writer/ValueEnumerator.cpp | 10 ++++++++++ lib/Bitcode/Writer/ValueEnumerator.h | 14 ++++++++++++++ 2 files changed, 24 insertions(+) (limited to 'lib') diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp index b2f7875..9f7e17b 100644 --- a/lib/Bitcode/Writer/ValueEnumerator.cpp +++ b/lib/Bitcode/Writer/ValueEnumerator.cpp @@ -427,6 +427,16 @@ void ValueEnumerator::EnumerateAttributes(const AttributeSet &PAL) { Attribute.push_back(PAL); Entry = Attribute.size(); } + + // Do lookups for all attribute groups. + for (unsigned i = 0, e = PAL.getNumSlots(); i != e; ++i) { + AttributeSet AS = PAL.getSlotAttributes(i); + unsigned &Entry = AttributeSetMap[AS]; + if (Entry == 0) { + AttributeSets.push_back(AS); + Entry = AttributeSets.size(); + } + } } void ValueEnumerator::incorporateFunction(const Function &F) { diff --git a/lib/Bitcode/Writer/ValueEnumerator.h b/lib/Bitcode/Writer/ValueEnumerator.h index 2d3d570..6e91d68 100644 --- a/lib/Bitcode/Writer/ValueEnumerator.h +++ b/lib/Bitcode/Writer/ValueEnumerator.h @@ -52,6 +52,10 @@ private: SmallVector FunctionLocalMDs; ValueMapType MDValueMap; + typedef DenseMap AttributeSetMapType; + AttributeSetMapType AttributeSetMap; + std::vector AttributeSets; + typedef DenseMap AttributeMapType; AttributeMapType AttributeMap; std::vector Attribute; @@ -105,6 +109,13 @@ public: return I->second; } + unsigned getAttributeSetID(const AttributeSet &PAL) const { + if (PAL.isEmpty()) return 0; // Null maps to zero. + AttributeSetMapType::const_iterator I = AttributeSetMap.find(PAL); + assert(I != AttributeSetMap.end() && "Attribute not in ValueEnumerator!"); + return I->second; + } + /// getFunctionConstantRange - Return the range of values that corresponds to /// function-local constants. void getFunctionConstantRange(unsigned &Start, unsigned &End) const { @@ -124,6 +135,9 @@ public: const std::vector &getAttributes() const { return Attribute; } + const std::vector &getAttributeSets() const { + return AttributeSets; + } /// getGlobalBasicBlockID - This returns the function-specific ID for the /// specified basic block. This is relatively expensive information, so it -- cgit v1.1 From 0f715c26bd65516f616df94124679bad03084652 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Sun, 10 Feb 2013 23:09:32 +0000 Subject: Add code for emitting the attribute groups. This is some initial code for emitting the attribute groups into the bitcode. NOTE: This format *may* change! Do not rely upon the attribute groups' bitcode not changing. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174845 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Bitcode/Writer/BitcodeWriter.cpp | 53 ++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) (limited to 'lib') diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index 74bbaf2..8d43099 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -185,6 +185,54 @@ static uint64_t encodeLLVMAttributesForBitcode(AttributeSet Attrs, return EncodedAttrs; } +static void WriteAttributeGroupTable(const ValueEnumerator &VE, + BitstreamWriter &Stream) { + const std::vector &Attrs = VE.getAttributeSets(); + if (Attrs.empty()) return; + + Stream.EnterSubblock(bitc::PARAMATTR_GROUP_BLOCK_ID, 3); + + SmallVector Record; + for (unsigned i = 0, e = Attrs.size(); i != e; ++i) { + AttributeSet AS = Attrs[i]; + for (unsigned i = 0, e = AS.getNumSlots(); i != e; ++i) { + AttributeSet A = AS.getSlotAttributes(i); + + Record.push_back(VE.getAttributeSetID(A)); + Record.push_back(AS.getSlotIndex(i)); + + for (AttributeSet::iterator I = AS.begin(0), E = AS.end(0); + I != E; ++I) { + Attribute Attr = *I; + if (Attr.isEnumAttribute()) { + Record.push_back(0); + Record.push_back(Attr.getKindAsEnum()); + } else if (Attr.isAlignAttribute()) { + Record.push_back(1); + Record.push_back(Attr.getKindAsEnum()); + Record.push_back(Attr.getValueAsInt()); + } else { + StringRef Kind = Attr.getKindAsString(); + StringRef Val = Attr.getValueAsString(); + + Record.push_back(Val.empty() ? 3 : 4); + Record.append(Kind.begin(), Kind.end()); + Record.push_back(0); + if (!Val.empty()) { + Record.append(Val.begin(), Val.end()); + Record.push_back(0); + } + } + } + + Stream.EmitRecord(bitc::PARAMATTR_GRP_CODE_ENTRY, Record); + Record.clear(); + } + } + + Stream.ExitBlock(); +} + static void WriteAttributeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) { const std::vector &Attrs = VE.getAttributes(); @@ -192,6 +240,8 @@ static void WriteAttributeTable(const ValueEnumerator &VE, Stream.EnterSubblock(bitc::PARAMATTR_BLOCK_ID, 3); + // FIXME: Remove this! It no longer works with the current attributes classes. + SmallVector Record; for (unsigned i = 0, e = Attrs.size(); i != e; ++i) { const AttributeSet &A = Attrs[i]; @@ -1854,6 +1904,9 @@ static void WriteModule(const Module *M, BitstreamWriter &Stream) { // Emit blockinfo, which defines the standard abbreviations etc. WriteBlockInfo(VE, Stream); + // Emit information about attribute groups. + WriteAttributeGroupTable(VE, Stream); + // Emit information about parameter attributes. WriteAttributeTable(VE, Stream); -- cgit v1.1 From 3f87d23a3dc33afa8e9f6e8853fe4b9717c8cf8d Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Sun, 10 Feb 2013 23:15:51 +0000 Subject: Eat the alignment keyword if we're in an attribute group. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174846 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/AsmParser/LLParser.cpp | 2 ++ 1 file changed, 2 insertions(+) (limited to 'lib') diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index 9a76007..e2f42d8 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -878,6 +878,7 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B, // 2". unsigned Alignment; if (inAttrGrp) { + Lex.Lex(); if (ParseToken(lltok::equal, "expected '=' here") || ParseUInt32(Alignment)) return true; @@ -891,6 +892,7 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B, case lltok::kw_alignstack: { unsigned Alignment; if (inAttrGrp) { + Lex.Lex(); if (ParseToken(lltok::equal, "expected '=' here") || ParseUInt32(Alignment)) return true; -- cgit v1.1 From 2153691a8ba35cb2bcf9237557b6cae7e9d8e68d Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Sun, 10 Feb 2013 23:18:05 +0000 Subject: The 'Raw' method cannot handle 'string' attributes. Don't even try. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174848 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Attributes.cpp | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'lib') diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index e64603c..343f569 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -468,6 +468,10 @@ uint64_t AttributeSetImpl::Raw(uint64_t Index) const { for (AttributeSetNode::const_iterator II = ASN->begin(), IE = ASN->end(); II != IE; ++II) { Attribute Attr = *II; + + // This cannot handle string attributes. + if (Attr.isStringAttribute()) continue; + Attribute::AttrKind Kind = Attr.getKindAsEnum(); if (Kind == Attribute::Alignment) -- cgit v1.1 From c3ba0a821b359060d76453023f3852954e6a5ee3 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Sun, 10 Feb 2013 23:24:25 +0000 Subject: Add support in the bitcode reader to read the attribute groups. This reads the attribute groups. It currently doesn't do anything with them. NOTE: In the commit to the bitcode writer, the format *may* change in the near future. Which means that this code would also change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174849 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Bitcode/Reader/BitcodeReader.cpp | 78 ++++++++++++++++++++++++++++++++++++ lib/Bitcode/Reader/BitcodeReader.h | 4 ++ 2 files changed, 82 insertions(+) (limited to 'lib') diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index 2c1e535..110f47c 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -498,6 +498,80 @@ bool BitcodeReader::ParseAttributeBlock() { } } +bool BitcodeReader::ParseAttributeGroupBlock() { + if (Stream.EnterSubBlock(bitc::PARAMATTR_GROUP_BLOCK_ID)) + return Error("Malformed block record"); + + if (!MAttributeGroups.empty()) + return Error("Multiple PARAMATTR_GROUP blocks found!"); + + SmallVector Record; + + // Read all the records. + while (1) { + BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); + + switch (Entry.Kind) { + case BitstreamEntry::SubBlock: // Handled for us already. + case BitstreamEntry::Error: + return Error("Error at end of PARAMATTR_GROUP block"); + case BitstreamEntry::EndBlock: + return false; + case BitstreamEntry::Record: + // The interesting case. + break; + } + + // Read a record. + Record.clear(); + switch (Stream.readRecord(Entry.ID, Record)) { + default: // Default behavior: ignore. + break; + case bitc::PARAMATTR_GRP_CODE_ENTRY: { // ENTRY: [grpid, idx, a0, a1, ...] + if (Record.size() < 3) + return Error("Invalid ENTRY record"); + + // FIXME: Record[0] is the 'group ID'. What should we do with it here? + + uint64_t Idx = Record[1]; // Index of the object this attribute refers to. + + AttrBuilder B; + for (unsigned i = 2, e = Record.size(); i != e; ++i) { + if (Record[i] == 0) { // Enum attribute + B.addAttribute(Attribute::AttrKind(Record[++i])); + } else if (Record[i] == 1) { // Align attribute + if (Attribute::AttrKind(Record[++i]) == Attribute::Alignment) + B.addAlignmentAttr(Record[++i]); + else + B.addStackAlignmentAttr(Record[++i]); + } else { // String attribute + bool HasValue = (Record[i++] == 4); + SmallString<64> KindStr; + SmallString<64> ValStr; + + while (Record[i] != 0 && i != e) + KindStr += Record[i++]; + assert(Record[i] == 0 && "Kind string not terminated with 0"); + + if (HasValue) { + // Has a value associated with it. + ++i; // Skip the '0' that terminates the kind string. + while (Record[i] != 0 && i != e) + ValStr += Record[i++]; + assert(Record[i] == 0 && "Value string not terminated with 0"); + } + + B.addAttribute(KindStr.str(), ValStr.str()); + } + } + + MAttributeGroups.push_back(AttributeSet::get(Context, Idx, B)); + break; + } + } + } +} + bool BitcodeReader::ParseTypeTable() { if (Stream.EnterSubBlock(bitc::TYPE_BLOCK_ID_NEW)) return Error("Malformed block record"); @@ -1447,6 +1521,10 @@ bool BitcodeReader::ParseModule(bool Resume) { if (ParseAttributeBlock()) return true; break; + case bitc::PARAMATTR_GROUP_BLOCK_ID: + if (ParseAttributeGroupBlock()) + return true; + break; case bitc::TYPE_BLOCK_ID_NEW: if (ParseTypeTable()) return true; diff --git a/lib/Bitcode/Reader/BitcodeReader.h b/lib/Bitcode/Reader/BitcodeReader.h index 3347418..8d36e67 100644 --- a/lib/Bitcode/Reader/BitcodeReader.h +++ b/lib/Bitcode/Reader/BitcodeReader.h @@ -148,6 +148,9 @@ class BitcodeReader : public GVMaterializer { /// are off by one. std::vector MAttributes; + /// \brief The set of attribute groups. + std::vector MAttributeGroups; + /// FunctionBBs - While parsing a function body, this is a list of the basic /// blocks for the function. std::vector FunctionBBs; @@ -320,6 +323,7 @@ private: bool ParseModule(bool Resume); bool ParseAttributeBlock(); + bool ParseAttributeGroupBlock(); bool ParseTypeTable(); bool ParseTypeTableBody(); -- cgit v1.1 From 36f54480f83d47404aceea5d41f8f6b95da2d00b Mon Sep 17 00:00:00 2001 From: Cameron Zwarich Date: Sun, 10 Feb 2013 23:29:49 +0000 Subject: Abstract the liveness checking in PHIElimination::SplitPHIEdges() to support both LiveVariables and LiveIntervals. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174850 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/PHIElimination.cpp | 39 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp index b952aab..bf2b95f 100644 --- a/lib/CodeGen/PHIElimination.cpp +++ b/lib/CodeGen/PHIElimination.cpp @@ -75,6 +75,11 @@ namespace { bool SplitPHIEdges(MachineFunction &MF, MachineBasicBlock &MBB, MachineLoopInfo *MLI); + // These functions are temporary abstractions around LiveVariables and + // LiveIntervals, so they can go away when LiveVariables does. + bool isLiveIn(unsigned Reg, MachineBasicBlock *MBB); + bool isLiveOutPastPHIs(unsigned Reg, MachineBasicBlock *MBB); + typedef std::pair BBVRegPair; typedef DenseMap VRegPHIUse; @@ -557,7 +562,7 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF, // there is a risk it may not be coalesced away. // // If the copy would be a kill, there is no need to split the edge. - if (!LV->isLiveOut(Reg, *PreMBB)) + if (!isLiveOutPastPHIs(Reg, PreMBB)) continue; DEBUG(dbgs() << PrintReg(Reg) << " live-out before critical edge BB#" @@ -572,7 +577,7 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF, // is likely to be left after coalescing. If we are looking at a loop // exiting edge, split it so we won't insert code in the loop, otherwise // don't bother. - bool ShouldSplit = !LV->isLiveIn(Reg, MBB); + bool ShouldSplit = !isLiveIn(Reg, &MBB); // Check for a loop exiting edge. if (!ShouldSplit && CurLoop != PreLoop) { @@ -599,3 +604,33 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF, } return Changed; } + +bool PHIElimination::isLiveIn(unsigned Reg, MachineBasicBlock *MBB) { + assert((LV || LIS) && + "isLiveIn() requires either LiveVariables or LiveIntervals"); + if (LIS) + return LIS->isLiveInToMBB(LIS->getInterval(Reg), MBB); + else + return LV->isLiveIn(Reg, *MBB); +} + +bool PHIElimination::isLiveOutPastPHIs(unsigned Reg, MachineBasicBlock *MBB) { + assert((LV || LIS) && + "isLiveOutPastPHIs() requires either LiveVariables or LiveIntervals"); + // LiveVariables considers uses in PHIs to be in the predecessor basic block, + // so that a register used only in a PHI is not live out of the block. In + // contrast, LiveIntervals considers uses in PHIs to be on the edge rather than + // in the predecessor basic block, so that a register used only in a PHI is live + // out of the block. + if (LIS) { + const LiveInterval &LI = LIS->getInterval(Reg); + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); SI != SE; ++SI) { + if (LI.liveAt(LIS->getMBBStartIdx(*SI))) + return true; + } + return false; + } else { + return LV->isLiveOut(Reg, *MBB); + } +} -- cgit v1.1 From f5844a75154e73a2302767eeecf3b3401e157bb3 Mon Sep 17 00:00:00 2001 From: Cameron Zwarich Date: Sun, 10 Feb 2013 23:29:54 +0000 Subject: Fix the unused but nearly correct method SlotIndexes::insertMBBInMaps() and add support for updating SlotIndexes to MachineBasicBlock::SplitCriticalEdge(). This calls renumberIndexes() every time; it should be improved to only renumber locally. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174851 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineBasicBlock.cpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'lib') diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index 7e9fb20..2534a74 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -662,6 +662,9 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { " BB#" << getNumber() << " -- BB#" << NMBB->getNumber() << " -- BB#" << Succ->getNumber() << '\n'); + SlotIndexes *Indexes = P->getAnalysisIfAvailable(); + if (Indexes) + Indexes->insertMBBInMaps(NMBB); // On some targets like Mips, branches may kill virtual registers. Make sure // that LiveVariables is properly updated after updateTerminator replaces the @@ -697,6 +700,17 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { if (!NMBB->isLayoutSuccessor(Succ)) { Cond.clear(); MF->getTarget().getInstrInfo()->InsertBranch(*NMBB, Succ, NULL, Cond, dl); + + if (Indexes) { + for (instr_iterator I = NMBB->instr_begin(), E = NMBB->instr_end(); + I != E; ++I) { + // Some instructions may have been moved to NMBB by updateTerminator(), + // so we first remove any instruction that already has an index. + if (Indexes->hasIndex(I)) + Indexes->removeMachineInstrFromMaps(I); + Indexes->insertMachineInstrInMaps(I); + } + } } // Fix PHI nodes in Succ so they refer to NMBB instead of this -- cgit v1.1 From 612779eb83a98cec1e11dc823ba2e6420edbce54 Mon Sep 17 00:00:00 2001 From: Joel Jones Date: Sun, 10 Feb 2013 23:56:30 +0000 Subject: Spelling correction git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174852 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64ISelLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 071b432..ff28dc1 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2691,7 +2691,7 @@ static bool findEXTRHalf(SDValue N, SDValue &Src, uint32_t &ShiftAmount, return true; } -/// EXTR instruciton extracts a contiguous chunk of bits from two existing +/// EXTR instruction extracts a contiguous chunk of bits from two existing /// registers viewed as a high/low pair. This function looks for the pattern: /// (or (shl VAL1, #N), (srl VAL2, #RegWidth-N)) and replaces it with an /// EXTR. Can't quite be done in TableGen because the two immediates aren't -- cgit v1.1 From f12b379448a9f2131feba15c01714e44bedda120 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Mon, 11 Feb 2013 01:16:51 +0000 Subject: Fix unnecessary removal of const through cast machinery I have some uncommitted changes to the cast code that catch this sort of thing at compile-time but I still need to do some other cleanup before I can enable it. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174853 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Bitcode/Writer/BitcodeWriter.cpp | 8 ++++---- lib/IR/AsmWriter.cpp | 6 +++--- lib/MC/MCAssembler.cpp | 14 +++++++------- 3 files changed, 14 insertions(+), 14 deletions(-) (limited to 'lib') diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index 8d43099..37dcb46 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -1249,7 +1249,7 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, case Instruction::Br: { Code = bitc::FUNC_CODE_INST_BR; - BranchInst &II = cast(I); + const BranchInst &II = cast(I); Vals.push_back(VE.getValueID(II.getSuccessor(0))); if (II.isConditional()) { Vals.push_back(VE.getValueID(II.getSuccessor(1))); @@ -1264,7 +1264,7 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, SmallVector Vals64; Code = bitc::FUNC_CODE_INST_SWITCH; - SwitchInst &SI = cast(I); + const SwitchInst &SI = cast(I); uint32_t SwitchRecordHeader = SI.hash() | (SWITCH_INST_MAGIC << 16); Vals64.push_back(SwitchRecordHeader); @@ -1273,9 +1273,9 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, pushValue64(SI.getCondition(), InstID, Vals64, VE); Vals64.push_back(VE.getValueID(SI.getDefaultDest())); Vals64.push_back(SI.getNumCases()); - for (SwitchInst::CaseIt i = SI.case_begin(), e = SI.case_end(); + for (SwitchInst::ConstCaseIt i = SI.case_begin(), e = SI.case_end(); i != e; ++i) { - IntegersSubset& CaseRanges = i.getCaseValueEx(); + const IntegersSubset& CaseRanges = i.getCaseValueEx(); unsigned Code, Abbrev; // will unused. if (CaseRanges.isSingleNumber()) { diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp index efa5978..d3736a1 100644 --- a/lib/IR/AsmWriter.cpp +++ b/lib/IR/AsmWriter.cpp @@ -1758,7 +1758,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) { // Special case conditional branches to swizzle the condition out to the front if (isa(I) && cast(I).isConditional()) { - BranchInst &BI(cast(I)); + const BranchInst &BI(cast(I)); Out << ' '; writeOperand(BI.getCondition(), true); Out << ", "; @@ -1767,14 +1767,14 @@ void AssemblyWriter::printInstruction(const Instruction &I) { writeOperand(BI.getSuccessor(1), true); } else if (isa(I)) { - SwitchInst& SI(cast(I)); + const SwitchInst& SI(cast(I)); // Special case switch instruction to get formatting nice and correct. Out << ' '; writeOperand(SI.getCondition(), true); Out << ", "; writeOperand(SI.getDefaultDest(), true); Out << " ["; - for (SwitchInst::CaseIt i = SI.case_begin(), e = SI.case_end(); + for (SwitchInst::ConstCaseIt i = SI.case_begin(), e = SI.case_end(); i != e; ++i) { Out << "\n "; writeOperand(i.getCaseValue(), true); diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp index fd281e6..1829266 100644 --- a/lib/MC/MCAssembler.cpp +++ b/lib/MC/MCAssembler.cpp @@ -421,7 +421,7 @@ uint64_t MCAssembler::computeFragmentSize(const MCAsmLayout &Layout, } case MCFragment::FT_Org: { - MCOrgFragment &OF = cast(F); + const MCOrgFragment &OF = cast(F); int64_t TargetLocation; if (!OF.getOffset().EvaluateAsAbsolute(TargetLocation, Layout)) report_fatal_error("expected assembly-time absolute expression"); @@ -498,7 +498,7 @@ void MCAsmLayout::layoutFragment(MCFragment *F) { /// \brief Write the contents of a fragment to the given object writer. Expects /// a MCEncodedFragment. static void writeFragmentContents(const MCFragment &F, MCObjectWriter *OW) { - MCEncodedFragment &EF = cast(F); + const MCEncodedFragment &EF = cast(F); OW->WriteBytes(EF.getContents()); } @@ -549,7 +549,7 @@ static void writeFragment(const MCAssembler &Asm, const MCAsmLayout &Layout, switch (F.getKind()) { case MCFragment::FT_Align: { ++stats::EmittedAlignFragments; - MCAlignFragment &AF = cast(F); + const MCAlignFragment &AF = cast(F); uint64_t Count = FragmentSize / AF.getValueSize(); assert(AF.getValueSize() && "Invalid virtual align in concrete fragment!"); @@ -604,7 +604,7 @@ static void writeFragment(const MCAssembler &Asm, const MCAsmLayout &Layout, case MCFragment::FT_Fill: { ++stats::EmittedFillFragments; - MCFillFragment &FF = cast(F); + const MCFillFragment &FF = cast(F); assert(FF.getValueSize() && "Invalid virtual align in concrete fragment!"); @@ -621,14 +621,14 @@ static void writeFragment(const MCAssembler &Asm, const MCAsmLayout &Layout, } case MCFragment::FT_LEB: { - MCLEBFragment &LF = cast(F); + const MCLEBFragment &LF = cast(F); OW->WriteBytes(LF.getContents().str()); break; } case MCFragment::FT_Org: { ++stats::EmittedOrgFragments; - MCOrgFragment &OF = cast(F); + const MCOrgFragment &OF = cast(F); for (uint64_t i = 0, e = FragmentSize; i != e; ++i) OW->Write8(uint8_t(OF.getValue())); @@ -667,7 +667,7 @@ void MCAssembler::writeSectionData(const MCSectionData *SD, // Check that we aren't trying to write a non-zero contents (or fixups) // into a virtual section. This is to support clients which use standard // directives to fill the contents of virtual sections. - MCDataFragment &DF = cast(*it); + const MCDataFragment &DF = cast(*it); assert(DF.fixup_begin() == DF.fixup_end() && "Cannot have fixups in virtual section!"); for (unsigned i = 0, e = DF.getContents().size(); i != e; ++i) -- cgit v1.1 From 5f352cc6e49d714bd5c14c952aa5c394fc9897bf Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Mon, 11 Feb 2013 01:18:26 +0000 Subject: Remove unnecessary code. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174854 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 4 ---- 1 file changed, 4 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 405e1e8..3a55696 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6591,10 +6591,6 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { const DataLayout *TD = TLI.getDataLayout(); SmallVector Ins; - // Check whether the function can return without sret-demotion. - SmallVector Outs; - GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI); - if (!FuncInfo->CanLowerReturn) { // Put in an sret pointer parameter before all the other parameters. SmallVector ValueVTs; -- cgit v1.1 From 092e5e75661fdd5d54a748fb00fab59d21031268 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Mon, 11 Feb 2013 01:27:15 +0000 Subject: Currently, codegen may spent some time in SDISel passes even if an entire function is successfully handled by fast-isel. That's because function arguments are *always* handled by SDISel. Introduce FastLowerArguments to allow each target to provide hook to handle formal argument lowering. As a proof-of-concept, add ARMFastIsel::FastLowerArguments to handle functions with 4 or fewer scalar integer (i8, i16, or i32) arguments. It completely eliminates the need for SDISel for trivial functions. rdar://13163905 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174855 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/FastISel.cpp | 28 +++++++++- lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp | 20 +++++--- lib/Target/ARM/ARMFastISel.cpp | 74 +++++++++++++++++++++++++++ 3 files changed, 114 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 0d90a07..6c41e1b 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -87,6 +87,27 @@ void FastISel::startNewBlock() { LastLocalValue = EmitStartPt; } +bool FastISel::LowerArguments() { + if (!FuncInfo.CanLowerReturn) + // Fallback to SDISel argument lowering code to deal with sret pointer + // parameter. + return false; + + if (!FastLowerArguments()) + return false; + + // Enter non-dead arguments into ValueMap for uses in non-entry BBs. + for (Function::const_arg_iterator I = FuncInfo.Fn->arg_begin(), + E = FuncInfo.Fn->arg_end(); I != E; ++I) { + if (!I->use_empty()) { + DenseMap::iterator VI = LocalValueMap.find(I); + assert(VI != LocalValueMap.end() && "Missed an argument?"); + FuncInfo.ValueMap[I] = VI->second; + } + } + return true; +} + void FastISel::flushLocalValueMap() { LocalValueMap.clear(); LastLocalValue = EmitStartPt; @@ -836,7 +857,8 @@ FastISel::SelectInstruction(const Instruction *I) { void FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DL) { - if (FuncInfo.MBB->getBasicBlock()->size() > 1 && FuncInfo.MBB->isLayoutSuccessor(MSucc)) { + if (FuncInfo.MBB->getBasicBlock()->size() > 1 && + FuncInfo.MBB->isLayoutSuccessor(MSucc)) { // For more accurate line information if this is the only instruction // in the block then emit it, otherwise we have the unconditional // fall-through case, which needs no instructions. @@ -1067,6 +1089,10 @@ FastISel::FastISel(FunctionLoweringInfo &funcInfo, FastISel::~FastISel() {} +bool FastISel::FastLowerArguments() { + return false; +} + unsigned FastISel::FastEmit_(MVT, MVT, unsigned) { return 0; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index d4e9a50..c9289ad 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -1032,10 +1032,6 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { if (FuncInfo->MBB->isLandingPad()) PrepareEHLandingPad(); - // Lower any arguments needed in this block if this is the entry block. - if (LLVMBB == &Fn.getEntryBlock()) - LowerArguments(LLVMBB); - // Before doing SelectionDAG ISel, see if FastISel has been requested. if (FastIS) { FastIS->startNewBlock(); @@ -1043,9 +1039,15 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Emit code for any incoming arguments. This must happen before // beginning FastISel on the entry block. if (LLVMBB == &Fn.getEntryBlock()) { - CurDAG->setRoot(SDB->getControlRoot()); - SDB->clear(); - CodeGenAndEmitDAG(); + // Lower any arguments needed in this block if this is the entry block. + if (!FastIS->LowerArguments()) { + // Call target indepedent SDISel argument lowering code if the target + // specific routine is not successful. + LowerArguments(LLVMBB); + CurDAG->setRoot(SDB->getControlRoot()); + SDB->clear(); + CodeGenAndEmitDAG(); + } // If we inserted any instructions at the beginning, make a note of // where they are, so we can be sure to emit subsequent instructions @@ -1156,6 +1158,10 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { } FastIS->recomputeInsertPt(); + } else { + // Lower any arguments needed in this block if this is the entry block. + if (LLVMBB == &Fn.getEntryBlock()) + LowerArguments(LLVMBB); } if (Begin != BI) diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 4b7978a..022fe97 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -146,6 +146,7 @@ class ARMFastISel : public FastISel { virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI); virtual bool TryToFoldLoad(MachineInstr *MI, unsigned OpNo, const LoadInst *LI); + virtual bool FastLowerArguments(); private: #include "ARMGenFastISel.inc" @@ -2884,6 +2885,79 @@ unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV, return DestReg2; } +bool ARMFastISel::FastLowerArguments() { + if (!FuncInfo.CanLowerReturn) + return false; + + const Function *F = FuncInfo.Fn; + if (F->isVarArg()) + return false; + + CallingConv::ID CC = F->getCallingConv(); + switch (CC) { + default: + return false; + case CallingConv::Fast: + case CallingConv::C: + case CallingConv::ARM_AAPCS_VFP: + case CallingConv::ARM_AAPCS: + case CallingConv::ARM_APCS: + break; + } + + // Only handle simple cases. i.e. Up to 4 i8/i16/i32 scalar arguments + // which are passed in r0 - r3. + unsigned Idx = 1; + for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); + I != E; ++I, ++Idx) { + if (Idx > 4) + return false; + + if (F->getAttributes().hasAttribute(Idx, Attribute::InReg) || + F->getAttributes().hasAttribute(Idx, Attribute::StructRet) || + F->getAttributes().hasAttribute(Idx, Attribute::ByVal)) + return false; + + Type *ArgTy = I->getType(); + if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy()) + return false; + + EVT ArgVT = TLI.getValueType(ArgTy); + switch (ArgVT.getSimpleVT().SimpleTy) { + case MVT::i8: + case MVT::i16: + case MVT::i32: + break; + default: + return false; + } + } + + + static const uint16_t GPRArgRegs[] = { + ARM::R0, ARM::R1, ARM::R2, ARM::R3 + }; + + const TargetRegisterClass *RC = TLI.getRegClassFor(MVT::i32); + Idx = 0; + for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); + I != E; ++I, ++Idx) { + if (I->use_empty()) + continue; + unsigned SrcReg = GPRArgRegs[Idx]; + unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC); + // FIXME: Unfortunately it's necessary to emit a copy from the livein copy. + // Without this, EmitLiveInCopies may eliminate the livein if its only + // use is a bitcast (which isn't turned into an instruction). + unsigned ResultReg = createResultReg(RC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(DstReg, getKillRegState(true)); + UpdateValueMap(I, ResultReg); + } + + return true; +} + namespace llvm { FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) { -- cgit v1.1 From da20ea696d8b24d89ae157106ddad2337296ed50 Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Mon, 11 Feb 2013 05:29:41 +0000 Subject: BBVectorize: Make the bookkeeping to support full cycle checking less expensive By itself, this does not have much of an effect, but only because in the default configuration the full cycle checks are used only for small problem sizes. This is part of a general cleanup of uses of iteration over std::multimap ranges only for the purpose of checking membership. No functionality change intended. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174856 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/BBVectorize.cpp | 39 ++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 14 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp index ec10b42..824494d 100644 --- a/lib/Transforms/Vectorize/BBVectorize.cpp +++ b/lib/Transforms/Vectorize/BBVectorize.cpp @@ -288,7 +288,8 @@ namespace { bool pairsConflict(ValuePair P, ValuePair Q, DenseSet &PairableInstUsers, - std::multimap *PairableInstUserMap = 0); + std::multimap *PairableInstUserMap = 0, + DenseSet *PairableInstUserPairSet = 0); bool pairWillFormCycle(ValuePair P, std::multimap &PairableInstUsers, @@ -300,6 +301,7 @@ namespace { std::multimap &ConnectedPairs, DenseSet &PairableInstUsers, std::multimap &PairableInstUserMap, + DenseSet &PairableInstUserPairSet, DenseMap &ChosenPairs, DenseMap &Tree, DenseSet &PrunedTree, ValuePair J, @@ -323,6 +325,7 @@ namespace { std::multimap &ConnectedPairDeps, DenseSet &PairableInstUsers, std::multimap &PairableInstUserMap, + DenseSet &PairableInstUserPairSet, DenseMap &ChosenPairs, DenseSet &BestTree, size_t &BestMaxDepth, int &BestEffSize, VPIteratorPair ChoiceRange, @@ -1401,7 +1404,8 @@ namespace { // two pairs cannot be simultaneously fused. bool BBVectorize::pairsConflict(ValuePair P, ValuePair Q, DenseSet &PairableInstUsers, - std::multimap *PairableInstUserMap) { + std::multimap *PairableInstUserMap, + DenseSet *PairableInstUserPairSet) { // Two pairs are in conflict if they are mutual Users of eachother. bool QUsesP = PairableInstUsers.count(ValuePair(P.first, Q.first)) || PairableInstUsers.count(ValuePair(P.first, Q.second)) || @@ -1417,13 +1421,11 @@ namespace { // profiling and probably a different data structure (same is true of // most uses of std::multimap). if (PUsesQ) { - VPPIteratorPair QPairRange = PairableInstUserMap->equal_range(Q); - if (!isSecondInIteratorPair(P, QPairRange)) + if (PairableInstUserPairSet->insert(VPPair(Q, P)).second) PairableInstUserMap->insert(VPPair(Q, P)); } if (QUsesP) { - VPPIteratorPair PPairRange = PairableInstUserMap->equal_range(P); - if (!isSecondInIteratorPair(Q, PPairRange)) + if (PairableInstUserPairSet->insert(VPPair(P, Q)).second) PairableInstUserMap->insert(VPPair(P, Q)); } } @@ -1534,6 +1536,7 @@ namespace { std::multimap &ConnectedPairs, DenseSet &PairableInstUsers, std::multimap &PairableInstUserMap, + DenseSet &PairableInstUserPairSet, DenseMap &ChosenPairs, DenseMap &Tree, DenseSet &PrunedTree, ValuePair J, @@ -1586,7 +1589,8 @@ namespace { C2->first.second == C->first.first || C2->first.second == C->first.second || pairsConflict(C2->first, C->first, PairableInstUsers, - UseCycleCheck ? &PairableInstUserMap : 0)) { + UseCycleCheck ? &PairableInstUserMap : 0, + UseCycleCheck ? &PairableInstUserPairSet : 0)) { if (C2->second >= C->second) { CanAdd = false; break; @@ -1606,7 +1610,8 @@ namespace { T->second == C->first.first || T->second == C->first.second || pairsConflict(*T, C->first, PairableInstUsers, - UseCycleCheck ? &PairableInstUserMap : 0)) { + UseCycleCheck ? &PairableInstUserMap : 0, + UseCycleCheck ? &PairableInstUserPairSet : 0)) { CanAdd = false; break; } @@ -1623,7 +1628,8 @@ namespace { C2->first.second == C->first.first || C2->first.second == C->first.second || pairsConflict(C2->first, C->first, PairableInstUsers, - UseCycleCheck ? &PairableInstUserMap : 0)) { + UseCycleCheck ? &PairableInstUserMap : 0, + UseCycleCheck ? &PairableInstUserPairSet : 0)) { CanAdd = false; break; } @@ -1638,7 +1644,8 @@ namespace { ChosenPairs.begin(), E2 = ChosenPairs.end(); C2 != E2; ++C2) { if (pairsConflict(*C2, C->first, PairableInstUsers, - UseCycleCheck ? &PairableInstUserMap : 0)) { + UseCycleCheck ? &PairableInstUserMap : 0, + UseCycleCheck ? &PairableInstUserPairSet : 0)) { CanAdd = false; break; } @@ -1699,6 +1706,7 @@ namespace { std::multimap &ConnectedPairDeps, DenseSet &PairableInstUsers, std::multimap &PairableInstUserMap, + DenseSet &PairableInstUserPairSet, DenseMap &ChosenPairs, DenseSet &BestTree, size_t &BestMaxDepth, int &BestEffSize, VPIteratorPair ChoiceRange, @@ -1714,7 +1722,8 @@ namespace { for (DenseMap::iterator C = ChosenPairs.begin(), E = ChosenPairs.end(); C != E; ++C) { if (pairsConflict(*C, *J, PairableInstUsers, - UseCycleCheck ? &PairableInstUserMap : 0)) { + UseCycleCheck ? &PairableInstUserMap : 0, + UseCycleCheck ? &PairableInstUserPairSet : 0)) { DoesConflict = true; break; } @@ -1748,8 +1757,8 @@ namespace { DenseSet PrunedTree; pruneTreeFor(CandidatePairs, PairableInsts, ConnectedPairs, - PairableInstUsers, PairableInstUserMap, ChosenPairs, Tree, - PrunedTree, *J, UseCycleCheck); + PairableInstUsers, PairableInstUserMap, PairableInstUserPairSet, + ChosenPairs, Tree, PrunedTree, *J, UseCycleCheck); int EffSize = 0; if (TTI) { @@ -2075,6 +2084,7 @@ namespace { bool UseCycleCheck = CandidatePairs.size() <= Config.MaxCandPairsForCycleCheck; std::multimap PairableInstUserMap; + DenseSet PairableInstUserPairSet; for (std::vector::iterator I = PairableInsts.begin(), E = PairableInsts.end(); I != E; ++I) { // The number of possible pairings for this variable: @@ -2090,7 +2100,8 @@ namespace { findBestTreeFor(CandidatePairs, CandidatePairCostSavings, PairableInsts, FixedOrderPairs, PairConnectionTypes, ConnectedPairs, ConnectedPairDeps, - PairableInstUsers, PairableInstUserMap, ChosenPairs, + PairableInstUsers, PairableInstUserMap, + PairableInstUserPairSet, ChosenPairs, BestTree, BestMaxDepth, BestEffSize, ChoiceRange, UseCycleCheck); -- cgit v1.1 From 3fc1e4aa159ec15058bb26acbec39f6e09990207 Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Mon, 11 Feb 2013 05:29:48 +0000 Subject: BBVectorize: isa/cast cleanup in getInstructionTypes Profiling suggests that getInstructionTypes is performance-sensitive, this cleans up some double-casting in that function in favor of using dyn_cast. No functionality change intended. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174857 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/BBVectorize.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp index 824494d..682e992 100644 --- a/lib/Transforms/Vectorize/BBVectorize.cpp +++ b/lib/Transforms/Vectorize/BBVectorize.cpp @@ -466,18 +466,18 @@ namespace { static inline void getInstructionTypes(Instruction *I, Type *&T1, Type *&T2) { - if (isa(I)) { + if (StoreInst *SI = dyn_cast(I)) { // For stores, it is the value type, not the pointer type that matters // because the value is what will come from a vector register. - Value *IVal = cast(I)->getValueOperand(); + Value *IVal = SI->getValueOperand(); T1 = IVal->getType(); } else { T1 = I->getType(); } - if (I->isCast()) - T2 = cast(I)->getSrcTy(); + if (CastInst *CI = dyn_cast(I)) + T2 = CI->getSrcTy(); else T2 = T1; -- cgit v1.1 From 2f0e63cc16feb39480805bd00f53bbe5e3031d29 Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Mon, 11 Feb 2013 05:29:49 +0000 Subject: BBVectorize: Avoid linear searches within the load-move set This is another cleanup aimed at eliminating linear searches in ranges of std::multimap. No functionality change intended. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174858 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/BBVectorize.cpp | 50 +++++++++++++++++++------------- 1 file changed, 30 insertions(+), 20 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp index 682e992..0d3a444 100644 --- a/lib/Transforms/Vectorize/BBVectorize.cpp +++ b/lib/Transforms/Vectorize/BBVectorize.cpp @@ -277,7 +277,7 @@ namespace { bool trackUsesOfI(DenseSet &Users, AliasSetTracker &WriteSet, Instruction *I, Instruction *J, bool UpdateUsers = true, - std::multimap *LoadMoveSet = 0); + DenseSet *LoadMoveSetPairs = 0); void computePairsConnectedTo( std::multimap &CandidatePairs, @@ -362,19 +362,21 @@ namespace { void collectPairLoadMoveSet(BasicBlock &BB, DenseMap &ChosenPairs, std::multimap &LoadMoveSet, + DenseSet &LoadMoveSetPairs, Instruction *I); void collectLoadMoveSet(BasicBlock &BB, std::vector &PairableInsts, DenseMap &ChosenPairs, - std::multimap &LoadMoveSet); + std::multimap &LoadMoveSet, + DenseSet &LoadMoveSetPairs); bool canMoveUsesOfIAfterJ(BasicBlock &BB, - std::multimap &LoadMoveSet, + DenseSet &LoadMoveSetPairs, Instruction *I, Instruction *J); void moveUsesOfIAfterJ(BasicBlock &BB, - std::multimap &LoadMoveSet, + DenseSet &LoadMoveSetPairs, Instruction *&InsertionPt, Instruction *I, Instruction *J); @@ -1114,7 +1116,7 @@ namespace { bool BBVectorize::trackUsesOfI(DenseSet &Users, AliasSetTracker &WriteSet, Instruction *I, Instruction *J, bool UpdateUsers, - std::multimap *LoadMoveSet) { + DenseSet *LoadMoveSetPairs) { bool UsesI = false; // This instruction may already be marked as a user due, for example, to @@ -1132,9 +1134,8 @@ namespace { } } if (!UsesI && J->mayReadFromMemory()) { - if (LoadMoveSet) { - VPIteratorPair JPairRange = LoadMoveSet->equal_range(J); - UsesI = isSecondInIteratorPair(I, JPairRange); + if (LoadMoveSetPairs) { + UsesI = LoadMoveSetPairs->count(ValuePair(J, I)); } else { for (AliasSetTracker::iterator W = WriteSet.begin(), WE = WriteSet.end(); W != WE; ++W) { @@ -2737,7 +2738,7 @@ namespace { // Move all uses of the function I (including pairing-induced uses) after J. bool BBVectorize::canMoveUsesOfIAfterJ(BasicBlock &BB, - std::multimap &LoadMoveSet, + DenseSet &LoadMoveSetPairs, Instruction *I, Instruction *J) { // Skip to the first instruction past I. BasicBlock::iterator L = llvm::next(BasicBlock::iterator(I)); @@ -2745,18 +2746,18 @@ namespace { DenseSet Users; AliasSetTracker WriteSet(*AA); for (; cast(L) != J; ++L) - (void) trackUsesOfI(Users, WriteSet, I, L, true, &LoadMoveSet); + (void) trackUsesOfI(Users, WriteSet, I, L, true, &LoadMoveSetPairs); assert(cast(L) == J && "Tracking has not proceeded far enough to check for dependencies"); // If J is now in the use set of I, then trackUsesOfI will return true // and we have a dependency cycle (and the fusing operation must abort). - return !trackUsesOfI(Users, WriteSet, I, J, true, &LoadMoveSet); + return !trackUsesOfI(Users, WriteSet, I, J, true, &LoadMoveSetPairs); } // Move all uses of the function I (including pairing-induced uses) after J. void BBVectorize::moveUsesOfIAfterJ(BasicBlock &BB, - std::multimap &LoadMoveSet, + DenseSet &LoadMoveSetPairs, Instruction *&InsertionPt, Instruction *I, Instruction *J) { // Skip to the first instruction past I. @@ -2765,7 +2766,7 @@ namespace { DenseSet Users; AliasSetTracker WriteSet(*AA); for (; cast(L) != J;) { - if (trackUsesOfI(Users, WriteSet, I, L, true, &LoadMoveSet)) { + if (trackUsesOfI(Users, WriteSet, I, L, true, &LoadMoveSetPairs)) { // Move this instruction Instruction *InstToMove = L; ++L; @@ -2786,6 +2787,7 @@ namespace { void BBVectorize::collectPairLoadMoveSet(BasicBlock &BB, DenseMap &ChosenPairs, std::multimap &LoadMoveSet, + DenseSet &LoadMoveSetPairs, Instruction *I) { // Skip to the first instruction past I. BasicBlock::iterator L = llvm::next(BasicBlock::iterator(I)); @@ -2798,8 +2800,10 @@ namespace { // could be before I if this is an inverted input. for (BasicBlock::iterator E = BB.end(); cast(L) != E; ++L) { if (trackUsesOfI(Users, WriteSet, I, L)) { - if (L->mayReadFromMemory()) + if (L->mayReadFromMemory()) { LoadMoveSet.insert(ValuePair(L, I)); + LoadMoveSetPairs.insert(ValuePair(L, I)); + } } } } @@ -2814,14 +2818,16 @@ namespace { void BBVectorize::collectLoadMoveSet(BasicBlock &BB, std::vector &PairableInsts, DenseMap &ChosenPairs, - std::multimap &LoadMoveSet) { + std::multimap &LoadMoveSet, + DenseSet &LoadMoveSetPairs) { for (std::vector::iterator PI = PairableInsts.begin(), PIE = PairableInsts.end(); PI != PIE; ++PI) { DenseMap::iterator P = ChosenPairs.find(*PI); if (P == ChosenPairs.end()) continue; Instruction *I = cast(P->first); - collectPairLoadMoveSet(BB, ChosenPairs, LoadMoveSet, I); + collectPairLoadMoveSet(BB, ChosenPairs, LoadMoveSet, + LoadMoveSetPairs, I); } } @@ -2877,7 +2883,9 @@ namespace { ChosenPairs.insert(*P); std::multimap LoadMoveSet; - collectLoadMoveSet(BB, PairableInsts, ChosenPairs, LoadMoveSet); + DenseSet LoadMoveSetPairs; + collectLoadMoveSet(BB, PairableInsts, ChosenPairs, + LoadMoveSet, LoadMoveSetPairs); DEBUG(dbgs() << "BBV: initial: \n" << BB << "\n"); @@ -2909,7 +2917,7 @@ namespace { ChosenPairs.erase(FP); ChosenPairs.erase(P); - if (!canMoveUsesOfIAfterJ(BB, LoadMoveSet, I, J)) { + if (!canMoveUsesOfIAfterJ(BB, LoadMoveSetPairs, I, J)) { DEBUG(dbgs() << "BBV: fusion of: " << *I << " <-> " << *J << " aborted because of non-trivial dependency cycle\n"); @@ -3010,7 +3018,7 @@ namespace { // first instruction is disjoint from the input tree of the second // (by definition), and so commutes with it. - moveUsesOfIAfterJ(BB, LoadMoveSet, InsertionPt, I, J); + moveUsesOfIAfterJ(BB, LoadMoveSetPairs, InsertionPt, I, J); if (!isa(I)) { L->replaceAllUsesWith(K1); @@ -3036,8 +3044,10 @@ namespace { N != JPairRange.second; ++N) NewSetMembers.push_back(ValuePair(K, N->second)); for (std::vector::iterator A = NewSetMembers.begin(), - AE = NewSetMembers.end(); A != AE; ++A) + AE = NewSetMembers.end(); A != AE; ++A) { LoadMoveSet.insert(*A); + LoadMoveSetPairs.insert(*A); + } } // Before removing I, set the iterator to the next instruction. -- cgit v1.1 From 00f63b1b84d059a1ffa572e76708e03750a9e523 Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Mon, 11 Feb 2013 05:29:51 +0000 Subject: BBVectorize: Remove the linear searches from pair connection searching This removes the last of the linear searches over ranges of std::multimap iterators, giving a 7% speedup on the doduc.bc input from PR15222. No functionality change intended. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174859 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/BBVectorize.cpp | 35 ++++++++++---------------------- 1 file changed, 11 insertions(+), 24 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp index 0d3a444..9da4c37 100644 --- a/lib/Transforms/Vectorize/BBVectorize.cpp +++ b/lib/Transforms/Vectorize/BBVectorize.cpp @@ -281,6 +281,7 @@ namespace { void computePairsConnectedTo( std::multimap &CandidatePairs, + DenseSet &CandidatePairsSet, std::vector &PairableInsts, std::multimap &ConnectedPairs, DenseMap &PairConnectionTypes, @@ -666,19 +667,6 @@ namespace { } } - // Returns true if J is the second element in some pair referenced by - // some multimap pair iterator pair. - template - bool isSecondInIteratorPair(V J, std::pair< - typename std::multimap::iterator, - typename std::multimap::iterator> PairRange) { - for (typename std::multimap::iterator K = PairRange.first; - K != PairRange.second; ++K) - if (K->second == J) return true; - - return false; - } - bool isPureIEChain(InsertElementInst *IE) { InsertElementInst *IENext = IE; do { @@ -1253,6 +1241,7 @@ namespace { // output of PI or PJ. void BBVectorize::computePairsConnectedTo( std::multimap &CandidatePairs, + DenseSet &CandidatePairsSet, std::vector &PairableInsts, std::multimap &ConnectedPairs, DenseMap &PairConnectionTypes, @@ -1274,8 +1263,6 @@ namespace { continue; } - VPIteratorPair IPairRange = CandidatePairs.equal_range(*I); - // For each use of the first variable, look for uses of the second // variable... for (Value::use_iterator J = P.second->use_begin(), @@ -1284,17 +1271,15 @@ namespace { P.second == SJ->getPointerOperand()) continue; - VPIteratorPair JPairRange = CandidatePairs.equal_range(*J); - // Look for : - if (isSecondInIteratorPair(*J, IPairRange)) { + if (CandidatePairsSet.count(ValuePair(*I, *J))) { VPPair VP(P, ValuePair(*I, *J)); ConnectedPairs.insert(VP); PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionDirect)); } // Look for : - if (isSecondInIteratorPair(*I, JPairRange)) { + if (CandidatePairsSet.count(ValuePair(*J, *I))) { VPPair VP(P, ValuePair(*J, *I)); ConnectedPairs.insert(VP); PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionSwap)); @@ -1309,7 +1294,7 @@ namespace { P.first == SJ->getPointerOperand()) continue; - if (isSecondInIteratorPair(*J, IPairRange)) { + if (CandidatePairsSet.count(ValuePair(*I, *J))) { VPPair VP(P, ValuePair(*I, *J)); ConnectedPairs.insert(VP); PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionSplat)); @@ -1328,14 +1313,12 @@ namespace { P.second == SI->getPointerOperand()) continue; - VPIteratorPair IPairRange = CandidatePairs.equal_range(*I); - for (Value::use_iterator J = P.second->use_begin(); J != E; ++J) { if ((SJ = dyn_cast(*J)) && P.second == SJ->getPointerOperand()) continue; - if (isSecondInIteratorPair(*J, IPairRange)) { + if (CandidatePairsSet.count(ValuePair(*I, *J))) { VPPair VP(P, ValuePair(*I, *J)); ConnectedPairs.insert(VP); PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionSplat)); @@ -1352,6 +1335,10 @@ namespace { std::vector &PairableInsts, std::multimap &ConnectedPairs, DenseMap &PairConnectionTypes) { + DenseSet CandidatePairsSet; + for (std::multimap::iterator I = CandidatePairs.begin(), + E = CandidatePairs.end(); I != E; ++I) + CandidatePairsSet.insert(*I); for (std::vector::iterator PI = PairableInsts.begin(), PE = PairableInsts.end(); PI != PE; ++PI) { @@ -1359,7 +1346,7 @@ namespace { for (std::multimap::iterator P = choiceRange.first; P != choiceRange.second; ++P) - computePairsConnectedTo(CandidatePairs, PairableInsts, + computePairsConnectedTo(CandidatePairs, CandidatePairsSet, PairableInsts, ConnectedPairs, PairConnectionTypes, *P); } -- cgit v1.1 From f64c889cc94417322b0ff8ad1c61939183bf3c38 Mon Sep 17 00:00:00 2001 From: Bob Wilson Date: Mon, 11 Feb 2013 05:37:07 +0000 Subject: Revert "Rename LLVMContext diagnostic handler types and functions." This reverts my commit 171047. Now that I've removed my misguided attempt to support backend warnings, these diagnostics are only about inline assembly. It would take quite a bit more work to generalize them properly, so I'm just reverting this. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174860 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp | 10 ++++----- lib/IR/LLVMContext.cpp | 30 ++++++++++++++------------ lib/IR/LLVMContextImpl.cpp | 4 ++-- lib/IR/LLVMContextImpl.h | 4 ++-- 4 files changed, 25 insertions(+), 23 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index d5608c3..abfa330 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -38,7 +38,7 @@ using namespace llvm; namespace { struct SrcMgrDiagInfo { const MDNode *LocInfo; - LLVMContext::DiagHandlerTy DiagHandler; + LLVMContext::InlineAsmDiagHandlerTy DiagHandler; void *DiagContext; }; } @@ -88,15 +88,15 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode, SourceMgr SrcMgr; SrcMgrDiagInfo DiagInfo; - // If the current LLVMContext has a diagnostic handler, set it in SourceMgr. + // If the current LLVMContext has an inline asm handler, set it in SourceMgr. LLVMContext &LLVMCtx = MMI->getModule()->getContext(); bool HasDiagHandler = false; - if (LLVMCtx.getDiagnosticHandler() != 0) { + if (LLVMCtx.getInlineAsmDiagnosticHandler() != 0) { // If the source manager has an issue, we arrange for srcMgrDiagHandler // to be invoked, getting DiagInfo passed into it. DiagInfo.LocInfo = LocMDNode; - DiagInfo.DiagHandler = LLVMCtx.getDiagnosticHandler(); - DiagInfo.DiagContext = LLVMCtx.getDiagnosticContext(); + DiagInfo.DiagHandler = LLVMCtx.getInlineAsmDiagnosticHandler(); + DiagInfo.DiagContext = LLVMCtx.getInlineAsmDiagnosticContext(); SrcMgr.setDiagHandler(srcMgrDiagHandler, &DiagInfo); HasDiagHandler = true; } diff --git a/lib/IR/LLVMContext.cpp b/lib/IR/LLVMContext.cpp index 774c591..8e2bbb7 100644 --- a/lib/IR/LLVMContext.cpp +++ b/lib/IR/LLVMContext.cpp @@ -73,22 +73,24 @@ void LLVMContext::removeModule(Module *M) { // Recoverable Backend Errors //===----------------------------------------------------------------------===// -void LLVMContext::setDiagnosticHandler(DiagHandlerTy DiagHandler, - void *DiagContext) { - pImpl->DiagHandler = DiagHandler; - pImpl->DiagContext = DiagContext; +void LLVMContext:: +setInlineAsmDiagnosticHandler(InlineAsmDiagHandlerTy DiagHandler, + void *DiagContext) { + pImpl->InlineAsmDiagHandler = DiagHandler; + pImpl->InlineAsmDiagContext = DiagContext; } -/// getDiagnosticHandler - Return the diagnostic handler set by -/// setDiagnosticHandler. -LLVMContext::DiagHandlerTy LLVMContext::getDiagnosticHandler() const { - return pImpl->DiagHandler; +/// getInlineAsmDiagnosticHandler - Return the diagnostic handler set by +/// setInlineAsmDiagnosticHandler. +LLVMContext::InlineAsmDiagHandlerTy +LLVMContext::getInlineAsmDiagnosticHandler() const { + return pImpl->InlineAsmDiagHandler; } -/// getDiagnosticContext - Return the diagnostic context set by -/// setDiagnosticHandler. -void *LLVMContext::getDiagnosticContext() const { - return pImpl->DiagContext; +/// getInlineAsmDiagnosticContext - Return the diagnostic context set by +/// setInlineAsmDiagnosticHandler. +void *LLVMContext::getInlineAsmDiagnosticContext() const { + return pImpl->InlineAsmDiagContext; } void LLVMContext::emitError(const Twine &ErrorStr) { @@ -107,7 +109,7 @@ void LLVMContext::emitError(const Instruction *I, const Twine &ErrorStr) { void LLVMContext::emitError(unsigned LocCookie, const Twine &ErrorStr) { // If there is no error handler installed, just print the error and exit. - if (pImpl->DiagHandler == 0) { + if (pImpl->InlineAsmDiagHandler == 0) { errs() << "error: " << ErrorStr << "\n"; exit(1); } @@ -115,7 +117,7 @@ void LLVMContext::emitError(unsigned LocCookie, const Twine &ErrorStr) { // If we do have an error handler, we can report the error and keep going. SMDiagnostic Diag("", SourceMgr::DK_Error, ErrorStr.str()); - pImpl->DiagHandler(Diag, pImpl->DiagContext, LocCookie); + pImpl->InlineAsmDiagHandler(Diag, pImpl->InlineAsmDiagContext, LocCookie); } //===----------------------------------------------------------------------===// diff --git a/lib/IR/LLVMContextImpl.cpp b/lib/IR/LLVMContextImpl.cpp index 89e163f..6a6a4d6 100644 --- a/lib/IR/LLVMContextImpl.cpp +++ b/lib/IR/LLVMContextImpl.cpp @@ -35,8 +35,8 @@ LLVMContextImpl::LLVMContextImpl(LLVMContext &C) Int16Ty(C, 16), Int32Ty(C, 32), Int64Ty(C, 64) { - DiagHandler = 0; - DiagContext = 0; + InlineAsmDiagHandler = 0; + InlineAsmDiagContext = 0; NamedStructTypesUniqueID = 0; } diff --git a/lib/IR/LLVMContextImpl.h b/lib/IR/LLVMContextImpl.h index cc7ca5e..7353dc0 100644 --- a/lib/IR/LLVMContextImpl.h +++ b/lib/IR/LLVMContextImpl.h @@ -236,8 +236,8 @@ public: /// will be automatically deleted if this context is deleted. SmallPtrSet OwnedModules; - LLVMContext::DiagHandlerTy DiagHandler; - void *DiagContext; + LLVMContext::InlineAsmDiagHandlerTy InlineAsmDiagHandler; + void *InlineAsmDiagContext; typedef DenseMap IntMapTy; -- cgit v1.1 From ab39afa9d9b99c61842c8e3d0eb706bd16efdcf3 Mon Sep 17 00:00:00 2001 From: Kostya Serebryany Date: Mon, 11 Feb 2013 08:13:54 +0000 Subject: [tsan/msan] adding thread_safety and uninitialized_checks attributes git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174864 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/AsmParser/LLLexer.cpp | 2 ++ lib/AsmParser/LLParser.cpp | 6 +++++- lib/AsmParser/LLToken.h | 2 ++ lib/Bitcode/Reader/BitcodeReader.cpp | 2 +- lib/Bitcode/Writer/BitcodeWriter.cpp | 2 +- lib/IR/Attributes.cpp | 6 ++++++ lib/IR/Verifier.cpp | 2 ++ 7 files changed, 19 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp index 2256124..3b8b033 100644 --- a/lib/AsmParser/LLLexer.cpp +++ b/lib/AsmParser/LLLexer.cpp @@ -578,6 +578,8 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(ssp); KEYWORD(sspreq); KEYWORD(sspstrong); + KEYWORD(thread_safety); + KEYWORD(uninitialized_checks); KEYWORD(uwtable); KEYWORD(zeroext); diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index e2f42d8..e4f8d1f 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -922,6 +922,8 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B, case lltok::kw_ssp: B.addAttribute(Attribute::StackProtect); break; case lltok::kw_sspreq: B.addAttribute(Attribute::StackProtectReq); break; case lltok::kw_sspstrong: B.addAttribute(Attribute::StackProtectStrong); break; + case lltok::kw_thread_safety: B.addAttribute(Attribute::ThreadSafety); break; + case lltok::kw_uninitialized_checks: B.addAttribute(Attribute::UninitializedChecks); break; case lltok::kw_uwtable: B.addAttribute(Attribute::UWTable); break; // Error handling. @@ -1161,7 +1163,8 @@ bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) { case lltok::kw_noredzone: case lltok::kw_noimplicitfloat: case lltok::kw_naked: case lltok::kw_nonlazybind: case lltok::kw_address_safety: case lltok::kw_minsize: - case lltok::kw_alignstack: + case lltok::kw_alignstack: case lltok::kw_thread_safety: + case lltok::kw_uninitialized_checks: HaveError |= Error(Lex.getLoc(), "invalid use of function-only attribute"); break; } @@ -1203,6 +1206,7 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) { case lltok::kw_nonlazybind: case lltok::kw_address_safety: case lltok::kw_minsize: case lltok::kw_alignstack: case lltok::kw_align: case lltok::kw_noduplicate: + case lltok::kw_thread_safety: case lltok::kw_uninitialized_checks: HaveError |= Error(Lex.getLoc(), "invalid use of function-only attribute"); break; } diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h index 8c18a3b..97429b8 100644 --- a/lib/AsmParser/LLToken.h +++ b/lib/AsmParser/LLToken.h @@ -119,6 +119,8 @@ namespace lltok { kw_sspreq, kw_sspstrong, kw_sret, + kw_thread_safety, + kw_uninitialized_checks, kw_uwtable, kw_zeroext, diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index 110f47c..30ba85e 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -444,7 +444,7 @@ static void decodeLLVMAttributesForBitcode(AttrBuilder &B, if (Alignment) B.addAlignmentAttr(Alignment); - B.addRawValue(((EncodedAttrs & (0xffffULL << 32)) >> 11) | + B.addRawValue(((EncodedAttrs & (0xfffffULL << 32)) >> 11) | (EncodedAttrs & 0xffff)); } diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index 37dcb46..65c3f73 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -181,7 +181,7 @@ static uint64_t encodeLLVMAttributesForBitcode(AttributeSet Attrs, uint64_t EncodedAttrs = Attrs.Raw(Index) & 0xffff; if (Attrs.hasAttribute(Index, Attribute::Alignment)) EncodedAttrs |= Attrs.getParamAlignment(Index) << 16; - EncodedAttrs |= (Attrs.Raw(Index) & (0xffffULL << 21)) << 11; + EncodedAttrs |= (Attrs.Raw(Index) & (0xfffffULL << 21)) << 11; return EncodedAttrs; } diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 343f569..267c1aa 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -205,6 +205,10 @@ std::string Attribute::getAsString() const { return "sspstrong"; if (hasAttribute(Attribute::StructRet)) return "sret"; + if (hasAttribute(Attribute::ThreadSafety)) + return "thread_safety"; + if (hasAttribute(Attribute::UninitializedChecks)) + return "uninitialized_checks"; if (hasAttribute(Attribute::UWTable)) return "uwtable"; if (hasAttribute(Attribute::ZExt)) @@ -382,6 +386,8 @@ uint64_t AttributeImpl::getAttrMask(Attribute::AttrKind Val) { case Attribute::MinSize: return 1ULL << 33; case Attribute::NoDuplicate: return 1ULL << 34; case Attribute::StackProtectStrong: return 1ULL << 35; + case Attribute::ThreadSafety: return 1ULL << 36; + case Attribute::UninitializedChecks: return 1ULL << 37; } llvm_unreachable("Unsupported attribute type"); } diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp index 31312dc..02c2096 100644 --- a/lib/IR/Verifier.cpp +++ b/lib/IR/Verifier.cpp @@ -651,6 +651,8 @@ void Verifier::VerifyParameterAttrs(AttributeSet Attrs, uint64_t Idx, Type *Ty, !Attrs.hasAttribute(Idx, Attribute::NonLazyBind) && !Attrs.hasAttribute(Idx, Attribute::ReturnsTwice) && !Attrs.hasAttribute(Idx, Attribute::AddressSafety) && + !Attrs.hasAttribute(Idx, Attribute::ThreadSafety) && + !Attrs.hasAttribute(Idx, Attribute::UninitializedChecks) && !Attrs.hasAttribute(Idx, Attribute::MinSize), "Some attributes in '" + Attrs.getAsString(Idx) + "' only apply to functions!", V); -- cgit v1.1 From b29ce26ea60f7516c853318ffbfc107fde9ad897 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Mon, 11 Feb 2013 08:43:33 +0000 Subject: Add support for printing out the attribute groups. This emits the attribute groups that are used by the functions. (It currently doesn't print out return type or parameter attributes within attribute groups.) Note: The functions still retrieve their attributes from the "old" bitcode format (using the deprecated 'Raw()' method). This means that string attributes within an attribute group will not show up during a disassembly. This will be addressed in a future commit. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174867 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/AsmWriter.cpp | 72 +++++++++++++++++++++++++++++++++++++++++++++++--- lib/IR/AttributeImpl.h | 2 +- lib/IR/Attributes.cpp | 31 +++++++++++++--------- 3 files changed, 88 insertions(+), 17 deletions(-) (limited to 'lib') diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp index d3736a1..bf893e8 100644 --- a/lib/IR/AsmWriter.cpp +++ b/lib/IR/AsmWriter.cpp @@ -347,6 +347,10 @@ private: /// mdnMap - Map for MDNodes. DenseMap mdnMap; unsigned mdnNext; + + /// asMap - The slot map for attribute sets. + DenseMap asMap; + unsigned asNext; public: /// Construct from a module explicit SlotTracker(const Module *M); @@ -358,6 +362,7 @@ public: int getLocalSlot(const Value *V); int getGlobalSlot(const GlobalValue *V); int getMetadataSlot(const MDNode *N); + int getAttributeGroupSlot(AttributeSet AS); /// If you'd like to deal with a function instead of just a module, use /// this method to get its data into the SlotTracker. @@ -378,6 +383,13 @@ public: unsigned mdn_size() const { return mdnMap.size(); } bool mdn_empty() const { return mdnMap.empty(); } + /// AttributeSet map iterators. + typedef DenseMap::iterator as_iterator; + as_iterator as_begin() { return asMap.begin(); } + as_iterator as_end() { return asMap.end(); } + unsigned as_size() const { return asMap.size(); } + bool as_empty() const { return asMap.empty(); } + /// This function does the actual initialization. inline void initialize(); @@ -392,6 +404,9 @@ private: /// CreateFunctionSlot - Insert the specified Value* into the slot table. void CreateFunctionSlot(const Value *V); + /// \brief Insert the specified AttributeSet into the slot table. + void CreateAttributeSetSlot(AttributeSet AS); + /// Add all of the module level global variables (and their initializers) /// and function declarations, but not the contents of those functions. void processModule(); @@ -446,14 +461,14 @@ static SlotTracker *createSlotTracker(const Value *V) { // to be added to the slot table. SlotTracker::SlotTracker(const Module *M) : TheModule(M), TheFunction(0), FunctionProcessed(false), - mNext(0), fNext(0), mdnNext(0) { + mNext(0), fNext(0), mdnNext(0), asNext(0) { } // Function level constructor. Causes the contents of the Module and the one // function provided to be added to the slot table. SlotTracker::SlotTracker(const Function *F) : TheModule(F ? F->getParent() : 0), TheFunction(F), FunctionProcessed(false), - mNext(0), fNext(0), mdnNext(0) { + mNext(0), fNext(0), mdnNext(0), asNext(0) { } inline void SlotTracker::initialize() { @@ -487,12 +502,18 @@ void SlotTracker::processModule() { CreateMetadataSlot(NMD->getOperand(i)); } - // Add all the unnamed functions to the table. for (Module::const_iterator I = TheModule->begin(), E = TheModule->end(); - I != E; ++I) + I != E; ++I) { if (!I->hasName()) + // Add all the unnamed functions to the table. CreateModuleSlot(I); + // Add all the function attributes to the table. + AttributeSet FnAttrs = I->getAttributes().getFnAttributes(); + if (FnAttrs.hasAttributes(AttributeSet::FunctionIndex)) + CreateAttributeSetSlot(FnAttrs); + } + ST_DEBUG("end processModule!\n"); } @@ -589,6 +610,14 @@ int SlotTracker::getLocalSlot(const Value *V) { return FI == fMap.end() ? -1 : (int)FI->second; } +int SlotTracker::getAttributeGroupSlot(AttributeSet AS) { + // Check for uninitialized state and do lazy initialization. + initialize(); + + // Find the AttributeSet in the module map. + as_iterator AI = asMap.find(AS); + return AI == asMap.end() ? -1 : (int)AI->second; +} /// CreateModuleSlot - Insert the specified GlobalValue* into the slot table. void SlotTracker::CreateModuleSlot(const GlobalValue *V) { @@ -640,6 +669,18 @@ void SlotTracker::CreateMetadataSlot(const MDNode *N) { CreateMetadataSlot(Op); } +void SlotTracker::CreateAttributeSetSlot(AttributeSet AS) { + assert(AS.hasAttributes(AttributeSet::FunctionIndex) && + "Doesn't need a slot!"); + + as_iterator I = asMap.find(AS); + if (I != asMap.end()) + return; + + unsigned DestSlot = asNext++; + asMap[AS] = DestSlot; +} + //===----------------------------------------------------------------------===// // AsmWriter Implementation //===----------------------------------------------------------------------===// @@ -1201,6 +1242,7 @@ public: void writeAtomic(AtomicOrdering Ordering, SynchronizationScope SynchScope); void writeAllMDNodes(); + void writeAllAttributeGroups(); void printTypeIdentities(); void printGlobal(const GlobalVariable *GV); @@ -1268,6 +1310,8 @@ void AssemblyWriter::writeParamOperand(const Value *Operand, } void AssemblyWriter::printModule(const Module *M) { + Machine.initialize(); + if (!M->getModuleIdentifier().empty() && // Don't print the ID if it will start a new line (which would // require a comment char before it). @@ -1322,6 +1366,12 @@ void AssemblyWriter::printModule(const Module *M) { for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) printFunction(I); + // Output all attribute groups. + if (!Machine.as_empty()) { + Out << '\n'; + writeAllAttributeGroups(); + } + // Output named metadata. if (!M->named_metadata_empty()) Out << '\n'; @@ -2063,6 +2113,20 @@ void AssemblyWriter::printMDNodeBody(const MDNode *Node) { Out << "\n"; } +void AssemblyWriter::writeAllAttributeGroups() { + std::vector > asVec; + asVec.resize(Machine.as_size()); + + for (SlotTracker::as_iterator I = Machine.as_begin(), E = Machine.as_end(); + I != E; ++I) + asVec[I->second] = *I; + + for (std::vector >::iterator + I = asVec.begin(), E = asVec.end(); I != E; ++I) + Out << "attributes #" << I->second << " = { " + << I->first.getAsString(AttributeSet::FunctionIndex, true) << " }\n"; +} + //===----------------------------------------------------------------------===// // External Interface declarations //===----------------------------------------------------------------------===// diff --git a/lib/IR/AttributeImpl.h b/lib/IR/AttributeImpl.h index 84f472d..7bb1fcc 100644 --- a/lib/IR/AttributeImpl.h +++ b/lib/IR/AttributeImpl.h @@ -174,7 +174,7 @@ public: unsigned getAlignment() const; unsigned getStackAlignment() const; - std::string getAsString() const; + std::string getAsString(bool InAttrGrp) const; typedef SmallVectorImpl::iterator iterator; typedef SmallVectorImpl::const_iterator const_iterator; diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 267c1aa..d338d65 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -150,7 +150,7 @@ unsigned Attribute::getStackAlignment() const { return pImpl->getValueAsInt(); } -std::string Attribute::getAsString() const { +std::string Attribute::getAsString(bool InAttrGrp) const { if (!pImpl) return ""; if (hasAttribute(Attribute::AddressSafety)) @@ -221,15 +221,23 @@ std::string Attribute::getAsString() const { // if (hasAttribute(Attribute::Alignment)) { std::string Result; - Result += "align "; + Result += "align"; + Result += (InAttrGrp) ? "=" : " "; Result += utostr(getValueAsInt()); return Result; } + if (hasAttribute(Attribute::StackAlignment)) { std::string Result; - Result += "alignstack("; - Result += utostr(getValueAsInt()); - Result += ")"; + Result += "alignstack"; + if (InAttrGrp) { + Result += "="; + Result += utostr(getValueAsInt()); + } else { + Result += "("; + Result += utostr(getValueAsInt()); + Result += ")"; + } return Result; } @@ -237,7 +245,6 @@ std::string Attribute::getAsString() const { // // "kind" // "kind" = "value" - // "kind" = ( "value1" "value2" "value3" ) // if (isStringAttribute()) { std::string Result; @@ -246,8 +253,7 @@ std::string Attribute::getAsString() const { StringRef Val = pImpl->getValueAsString(); if (Val.empty()) return Result; - Result += " = "; - Result += '\"' + Val.str() + '"'; + Result += "=\"" + Val.str() + '"'; return Result; } @@ -451,11 +457,11 @@ unsigned AttributeSetNode::getStackAlignment() const { return 0; } -std::string AttributeSetNode::getAsString() const { +std::string AttributeSetNode::getAsString(bool InAttrGrp) const { std::string Str = ""; for (SmallVectorImpl::const_iterator I = AttrList.begin(), E = AttrList.end(); I != E; ) { - Str += I->getAsString(); + Str += I->getAsString(InAttrGrp); if (++I != E) Str += " "; } return Str; @@ -783,9 +789,10 @@ unsigned AttributeSet::getStackAlignment(unsigned Index) const { return ASN ? ASN->getStackAlignment() : 0; } -std::string AttributeSet::getAsString(unsigned Index) const { +std::string AttributeSet::getAsString(unsigned Index, + bool InAttrGrp) const { AttributeSetNode *ASN = getAttributes(Index); - return ASN ? ASN->getAsString() : std::string(""); + return ASN ? ASN->getAsString(InAttrGrp) : std::string(""); } /// \brief The attributes for the specified index are returned. -- cgit v1.1 From cbe3f5e1622b5f809bc04d61da125801e4658a73 Mon Sep 17 00:00:00 2001 From: Cameron Zwarich Date: Mon, 11 Feb 2013 09:24:45 +0000 Subject: Update SlotIndexes after updateTerminator() possibly removes instructions. I am really trying to avoid piping SlotIndexes through to RemoveBranch() and friends. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174869 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineBasicBlock.cpp | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'lib') diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index 2534a74..7457cd5 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -693,8 +693,32 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { } ReplaceUsesOfBlockWith(Succ, NMBB); + + // If updateTerminator() removes instructions, we need to remove them from + // SlotIndexes. + SmallVector Terminators; + if (Indexes) { + for (instr_iterator I = getFirstInstrTerminator(), E = instr_end(); + I != E; ++I) + Terminators.push_back(I); + } + updateTerminator(); + if (Indexes) { + SmallVector NewTerminators; + for (instr_iterator I = getFirstInstrTerminator(), E = instr_end(); + I != E; ++I) + NewTerminators.push_back(I); + + for (SmallVectorImpl::iterator I = Terminators.begin(), + E = Terminators.end(); I != E; ++I) { + if (std::find(NewTerminators.begin(), NewTerminators.end(), *I) == + NewTerminators.end()) + Indexes->removeMachineInstrFromMaps(*I); + } + } + // Insert unconditional "jump Succ" instruction in NMBB if necessary. NMBB->addSuccessor(Succ); if (!NMBB->isLayoutSuccessor(Succ)) { -- cgit v1.1 From 8597c14e9b32259cc7cfd752d95fd71e7aaba0ec Mon Sep 17 00:00:00 2001 From: Cameron Zwarich Date: Mon, 11 Feb 2013 09:24:47 +0000 Subject: Add support for updating LiveIntervals to MachineBasicBlock::SplitCriticalEdge(). This is currently a bit hairier than it needs to be, since depending on where the split block resides the end ListEntry of the split block may be the end ListEntry of the original block or a new entry. Some changes to the SlotIndexes updating should make it possible to eliminate the two cases here. This also isn't as optimized as it could be. In the future Liveinterval should probably get a flag that indicates whether the LiveInterval is within a single basic block. We could ignore all such intervals when splitting an edge. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174870 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineBasicBlock.cpp | 68 +++++++++++++++++++++++++++++++++++++++ lib/CodeGen/PHIElimination.cpp | 2 +- 2 files changed, 69 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index 7457cd5..7351302 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -15,10 +15,12 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/Assembly/Writer.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/DataLayout.h" @@ -769,6 +771,72 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { LV->addNewBlock(NMBB, this, Succ); } + if (LiveIntervals *LIS = P->getAnalysisIfAvailable()) { + // After splitting the edge and updating SlotIndexes, live intervals may be + // in one of two situations, depending on whether this block was the last in + // the function. If the original block was the last in the function, all live + // intervals will end prior to the beginning of the new split block. If the + // original block was not at the end of the function, all live intervals will + // extend to the end of the new split block. + + bool isLastMBB = + llvm::next(MachineFunction::iterator(NMBB)) == getParent()->end(); + + SlotIndex StartIndex = Indexes->getMBBEndIdx(this); + SlotIndex PrevIndex = StartIndex.getPrevSlot(); + SlotIndex EndIndex = Indexes->getMBBEndIdx(NMBB); + + // Find the registers used from NMBB in PHIs in Succ. + SmallSet PHISrcRegs; + for (MachineBasicBlock::instr_iterator + I = Succ->instr_begin(), E = Succ->instr_end(); + I != E && I->isPHI(); ++I) { + for (unsigned ni = 1, ne = I->getNumOperands(); ni != ne; ni += 2) { + if (I->getOperand(ni+1).getMBB() == NMBB) { + MachineOperand &MO = I->getOperand(ni); + unsigned Reg = MO.getReg(); + PHISrcRegs.insert(Reg); + if (MO.isUndef() || !isLastMBB) + break; + + LiveInterval &LI = LIS->getInterval(Reg); + VNInfo *VNI = LI.getVNInfoAt(PrevIndex); + assert(VNI && "PHI sources should be live out of their predecessors."); + LI.addRange(LiveRange(StartIndex, EndIndex, VNI)); + } + } + } + + MachineRegisterInfo *MRI = &getParent()->getRegInfo(); + for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + if (PHISrcRegs.count(Reg) || !LIS->hasInterval(Reg)) + continue; + + LiveInterval &LI = LIS->getInterval(Reg); + if (!LI.liveAt(PrevIndex)) + continue; + + bool isLiveOut = false; + for (MachineBasicBlock::succ_iterator SI = succ_begin(), + SE = succ_end(); SI != SE; ++SI) { + MachineBasicBlock *SuccMBB = *SI == NMBB ? Succ : *SI; + if (LI.liveAt(LIS->getMBBStartIdx(SuccMBB))) { + isLiveOut = true; + break; + } + } + + if (isLiveOut && isLastMBB) { + VNInfo *VNI = LI.getVNInfoAt(PrevIndex); + assert(VNI && "LiveInterval should have VNInfo where it is live."); + LI.addRange(LiveRange(StartIndex, EndIndex, VNI)); + } else if (!isLiveOut && !isLastMBB) { + LI.removeRange(StartIndex, EndIndex); + } + } + } + if (MachineDominatorTree *MDT = P->getAnalysisIfAvailable()) { // Update dominator information. diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp index bf2b95f..aa84a60 100644 --- a/lib/CodeGen/PHIElimination.cpp +++ b/lib/CodeGen/PHIElimination.cpp @@ -129,7 +129,7 @@ bool PHIElimination::runOnMachineFunction(MachineFunction &MF) { // Split critical edges to help the coalescer. This does not yet support // updating LiveIntervals, so we disable it. - if (!DisableEdgeSplitting && LV && !LIS) { + if (!DisableEdgeSplitting && (LV || LIS)) { MachineLoopInfo *MLI = getAnalysisIfAvailable(); for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) Changed |= SplitPHIEdges(MF, *I, MLI); -- cgit v1.1 From b5161863866b64498a7faa20e612c55de4bca6f8 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Mon, 11 Feb 2013 09:29:37 +0000 Subject: Make use of DiagnosticType to provide better AArch64 diagnostics. This gives a DiagnosticType to all AsmOperands in sight. This replaces all "invalid operand" diagnostics with something more specific. The messages given should still be sufficiently vague that they're not usually actively misleading when LLVM guesses your instruction incorrectly. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174871 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64InstrInfo.td | 68 +++++++-- lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp | 168 +++++++++++++++++++++- 2 files changed, 218 insertions(+), 18 deletions(-) (limited to 'lib') diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index a9ff02a..adcab89 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -205,11 +205,12 @@ def ATOMIC_CMP_SWAP_I64 // is not optional in that case (but can explicitly be 0), and the // entire suffix can be skipped (e.g. "add sp, x3, x2"). -multiclass extend_operands { +multiclass extend_operands { def _asmoperand : AsmOperandClass { let Name = PREFIX; let RenderMethod = "addRegExtendOperands"; let PredicateMethod = "isRegExtend"; + let DiagnosticType = "AddSubRegExtend" # Diag; } def _operand : Operand, @@ -220,18 +221,19 @@ multiclass extend_operands { } } -defm UXTB : extend_operands<"UXTB">; -defm UXTH : extend_operands<"UXTH">; -defm UXTW : extend_operands<"UXTW">; -defm UXTX : extend_operands<"UXTX">; -defm SXTB : extend_operands<"SXTB">; -defm SXTH : extend_operands<"SXTH">; -defm SXTW : extend_operands<"SXTW">; -defm SXTX : extend_operands<"SXTX">; +defm UXTB : extend_operands<"UXTB", "Small">; +defm UXTH : extend_operands<"UXTH", "Small">; +defm UXTW : extend_operands<"UXTW", "Small">; +defm UXTX : extend_operands<"UXTX", "Large">; +defm SXTB : extend_operands<"SXTB", "Small">; +defm SXTH : extend_operands<"SXTH", "Small">; +defm SXTW : extend_operands<"SXTW", "Small">; +defm SXTX : extend_operands<"SXTX", "Large">; def LSL_extasmoperand : AsmOperandClass { let Name = "RegExtendLSL"; let RenderMethod = "addRegExtendOperands"; + let DiagnosticType = "AddSubRegExtendLarge"; } def LSL_extoperand : Operand { @@ -540,10 +542,14 @@ let ParserMethod = "ParseImmWithLSLOperand", // Derived PredicateMethod fields are different for each def addsubimm_lsl0_asmoperand : AsmOperandClass { let Name = "AddSubImmLSL0"; + // If an error is reported against this operand, instruction could also be a + // register variant. + let DiagnosticType = "AddSubSecondSource"; } def addsubimm_lsl12_asmoperand : AsmOperandClass { let Name = "AddSubImmLSL12"; + let DiagnosticType = "AddSubSecondSource"; } } @@ -689,8 +695,8 @@ multiclass shift_operands { def _asmoperand_i32 : AsmOperandClass { let Name = "Shift" # form # "i32"; let RenderMethod = "addShiftOperands"; - let PredicateMethod - = "isShift"; + let PredicateMethod = "isShift"; + let DiagnosticType = "AddSubRegShift32"; } // Note that the operand type is intentionally i64 because the DAGCombiner @@ -705,8 +711,8 @@ multiclass shift_operands { def _asmoperand_i64 : AsmOperandClass { let Name = "Shift" # form # "i64"; let RenderMethod = "addShiftOperands"; - let PredicateMethod - = "isShift"; + let PredicateMethod = "isShift"; + let DiagnosticType = "AddSubRegShift64"; } def _i64 : Operand, ImmLeaf= 0 && Imm <= 63; }]> { @@ -957,12 +963,14 @@ def uimm5_asmoperand : AsmOperandClass { let Name = "UImm5"; let PredicateMethod = "isUImm<5>"; let RenderMethod = "addImmOperands"; + let DiagnosticType = "UImm5"; } def uimm6_asmoperand : AsmOperandClass { let Name = "UImm6"; let PredicateMethod = "isUImm<6>"; let RenderMethod = "addImmOperands"; + let DiagnosticType = "UImm6"; } def bitfield32_imm : Operand, @@ -1157,6 +1165,7 @@ def bfx32_width_asmoperand : AsmOperandClass { let Name = "BFX32Width"; let PredicateMethod = "isBitfieldWidth<32>"; let RenderMethod = "addBFXWidthOperands"; + let DiagnosticType = "Width32"; } def bfx32_width : Operand, ImmLeaf { @@ -1168,6 +1177,7 @@ def bfx64_width_asmoperand : AsmOperandClass { let Name = "BFX64Width"; let PredicateMethod = "isBitfieldWidth<64>"; let RenderMethod = "addBFXWidthOperands"; + let DiagnosticType = "Width64"; } def bfx64_width : Operand { @@ -1235,6 +1245,7 @@ def bfi32_lsb_asmoperand : AsmOperandClass { let Name = "BFI32LSB"; let PredicateMethod = "isUImm<5>"; let RenderMethod = "addBFILSBOperands<32>"; + let DiagnosticType = "UImm5"; } def bfi32_lsb : Operand, @@ -1247,6 +1258,7 @@ def bfi64_lsb_asmoperand : AsmOperandClass { let Name = "BFI64LSB"; let PredicateMethod = "isUImm<6>"; let RenderMethod = "addBFILSBOperands<64>"; + let DiagnosticType = "UImm6"; } def bfi64_lsb : Operand, @@ -1262,6 +1274,7 @@ def bfi32_width_asmoperand : AsmOperandClass { let Name = "BFI32Width"; let PredicateMethod = "isBitfieldWidth<32>"; let RenderMethod = "addBFIWidthOperands"; + let DiagnosticType = "Width32"; } def bfi32_width : Operand, @@ -1274,6 +1287,7 @@ def bfi64_width_asmoperand : AsmOperandClass { let Name = "BFI64Width"; let PredicateMethod = "isBitfieldWidth<64>"; let RenderMethod = "addBFIWidthOperands"; + let DiagnosticType = "Width64"; } def bfi64_width : Operand, @@ -1329,6 +1343,7 @@ class label_asmoperand : AsmOperandClass { let Name = "Label" # width # "_" # scale; let PredicateMethod = "isLabel<" # width # "," # scale # ">"; let RenderMethod = "addLabelOperands<" # width # ", " # scale # ">"; + let DiagnosticType = "Label"; } def label_wid19_scal4_asmoperand : label_asmoperand<19, 4>; @@ -1375,6 +1390,7 @@ defm CBNZ : cmpbr_sizes<0b1, "cbnz", ImmLeaf, ImmLeaf { @@ -1420,6 +1437,7 @@ def cond_code_op_asmoperand : AsmOperandClass { let RenderMethod = "addCondCodeOperands"; let PredicateMethod = "isCondCode"; let ParserMethod = "ParseCondCodeOperand"; + let DiagnosticType = "CondCode"; } def cond_code_op : Operand { @@ -1471,6 +1489,7 @@ def inv_cond_code_op_asmoperand : AsmOperandClass { let RenderMethod = "addInvCondCodeOperands"; let PredicateMethod = "isCondCode"; let ParserMethod = "ParseCondCodeOperand"; + let DiagnosticType = "CondCode"; } def inv_cond_code_op : Operand { @@ -1836,6 +1855,7 @@ def uimm16_asmoperand : AsmOperandClass { let Name = "UImm16"; let PredicateMethod = "isUImm<16>"; let RenderMethod = "addImmOperands"; + let DiagnosticType = "UImm16"; } def uimm16 : Operand { @@ -1902,6 +1922,7 @@ def : Pat<(rotr GPR64:$Rn, bitfield64_imm:$LSB), def fpzero_asmoperand : AsmOperandClass { let Name = "FPZero"; let ParserMethod = "ParseFPImmOperand"; + let DiagnosticType = "FPZero"; } def fpz32 : Operand, @@ -2139,6 +2160,7 @@ def fixedpos_asmoperand_i32 : AsmOperandClass { let Name = "CVTFixedPos32"; let RenderMethod = "addCVTFixedPosOperands"; let PredicateMethod = "isCVTFixedPos<32>"; + let DiagnosticType = "CVTFixedPos32"; } // Also encoded as "64 - " but #1-#64 allowed. @@ -2146,6 +2168,7 @@ def fixedpos_asmoperand_i64 : AsmOperandClass { let Name = "CVTFixedPos64"; let RenderMethod = "addCVTFixedPosOperands"; let PredicateMethod = "isCVTFixedPos<64>"; + let DiagnosticType = "CVTFixedPos64"; } // We need the cartesian product of f32/f64 i32/i64 operands for @@ -2301,6 +2324,7 @@ def : Pat<(f64 (bitconvert (i64 GPR64:$Rn))), (FMOVdx GPR64:$Rn)>; def lane1_asmoperand : AsmOperandClass { let Name = "Lane1"; let RenderMethod = "addImmOperands"; + let DiagnosticType = "Lane1"; } def lane1 : Operand { @@ -2332,6 +2356,7 @@ def : InstAlias<"fmov $Rd.2d[$Lane], $Rn", def fpimm_asmoperand : AsmOperandClass { let Name = "FMOVImm"; let ParserMethod = "ParseFPImmOperand"; + let DiagnosticType = "FPImm"; } // The MCOperand for these instructions are the encoded 8-bit values. @@ -2372,6 +2397,7 @@ def FMOVdi : A64I_fpimm_impl<0b01, FPR64, f64, fmov64_operand>; def ldrlit_label_asmoperand : AsmOperandClass { let Name = "LoadLitLabel"; let RenderMethod = "addLabelOperands<19, 4>"; + let DiagnosticType = "Label"; } def ldrlit_label : Operand { @@ -2392,6 +2418,7 @@ multiclass namedimm { let PredicateMethod = "isUImm"; let RenderMethod = "addImmOperands"; let ParserMethod = "ParseNamedImmOperand<" # mapper # ">"; + let DiagnosticType = "NamedImm_" # prefix; } def _op : Operand { @@ -2461,6 +2488,7 @@ def GPR64xsp0_asmoperand : AsmOperandClass { let PredicateMethod = "isWrappedReg"; let RenderMethod = "addRegOperands"; let ParserMethod = "ParseLSXAddressOperand"; + // Diagnostics are provided by ParserMethod } def GPR64xsp0 : RegisterOperand { @@ -2738,6 +2766,7 @@ multiclass offsets_uimm12 { let Name = "OffsetUImm12_" # MemSize; let PredicateMethod = "isOffsetUImm12<" # MemSize # ">"; let RenderMethod = "addOffsetUImm12Operands<" # MemSize # ">"; + let DiagnosticType = "LoadStoreUImm12_" # MemSize; } // Pattern is really no more than an ImmLeaf, but predicated on MemSize which @@ -2772,6 +2801,7 @@ def simm9_asmoperand : AsmOperandClass { let Name = "SImm9"; let PredicateMethod = "isSImm<9>"; let RenderMethod = "addSImmOperands<9>"; + let DiagnosticType = "LoadStoreSImm9"; } def simm9 : Operand, @@ -2804,6 +2834,7 @@ multiclass regexts"; let RenderMethod = "addAddrRegExtendOperands<" # MemSize # ">"; + let DiagnosticType = "LoadStoreExtend" # RmSize # "_" # MemSize; } def regext : Operand { @@ -3377,6 +3408,7 @@ multiclass offsets_simm7 { let Name = "SImm7_Scaled" # MemSize; let PredicateMethod = "isSImm7Scaled<" # MemSize # ">"; let RenderMethod = "addSImm7ScaledOperands<" # MemSize # ">"; + let DiagnosticType = "LoadStoreSImm7_" # MemSize; } def simm7 : Operand { @@ -3528,6 +3560,7 @@ multiclass logical_imm_operands"; let RenderMethod = "addLogicalImmOperands<" # size # ">"; + let DiagnosticType = "LogicalSecondSource"; } def _operand @@ -3819,8 +3852,8 @@ multiclass movw_operands { let Name = instname # width # "Shifted" # shift; let PredicateMethod = "is" # instname # width # "Imm"; let RenderMethod = "addMoveWideImmOperands"; - let ParserMethod = "ParseImmWithLSLOperand"; + let DiagnosticType = "MOVWUImm16"; } def _imm : Operand { @@ -3935,6 +3968,7 @@ def adr_label : Operand { def adrp_label_asmoperand : AsmOperandClass { let Name = "AdrpLabel"; let RenderMethod = "addLabelOperands<21, 4096>"; + let DiagnosticType = "Label"; } def adrp_label : Operand { @@ -3965,6 +3999,7 @@ def uimm3_asmoperand : AsmOperandClass { let Name = "UImm3"; let PredicateMethod = "isUImm<3>"; let RenderMethod = "addImmOperands"; + let DiagnosticType = "UImm3"; } def uimm3 : Operand { @@ -3976,6 +4011,7 @@ def uimm7_asmoperand : AsmOperandClass { let Name = "UImm7"; let PredicateMethod = "isUImm<7>"; let RenderMethod = "addImmOperands"; + let DiagnosticType = "UImm7"; } def uimm7 : Operand { @@ -4011,6 +4047,7 @@ defm tlbi : namedimm<"tlbi", "A64TLBI::TLBIMapper">; def mrs_asmoperand : AsmOperandClass { let Name = "MRS"; let ParserMethod = "ParseSysRegOperand"; + let DiagnosticType = "MRS"; } def mrs_op : Operand { @@ -4027,6 +4064,7 @@ def msr_asmoperand : AsmOperandClass { // AArch64Operand rather than an immediate. The overlap is small enough that // it could be resolved with hackery now, but who can say in future? let ParserMethod = "ParseSysRegOperand"; + let DiagnosticType = "MSR"; } def msr_op : Operand { @@ -4039,6 +4077,7 @@ def pstate_asmoperand : AsmOperandClass { let Name = "MSRPState"; // See comment above about parser. let ParserMethod = "ParseSysRegOperand"; + let DiagnosticType = "MSR"; } def pstate_op : Operand { @@ -4054,6 +4093,7 @@ def CRx_asmoperand : AsmOperandClass { let PredicateMethod = "isUImm<4>"; let RenderMethod = "addImmOperands"; let ParserMethod = "ParseCRxOperand"; + // Diagnostics are handled in all cases by ParseCRxOperand. } def CRx : Operand { diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 69e4fc2..bab7d84 100644 --- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -43,6 +43,12 @@ class AArch64AsmParser : public MCTargetAsmParser { #include "AArch64GenAsmMatcher.inc" public: + enum AArch64MatchResultTy { + Match_FirstAArch64 = FIRST_TARGET_MATCH_RESULT_TY, +#define GET_OPERAND_DIAGNOSTIC_TYPES +#include "AArch64GenAsmMatcher.inc" + }; + AArch64AsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser) : MCTargetAsmParser(), STI(_STI), Parser(_Parser) { MCAsmParserExtension::Initialize(_Parser); @@ -1871,7 +1877,7 @@ bool AArch64AsmParser::ParseInstruction(ParseInstructionInfo &Info, if (getLexer().isNot(AsmToken::EndOfStatement)) { SMLoc Loc = getLexer().getLoc(); Parser.EatToEndOfStatement(); - return Error(Loc, ""); + return Error(Loc, "expected comma before next operand"); } // Eat the EndOfStatement @@ -1946,6 +1952,10 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, unsigned MatchResult; MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm); + + if (ErrorInfo != ~0U && ErrorInfo >= Operands.size()) + return Error(IDLoc, "too few operands for instruction"); + switch (MatchResult) { default: break; case Match_Success: @@ -1960,9 +1970,6 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, case Match_InvalidOperand: { SMLoc ErrorLoc = IDLoc; if (ErrorInfo != ~0U) { - if (ErrorInfo >= Operands.size()) - return Error(IDLoc, "too few operands for instruction"); - ErrorLoc = ((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(); if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; } @@ -1971,6 +1978,159 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, } case Match_MnemonicFail: return Error(IDLoc, "invalid instruction"); + + case Match_AddSubRegExtendSmall: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected '[su]xt[bhw]' or 'lsl' with optional integer in range [0, 4]"); + case Match_AddSubRegExtendLarge: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]"); + case Match_AddSubRegShift32: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 31]"); + case Match_AddSubRegShift64: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 63]"); + case Match_AddSubSecondSource: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected compatible register, symbol or integer in range [0, 4095]"); + case Match_CVTFixedPos32: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected integer in range [1, 32]"); + case Match_CVTFixedPos64: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected integer in range [1, 64]"); + case Match_CondCode: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected AArch64 condition code"); + case Match_FPImm: + // Any situation which allows a nontrivial floating-point constant also + // allows a register. + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected compatible register or floating-point constant"); + case Match_FPZero: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected floating-point constant #0.0"); + case Match_Label: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected label or encodable integer pc offset"); + case Match_Lane1: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected lane specifier '[1]'"); + case Match_LoadStoreExtend32_1: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected 'uxtw' or 'sxtw' with optional shift of #0"); + case Match_LoadStoreExtend32_2: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected 'uxtw' or 'sxtw' with optional shift of #0 or #1"); + case Match_LoadStoreExtend32_4: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected 'uxtw' or 'sxtw' with optional shift of #0 or #2"); + case Match_LoadStoreExtend32_8: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected 'uxtw' or 'sxtw' with optional shift of #0 or #3"); + case Match_LoadStoreExtend32_16: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected 'lsl' or 'sxtw' with optional shift of #0 or #4"); + case Match_LoadStoreExtend64_1: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected 'lsl' or 'sxtx' with optional shift of #0"); + case Match_LoadStoreExtend64_2: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected 'lsl' or 'sxtx' with optional shift of #0 or #1"); + case Match_LoadStoreExtend64_4: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected 'lsl' or 'sxtx' with optional shift of #0 or #2"); + case Match_LoadStoreExtend64_8: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected 'lsl' or 'sxtx' with optional shift of #0 or #3"); + case Match_LoadStoreExtend64_16: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected 'lsl' or 'sxtx' with optional shift of #0 or #4"); + case Match_LoadStoreSImm7_4: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected integer multiple of 4 in range [-256, 252]"); + case Match_LoadStoreSImm7_8: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected integer multiple of 8 in range [-512, 508]"); + case Match_LoadStoreSImm7_16: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected integer multiple of 16 in range [-1024, 1016]"); + case Match_LoadStoreSImm9: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected integer in range [-256, 255]"); + case Match_LoadStoreUImm12_1: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected symbolic reference or integer in range [0, 4095]"); + case Match_LoadStoreUImm12_2: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected symbolic reference or integer in range [0, 8190]"); + case Match_LoadStoreUImm12_4: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected symbolic reference or integer in range [0, 16380]"); + case Match_LoadStoreUImm12_8: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected symbolic reference or integer in range [0, 32760]"); + case Match_LoadStoreUImm12_16: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected symbolic reference or integer in range [0, 65520]"); + case Match_LogicalSecondSource: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected compatible register or logical immediate"); + case Match_MOVWUImm16: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected relocated symbol or integer in range [0, 65535]"); + case Match_MRS: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected readable system register"); + case Match_MSR: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected writable system register or pstate"); + case Match_NamedImm_at: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected symbolic 'at' operand: s1e[0-3][rw] or s12e[01][rw]"); + case Match_NamedImm_dbarrier: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected integer in range [0, 15] or symbolic barrier operand"); + case Match_NamedImm_dc: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected symbolic 'dc' operand"); + case Match_NamedImm_ic: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected 'ic' operand: 'ialluis', 'iallu' or 'ivau'"); + case Match_NamedImm_isb: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected integer in range [0, 15] or 'sy'"); + case Match_NamedImm_prefetch: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected prefetch hint: p(ld|st|i)l[123](strm|keep)"); + case Match_NamedImm_tlbi: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected translation buffer invalidation operand"); + case Match_UImm16: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected integer in range [0, 65535]"); + case Match_UImm3: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected integer in range [0, 7]"); + case Match_UImm4: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected integer in range [0, 15]"); + case Match_UImm5: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected integer in range [0, 31]"); + case Match_UImm6: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected integer in range [0, 63]"); + case Match_UImm7: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected integer in range [0, 127]"); + case Match_Width32: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected integer in range [, 31]"); + case Match_Width64: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected integer in range [, 63]"); } llvm_unreachable("Implement any new match types added!"); -- cgit v1.1 From 77b1c9cf57849b3f9a4e8bae47cd5954d20a7e11 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Mon, 11 Feb 2013 12:32:18 +0000 Subject: AArch64: Simplify logic in deciding whether bfi is valid Previous code had a confusing comment which was mostly an implementation detail. This condition corresponds to "lsb up to register width" and "width not ridiculous". git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174877 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index bab7d84..1cdeafb 100644 --- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -1725,12 +1725,7 @@ validateInstruction(MCInst &Inst, int64_t ImmR = Inst.getOperand(ImmOps).getImm(); int64_t ImmS = Inst.getOperand(ImmOps+1).getImm(); - if (ImmR == 0) { - // Bitfield inserts are preferred disassembly if ImmS < ImmR. However, - // there is this one case where insert is valid syntax but the bfx - // disassembly should be used: e.g. "sbfiz w0, w0, #0, #1". - return false; - } else if (ImmS >= ImmR) { + if (ImmR != 0 && ImmS >= ImmR) { return Error(Operands[4]->getStartLoc(), "requested insert overflows register"); } -- cgit v1.1 From 716d26b2ce310e852a78b25ec94db031378133bb Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Mon, 11 Feb 2013 14:25:52 +0000 Subject: AArch64: fix build on some MSVC versions This does two things: It removes a call to abs() which may have "long long" parameter on Windows, which is not necessarily available in C++03. It also corrects the signedness of Amount, which was relying on implementation-defined conversions previously. Code was already tested (albeit in an implemnetation defined way) so no extra tests. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174885 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64RegisterInfo.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Target/AArch64/AArch64RegisterInfo.cpp b/lib/Target/AArch64/AArch64RegisterInfo.cpp index da45685..ee34d76 100644 --- a/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -165,8 +165,8 @@ AArch64RegisterInfo::eliminateCallFramePseudoInstr(MachineFunction &MF, if (!TFI->hasReservedCallFrame(MF)) { unsigned Align = TFI->getStackAlignment(); - uint64_t Amount = MI->getOperand(0).getImm(); - Amount = (Amount + Align - 1)/Align * Align; + int64_t Amount = MI->getOperand(0).getImm(); + Amount = RoundUpToAlignment(Amount, Align); if (!IsDestroy) Amount = -Amount; // N.b. if CalleePopAmount is valid but zero (i.e. callee would pop, but it @@ -177,7 +177,7 @@ AArch64RegisterInfo::eliminateCallFramePseudoInstr(MachineFunction &MF, // because there's no guaranteed temporary register available. Mostly call // frames will be allocated at the start of a function so this is OK, but // it is a limitation that needs dealing with. - assert(abs(Amount) < 0xfff && "call frame too large"); + assert(Amount > -0xfff && Amount < 0xfff && "call frame too large"); emitSPUpdate(MBB, MI, dl, TII, AArch64::NoRegister, Amount); } } else if (CalleePopAmount != 0) { -- cgit v1.1 From 117de489a0d5f4ff280fb173fe45bd5ce8514d93 Mon Sep 17 00:00:00 2001 From: Kostya Serebryany Date: Mon, 11 Feb 2013 14:36:01 +0000 Subject: [asan] added a flag -mllvm asan-short-64bit-mapping-offset=1 (0 by default) This flag makes asan use a small (<2G) offset for 64-bit asan shadow mapping. On x86_64 this saves us a register, thus achieving ~2/3 of the zero-base-offset's benefits in both performance and code size. Thanks Jakub Jelinek for the idea. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174886 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Instrumentation/AddressSanitizer.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 0474eb5..c5f77ec 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -53,6 +53,7 @@ using namespace llvm; static const uint64_t kDefaultShadowScale = 3; static const uint64_t kDefaultShadowOffset32 = 1ULL << 29; static const uint64_t kDefaultShadowOffset64 = 1ULL << 44; +static const uint64_t kDefaultShort64bitShadowOffset = 0x7FFF8000; // < 2G. static const uint64_t kPPC64_ShadowOffset64 = 1ULL << 41; static const size_t kMaxStackMallocSize = 1 << 16; // 64K @@ -133,6 +134,9 @@ static cl::opt ClMappingScale("asan-mapping-scale", cl::desc("scale of asan shadow mapping"), cl::Hidden, cl::init(0)); static cl::opt ClMappingOffsetLog("asan-mapping-offset-log", cl::desc("offset of asan shadow mapping"), cl::Hidden, cl::init(-1)); +static cl::opt ClShort64BitOffset("asan-short-64bit-mapping-offset", + cl::desc("Use short immediate constant as the mapping offset for 64bit"), + cl::Hidden, cl::init(false)); // Optimization flags. Not user visible, used mostly for testing // and benchmarking the tool. @@ -205,12 +209,14 @@ static ShadowMapping getShadowMapping(const Module &M, int LongSize, // OR-ing shadow offset if more efficient (at least on x86), // but on ppc64 we have to use add since the shadow offset is not neccesary // 1/8-th of the address space. - Mapping.OrShadowOffset = !IsPPC64; + Mapping.OrShadowOffset = !IsPPC64 && !ClShort64BitOffset; Mapping.Offset = (IsAndroid || ZeroBaseShadow) ? 0 : (LongSize == 32 ? kDefaultShadowOffset32 : IsPPC64 ? kPPC64_ShadowOffset64 : kDefaultShadowOffset64); - if (ClMappingOffsetLog >= 0) { + if (!ZeroBaseShadow && ClShort64BitOffset && LongSize == 64) { + Mapping.Offset = kDefaultShort64bitShadowOffset; + } if (!ZeroBaseShadow && ClMappingOffsetLog >= 0) { // Zero offset log is the special case. Mapping.Offset = (ClMappingOffsetLog == 0) ? 0 : 1ULL << ClMappingOffsetLog; } -- cgit v1.1 From 311ea66db107917789c00f2ae11ec6b98eb64e59 Mon Sep 17 00:00:00 2001 From: Michel Danzer Date: Mon, 11 Feb 2013 15:58:21 +0000 Subject: R600/SI: Use V_ADD_F32 instead of V_MOV_B32 for clamp/neg/abs modifiers. The modifiers don't seem to have any effect with V_MOV_B32, supposedly it's meant to just move bits untouched. Fixes 46 piglit tests with radeonsi, though unfortunately 11 of those had just regressed because they started using the clamp modifier. NOTE: This is a candidate for the Mesa stable branch. Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174890 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIISelLowering.cpp | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) (limited to 'lib') diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index afafa8c..ceab692 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -74,13 +74,11 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); case AMDGPU::BRANCH: return BB; case AMDGPU::CLAMP_SI: - BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64)) + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_ADD_F32_e64)) .addOperand(MI->getOperand(0)) .addOperand(MI->getOperand(1)) - // VSRC1-2 are unused, but we still need to fill all the - // operand slots, so we just reuse the VSRC0 operand - .addOperand(MI->getOperand(1)) - .addOperand(MI->getOperand(1)) + .addReg(AMDGPU::SREG_LIT_0) + .addReg(AMDGPU::SREG_LIT_0) .addImm(0) // ABS .addImm(1) // CLAMP .addImm(0) // OMOD @@ -89,13 +87,11 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( break; case AMDGPU::FABS_SI: - BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64)) + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_ADD_F32_e64)) .addOperand(MI->getOperand(0)) .addOperand(MI->getOperand(1)) - // VSRC1-2 are unused, but we still need to fill all the - // operand slots, so we just reuse the VSRC0 operand - .addOperand(MI->getOperand(1)) - .addOperand(MI->getOperand(1)) + .addReg(AMDGPU::SREG_LIT_0) + .addReg(AMDGPU::SREG_LIT_0) .addImm(1) // ABS .addImm(0) // CLAMP .addImm(0) // OMOD @@ -104,13 +100,11 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( break; case AMDGPU::FNEG_SI: - BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64)) + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_ADD_F32_e64)) .addOperand(MI->getOperand(0)) .addOperand(MI->getOperand(1)) - // VSRC1-2 are unused, but we still need to fill all the - // operand slots, so we just reuse the VSRC0 operand - .addOperand(MI->getOperand(1)) - .addOperand(MI->getOperand(1)) + .addReg(AMDGPU::SREG_LIT_0) + .addReg(AMDGPU::SREG_LIT_0) .addImm(0) // ABS .addImm(0) // CLAMP .addImm(0) // OMOD -- cgit v1.1 From b1a82589339fed148c12b052d30861a539552f1a Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Mon, 11 Feb 2013 17:19:34 +0000 Subject: BBVectorize: Eliminate one more restricted linear search This eliminates one more linear search over a range of std::multimap entries. This gives a 22% speedup on the csa.ll test case from PR15222. No functionality change intended. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174893 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/BBVectorize.cpp | 58 +++++++++++++++++--------------- 1 file changed, 31 insertions(+), 27 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp index 9da4c37..bf8fca0 100644 --- a/lib/Transforms/Vectorize/BBVectorize.cpp +++ b/lib/Transforms/Vectorize/BBVectorize.cpp @@ -240,6 +240,7 @@ namespace { }; void computeConnectedPairs(std::multimap &CandidatePairs, + DenseSet &CandidatePairsSet, std::vector &PairableInsts, std::multimap &ConnectedPairs, DenseMap &PairConnectionTypes); @@ -250,6 +251,7 @@ namespace { DenseSet &PairableInstUsers); void choosePairs(std::multimap &CandidatePairs, + DenseSet &CandidatePairsSet, DenseMap &CandidatePairCostSavings, std::vector &PairableInsts, DenseSet &FixedOrderPairs, @@ -310,6 +312,7 @@ namespace { void buildInitialTreeFor( std::multimap &CandidatePairs, + DenseSet &CandidatePairsSet, std::vector &PairableInsts, std::multimap &ConnectedPairs, DenseSet &PairableInstUsers, @@ -318,6 +321,7 @@ namespace { void findBestTreeFor( std::multimap &CandidatePairs, + DenseSet &CandidatePairsSet, DenseMap &CandidatePairCostSavings, std::vector &PairableInsts, DenseSet &FixedOrderPairs, @@ -704,6 +708,12 @@ namespace { PairableInsts, NonPow2Len); if (PairableInsts.empty()) continue; + // Build the candidate pair set for faster lookups. + DenseSet CandidatePairsSet; + for (std::multimap::iterator I = CandidatePairs.begin(), + E = CandidatePairs.end(); I != E; ++I) + CandidatePairsSet.insert(*I); + // Now we have a map of all of the pairable instructions and we need to // select the best possible pairing. A good pairing is one such that the // users of the pair are also paired. This defines a (directed) forest @@ -715,8 +725,8 @@ namespace { std::multimap ConnectedPairs, ConnectedPairDeps; DenseMap PairConnectionTypes; - computeConnectedPairs(CandidatePairs, PairableInsts, ConnectedPairs, - PairConnectionTypes); + computeConnectedPairs(CandidatePairs, CandidatePairsSet, + PairableInsts, ConnectedPairs, PairConnectionTypes); if (ConnectedPairs.empty()) continue; for (std::multimap::iterator @@ -736,7 +746,8 @@ namespace { // variables. DenseMap ChosenPairs; - choosePairs(CandidatePairs, CandidatePairCostSavings, + choosePairs(CandidatePairs, CandidatePairsSet, + CandidatePairCostSavings, PairableInsts, FixedOrderPairs, PairConnectionTypes, ConnectedPairs, ConnectedPairDeps, PairableInstUsers, ChosenPairs); @@ -1332,22 +1343,19 @@ namespace { // of the second pair. void BBVectorize::computeConnectedPairs( std::multimap &CandidatePairs, + DenseSet &CandidatePairsSet, std::vector &PairableInsts, std::multimap &ConnectedPairs, DenseMap &PairConnectionTypes) { - DenseSet CandidatePairsSet; - for (std::multimap::iterator I = CandidatePairs.begin(), - E = CandidatePairs.end(); I != E; ++I) - CandidatePairsSet.insert(*I); - for (std::vector::iterator PI = PairableInsts.begin(), PE = PairableInsts.end(); PI != PE; ++PI) { VPIteratorPair choiceRange = CandidatePairs.equal_range(*PI); for (std::multimap::iterator P = choiceRange.first; P != choiceRange.second; ++P) - computePairsConnectedTo(CandidatePairs, CandidatePairsSet, PairableInsts, - ConnectedPairs, PairConnectionTypes, *P); + computePairsConnectedTo(CandidatePairs, CandidatePairsSet, + PairableInsts, ConnectedPairs, + PairConnectionTypes, *P); } DEBUG(dbgs() << "BBV: found " << ConnectedPairs.size() @@ -1464,6 +1472,7 @@ namespace { // pair J at the root. void BBVectorize::buildInitialTreeFor( std::multimap &CandidatePairs, + DenseSet &CandidatePairsSet, std::vector &PairableInsts, std::multimap &ConnectedPairs, DenseSet &PairableInstUsers, @@ -1485,18 +1494,7 @@ namespace { for (std::multimap::iterator k = qtRange.first; k != qtRange.second; ++k) { // Make sure that this child pair is still a candidate: - bool IsStillCand = false; - VPIteratorPair checkRange = - CandidatePairs.equal_range(k->second.first); - for (std::multimap::iterator m = checkRange.first; - m != checkRange.second; ++m) { - if (m->second == k->second.second) { - IsStillCand = true; - break; - } - } - - if (IsStillCand) { + if (CandidatePairsSet.count(ValuePair(k->second))) { DenseMap::iterator C = Tree.find(k->second); if (C == Tree.end()) { size_t d = getDepthFactor(k->second.first); @@ -1686,6 +1684,7 @@ namespace { // pairs, given the choice of root pairs as an iterator range. void BBVectorize::findBestTreeFor( std::multimap &CandidatePairs, + DenseSet &CandidatePairsSet, DenseMap &CandidatePairCostSavings, std::vector &PairableInsts, DenseSet &FixedOrderPairs, @@ -1725,7 +1724,8 @@ namespace { continue; DenseMap Tree; - buildInitialTreeFor(CandidatePairs, PairableInsts, ConnectedPairs, + buildInitialTreeFor(CandidatePairs, CandidatePairsSet, + PairableInsts, ConnectedPairs, PairableInstUsers, ChosenPairs, Tree, *J); // Because we'll keep the child with the largest depth, the largest @@ -1745,7 +1745,8 @@ namespace { DenseSet PrunedTree; pruneTreeFor(CandidatePairs, PairableInsts, ConnectedPairs, - PairableInstUsers, PairableInstUserMap, PairableInstUserPairSet, + PairableInstUsers, PairableInstUserMap, + PairableInstUserPairSet, ChosenPairs, Tree, PrunedTree, *J, UseCycleCheck); int EffSize = 0; @@ -2061,6 +2062,7 @@ namespace { // that will be fused into vector instructions. void BBVectorize::choosePairs( std::multimap &CandidatePairs, + DenseSet &CandidatePairsSet, DenseMap &CandidatePairCostSavings, std::vector &PairableInsts, DenseSet &FixedOrderPairs, @@ -2085,7 +2087,8 @@ namespace { size_t BestMaxDepth = 0; int BestEffSize = 0; DenseSet BestTree; - findBestTreeFor(CandidatePairs, CandidatePairCostSavings, + findBestTreeFor(CandidatePairs, CandidatePairsSet, + CandidatePairCostSavings, PairableInsts, FixedOrderPairs, PairConnectionTypes, ConnectedPairs, ConnectedPairDeps, PairableInstUsers, PairableInstUserMap, @@ -2115,9 +2118,10 @@ namespace { K->second == S->second || K->first == S->second) { // Don't remove the actual pair chosen so that it can be used // in subsequent tree selections. - if (!(K->first == S->first && K->second == S->second)) + if (!(K->first == S->first && K->second == S->second)) { + CandidatePairsSet.erase(*K); CandidatePairs.erase(K++); - else + } else ++K; } else { ++K; -- cgit v1.1 From 86494d7991050b4ffbfdcf1d15e1ad5e3c28f07b Mon Sep 17 00:00:00 2001 From: Kay Tiong Khoo Date: Mon, 11 Feb 2013 19:46:36 +0000 Subject: *fixed disassembly of some i386 system insts with intel syntax *added file for test cases for i386 intel syntax git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174900 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSystem.td | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td index ea716bf..3caa1b5 100644 --- a/lib/Target/X86/X86InstrSystem.td +++ b/lib/Target/X86/X86InstrSystem.td @@ -352,11 +352,11 @@ def VERWm : I<0x00, MRM5m, (outs), (ins i16mem:$seg), // Descriptor-table support instructions def SGDT16m : I<0x01, MRM0m, (outs opaque48mem:$dst), (ins), - "sgdtw\t$dst", [], IIC_SGDT>, TB, OpSize, Requires<[In32BitMode]>; + "sgdt{w}\t$dst", [], IIC_SGDT>, TB, OpSize, Requires<[In32BitMode]>; def SGDTm : I<0x01, MRM0m, (outs opaque48mem:$dst), (ins), "sgdt\t$dst", [], IIC_SGDT>, TB; def SIDT16m : I<0x01, MRM1m, (outs opaque48mem:$dst), (ins), - "sidtw\t$dst", [], IIC_SIDT>, TB, OpSize, Requires<[In32BitMode]>; + "sidt{w}\t$dst", [], IIC_SIDT>, TB, OpSize, Requires<[In32BitMode]>; def SIDTm : I<0x01, MRM1m, (outs opaque48mem:$dst), (ins), "sidt\t$dst", []>, TB; def SLDT16r : I<0x00, MRM0r, (outs GR16:$dst), (ins), @@ -374,11 +374,11 @@ def SLDT64m : RI<0x00, MRM0m, (outs i16mem:$dst), (ins), "sldt{q}\t$dst", [], IIC_SLDT>, TB; def LGDT16m : I<0x01, MRM2m, (outs), (ins opaque48mem:$src), - "lgdtw\t$src", [], IIC_LGDT>, TB, OpSize, Requires<[In32BitMode]>; + "lgdt{w}\t$src", [], IIC_LGDT>, TB, OpSize, Requires<[In32BitMode]>; def LGDTm : I<0x01, MRM2m, (outs), (ins opaque48mem:$src), "lgdt\t$src", [], IIC_LGDT>, TB; def LIDT16m : I<0x01, MRM3m, (outs), (ins opaque48mem:$src), - "lidtw\t$src", [], IIC_LIDT>, TB, OpSize, Requires<[In32BitMode]>; + "lidt{w}\t$src", [], IIC_LIDT>, TB, OpSize, Requires<[In32BitMode]>; def LIDTm : I<0x01, MRM3m, (outs), (ins opaque48mem:$src), "lidt\t$src", [], IIC_LIDT>, TB; def LLDT16r : I<0x00, MRM2r, (outs), (ins GR16:$src), -- cgit v1.1 From ce55d91ec97b1227cb4cb94354c89452d787515e Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Mon, 11 Feb 2013 20:04:29 +0000 Subject: Implement HexagonInstrInfo::analyzeCompare. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174901 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Hexagon/HexagonInstrInfo.cpp | 82 +++++++++++++++++++++++++++++++++ lib/Target/Hexagon/HexagonInstrInfo.h | 4 ++ 2 files changed, 86 insertions(+) (limited to 'lib') diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp index 2cb77dd..c067465 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -305,6 +305,88 @@ unsigned HexagonInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { } +/// \brief For a comparison instruction, return the source registers in +/// \p SrcReg and \p SrcReg2 if having two register operands, and the value it +/// compares against in CmpValue. Return true if the comparison instruction +/// can be analyzed. +bool HexagonInstrInfo::analyzeCompare(const MachineInstr *MI, + unsigned &SrcReg, unsigned &SrcReg2, + int &Mask, int &Value) const { + unsigned Opc = MI->getOpcode(); + + // Set mask and the first source register. + switch (Opc) { + case Hexagon::CMPEHexagon4rr: + case Hexagon::CMPEQri: + case Hexagon::CMPEQrr: + case Hexagon::CMPGT64rr: + case Hexagon::CMPGTU64rr: + case Hexagon::CMPGTUri: + case Hexagon::CMPGTUrr: + case Hexagon::CMPGTri: + case Hexagon::CMPGTrr: + case Hexagon::CMPLTUrr: + case Hexagon::CMPLTrr: + SrcReg = MI->getOperand(1).getReg(); + Mask = ~0; + break; + case Hexagon::CMPbEQri_V4: + case Hexagon::CMPbEQrr_sbsb_V4: + case Hexagon::CMPbEQrr_ubub_V4: + case Hexagon::CMPbGTUri_V4: + case Hexagon::CMPbGTUrr_V4: + case Hexagon::CMPbGTrr_V4: + SrcReg = MI->getOperand(1).getReg(); + Mask = 0xFF; + break; + case Hexagon::CMPhEQri_V4: + case Hexagon::CMPhEQrr_shl_V4: + case Hexagon::CMPhEQrr_xor_V4: + case Hexagon::CMPhGTUri_V4: + case Hexagon::CMPhGTUrr_V4: + case Hexagon::CMPhGTrr_shl_V4: + SrcReg = MI->getOperand(1).getReg(); + Mask = 0xFFFF; + break; + } + + // Set the value/second source register. + switch (Opc) { + case Hexagon::CMPEHexagon4rr: + case Hexagon::CMPEQrr: + case Hexagon::CMPGT64rr: + case Hexagon::CMPGTU64rr: + case Hexagon::CMPGTUrr: + case Hexagon::CMPGTrr: + case Hexagon::CMPbEQrr_sbsb_V4: + case Hexagon::CMPbEQrr_ubub_V4: + case Hexagon::CMPbGTUrr_V4: + case Hexagon::CMPbGTrr_V4: + case Hexagon::CMPhEQrr_shl_V4: + case Hexagon::CMPhEQrr_xor_V4: + case Hexagon::CMPhGTUrr_V4: + case Hexagon::CMPhGTrr_shl_V4: + case Hexagon::CMPLTUrr: + case Hexagon::CMPLTrr: + SrcReg2 = MI->getOperand(2).getReg(); + return true; + + case Hexagon::CMPEQri: + case Hexagon::CMPGTUri: + case Hexagon::CMPGTri: + case Hexagon::CMPbEQri_V4: + case Hexagon::CMPbGTUri_V4: + case Hexagon::CMPhEQri_V4: + case Hexagon::CMPhGTUri_V4: + SrcReg2 = 0; + Value = MI->getOperand(2).getImm(); + return true; + } + + return false; +} + + void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h index 29e3eb1..4e36dfb 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.h +++ b/lib/Target/Hexagon/HexagonInstrInfo.h @@ -66,6 +66,10 @@ public: const SmallVectorImpl &Cond, DebugLoc DL) const; + virtual bool analyzeCompare(const MachineInstr *MI, + unsigned &SrcReg, unsigned &SrcReg2, + int &Mask, int &Value) const; + virtual void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, -- cgit v1.1 From 71490fa946f750fb3afe7228a32d31d401d4c1d8 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Mon, 11 Feb 2013 21:37:55 +0000 Subject: Extend Hexagon hardware loop generation to handle various additional cases: - variety of compare instructions, - loops with no preheader, - arbitrary lower and upper bounds. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174904 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Hexagon/CMakeLists.txt | 1 + lib/Target/Hexagon/HexagonFixupHwLoops.cpp | 183 +++ lib/Target/Hexagon/HexagonHardwareLoops.cpp | 1665 +++++++++++++++++++++------ lib/Target/Hexagon/HexagonInstrInfo.cpp | 3 + 4 files changed, 1470 insertions(+), 382 deletions(-) create mode 100644 lib/Target/Hexagon/HexagonFixupHwLoops.cpp (limited to 'lib') diff --git a/lib/Target/Hexagon/CMakeLists.txt b/lib/Target/Hexagon/CMakeLists.txt index aee43ba..b5b887e 100644 --- a/lib/Target/Hexagon/CMakeLists.txt +++ b/lib/Target/Hexagon/CMakeLists.txt @@ -18,6 +18,7 @@ add_llvm_target(HexagonCodeGen HexagonExpandPredSpillCode.cpp HexagonFrameLowering.cpp HexagonHardwareLoops.cpp + HexagonFixupHwLoops.cpp HexagonMachineScheduler.cpp HexagonMCInstLower.cpp HexagonInstrInfo.cpp diff --git a/lib/Target/Hexagon/HexagonFixupHwLoops.cpp b/lib/Target/Hexagon/HexagonFixupHwLoops.cpp new file mode 100644 index 0000000..240cc95 --- /dev/null +++ b/lib/Target/Hexagon/HexagonFixupHwLoops.cpp @@ -0,0 +1,183 @@ +//===---- HexagonFixupHwLoops.cpp - Fixup HW loops too far from LOOPn. ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +// The loop start address in the LOOPn instruction is encoded as a distance +// from the LOOPn instruction itself. If the start address is too far from +// the LOOPn instruction, the loop needs to be set up manually, i.e. via +// direct transfers to SAn and LCn. +// This pass will identify and convert such LOOPn instructions to a proper +// form. +//===----------------------------------------------------------------------===// + + +#include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/PassSupport.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "Hexagon.h" +#include "HexagonTargetMachine.h" + +using namespace llvm; + +namespace llvm { + void initializeHexagonFixupHwLoopsPass(PassRegistry&); +} + +namespace { + struct HexagonFixupHwLoops : public MachineFunctionPass { + public: + static char ID; + + HexagonFixupHwLoops() : MachineFunctionPass(ID) { + initializeHexagonFixupHwLoopsPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnMachineFunction(MachineFunction &MF); + + const char *getPassName() const { return "Hexagon Hardware Loop Fixup"; } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + private: + /// \brief Maximum distance between the loop instr and the basic block. + /// Just an estimate. + static const unsigned MAX_LOOP_DISTANCE = 200; + + /// \brief Check the offset between each loop instruction and + /// the loop basic block to determine if we can use the LOOP instruction + /// or if we need to set the LC/SA registers explicitly. + bool fixupLoopInstrs(MachineFunction &MF); + + /// \brief Add the instruction to set the LC and SA registers explicitly. + void convertLoopInstr(MachineFunction &MF, + MachineBasicBlock::iterator &MII, + RegScavenger &RS); + + }; + + char HexagonFixupHwLoops::ID = 0; +} + +INITIALIZE_PASS(HexagonFixupHwLoops, "hwloopsfixup", + "Hexagon Hardware Loops Fixup", false, false) + +FunctionPass *llvm::createHexagonFixupHwLoops() { + return new HexagonFixupHwLoops(); +} + + +/// \brief Returns true if the instruction is a hardware loop instruction. +static bool isHardwareLoop(const MachineInstr *MI) { + return MI->getOpcode() == Hexagon::LOOP0_r || + MI->getOpcode() == Hexagon::LOOP0_i; +} + + +bool HexagonFixupHwLoops::runOnMachineFunction(MachineFunction &MF) { + bool Changed = fixupLoopInstrs(MF); + return Changed; +} + + +/// \brief For Hexagon, if the loop label is to far from the +/// loop instruction then we need to set the LC0 and SA0 registers +/// explicitly instead of using LOOP(start,count). This function +/// checks the distance, and generates register assignments if needed. +/// +/// This function makes two passes over the basic blocks. The first +/// pass computes the offset of the basic block from the start. +/// The second pass checks all the loop instructions. +bool HexagonFixupHwLoops::fixupLoopInstrs(MachineFunction &MF) { + + // Offset of the current instruction from the start. + unsigned InstOffset = 0; + // Map for each basic block to it's first instruction. + DenseMap BlockToInstOffset; + + // First pass - compute the offset of each basic block. + for (MachineFunction::iterator MBB = MF.begin(), MBBe = MF.end(); + MBB != MBBe; ++MBB) { + BlockToInstOffset[MBB] = InstOffset; + InstOffset += (MBB->size() * 4); + } + + // Second pass - check each loop instruction to see if it needs to + // be converted. + InstOffset = 0; + bool Changed = false; + RegScavenger RS; + + // Loop over all the basic blocks. + for (MachineFunction::iterator MBB = MF.begin(), MBBe = MF.end(); + MBB != MBBe; ++MBB) { + InstOffset = BlockToInstOffset[MBB]; + RS.enterBasicBlock(MBB); + + // Loop over all the instructions. + MachineBasicBlock::iterator MIE = MBB->end(); + MachineBasicBlock::iterator MII = MBB->begin(); + while (MII != MIE) { + if (isHardwareLoop(MII)) { + RS.forward(MII); + assert(MII->getOperand(0).isMBB() && + "Expect a basic block as loop operand"); + int Sub = InstOffset - BlockToInstOffset[MII->getOperand(0).getMBB()]; + unsigned Dist = Sub > 0 ? Sub : -Sub; + if (Dist > MAX_LOOP_DISTANCE) { + // Convert to explicity setting LC0 and SA0. + convertLoopInstr(MF, MII, RS); + MII = MBB->erase(MII); + Changed = true; + } else { + ++MII; + } + } else { + ++MII; + } + InstOffset += 4; + } + } + + return Changed; +} + + +/// \brief convert a loop instruction to a sequence of instructions that +/// set the LC0 and SA0 register explicitly. +void HexagonFixupHwLoops::convertLoopInstr(MachineFunction &MF, + MachineBasicBlock::iterator &MII, + RegScavenger &RS) { + const TargetInstrInfo *TII = MF.getTarget().getInstrInfo(); + MachineBasicBlock *MBB = MII->getParent(); + DebugLoc DL = MII->getDebugLoc(); + unsigned Scratch = RS.scavengeRegister(&Hexagon::IntRegsRegClass, MII, 0); + + // First, set the LC0 with the trip count. + if (MII->getOperand(1).isReg()) { + // Trip count is a register + BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::LC0) + .addReg(MII->getOperand(1).getReg()); + } else { + // Trip count is an immediate. + BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFRI), Scratch) + .addImm(MII->getOperand(1).getImm()); + BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::LC0) + .addReg(Scratch); + } + // Then, set the SA0 with the loop start address. + BuildMI(*MBB, MII, DL, TII->get(Hexagon::CONST32_Label), Scratch) + .addMBB(MII->getOperand(0).getMBB()); + BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::SA0) + .addReg(Scratch); +} diff --git a/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/lib/Target/Hexagon/HexagonHardwareLoops.cpp index 2a00a9f..62aed13 100644 --- a/lib/Target/Hexagon/HexagonHardwareLoops.cpp +++ b/lib/Target/Hexagon/HexagonHardwareLoops.cpp @@ -27,9 +27,7 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "hwloops" -#include "Hexagon.h" -#include "HexagonTargetMachine.h" -#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" @@ -37,79 +35,194 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/IR/Constants.h" #include "llvm/PassSupport.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" +#include "Hexagon.h" +#include "HexagonTargetMachine.h" + #include +#include using namespace llvm; +#ifndef NDEBUG +static cl::opt HWLoopLimit("max-hwloop", cl::Hidden, cl::init(-1)); +#endif + STATISTIC(NumHWLoops, "Number of loops converted to hardware loops"); +namespace llvm { + void initializeHexagonHardwareLoopsPass(PassRegistry&); +} + namespace { class CountValue; struct HexagonHardwareLoops : public MachineFunctionPass { - MachineLoopInfo *MLI; - MachineRegisterInfo *MRI; - const TargetInstrInfo *TII; + MachineLoopInfo *MLI; + MachineRegisterInfo *MRI; + MachineDominatorTree *MDT; + const HexagonTargetMachine *TM; + const HexagonInstrInfo *TII; + const HexagonRegisterInfo *TRI; +#ifndef NDEBUG + static int Counter; +#endif public: - static char ID; // Pass identification, replacement for typeid + static char ID; - HexagonHardwareLoops() : MachineFunctionPass(ID) {} + HexagonHardwareLoops() : MachineFunctionPass(ID) { + initializeHexagonHardwareLoopsPass(*PassRegistry::getPassRegistry()); + } virtual bool runOnMachineFunction(MachineFunction &MF); const char *getPassName() const { return "Hexagon Hardware Loops"; } virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); AU.addRequired(); - AU.addPreserved(); AU.addRequired(); - AU.addPreserved(); MachineFunctionPass::getAnalysisUsage(AU); } private: - /// getCanonicalInductionVariable - Check to see if the loop has a canonical - /// induction variable. - /// Should be defined in MachineLoop. Based upon version in class Loop. - const MachineInstr *getCanonicalInductionVariable(MachineLoop *L) const; - - /// getTripCount - Return a loop-invariant LLVM register indicating the - /// number of times the loop will be executed. If the trip-count cannot - /// be determined, this return null. - CountValue *getTripCount(MachineLoop *L) const; - - /// isInductionOperation - Return true if the instruction matches the - /// pattern for an opertion that defines an induction variable. - bool isInductionOperation(const MachineInstr *MI, unsigned IVReg) const; + /// Kinds of comparisons in the compare instructions. + struct Comparison { + enum Kind { + EQ = 0x01, + NE = 0x02, + L = 0x04, // Less-than property. + G = 0x08, // Greater-than property. + U = 0x40, // Unsigned property. + LTs = L, + LEs = L | EQ, + GTs = G, + GEs = G | EQ, + LTu = L | U, + LEu = L | EQ | U, + GTu = G | U, + GEu = G | EQ | U + }; + + static Kind getSwappedComparison(Kind Cmp) { + assert ((!((Cmp & L) && (Cmp & G))) && "Malformed comparison operator"); + if ((Cmp & L) || (Cmp & G)) + return (Kind)(Cmp ^ (L|G)); + return Cmp; + } + }; - /// isInvalidOperation - Return true if the instruction is not valid within - /// a hardware loop. + /// \brief Find the register that contains the loop controlling + /// induction variable. + /// If successful, it will return true and set the \p Reg, \p IVBump + /// and \p IVOp arguments. Otherwise it will return false. + /// The returned induction register is the register R that follows the + /// following induction pattern: + /// loop: + /// R = phi ..., [ R.next, LatchBlock ] + /// R.next = R + #bump + /// if (R.next < #N) goto loop + /// IVBump is the immediate value added to R, and IVOp is the instruction + /// "R.next = R + #bump". + bool findInductionRegister(MachineLoop *L, unsigned &Reg, + int64_t &IVBump, MachineInstr *&IVOp) const; + + /// \brief Analyze the statements in a loop to determine if the loop + /// has a computable trip count and, if so, return a value that represents + /// the trip count expression. + CountValue *getLoopTripCount(MachineLoop *L, + SmallVector &OldInsts); + + /// \brief Return the expression that represents the number of times + /// a loop iterates. The function takes the operands that represent the + /// loop start value, loop end value, and induction value. Based upon + /// these operands, the function attempts to compute the trip count. + /// If the trip count is not directly available (as an immediate value, + /// or a register), the function will attempt to insert computation of it + /// to the loop's preheader. + CountValue *computeCount(MachineLoop *Loop, + const MachineOperand *Start, + const MachineOperand *End, + unsigned IVReg, + int64_t IVBump, + Comparison::Kind Cmp) const; + + /// \brief Return true if the instruction is not valid within a hardware + /// loop. bool isInvalidLoopOperation(const MachineInstr *MI) const; - /// containsInavlidInstruction - Return true if the loop contains an - /// instruction that inhibits using the hardware loop. + /// \brief Return true if the loop contains an instruction that inhibits + /// using the hardware loop. bool containsInvalidInstruction(MachineLoop *L) const; - /// converToHardwareLoop - Given a loop, check if we can convert it to a - /// hardware loop. If so, then perform the conversion and return true. + /// \brief Given a loop, check if we can convert it to a hardware loop. + /// If so, then perform the conversion and return true. bool convertToHardwareLoop(MachineLoop *L); + /// \brief Return true if the instruction is now dead. + bool isDead(const MachineInstr *MI, + SmallVector &DeadPhis) const; + + /// \brief Remove the instruction if it is now dead. + void removeIfDead(MachineInstr *MI); + + /// \brief Make sure that the "bump" instruction executes before the + /// compare. We need that for the IV fixup, so that the compare + /// instruction would not use a bumped value that has not yet been + /// defined. If the instructions are out of order, try to reorder them. + bool orderBumpCompare(MachineInstr *BumpI, MachineInstr *CmpI); + + /// \brief Get the instruction that loads an immediate value into \p R, + /// or 0 if such an instruction does not exist. + MachineInstr *defWithImmediate(unsigned R); + + /// \brief Get the immediate value referenced to by \p MO, either for + /// immediate operands, or for register operands, where the register + /// was defined with an immediate value. + int64_t getImmediate(MachineOperand &MO); + + /// \brief Reset the given machine operand to now refer to a new immediate + /// value. Assumes that the operand was already referencing an immediate + /// value, either directly, or via a register. + void setImmediate(MachineOperand &MO, int64_t Val); + + /// \brief Fix the data flow of the induction varible. + /// The desired flow is: phi ---> bump -+-> comparison-in-latch. + /// | + /// +-> back to phi + /// where "bump" is the increment of the induction variable: + /// iv = iv + #const. + /// Due to some prior code transformations, the actual flow may look + /// like this: + /// phi -+-> bump ---> back to phi + /// | + /// +-> comparison-in-latch (against upper_bound-bump), + /// i.e. the comparison that controls the loop execution may be using + /// the value of the induction variable from before the increment. + /// + /// Return true if the loop's flow is the desired one (i.e. it's + /// either been fixed, or no fixing was necessary). + /// Otherwise, return false. This can happen if the induction variable + /// couldn't be identified, or if the value in the latch's comparison + /// cannot be adjusted to reflect the post-bump value. + bool fixupInductionVariable(MachineLoop *L); + + /// \brief Given a loop, if it does not have a preheader, create one. + /// Return the block that is the preheader. + MachineBasicBlock *createPreheaderForLoop(MachineLoop *L); }; char HexagonHardwareLoops::ID = 0; +#ifndef NDEBUG + int HexagonHardwareLoops::Counter = 0; +#endif - - // CountValue class - Abstraction for a trip count of a loop. A - // smaller vesrsion of the MachineOperand class without the concerns - // of changing the operand representation. + /// \brief Abstraction for a trip count of a loop. A smaller vesrsion + /// of the MachineOperand class without the concerns of changing the + /// operand representation. class CountValue { public: enum CountValueType { @@ -119,101 +232,62 @@ namespace { private: CountValueType Kind; union Values { - unsigned RegNum; - int64_t ImmVal; - Values(unsigned r) : RegNum(r) {} - Values(int64_t i) : ImmVal(i) {} + struct { + unsigned Reg; + unsigned Sub; + } R; + unsigned ImmVal; } Contents; - bool isNegative; public: - CountValue(unsigned r, bool neg) : Kind(CV_Register), Contents(r), - isNegative(neg) {} - explicit CountValue(int64_t i) : Kind(CV_Immediate), Contents(i), - isNegative(i < 0) {} - CountValueType getType() const { return Kind; } + explicit CountValue(CountValueType t, unsigned v, unsigned u = 0) { + Kind = t; + if (Kind == CV_Register) { + Contents.R.Reg = v; + Contents.R.Sub = u; + } else { + Contents.ImmVal = v; + } + } bool isReg() const { return Kind == CV_Register; } bool isImm() const { return Kind == CV_Immediate; } - bool isNeg() const { return isNegative; } unsigned getReg() const { assert(isReg() && "Wrong CountValue accessor"); - return Contents.RegNum; + return Contents.R.Reg; } - void setReg(unsigned Val) { - Contents.RegNum = Val; + unsigned getSubReg() const { + assert(isReg() && "Wrong CountValue accessor"); + return Contents.R.Sub; } - int64_t getImm() const { + unsigned getImm() const { assert(isImm() && "Wrong CountValue accessor"); - if (isNegative) { - return -Contents.ImmVal; - } return Contents.ImmVal; } - void setImm(int64_t Val) { - Contents.ImmVal = Val; - } void print(raw_ostream &OS, const TargetMachine *TM = 0) const { - if (isReg()) { OS << PrintReg(getReg()); } - if (isImm()) { OS << getImm(); } - } - }; - - struct HexagonFixupHwLoops : public MachineFunctionPass { - public: - static char ID; // Pass identification, replacement for typeid. - - HexagonFixupHwLoops() : MachineFunctionPass(ID) {} - - virtual bool runOnMachineFunction(MachineFunction &MF); - - const char *getPassName() const { return "Hexagon Hardware Loop Fixup"; } - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); - MachineFunctionPass::getAnalysisUsage(AU); + const TargetRegisterInfo *TRI = TM ? TM->getRegisterInfo() : 0; + if (isReg()) { OS << PrintReg(Contents.R.Reg, TRI, Contents.R.Sub); } + if (isImm()) { OS << Contents.ImmVal; } } - - private: - /// Maximum distance between the loop instr and the basic block. - /// Just an estimate. - static const unsigned MAX_LOOP_DISTANCE = 200; - - /// fixupLoopInstrs - Check the offset between each loop instruction and - /// the loop basic block to determine if we can use the LOOP instruction - /// or if we need to set the LC/SA registers explicitly. - bool fixupLoopInstrs(MachineFunction &MF); - - /// convertLoopInstr - Add the instruction to set the LC and SA registers - /// explicitly. - void convertLoopInstr(MachineFunction &MF, - MachineBasicBlock::iterator &MII, - RegScavenger &RS); - }; +} // end anonymous namespace - char HexagonFixupHwLoops::ID = 0; -} // end anonymous namespace +INITIALIZE_PASS_BEGIN(HexagonHardwareLoops, "hwloops", + "Hexagon Hardware Loops", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_END(HexagonHardwareLoops, "hwloops", + "Hexagon Hardware Loops", false, false) -/// isHardwareLoop - Returns true if the instruction is a hardware loop -/// instruction. +/// \brief Returns true if the instruction is a hardware loop instruction. static bool isHardwareLoop(const MachineInstr *MI) { return MI->getOpcode() == Hexagon::LOOP0_r || MI->getOpcode() == Hexagon::LOOP0_i; } -/// isCompareEquals - Returns true if the instruction is a compare equals -/// instruction with an immediate operand. -static bool isCompareEqualsImm(const MachineInstr *MI) { - return MI->getOpcode() == Hexagon::CMPEQri; -} - - -/// createHexagonHardwareLoops - Factory for creating -/// the hardware loop phase. FunctionPass *llvm::createHexagonHardwareLoops() { return new HexagonHardwareLoops(); } @@ -224,45 +298,149 @@ bool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; - // get the loop information MLI = &getAnalysis(); - // get the register information MRI = &MF.getRegInfo(); - // the target specific instructio info. - TII = MF.getTarget().getInstrInfo(); + MDT = &getAnalysis(); + TM = static_cast(&MF.getTarget()); + TII = static_cast(TM->getInstrInfo()); + TRI = static_cast(TM->getRegisterInfo()); for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); I != E; ++I) { MachineLoop *L = *I; - if (!L->getParentLoop()) { + if (!L->getParentLoop()) Changed |= convertToHardwareLoop(L); - } } return Changed; } -/// getCanonicalInductionVariable - Check to see if the loop has a canonical -/// induction variable. We check for a simple recurrence pattern - an -/// integer recurrence that decrements by one each time through the loop and -/// ends at zero. If so, return the phi node that corresponds to it. -/// -/// Based upon the similar code in LoopInfo except this code is specific to -/// the machine. -/// This method assumes that the IndVarSimplify pass has been run by 'opt'. + +bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L, + unsigned &Reg, + int64_t &IVBump, + MachineInstr *&IVOp + ) const { + MachineBasicBlock *Header = L->getHeader(); + MachineBasicBlock *Preheader = L->getLoopPreheader(); + MachineBasicBlock *Latch = L->getLoopLatch(); + if (!Header || !Preheader || !Latch) + return false; + + // This pair represents an induction register together with an immediate + // value that will be added to it in each loop iteration. + typedef std::pair RegisterBump; + + // Mapping: R.next -> (R, bump), where R, R.next and bump are derived + // from an induction operation + // R.next = R + bump + // where bump is an immediate value. + typedef std::map InductionMap; + + InductionMap IndMap; + + typedef MachineBasicBlock::instr_iterator instr_iterator; + for (instr_iterator I = Header->instr_begin(), E = Header->instr_end(); + I != E && I->isPHI(); ++I) { + MachineInstr *Phi = &*I; + + // Have a PHI instruction. Get the operand that corresponds to the + // latch block, and see if is a result of an addition of form "reg+imm", + // where the "reg" is defined by the PHI node we are looking at. + for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) { + if (Phi->getOperand(i+1).getMBB() != Latch) + continue; + + unsigned PhiOpReg = Phi->getOperand(i).getReg(); + MachineInstr *DI = MRI->getVRegDef(PhiOpReg); + unsigned UpdOpc = DI->getOpcode(); + bool isAdd = (UpdOpc == Hexagon::ADD_ri); + + if (isAdd) { + // If the register operand to the add is the PHI we're + // looking at, this meets the induction pattern. + unsigned IndReg = DI->getOperand(1).getReg(); + if (MRI->getVRegDef(IndReg) == Phi) { + unsigned UpdReg = DI->getOperand(0).getReg(); + int64_t V = DI->getOperand(2).getImm(); + IndMap.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V))); + } + } + } // for (i) + } // for (instr) + + SmallVector Cond; + MachineBasicBlock *TB = 0, *FB = 0; + bool NotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Cond, false); + if (NotAnalyzed) + return false; + + unsigned CSz = Cond.size(); + assert (CSz == 1 || CSz == 2); + unsigned PredR = Cond[CSz-1].getReg(); + + MachineInstr *PredI = MRI->getVRegDef(PredR); + if (!PredI->isCompare()) + return false; + + unsigned CmpReg1 = 0, CmpReg2 = 0; + int CmpImm = 0, CmpMask = 0; + bool CmpAnalyzed = TII->analyzeCompare(PredI, CmpReg1, CmpReg2, + CmpMask, CmpImm); + // Fail if the compare was not analyzed, or it's not comparing a register + // with an immediate value. Not checking the mask here, since we handle + // the individual compare opcodes (including CMPb) later on. + if (!CmpAnalyzed) + return false; + + // Exactly one of the input registers to the comparison should be among + // the induction registers. + InductionMap::iterator IndMapEnd = IndMap.end(); + InductionMap::iterator F = IndMapEnd; + if (CmpReg1 != 0) { + InductionMap::iterator F1 = IndMap.find(CmpReg1); + if (F1 != IndMapEnd) + F = F1; + } + if (CmpReg2 != 0) { + InductionMap::iterator F2 = IndMap.find(CmpReg2); + if (F2 != IndMapEnd) { + if (F != IndMapEnd) + return false; + F = F2; + } + } + if (F == IndMapEnd) + return false; + + Reg = F->second.first; + IVBump = F->second.second; + IVOp = MRI->getVRegDef(F->first); + return true; +} + + +/// \brief Analyze the statements in a loop to determine if the loop has +/// a computable trip count and, if so, return a value that represents +/// the trip count expression. /// -const MachineInstr -*HexagonHardwareLoops::getCanonicalInductionVariable(MachineLoop *L) const { +/// This function iterates over the phi nodes in the loop to check for +/// induction variable patterns that are used in the calculation for +/// the number of time the loop is executed. +CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L, + SmallVector &OldInsts) { MachineBasicBlock *TopMBB = L->getTopBlock(); MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin(); assert(PI != TopMBB->pred_end() && "Loop must have more than one incoming edge!"); MachineBasicBlock *Backedge = *PI++; - if (PI == TopMBB->pred_end()) return 0; // dead loop + if (PI == TopMBB->pred_end()) // dead loop? + return 0; MachineBasicBlock *Incoming = *PI++; - if (PI != TopMBB->pred_end()) return 0; // multiple backedges? + if (PI != TopMBB->pred_end()) // multiple backedges? + return 0; - // make sure there is one incoming and one backedge and determine which + // Make sure there is one incoming and one backedge and determine which // is which. if (L->contains(Incoming)) { if (L->contains(Backedge)) @@ -271,139 +449,433 @@ const MachineInstr } else if (!L->contains(Backedge)) return 0; - // Loop over all of the PHI nodes, looking for a canonical induction variable: - // - The PHI node is "reg1 = PHI reg2, BB1, reg3, BB2". - // - The recurrence comes from the backedge. - // - the definition is an induction operatio.n - for (MachineBasicBlock::iterator I = TopMBB->begin(), E = TopMBB->end(); - I != E && I->isPHI(); ++I) { - const MachineInstr *MPhi = &*I; - unsigned DefReg = MPhi->getOperand(0).getReg(); - for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) { - // Check each operand for the value from the backedge. - MachineBasicBlock *MBB = MPhi->getOperand(i+1).getMBB(); - if (L->contains(MBB)) { // operands comes from the backedge - // Check if the definition is an induction operation. - const MachineInstr *DI = MRI->getVRegDef(MPhi->getOperand(i).getReg()); - if (isInductionOperation(DI, DefReg)) { - return MPhi; - } - } + // Look for the cmp instruction to determine if we can get a useful trip + // count. The trip count can be either a register or an immediate. The + // location of the value depends upon the type (reg or imm). + MachineBasicBlock *Latch = L->getLoopLatch(); + if (!Latch) + return 0; + + unsigned IVReg = 0; + int64_t IVBump = 0; + MachineInstr *IVOp; + bool FoundIV = findInductionRegister(L, IVReg, IVBump, IVOp); + if (!FoundIV) + return 0; + + MachineBasicBlock *Preheader = L->getLoopPreheader(); + + MachineOperand *InitialValue = 0; + MachineInstr *IV_Phi = MRI->getVRegDef(IVReg); + for (unsigned i = 1, n = IV_Phi->getNumOperands(); i < n; i += 2) { + MachineBasicBlock *MBB = IV_Phi->getOperand(i+1).getMBB(); + if (MBB == Preheader) + InitialValue = &IV_Phi->getOperand(i); + else if (MBB == Latch) + IVReg = IV_Phi->getOperand(i).getReg(); // Want IV reg after bump. + } + if (!InitialValue) + return 0; + + SmallVector Cond; + MachineBasicBlock *TB = 0, *FB = 0; + bool NotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Cond, false); + if (NotAnalyzed) + return 0; + + MachineBasicBlock *Header = L->getHeader(); + // TB must be non-null. If FB is also non-null, one of them must be + // the header. Otherwise, branch to TB could be exiting the loop, and + // the fall through can go to the header. + assert (TB && "Latch block without a branch?"); + assert ((!FB || TB == Header || FB == Header) && "Branches not to header?"); + if (!TB || (FB && TB != Header && FB != Header)) + return 0; + + // Branches of form "if (!P) ..." cause HexagonInstrInfo::AnalyzeBranch + // to put imm(0), followed by P in the vector Cond. + // If TB is not the header, it means that the "not-taken" path must lead + // to the header. + bool Negated = (Cond.size() > 1) ^ (TB != Header); + unsigned PredReg = Cond[Cond.size()-1].getReg(); + MachineInstr *CondI = MRI->getVRegDef(PredReg); + unsigned CondOpc = CondI->getOpcode(); + + unsigned CmpReg1 = 0, CmpReg2 = 0; + int Mask = 0, ImmValue = 0; + bool AnalyzedCmp = TII->analyzeCompare(CondI, CmpReg1, CmpReg2, + Mask, ImmValue); + if (!AnalyzedCmp) + return 0; + + // The comparison operator type determines how we compute the loop + // trip count. + OldInsts.push_back(CondI); + OldInsts.push_back(IVOp); + + // Sadly, the following code gets information based on the position + // of the operands in the compare instruction. This has to be done + // this way, because the comparisons check for a specific relationship + // between the operands (e.g. is-less-than), rather than to find out + // what relationship the operands are in (as on PPC). + Comparison::Kind Cmp; + bool isSwapped = false; + const MachineOperand &Op1 = CondI->getOperand(1); + const MachineOperand &Op2 = CondI->getOperand(2); + const MachineOperand *EndValue = 0; + + if (Op1.isReg()) { + if (Op2.isImm() || Op1.getReg() == IVReg) + EndValue = &Op2; + else { + EndValue = &Op1; + isSwapped = true; } } - return 0; -} -/// getTripCount - Return a loop-invariant LLVM value indicating the -/// number of times the loop will be executed. The trip count can -/// be either a register or a constant value. If the trip-count -/// cannot be determined, this returns null. -/// -/// We find the trip count from the phi instruction that defines the -/// induction variable. We follow the links to the CMP instruction -/// to get the trip count. -/// -/// Based upon getTripCount in LoopInfo. -/// -CountValue *HexagonHardwareLoops::getTripCount(MachineLoop *L) const { - // Check that the loop has a induction variable. - const MachineInstr *IV_Inst = getCanonicalInductionVariable(L); - if (IV_Inst == 0) return 0; - - // Canonical loops will end with a 'cmpeq_ri IV, Imm', - // if Imm is 0, get the count from the PHI opnd - // if Imm is -M, than M is the count - // Otherwise, Imm is the count - const MachineOperand *IV_Opnd; - const MachineOperand *InitialValue; - if (!L->contains(IV_Inst->getOperand(2).getMBB())) { - InitialValue = &IV_Inst->getOperand(1); - IV_Opnd = &IV_Inst->getOperand(3); - } else { - InitialValue = &IV_Inst->getOperand(3); - IV_Opnd = &IV_Inst->getOperand(1); - } - - // Look for the cmp instruction to determine if we - // can get a useful trip count. The trip count can - // be either a register or an immediate. The location - // of the value depends upon the type (reg or imm). - for (MachineRegisterInfo::reg_iterator - RI = MRI->reg_begin(IV_Opnd->getReg()), RE = MRI->reg_end(); - RI != RE; ++RI) { - IV_Opnd = &RI.getOperand(); - const MachineInstr *MI = IV_Opnd->getParent(); - if (L->contains(MI) && isCompareEqualsImm(MI)) { - const MachineOperand &MO = MI->getOperand(2); - assert(MO.isImm() && "IV Cmp Operand should be 0"); - int64_t ImmVal = MO.getImm(); - - const MachineInstr *IV_DefInstr = MRI->getVRegDef(IV_Opnd->getReg()); - assert(L->contains(IV_DefInstr->getParent()) && - "IV definition should occurs in loop"); - int64_t iv_value = IV_DefInstr->getOperand(2).getImm(); - - if (ImmVal == 0) { - // Make sure the induction variable changes by one on each iteration. - if (iv_value != 1 && iv_value != -1) { + if (!EndValue) + return 0; + + switch (CondOpc) { + case Hexagon::CMPEQri: + case Hexagon::CMPEQrr: + Cmp = !Negated ? Comparison::EQ : Comparison::NE; + break; + case Hexagon::CMPLTrr: + Cmp = !Negated ? Comparison::LTs : Comparison::GEs; + break; + case Hexagon::CMPLTUrr: + Cmp = !Negated ? Comparison::LTu : Comparison::GEu; + break; + case Hexagon::CMPGTUri: + case Hexagon::CMPGTUrr: + Cmp = !Negated ? Comparison::GTu : Comparison::LEu; + break; + case Hexagon::CMPGTri: + case Hexagon::CMPGTrr: + Cmp = !Negated ? Comparison::GTs : Comparison::LEs; + break; + // Very limited support for byte/halfword compares. + case Hexagon::CMPbEQri_V4: + case Hexagon::CMPhEQri_V4: { + if (IVBump != 1) + return 0; + + int64_t InitV, EndV; + // Since the comparisons are "ri", the EndValue should be an + // immediate. Check it just in case. + assert(EndValue->isImm() && "Unrecognized latch comparison"); + EndV = EndValue->getImm(); + // Allow InitialValue to be a register defined with an immediate. + if (InitialValue->isReg()) { + if (!defWithImmediate(InitialValue->getReg())) return 0; - } - return new CountValue(InitialValue->getReg(), iv_value > 0); + InitV = getImmediate(*InitialValue); } else { - assert(InitialValue->isReg() && "Expecting register for init value"); - const MachineInstr *DefInstr = MRI->getVRegDef(InitialValue->getReg()); - if (DefInstr && DefInstr->getOpcode() == Hexagon::TFRI) { - int64_t count = ImmVal - DefInstr->getOperand(1).getImm(); - if ((count % iv_value) != 0) { - return 0; - } - return new CountValue(count/iv_value); - } + assert(InitialValue->isImm()); + InitV = InitialValue->getImm(); + } + if (InitV >= EndV) + return 0; + if (CondOpc == Hexagon::CMPbEQri_V4) { + if (!isInt<8>(InitV) || !isInt<8>(EndV)) + return 0; + } else { // Hexagon::CMPhEQri_V4 + if (!isInt<16>(InitV) || !isInt<16>(EndV)) + return 0; } + Cmp = !Negated ? Comparison::EQ : Comparison::NE; + break; } + default: + return 0; } - return 0; + + if (isSwapped) + Cmp = Comparison::getSwappedComparison(Cmp); + + if (InitialValue->isReg()) { + unsigned R = InitialValue->getReg(); + MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent(); + if (!MDT->properlyDominates(DefBB, Header)) + return 0; + OldInsts.push_back(MRI->getVRegDef(R)); + } + if (EndValue->isReg()) { + unsigned R = EndValue->getReg(); + MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent(); + if (!MDT->properlyDominates(DefBB, Header)) + return 0; + } + + return computeCount(L, InitialValue, EndValue, IVReg, IVBump, Cmp); } -/// isInductionOperation - return true if the operation is matches the -/// pattern that defines an induction variable: -/// add iv, c -/// -bool -HexagonHardwareLoops::isInductionOperation(const MachineInstr *MI, - unsigned IVReg) const { - return (MI->getOpcode() == - Hexagon::ADD_ri && MI->getOperand(1).getReg() == IVReg); +/// \brief Helper function that returns the expression that represents the +/// number of times a loop iterates. The function takes the operands that +/// represent the loop start value, loop end value, and induction value. +/// Based upon these operands, the function attempts to compute the trip count. +CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop, + const MachineOperand *Start, + const MachineOperand *End, + unsigned IVReg, + int64_t IVBump, + Comparison::Kind Cmp) const { + // Cannot handle comparison EQ, i.e. while (A == B). + if (Cmp == Comparison::EQ) + return 0; + + // Check if either the start or end values are an assignment of an immediate. + // If so, use the immediate value rather than the register. + if (Start->isReg()) { + const MachineInstr *StartValInstr = MRI->getVRegDef(Start->getReg()); + if (StartValInstr && StartValInstr->getOpcode() == Hexagon::TFRI) + Start = &StartValInstr->getOperand(1); + } + if (End->isReg()) { + const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg()); + if (EndValInstr && EndValInstr->getOpcode() == Hexagon::TFRI) + End = &EndValInstr->getOperand(1); + } + + assert (Start->isReg() || Start->isImm()); + assert (End->isReg() || End->isImm()); + + bool CmpLess = Cmp & Comparison::L; + bool CmpGreater = Cmp & Comparison::G; + bool CmpHasEqual = Cmp & Comparison::EQ; + + // Avoid certain wrap-arounds. This doesn't detect all wrap-arounds. + // If loop executes while iv is "less" with the iv value going down, then + // the iv must wrap. + if (CmpLess && IVBump < 0) + return 0; + // If loop executes while iv is "greater" with the iv value going up, then + // the iv must wrap. + if (CmpGreater && IVBump > 0) + return 0; + + if (Start->isImm() && End->isImm()) { + // Both, start and end are immediates. + int64_t StartV = Start->getImm(); + int64_t EndV = End->getImm(); + int64_t Dist = EndV - StartV; + if (Dist == 0) + return 0; + + bool Exact = (Dist % IVBump) == 0; + + if (Cmp == Comparison::NE) { + if (!Exact) + return 0; + if ((Dist < 0) ^ (IVBump < 0)) + return 0; + } + + // For comparisons that include the final value (i.e. include equality + // with the final value), we need to increase the distance by 1. + if (CmpHasEqual) + Dist = Dist > 0 ? Dist+1 : Dist-1; + + // assert (CmpLess => Dist > 0); + assert ((!CmpLess || Dist > 0) && "Loop should never iterate!"); + // assert (CmpGreater => Dist < 0); + assert ((!CmpGreater || Dist < 0) && "Loop should never iterate!"); + + // "Normalized" distance, i.e. with the bump set to +-1. + int64_t Dist1 = (IVBump > 0) ? (Dist + (IVBump-1)) / IVBump + : (-Dist + (-IVBump-1)) / (-IVBump); + assert (Dist1 > 0 && "Fishy thing. Both operands have the same sign."); + + uint64_t Count = Dist1; + + if (Count > 0xFFFFFFFFULL) + return 0; + + return new CountValue(CountValue::CV_Immediate, Count); + } + + // A general case: Start and End are some values, but the actual + // iteration count may not be available. If it is not, insert + // a computation of it into the preheader. + + // If the induction variable bump is not a power of 2, quit. + // Othwerise we'd need a general integer division. + if (!isPowerOf2_64(abs(IVBump))) + return 0; + + MachineBasicBlock *PH = Loop->getLoopPreheader(); + assert (PH && "Should have a preheader by now"); + MachineBasicBlock::iterator InsertPos = PH->getFirstTerminator(); + DebugLoc DL = (InsertPos != PH->end()) ? InsertPos->getDebugLoc() + : DebugLoc(); + + // If Start is an immediate and End is a register, the trip count + // will be "reg - imm". Hexagon's "subtract immediate" instruction + // is actually "reg + -imm". + + // If the loop IV is going downwards, i.e. if the bump is negative, + // then the iteration count (computed as End-Start) will need to be + // negated. To avoid the negation, just swap Start and End. + if (IVBump < 0) { + std::swap(Start, End); + IVBump = -IVBump; + } + // Cmp may now have a wrong direction, e.g. LEs may now be GEs. + // Signedness, and "including equality" are preserved. + + bool RegToImm = Start->isReg() && End->isImm(); // for (reg..imm) + bool RegToReg = Start->isReg() && End->isReg(); // for (reg..reg) + + int64_t StartV = 0, EndV = 0; + if (Start->isImm()) + StartV = Start->getImm(); + if (End->isImm()) + EndV = End->getImm(); + + int64_t AdjV = 0; + // To compute the iteration count, we would need this computation: + // Count = (End - Start + (IVBump-1)) / IVBump + // or, when CmpHasEqual: + // Count = (End - Start + (IVBump-1)+1) / IVBump + // The "IVBump-1" part is the adjustment (AdjV). We can avoid + // generating an instruction specifically to add it if we can adjust + // the immediate values for Start or End. + + if (CmpHasEqual) { + // Need to add 1 to the total iteration count. + if (Start->isImm()) + StartV--; + else if (End->isImm()) + EndV++; + else + AdjV += 1; + } + + if (Cmp != Comparison::NE) { + if (Start->isImm()) + StartV -= (IVBump-1); + else if (End->isImm()) + EndV += (IVBump-1); + else + AdjV += (IVBump-1); + } + + unsigned R = 0, SR = 0; + if (Start->isReg()) { + R = Start->getReg(); + SR = Start->getSubReg(); + } else { + R = End->getReg(); + SR = End->getSubReg(); + } + const TargetRegisterClass *RC = MRI->getRegClass(R); + // Hardware loops cannot handle 64-bit registers. If it's a double + // register, it has to have a subregister. + if (!SR && RC == &Hexagon::DoubleRegsRegClass) + return 0; + const TargetRegisterClass *IntRC = &Hexagon::IntRegsRegClass; + + // Compute DistR (register with the distance between Start and End). + unsigned DistR, DistSR; + + // Avoid special case, where the start value is an imm(0). + if (Start->isImm() && StartV == 0) { + DistR = End->getReg(); + DistSR = End->getSubReg(); + } else { + const MCInstrDesc &SubD = RegToReg ? TII->get(Hexagon::SUB_rr) : + (RegToImm ? TII->get(Hexagon::SUB_ri) : + TII->get(Hexagon::ADD_ri)); + unsigned SubR = MRI->createVirtualRegister(IntRC); + MachineInstrBuilder SubIB = + BuildMI(*PH, InsertPos, DL, SubD, SubR); + + if (RegToReg) { + SubIB.addReg(End->getReg(), 0, End->getSubReg()) + .addReg(Start->getReg(), 0, Start->getSubReg()); + } else if (RegToImm) { + SubIB.addImm(EndV) + .addReg(Start->getReg(), 0, Start->getSubReg()); + } else { // ImmToReg + SubIB.addReg(End->getReg(), 0, End->getSubReg()) + .addImm(-StartV); + } + DistR = SubR; + DistSR = 0; + } + + // From DistR, compute AdjR (register with the adjusted distance). + unsigned AdjR, AdjSR; + + if (AdjV == 0) { + AdjR = DistR; + AdjSR = DistSR; + } else { + // Generate CountR = ADD DistR, AdjVal + unsigned AddR = MRI->createVirtualRegister(IntRC); + const MCInstrDesc &AddD = TII->get(Hexagon::ADD_ri); + BuildMI(*PH, InsertPos, DL, AddD, AddR) + .addReg(DistR, 0, DistSR) + .addImm(AdjV); + + AdjR = AddR; + AdjSR = 0; + } + + // From AdjR, compute CountR (register with the final count). + unsigned CountR, CountSR; + + if (IVBump == 1) { + CountR = AdjR; + CountSR = AdjSR; + } else { + // The IV bump is a power of two. Log_2(IV bump) is the shift amount. + unsigned Shift = Log2_32(IVBump); + + // Generate NormR = LSR DistR, Shift. + unsigned LsrR = MRI->createVirtualRegister(IntRC); + const MCInstrDesc &LsrD = TII->get(Hexagon::LSR_ri); + BuildMI(*PH, InsertPos, DL, LsrD, LsrR) + .addReg(AdjR, 0, AdjSR) + .addImm(Shift); + + CountR = LsrR; + CountSR = 0; + } + + return new CountValue(CountValue::CV_Register, CountR, CountSR); } -/// isInvalidOperation - Return true if the operation is invalid within -/// hardware loop. -bool -HexagonHardwareLoops::isInvalidLoopOperation(const MachineInstr *MI) const { + +/// \brief Return true if the operation is invalid within hardware loop. +bool HexagonHardwareLoops::isInvalidLoopOperation( + const MachineInstr *MI) const { // call is not allowed because the callee may use a hardware loop - if (MI->getDesc().isCall()) { + if (MI->getDesc().isCall()) return true; - } + // do not allow nested hardware loops - if (isHardwareLoop(MI)) { + if (isHardwareLoop(MI)) return true; - } + // check if the instruction defines a hardware loop register for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); - if (MO.isReg() && MO.isDef() && - (MO.getReg() == Hexagon::LC0 || MO.getReg() == Hexagon::LC1 || - MO.getReg() == Hexagon::SA0 || MO.getReg() == Hexagon::SA0)) { + if (!MO.isReg() || !MO.isDef()) + continue; + unsigned R = MO.getReg(); + if (R == Hexagon::LC0 || R == Hexagon::LC1 || + R == Hexagon::SA0 || R == Hexagon::SA1) return true; - } } return false; } -/// containsInvalidInstruction - Return true if the loop contains -/// an instruction that inhibits the use of the hardware loop function. -/// + +/// \brief - Return true if the loop contains an instruction that inhibits +/// the use of the hardware loop function. bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L) const { const std::vector Blocks = L->getBlocks(); for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { @@ -411,58 +883,184 @@ bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L) const { for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) { const MachineInstr *MI = &*MII; - if (isInvalidLoopOperation(MI)) { + if (isInvalidLoopOperation(MI)) return true; - } } } return false; } -/// converToHardwareLoop - check if the loop is a candidate for -/// converting to a hardware loop. If so, then perform the -/// transformation. + +/// \brief Returns true if the instruction is dead. This was essentially +/// copied from DeadMachineInstructionElim::isDead, but with special cases +/// for inline asm, physical registers and instructions with side effects +/// removed. +bool HexagonHardwareLoops::isDead(const MachineInstr *MI, + SmallVector &DeadPhis) const { + // Examine each operand. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef()) + continue; + + unsigned Reg = MO.getReg(); + if (MRI->use_nodbg_empty(Reg)) + continue; + + typedef MachineRegisterInfo::use_nodbg_iterator use_nodbg_iterator; + + // This instruction has users, but if the only user is the phi node for the + // parent block, and the only use of that phi node is this instruction, then + // this instruction is dead: both it (and the phi node) can be removed. + use_nodbg_iterator I = MRI->use_nodbg_begin(Reg); + use_nodbg_iterator End = MRI->use_nodbg_end(); + if (llvm::next(I) != End || !I.getOperand().getParent()->isPHI()) + return false; + + MachineInstr *OnePhi = I.getOperand().getParent(); + for (unsigned j = 0, f = OnePhi->getNumOperands(); j != f; ++j) { + const MachineOperand &OPO = OnePhi->getOperand(j); + if (!OPO.isReg() || !OPO.isDef()) + continue; + + unsigned OPReg = OPO.getReg(); + use_nodbg_iterator nextJ; + for (use_nodbg_iterator J = MRI->use_nodbg_begin(OPReg); + J != End; J = nextJ) { + nextJ = llvm::next(J); + MachineOperand &Use = J.getOperand(); + MachineInstr *UseMI = Use.getParent(); + + // If the phi node has a user that is not MI, bail... + if (MI != UseMI) + return false; + } + } + DeadPhis.push_back(OnePhi); + } + + // If there are no defs with uses, the instruction is dead. + return true; +} + +void HexagonHardwareLoops::removeIfDead(MachineInstr *MI) { + // This procedure was essentially copied from DeadMachineInstructionElim. + + SmallVector DeadPhis; + if (isDead(MI, DeadPhis)) { + DEBUG(dbgs() << "HW looping will remove: " << *MI); + + // It is possible that some DBG_VALUE instructions refer to this + // instruction. Examine each def operand for such references; + // if found, mark the DBG_VALUE as undef (but don't delete it). + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef()) + continue; + unsigned Reg = MO.getReg(); + MachineRegisterInfo::use_iterator nextI; + for (MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg), + E = MRI->use_end(); I != E; I = nextI) { + nextI = llvm::next(I); // I is invalidated by the setReg + MachineOperand &Use = I.getOperand(); + MachineInstr *UseMI = Use.getParent(); + if (UseMI == MI) + continue; + if (Use.isDebug()) + UseMI->getOperand(0).setReg(0U); + // This may also be a "instr -> phi -> instr" case which can + // be removed too. + } + } + + MI->eraseFromParent(); + for (unsigned i = 0; i < DeadPhis.size(); ++i) + DeadPhis[i]->eraseFromParent(); + } +} + +/// \brief Check if the loop is a candidate for converting to a hardware +/// loop. If so, then perform the transformation. /// -/// This function works on innermost loops first. A loop can -/// be converted if it is a counting loop; either a register -/// value or an immediate. +/// This function works on innermost loops first. A loop can be converted +/// if it is a counting loop; either a register value or an immediate. /// -/// The code makes several assumptions about the representation -/// of the loop in llvm. +/// The code makes several assumptions about the representation of the loop +/// in llvm. bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) { + // This is just for sanity. + assert(L->getHeader() && "Loop without a header?"); + bool Changed = false; // Process nested loops first. - for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) { + for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) Changed |= convertToHardwareLoop(*I); - } + // If a nested loop has been converted, then we can't convert this loop. - if (Changed) { + if (Changed) return Changed; + +#ifndef NDEBUG + // Stop trying after reaching the limit (if any). + int Limit = HWLoopLimit; + if (Limit >= 0) { + if (Counter >= HWLoopLimit) + return false; + Counter++; } - // Are we able to determine the trip count for the loop? - CountValue *TripCount = getTripCount(L); - if (TripCount == 0) { - return false; - } +#endif + // Does the loop contain any invalid instructions? - if (containsInvalidInstruction(L)) { + if (containsInvalidInstruction(L)) return false; - } - MachineBasicBlock *Preheader = L->getLoopPreheader(); - // No preheader means there's not place for the loop instr. - if (Preheader == 0) { + + // Is the induction variable bump feeding the latch condition? + if (!fixupInductionVariable(L)) return false; - } - MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator(); MachineBasicBlock *LastMBB = L->getExitingBlock(); // Don't generate hw loop if the loop has more than one exit. - if (LastMBB == 0) { + if (LastMBB == 0) return false; - } + MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator(); - if (LastI == LastMBB->end()) { + if (LastI == LastMBB->end()) return false; + + // Ensure the loop has a preheader: the loop instruction will be + // placed there. + bool NewPreheader = false; + MachineBasicBlock *Preheader = L->getLoopPreheader(); + if (!Preheader) { + Preheader = createPreheaderForLoop(L); + if (!Preheader) + return false; + NewPreheader = true; + } + MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator(); + + SmallVector OldInsts; + // Are we able to determine the trip count for the loop? + CountValue *TripCount = getLoopTripCount(L, OldInsts); + if (TripCount == 0) + return false; + + // Is the trip count available in the preheader? + if (TripCount->isReg()) { + // There will be a use of the register inserted into the preheader, + // so make sure that the register is actually defined at that point. + MachineInstr *TCDef = MRI->getVRegDef(TripCount->getReg()); + MachineBasicBlock *BBDef = TCDef->getParent(); + if (!NewPreheader) { + if (!MDT->dominates(BBDef, Preheader)) + return false; + } else { + // If we have just created a preheader, the dominator tree won't be + // aware of it. Check if the definition of the register dominates + // the header, but is not the header itself. + if (!MDT->properlyDominates(BBDef, L->getHeader())) + return false; + } } // Determine the loop start. @@ -470,53 +1068,53 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) { if (L->getLoopLatch() != LastMBB) { // When the exit and latch are not the same, use the latch block as the // start. - // The loop start address is used only after the 1st iteration, and the loop - // latch may contains instrs. that need to be executed after the 1st iter. + // The loop start address is used only after the 1st iteration, and the + // loop latch may contains instrs. that need to be executed after the + // first iteration. LoopStart = L->getLoopLatch(); // Make sure the latch is a successor of the exit, otherwise it won't work. - if (!LastMBB->isSuccessor(LoopStart)) { + if (!LastMBB->isSuccessor(LoopStart)) return false; - } } - // Convert the loop to a hardware loop + // Convert the loop to a hardware loop. DEBUG(dbgs() << "Change to hardware loop at "; L->dump()); - DebugLoc InsertPosDL; + DebugLoc DL; if (InsertPos != Preheader->end()) - InsertPosDL = InsertPos->getDebugLoc(); + DL = InsertPos->getDebugLoc(); if (TripCount->isReg()) { // Create a copy of the loop count register. - MachineFunction *MF = LastMBB->getParent(); - const TargetRegisterClass *RC = - MF->getRegInfo().getRegClass(TripCount->getReg()); - unsigned CountReg = MF->getRegInfo().createVirtualRegister(RC); - BuildMI(*Preheader, InsertPos, InsertPosDL, - TII->get(TargetOpcode::COPY), CountReg).addReg(TripCount->getReg()); - if (TripCount->isNeg()) { - unsigned CountReg1 = CountReg; - CountReg = MF->getRegInfo().createVirtualRegister(RC); - BuildMI(*Preheader, InsertPos, InsertPosDL, - TII->get(Hexagon::NEG), CountReg).addReg(CountReg1); - } - + unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass); + BuildMI(*Preheader, InsertPos, DL, TII->get(TargetOpcode::COPY), CountReg) + .addReg(TripCount->getReg(), 0, TripCount->getSubReg()); // Add the Loop instruction to the beginning of the loop. - BuildMI(*Preheader, InsertPos, InsertPosDL, - TII->get(Hexagon::LOOP0_r)).addMBB(LoopStart).addReg(CountReg); + BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::LOOP0_r)) + .addMBB(LoopStart) + .addReg(CountReg); } else { - assert(TripCount->isImm() && "Expecting immedate vaule for trip count"); - // Add the Loop immediate instruction to the beginning of the loop. + assert(TripCount->isImm() && "Expecting immediate value for trip count"); + // Add the Loop immediate instruction to the beginning of the loop, + // if the immediate fits in the instructions. Otherwise, we need to + // create a new virtual register. int64_t CountImm = TripCount->getImm(); - BuildMI(*Preheader, InsertPos, InsertPosDL, - TII->get(Hexagon::LOOP0_i)).addMBB(LoopStart).addImm(CountImm); + if (!TII->isValidOffset(Hexagon::LOOP0_i, CountImm)) { + unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass); + BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::TFRI), CountReg) + .addImm(CountImm); + BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::LOOP0_r)) + .addMBB(LoopStart).addReg(CountReg); + } else + BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::LOOP0_i)) + .addMBB(LoopStart).addImm(CountImm); } - // Make sure the loop start always has a reference in the CFG. We need to - // create a BlockAddress operand to get this mechanism to work both the + // Make sure the loop start always has a reference in the CFG. We need + // to create a BlockAddress operand to get this mechanism to work both the // MachineBasicBlock and BasicBlock objects need the flag set. LoopStart->setHasAddressTaken(); // This line is needed to set the hasAddressTaken flag on the BasicBlock - // object + // object. BlockAddress::get(const_cast(LoopStart->getBasicBlock())); // Replace the loop branch with an endloop instruction. @@ -529,13 +1127,12 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) { // - a conditional branch to the loop start. if (LastI->getOpcode() == Hexagon::JMP_c || LastI->getOpcode() == Hexagon::JMP_cNot) { - // delete one and change/add an uncond. branch to out of the loop + // Delete one and change/add an uncond. branch to out of the loop. MachineBasicBlock *BranchTarget = LastI->getOperand(1).getMBB(); LastI = LastMBB->erase(LastI); if (!L->contains(BranchTarget)) { - if (LastI != LastMBB->end()) { - TII->RemoveBranch(*LastMBB); - } + if (LastI != LastMBB->end()) + LastI = LastMBB->erase(LastI); SmallVector Cond; TII->InsertBranch(*LastMBB, BranchTarget, 0, Cond, LastIDL); } @@ -545,110 +1142,414 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) { } delete TripCount; + // The induction operation and the comparison may now be + // unneeded. If these are unneeded, then remove them. + for (unsigned i = 0; i < OldInsts.size(); ++i) + removeIfDead(OldInsts[i]); + ++NumHWLoops; return true; } -/// createHexagonFixupHwLoops - Factory for creating the hardware loop -/// phase. -FunctionPass *llvm::createHexagonFixupHwLoops() { - return new HexagonFixupHwLoops(); + +bool HexagonHardwareLoops::orderBumpCompare(MachineInstr *BumpI, + MachineInstr *CmpI) { + assert (BumpI != CmpI && "Bump and compare in the same instruction?"); + + MachineBasicBlock *BB = BumpI->getParent(); + if (CmpI->getParent() != BB) + return false; + + typedef MachineBasicBlock::instr_iterator instr_iterator; + // Check if things are in order to begin with. + for (instr_iterator I = BumpI, E = BB->instr_end(); I != E; ++I) + if (&*I == CmpI) + return true; + + // Out of order. + unsigned PredR = CmpI->getOperand(0).getReg(); + bool FoundBump = false; + instr_iterator CmpIt = CmpI, NextIt = llvm::next(CmpIt); + for (instr_iterator I = NextIt, E = BB->instr_end(); I != E; ++I) { + MachineInstr *In = &*I; + for (unsigned i = 0, n = In->getNumOperands(); i < n; ++i) { + MachineOperand &MO = In->getOperand(i); + if (MO.isReg() && MO.isUse()) { + if (MO.getReg() == PredR) // Found an intervening use of PredR. + return false; + } + } + + if (In == BumpI) { + instr_iterator After = BumpI; + instr_iterator From = CmpI; + BB->splice(llvm::next(After), BB, From); + FoundBump = true; + break; + } + } + assert (FoundBump && "Cannot determine instruction order"); + return FoundBump; } -bool HexagonFixupHwLoops::runOnMachineFunction(MachineFunction &MF) { - DEBUG(dbgs() << "****** Hexagon Hardware Loop Fixup ******\n"); - bool Changed = fixupLoopInstrs(MF); - return Changed; +MachineInstr *HexagonHardwareLoops::defWithImmediate(unsigned R) { + MachineInstr *DI = MRI->getVRegDef(R); + unsigned DOpc = DI->getOpcode(); + switch (DOpc) { + case Hexagon::TFRI: + case Hexagon::TFRI64: + case Hexagon::CONST32_Int_Real: + case Hexagon::CONST64_Int_Real: + return DI; + } + return 0; } -/// fixupLoopInsts - For Hexagon, if the loop label is to far from the -/// loop instruction then we need to set the LC0 and SA0 registers -/// explicitly instead of using LOOP(start,count). This function -/// checks the distance, and generates register assignments if needed. -/// -/// This function makes two passes over the basic blocks. The first -/// pass computes the offset of the basic block from the start. -/// The second pass checks all the loop instructions. -bool HexagonFixupHwLoops::fixupLoopInstrs(MachineFunction &MF) { - - // Offset of the current instruction from the start. - unsigned InstOffset = 0; - // Map for each basic block to it's first instruction. - DenseMap BlockToInstOffset; - - // First pass - compute the offset of each basic block. - for (MachineFunction::iterator MBB = MF.begin(), MBBe = MF.end(); - MBB != MBBe; ++MBB) { - BlockToInstOffset[MBB] = InstOffset; - InstOffset += (MBB->size() * 4); - } - - // Second pass - check each loop instruction to see if it needs to - // be converted. - InstOffset = 0; - bool Changed = false; - RegScavenger RS; - - // Loop over all the basic blocks. - for (MachineFunction::iterator MBB = MF.begin(), MBBe = MF.end(); - MBB != MBBe; ++MBB) { - InstOffset = BlockToInstOffset[MBB]; - RS.enterBasicBlock(MBB); - - // Loop over all the instructions. - MachineBasicBlock::iterator MIE = MBB->end(); - MachineBasicBlock::iterator MII = MBB->begin(); - while (MII != MIE) { - if (isHardwareLoop(MII)) { - RS.forward(MII); - assert(MII->getOperand(0).isMBB() && - "Expect a basic block as loop operand"); - int diff = InstOffset - BlockToInstOffset[MII->getOperand(0).getMBB()]; - diff = (diff > 0 ? diff : -diff); - if ((unsigned)diff > MAX_LOOP_DISTANCE) { - // Convert to explicity setting LC0 and SA0. - convertLoopInstr(MF, MII, RS); - MII = MBB->erase(MII); - Changed = true; - } else { - ++MII; + +int64_t HexagonHardwareLoops::getImmediate(MachineOperand &MO) { + if (MO.isImm()) + return MO.getImm(); + assert(MO.isReg()); + unsigned R = MO.getReg(); + MachineInstr *DI = defWithImmediate(R); + assert(DI && "Need an immediate operand"); + // All currently supported "define-with-immediate" instructions have the + // actual immediate value in the operand(1). + int64_t v = DI->getOperand(1).getImm(); + return v; +} + + +void HexagonHardwareLoops::setImmediate(MachineOperand &MO, int64_t Val) { + if (MO.isImm()) { + MO.setImm(Val); + return; + } + + assert(MO.isReg()); + unsigned R = MO.getReg(); + MachineInstr *DI = defWithImmediate(R); + if (MRI->hasOneNonDBGUse(R)) { + // If R has only one use, then just change its defining instruction to + // the new immediate value. + DI->getOperand(1).setImm(Val); + return; + } + + const TargetRegisterClass *RC = MRI->getRegClass(R); + unsigned NewR = MRI->createVirtualRegister(RC); + MachineBasicBlock &B = *DI->getParent(); + DebugLoc DL = DI->getDebugLoc(); + BuildMI(B, DI, DL, TII->get(DI->getOpcode()), NewR) + .addImm(Val); + MO.setReg(NewR); +} + + +bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) { + MachineBasicBlock *Header = L->getHeader(); + MachineBasicBlock *Preheader = L->getLoopPreheader(); + MachineBasicBlock *Latch = L->getLoopLatch(); + + if (!Header || !Preheader || !Latch) + return false; + + // These data structures follow the same concept as the corresponding + // ones in findInductionRegister (where some comments are). + typedef std::pair RegisterBump; + typedef std::pair RegisterInduction; + typedef std::set RegisterInductionSet; + + // Register candidates for induction variables, with their associated bumps. + RegisterInductionSet IndRegs; + + // Look for induction patterns: + // vreg1 = PHI ..., [ latch, vreg2 ] + // vreg2 = ADD vreg1, imm + typedef MachineBasicBlock::instr_iterator instr_iterator; + for (instr_iterator I = Header->instr_begin(), E = Header->instr_end(); + I != E && I->isPHI(); ++I) { + MachineInstr *Phi = &*I; + + // Have a PHI instruction. + for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) { + if (Phi->getOperand(i+1).getMBB() != Latch) + continue; + + unsigned PhiReg = Phi->getOperand(i).getReg(); + MachineInstr *DI = MRI->getVRegDef(PhiReg); + unsigned UpdOpc = DI->getOpcode(); + bool isAdd = (UpdOpc == Hexagon::ADD_ri); + + if (isAdd) { + // If the register operand to the add/sub is the PHI we are looking + // at, this meets the induction pattern. + unsigned IndReg = DI->getOperand(1).getReg(); + if (MRI->getVRegDef(IndReg) == Phi) { + unsigned UpdReg = DI->getOperand(0).getReg(); + int64_t V = DI->getOperand(2).getImm(); + IndRegs.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V))); } - } else { - ++MII; } - InstOffset += 4; + } // for (i) + } // for (instr) + + if (IndRegs.empty()) + return false; + + MachineBasicBlock *TB = 0, *FB = 0; + SmallVector Cond; + // AnalyzeBranch returns true if it fails to analyze branch. + bool NotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Cond, false); + if (NotAnalyzed) + return false; + + // Check if the latch branch is unconditional. + if (Cond.empty()) + return false; + + if (TB != Header && FB != Header) + // The latch does not go back to the header. Not a latch we know and love. + return false; + + // Expecting a predicate register as a condition. It won't be a hardware + // predicate register at this point yet, just a vreg. + // HexagonInstrInfo::AnalyzeBranch for negated branches inserts imm(0) + // into Cond, followed by the predicate register. For non-negated branches + // it's just the register. + unsigned CSz = Cond.size(); + if (CSz != 1 && CSz != 2) + return false; + + unsigned P = Cond[CSz-1].getReg(); + MachineInstr *PredDef = MRI->getVRegDef(P); + + if (!PredDef->isCompare()) + return false; + + SmallSet CmpRegs; + MachineOperand *CmpImmOp = 0; + + // Go over all operands to the compare and look for immediate and register + // operands. Assume that if the compare has a single register use and a + // single immediate operand, then the register is being compared with the + // immediate value. + for (unsigned i = 0, n = PredDef->getNumOperands(); i < n; ++i) { + MachineOperand &MO = PredDef->getOperand(i); + if (MO.isReg()) { + // Skip all implicit references. In one case there was: + // %vreg140 = FCMPUGT32_rr %vreg138, %vreg139, %USR + if (MO.isImplicit()) + continue; + if (MO.isUse()) { + unsigned R = MO.getReg(); + if (!defWithImmediate(R)) { + CmpRegs.insert(MO.getReg()); + continue; + } + // Consider the register to be the "immediate" operand. + if (CmpImmOp) + return false; + CmpImmOp = &MO; + } + } else if (MO.isImm()) { + if (CmpImmOp) // A second immediate argument? Confusing. Bail out. + return false; + CmpImmOp = &MO; } } - return Changed; + if (CmpRegs.empty()) + return false; + + // Check if the compared register follows the order we want. Fix if needed. + for (RegisterInductionSet::iterator I = IndRegs.begin(), E = IndRegs.end(); + I != E; ++I) { + // This is a success. If the register used in the comparison is one that + // we have identified as a bumped (updated) induction register, there is + // nothing to do. + if (CmpRegs.count(I->first)) + return true; + + // Otherwise, if the register being compared comes out of a PHI node, + // and has been recognized as following the induction pattern, and is + // compared against an immediate, we can fix it. + const RegisterBump &RB = I->second; + if (CmpRegs.count(RB.first)) { + if (!CmpImmOp) + return false; + + int64_t CmpImm = getImmediate(*CmpImmOp); + int64_t V = RB.second; + if (V > 0 && CmpImm+V < CmpImm) // Overflow (64-bit). + return false; + if (V < 0 && CmpImm+V > CmpImm) // Overflow (64-bit). + return false; + CmpImm += V; + // Some forms of cmp-immediate allow u9 and s10. Assume the worst case + // scenario, i.e. an 8-bit value. + if (CmpImmOp->isImm() && !isInt<8>(CmpImm)) + return false; + + // Make sure that the compare happens after the bump. Otherwise, + // after the fixup, the compare would use a yet-undefined register. + MachineInstr *BumpI = MRI->getVRegDef(I->first); + bool Order = orderBumpCompare(BumpI, PredDef); + if (!Order) + return false; + + // Finally, fix the compare instruction. + setImmediate(*CmpImmOp, CmpImm); + for (unsigned i = 0, n = PredDef->getNumOperands(); i < n; ++i) { + MachineOperand &MO = PredDef->getOperand(i); + if (MO.isReg() && MO.getReg() == RB.first) { + MO.setReg(I->first); + return true; + } + } + } + } + return false; } -/// convertLoopInstr - convert a loop instruction to a sequence of instructions -/// that set the lc and sa register explicitly. -void HexagonFixupHwLoops::convertLoopInstr(MachineFunction &MF, - MachineBasicBlock::iterator &MII, - RegScavenger &RS) { - const TargetInstrInfo *TII = MF.getTarget().getInstrInfo(); - MachineBasicBlock *MBB = MII->getParent(); - DebugLoc DL = MII->getDebugLoc(); - unsigned Scratch = RS.scavengeRegister(&Hexagon::IntRegsRegClass, MII, 0); - - // First, set the LC0 with the trip count. - if (MII->getOperand(1).isReg()) { - // Trip count is a register - BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::LC0) - .addReg(MII->getOperand(1).getReg()); + +/// \brief Create a preheader for a given loop. +MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop( + MachineLoop *L) { + if (MachineBasicBlock *TmpPH = L->getLoopPreheader()) + return TmpPH; + + MachineBasicBlock *Header = L->getHeader(); + MachineBasicBlock *Latch = L->getLoopLatch(); + MachineFunction *MF = Header->getParent(); + DebugLoc DL; + + if (!Latch || Header->hasAddressTaken()) + return 0; + + typedef MachineBasicBlock::instr_iterator instr_iterator; + typedef MachineBasicBlock::pred_iterator pred_iterator; + + // Verify that all existing predecessors have analyzable branches + // (or no branches at all). + typedef std::vector MBBVector; + MBBVector Preds(Header->pred_begin(), Header->pred_end()); + SmallVector Tmp1; + MachineBasicBlock *TB = 0, *FB = 0; + + if (TII->AnalyzeBranch(*Latch, TB, FB, Tmp1, false)) + return 0; + + for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) { + MachineBasicBlock *PB = *I; + if (PB != Latch) { + bool NotAnalyzed = TII->AnalyzeBranch(*PB, TB, FB, Tmp1, false); + if (NotAnalyzed) + return 0; + } + } + + MachineBasicBlock *NewPH = MF->CreateMachineBasicBlock(); + MF->insert(Header, NewPH); + + if (Header->pred_size() > 2) { + // Ensure that the header has only two predecessors: the preheader and + // the loop latch. Any additional predecessors of the header should + // join at the newly created preheader. Inspect all PHI nodes from the + // header and create appropriate corresponding PHI nodes in the preheader. + + for (instr_iterator I = Header->instr_begin(), E = Header->instr_end(); + I != E && I->isPHI(); ++I) { + MachineInstr *PN = &*I; + + const MCInstrDesc &PD = TII->get(TargetOpcode::PHI); + MachineInstr *NewPN = MF->CreateMachineInstr(PD, DL); + NewPH->insert(NewPH->end(), NewPN); + + unsigned PR = PN->getOperand(0).getReg(); + const TargetRegisterClass *RC = MRI->getRegClass(PR); + unsigned NewPR = MRI->createVirtualRegister(RC); + NewPN->addOperand(MachineOperand::CreateReg(NewPR, true)); + + // Copy all non-latch operands of a header's PHI node to the newly + // created PHI node in the preheader. + for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) { + unsigned PredR = PN->getOperand(i).getReg(); + MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB(); + if (PredB == Latch) + continue; + + NewPN->addOperand(MachineOperand::CreateReg(PredR, false)); + NewPN->addOperand(MachineOperand::CreateMBB(PredB)); + } + + // Remove copied operands from the old PHI node and add the value + // coming from the preheader's PHI. + for (int i = PN->getNumOperands()-2; i > 0; i -= 2) { + MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB(); + if (PredB != Latch) { + PN->RemoveOperand(i+1); + PN->RemoveOperand(i); + } + } + PN->addOperand(MachineOperand::CreateReg(NewPR, false)); + PN->addOperand(MachineOperand::CreateMBB(NewPH)); + } + } else { - // Trip count is an immediate. - BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFRI), Scratch) - .addImm(MII->getOperand(1).getImm()); - BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::LC0) - .addReg(Scratch); - } - // Then, set the SA0 with the loop start address. - BuildMI(*MBB, MII, DL, TII->get(Hexagon::CONST32_Label), Scratch) - .addMBB(MII->getOperand(0).getMBB()); - BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::SA0).addReg(Scratch); + assert(Header->pred_size() == 2); + + // The header has only two predecessors, but the non-latch predecessor + // is not a preheader (e.g. it has other successors, etc.) + // In such a case we don't need any extra PHI nodes in the new preheader, + // all we need is to adjust existing PHIs in the header to now refer to + // the new preheader. + for (instr_iterator I = Header->instr_begin(), E = Header->instr_end(); + I != E && I->isPHI(); ++I) { + MachineInstr *PN = &*I; + for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) { + MachineOperand &MO = PN->getOperand(i+1); + if (MO.getMBB() != Latch) + MO.setMBB(NewPH); + } + } + } + + // "Reroute" the CFG edges to link in the new preheader. + // If any of the predecessors falls through to the header, insert a branch + // to the new preheader in that place. + SmallVector Tmp2; + SmallVector EmptyCond; + + TB = FB = 0; + + for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) { + MachineBasicBlock *PB = *I; + if (PB != Latch) { + Tmp2.clear(); + bool NotAnalyzed = TII->AnalyzeBranch(*PB, TB, FB, Tmp2, false); + (void)NotAnalyzed; // supress compiler warning + assert (!NotAnalyzed && "Should be analyzable!"); + if (TB != Header && (Tmp2.empty() || FB != Header)) + TII->InsertBranch(*PB, NewPH, 0, EmptyCond, DL); + PB->ReplaceUsesOfBlockWith(Header, NewPH); + } + } + + // It can happen that the latch block will fall through into the header. + // Insert an unconditional branch to the header. + TB = FB = 0; + bool LatchNotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Tmp2, false); + (void)LatchNotAnalyzed; // supress compiler warning + assert (!LatchNotAnalyzed && "Should be analyzable!"); + if (!TB && !FB) + TII->InsertBranch(*Latch, Header, 0, EmptyCond, DL); + + // Finally, the branch from the preheader to the header. + TII->InsertBranch(*NewPH, Header, 0, EmptyCond, DL); + NewPH->addSuccessor(Header); + + return NewPH; } diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp index c067465..ee37dea 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -2461,6 +2461,9 @@ isValidOffset(const int Opcode, const int Offset) const { case Hexagon::LDriw_pred: return true; + case Hexagon::LOOP0_i: + return isUInt<10>(Offset); + // INLINEASM is very special. case Hexagon::INLINEASM: return true; -- cgit v1.1 From 9c213cc3c31da04a08a8e00a3395ac33d3d18536 Mon Sep 17 00:00:00 2001 From: Michael Ilseman Date: Mon, 11 Feb 2013 21:41:44 +0000 Subject: Optimization: bitcast (<1 x ...> insertelement ..., X, ...) to ... ==> bitcast X to ... git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174905 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/InstCombine/InstCombineCasts.cpp | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index 98fd05a..fbc259b 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -1738,11 +1738,22 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { } if (VectorType *SrcVTy = dyn_cast(SrcTy)) { - if (SrcVTy->getNumElements() == 1 && !DestTy->isVectorTy()) { - Value *Elem = - Builder->CreateExtractElement(Src, - Constant::getNullValue(Type::getInt32Ty(CI.getContext()))); - return CastInst::Create(Instruction::BitCast, Elem, DestTy); + if (SrcVTy->getNumElements() == 1) { + // If our destination is not a vector, then make this a straight + // scalar-scalar cast. + if (!DestTy->isVectorTy()) { + Value *Elem = + Builder->CreateExtractElement(Src, + Constant::getNullValue(Type::getInt32Ty(CI.getContext()))); + return CastInst::Create(Instruction::BitCast, Elem, DestTy); + } + + // Otherwise, see if our source is an insert. If so, then use the scalar + // component directly. + if (InsertElementInst *IEI = + dyn_cast(CI.getOperand(0))) + return CastInst::Create(Instruction::BitCast, IEI->getOperand(1), + DestTy); } } -- cgit v1.1 From 1b235a26f530623d07b49ab861fcd4adb4e62b4f Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Mon, 11 Feb 2013 22:03:52 +0000 Subject: [mips] Fix indentation. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174907 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsCodeEmitter.cpp | 80 ++++++++++++++++++------------------- 1 file changed, 39 insertions(+), 41 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/MipsCodeEmitter.cpp b/lib/Target/Mips/MipsCodeEmitter.cpp index a24de60..ab3bfd9 100644 --- a/lib/Target/Mips/MipsCodeEmitter.cpp +++ b/lib/Target/Mips/MipsCodeEmitter.cpp @@ -62,59 +62,57 @@ class MipsCodeEmitter : public MachineFunctionPass { static char ID; - public: - MipsCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce) : - MachineFunctionPass(ID), JTI(0), - II((const MipsInstrInfo *) tm.getInstrInfo()), - TD(tm.getDataLayout()), TM(tm), MCE(mce), MCPEs(0), MJTEs(0), - IsPIC(TM.getRelocationModel() == Reloc::PIC_) { - } +public: + MipsCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce) + : MachineFunctionPass(ID), JTI(0), + II((const MipsInstrInfo *) tm.getInstrInfo()), TD(tm.getDataLayout()), + TM(tm), MCE(mce), MCPEs(0), MJTEs(0), + IsPIC(TM.getRelocationModel() == Reloc::PIC_) {} - bool runOnMachineFunction(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF); - virtual const char *getPassName() const { - return "Mips Machine Code Emitter"; - } + virtual const char *getPassName() const { + return "Mips Machine Code Emitter"; + } - /// getBinaryCodeForInstr - This function, generated by the - /// CodeEmitterGenerator using TableGen, produces the binary encoding for - /// machine instructions. - uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const; + /// getBinaryCodeForInstr - This function, generated by the + /// CodeEmitterGenerator using TableGen, produces the binary encoding for + /// machine instructions. + uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const; - void emitInstruction(const MachineInstr &MI); + void emitInstruction(const MachineInstr &MI); - private: +private: - void emitWord(unsigned Word); + void emitWord(unsigned Word); - /// Routines that handle operands which add machine relocations which are - /// fixed up by the relocation stage. - void emitGlobalAddress(const GlobalValue *GV, unsigned Reloc, - bool MayNeedFarStub) const; - void emitExternalSymbolAddress(const char *ES, unsigned Reloc) const; - void emitConstPoolAddress(unsigned CPI, unsigned Reloc) const; - void emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const; - void emitMachineBasicBlock(MachineBasicBlock *BB, unsigned Reloc) const; + /// Routines that handle operands which add machine relocations which are + /// fixed up by the relocation stage. + void emitGlobalAddress(const GlobalValue *GV, unsigned Reloc, + bool MayNeedFarStub) const; + void emitExternalSymbolAddress(const char *ES, unsigned Reloc) const; + void emitConstPoolAddress(unsigned CPI, unsigned Reloc) const; + void emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const; + void emitMachineBasicBlock(MachineBasicBlock *BB, unsigned Reloc) const; - /// getMachineOpValue - Return binary encoding of operand. If the machine - /// operand requires relocation, record the relocation and return zero. - unsigned getMachineOpValue(const MachineInstr &MI, - const MachineOperand &MO) const; + /// getMachineOpValue - Return binary encoding of operand. If the machine + /// operand requires relocation, record the relocation and return zero. + unsigned getMachineOpValue(const MachineInstr &MI, + const MachineOperand &MO) const; - unsigned getRelocation(const MachineInstr &MI, - const MachineOperand &MO) const; + unsigned getRelocation(const MachineInstr &MI, + const MachineOperand &MO) const; - unsigned getJumpTargetOpValue(const MachineInstr &MI, unsigned OpNo) const; + unsigned getJumpTargetOpValue(const MachineInstr &MI, unsigned OpNo) const; - unsigned getBranchTargetOpValue(const MachineInstr &MI, - unsigned OpNo) const; - unsigned getMemEncoding(const MachineInstr &MI, unsigned OpNo) const; - unsigned getSizeExtEncoding(const MachineInstr &MI, unsigned OpNo) const; - unsigned getSizeInsEncoding(const MachineInstr &MI, unsigned OpNo) const; + unsigned getBranchTargetOpValue(const MachineInstr &MI, unsigned OpNo) const; + unsigned getMemEncoding(const MachineInstr &MI, unsigned OpNo) const; + unsigned getSizeExtEncoding(const MachineInstr &MI, unsigned OpNo) const; + unsigned getSizeInsEncoding(const MachineInstr &MI, unsigned OpNo) const; - void emitGlobalAddressUnaligned(const GlobalValue *GV, unsigned Reloc, - int Offset) const; - }; + void emitGlobalAddressUnaligned(const GlobalValue *GV, unsigned Reloc, + int Offset) const; +}; } char MipsCodeEmitter::ID = 0; -- cgit v1.1 From 04ef4be048934f8acf15c4ed6e3ebdd410c252bb Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Mon, 11 Feb 2013 22:32:29 +0000 Subject: Use a std::map so that we record the group ID. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174910 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Bitcode/Reader/BitcodeReader.cpp | 13 +++++++------ lib/Bitcode/Reader/BitcodeReader.h | 2 +- 2 files changed, 8 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index 30ba85e..476c68a 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -531,8 +531,7 @@ bool BitcodeReader::ParseAttributeGroupBlock() { if (Record.size() < 3) return Error("Invalid ENTRY record"); - // FIXME: Record[0] is the 'group ID'. What should we do with it here? - + uint64_t GrpID = Record[0]; uint64_t Idx = Record[1]; // Index of the object this attribute refers to. AttrBuilder B; @@ -545,27 +544,29 @@ bool BitcodeReader::ParseAttributeGroupBlock() { else B.addStackAlignmentAttr(Record[++i]); } else { // String attribute + assert((Record[i] == 3 || Record[i] == 4) && + "Invalid attribute group entry"); bool HasValue = (Record[i++] == 4); SmallString<64> KindStr; SmallString<64> ValStr; while (Record[i] != 0 && i != e) KindStr += Record[i++]; - assert(Record[i] == 0 && "Kind string not terminated with 0"); + assert(Record[i] == 0 && "Kind string not null terminated"); if (HasValue) { // Has a value associated with it. - ++i; // Skip the '0' that terminates the kind string. + ++i; // Skip the '0' that terminates the "kind" string. while (Record[i] != 0 && i != e) ValStr += Record[i++]; - assert(Record[i] == 0 && "Value string not terminated with 0"); + assert(Record[i] == 0 && "Value string not null terminated"); } B.addAttribute(KindStr.str(), ValStr.str()); } } - MAttributeGroups.push_back(AttributeSet::get(Context, Idx, B)); + MAttributeGroups[GrpID] = AttributeSet::get(Context, Idx, B); break; } } diff --git a/lib/Bitcode/Reader/BitcodeReader.h b/lib/Bitcode/Reader/BitcodeReader.h index 8d36e67..28674eb 100644 --- a/lib/Bitcode/Reader/BitcodeReader.h +++ b/lib/Bitcode/Reader/BitcodeReader.h @@ -149,7 +149,7 @@ class BitcodeReader : public GVMaterializer { std::vector MAttributes; /// \brief The set of attribute groups. - std::vector MAttributeGroups; + std::map MAttributeGroups; /// FunctionBBs - While parsing a function body, this is a list of the basic /// blocks for the function. -- cgit v1.1 From e9229a6a9614cbde1bff2bd6ffae3b7336db5702 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Mon, 11 Feb 2013 22:33:26 +0000 Subject: Rename AttributeSets to AttributeGroups so that it's more meaningful. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174911 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Bitcode/Writer/BitcodeWriter.cpp | 58 +++++++++++++++++----------------- lib/Bitcode/Writer/ValueEnumerator.cpp | 6 ++-- lib/Bitcode/Writer/ValueEnumerator.h | 18 +++++------ 3 files changed, 41 insertions(+), 41 deletions(-) (limited to 'lib') diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index 65c3f73..84f67ad 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -161,44 +161,20 @@ static void WriteStringRecord(unsigned Code, StringRef Str, Stream.EmitRecord(Code, Vals, AbbrevToUse); } -/// \brief This returns an integer containing an encoding of all the LLVM -/// attributes found in the given attribute bitset. Any change to this encoding -/// is a breaking change to bitcode compatibility. -/// N.B. This should be used only by the bitcode writer! -static uint64_t encodeLLVMAttributesForBitcode(AttributeSet Attrs, - unsigned Index) { - // FIXME: Remove in 4.0! - - // FIXME: It doesn't make sense to store the alignment information as an - // expanded out value, we should store it as a log2 value. However, we can't - // just change that here without breaking bitcode compatibility. If this ever - // becomes a problem in practice, we should introduce new tag numbers in the - // bitcode file and have those tags use a more efficiently encoded alignment - // field. - - // Store the alignment in the bitcode as a 16-bit raw value instead of a 5-bit - // log2 encoded value. Shift the bits above the alignment up by 11 bits. - uint64_t EncodedAttrs = Attrs.Raw(Index) & 0xffff; - if (Attrs.hasAttribute(Index, Attribute::Alignment)) - EncodedAttrs |= Attrs.getParamAlignment(Index) << 16; - EncodedAttrs |= (Attrs.Raw(Index) & (0xfffffULL << 21)) << 11; - return EncodedAttrs; -} - static void WriteAttributeGroupTable(const ValueEnumerator &VE, BitstreamWriter &Stream) { - const std::vector &Attrs = VE.getAttributeSets(); - if (Attrs.empty()) return; + const std::vector &AttrGrps = VE.getAttributeGroups(); + if (AttrGrps.empty()) return; Stream.EnterSubblock(bitc::PARAMATTR_GROUP_BLOCK_ID, 3); SmallVector Record; - for (unsigned i = 0, e = Attrs.size(); i != e; ++i) { - AttributeSet AS = Attrs[i]; + for (unsigned i = 0, e = AttrGrps.size(); i != e; ++i) { + AttributeSet AS = AttrGrps[i]; for (unsigned i = 0, e = AS.getNumSlots(); i != e; ++i) { AttributeSet A = AS.getSlotAttributes(i); - Record.push_back(VE.getAttributeSetID(A)); + Record.push_back(VE.getAttributeGroupID(A)); Record.push_back(AS.getSlotIndex(i)); for (AttributeSet::iterator I = AS.begin(0), E = AS.end(0); @@ -233,6 +209,30 @@ static void WriteAttributeGroupTable(const ValueEnumerator &VE, Stream.ExitBlock(); } +/// \brief This returns an integer containing an encoding of all the LLVM +/// attributes found in the given attribute bitset. Any change to this encoding +/// is a breaking change to bitcode compatibility. +/// N.B. This should be used only by the bitcode writer! +static uint64_t encodeLLVMAttributesForBitcode(AttributeSet Attrs, + unsigned Index) { + // FIXME: Remove in 4.0! + + // FIXME: It doesn't make sense to store the alignment information as an + // expanded out value, we should store it as a log2 value. However, we can't + // just change that here without breaking bitcode compatibility. If this ever + // becomes a problem in practice, we should introduce new tag numbers in the + // bitcode file and have those tags use a more efficiently encoded alignment + // field. + + // Store the alignment in the bitcode as a 16-bit raw value instead of a 5-bit + // log2 encoded value. Shift the bits above the alignment up by 11 bits. + uint64_t EncodedAttrs = Attrs.Raw(Index) & 0xffff; + if (Attrs.hasAttribute(Index, Attribute::Alignment)) + EncodedAttrs |= Attrs.getParamAlignment(Index) << 16; + EncodedAttrs |= (Attrs.Raw(Index) & (0xfffffULL << 21)) << 11; + return EncodedAttrs; +} + static void WriteAttributeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) { const std::vector &Attrs = VE.getAttributes(); diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp index 9f7e17b..5822586 100644 --- a/lib/Bitcode/Writer/ValueEnumerator.cpp +++ b/lib/Bitcode/Writer/ValueEnumerator.cpp @@ -431,10 +431,10 @@ void ValueEnumerator::EnumerateAttributes(const AttributeSet &PAL) { // Do lookups for all attribute groups. for (unsigned i = 0, e = PAL.getNumSlots(); i != e; ++i) { AttributeSet AS = PAL.getSlotAttributes(i); - unsigned &Entry = AttributeSetMap[AS]; + unsigned &Entry = AttributeGroupMap[AS]; if (Entry == 0) { - AttributeSets.push_back(AS); - Entry = AttributeSets.size(); + AttributeGroups.push_back(AS); + Entry = AttributeGroups.size(); } } } diff --git a/lib/Bitcode/Writer/ValueEnumerator.h b/lib/Bitcode/Writer/ValueEnumerator.h index 6e91d68..33db523 100644 --- a/lib/Bitcode/Writer/ValueEnumerator.h +++ b/lib/Bitcode/Writer/ValueEnumerator.h @@ -52,9 +52,9 @@ private: SmallVector FunctionLocalMDs; ValueMapType MDValueMap; - typedef DenseMap AttributeSetMapType; - AttributeSetMapType AttributeSetMap; - std::vector AttributeSets; + typedef DenseMap AttributeGroupMapType; + AttributeGroupMapType AttributeGroupMap; + std::vector AttributeGroups; typedef DenseMap AttributeMapType; AttributeMapType AttributeMap; @@ -102,17 +102,17 @@ public: unsigned getInstructionID(const Instruction *I) const; void setInstructionID(const Instruction *I); - unsigned getAttributeID(const AttributeSet &PAL) const { + unsigned getAttributeID(AttributeSet PAL) const { if (PAL.isEmpty()) return 0; // Null maps to zero. AttributeMapType::const_iterator I = AttributeMap.find(PAL.getRawPointer()); assert(I != AttributeMap.end() && "Attribute not in ValueEnumerator!"); return I->second; } - unsigned getAttributeSetID(const AttributeSet &PAL) const { + unsigned getAttributeGroupID(AttributeSet PAL) const { if (PAL.isEmpty()) return 0; // Null maps to zero. - AttributeSetMapType::const_iterator I = AttributeSetMap.find(PAL); - assert(I != AttributeSetMap.end() && "Attribute not in ValueEnumerator!"); + AttributeGroupMapType::const_iterator I = AttributeGroupMap.find(PAL); + assert(I != AttributeGroupMap.end() && "Attribute not in ValueEnumerator!"); return I->second; } @@ -135,8 +135,8 @@ public: const std::vector &getAttributes() const { return Attribute; } - const std::vector &getAttributeSets() const { - return AttributeSets; + const std::vector &getAttributeGroups() const { + return AttributeGroups; } /// getGlobalBasicBlockID - This returns the function-specific ID for the -- cgit v1.1 From b4b4fa80bab1387e50c5f1b08e1141f853d35204 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Mon, 11 Feb 2013 22:35:40 +0000 Subject: [mips] Expand pseudo instructions before they are emitted in MipsCodeEmitter.cpp. JALR and NOP are expanded by function emitPseudoExpansionLowering, which is not called when the old JIT is used. This fixes the following tests which have been failing on llvm-mips-linux builder: LLVM :: ExecutionEngine__2003-01-04-LoopTest.ll LLVM :: ExecutionEngine__2003-05-06-LivenessClobber.ll LLVM :: ExecutionEngine__2003-06-04-bzip2-bug.ll LLVM :: ExecutionEngine__2005-12-02-TailCallBug.ll LLVM :: ExecutionEngine__2003-10-18-PHINode-ConstantExpr-CondCode-Failure.ll LLVM :: ExecutionEngine__hello2.ll LLVM :: ExecutionEngine__stubs.ll LLVM :: ExecutionEngine__test-branch.ll LLVM :: ExecutionEngine__test-call.ll LLVM :: ExecutionEngine__test-common-symbols.ll LLVM :: ExecutionEngine__test-loadstore.ll LLVM :: ExecutionEngine__test-loop.ll git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174912 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsCodeEmitter.cpp | 49 ++++++++++++++++++++++++++++--------- 1 file changed, 38 insertions(+), 11 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/MipsCodeEmitter.cpp b/lib/Target/Mips/MipsCodeEmitter.cpp index ab3bfd9..df877b6 100644 --- a/lib/Target/Mips/MipsCodeEmitter.cpp +++ b/lib/Target/Mips/MipsCodeEmitter.cpp @@ -25,6 +25,7 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/Passes.h" @@ -80,7 +81,8 @@ public: /// machine instructions. uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const; - void emitInstruction(const MachineInstr &MI); + void emitInstruction(MachineBasicBlock::instr_iterator MI, + MachineBasicBlock &MBB); private: @@ -112,6 +114,10 @@ private: void emitGlobalAddressUnaligned(const GlobalValue *GV, unsigned Reloc, int Offset) const; + + /// \brief Expand pseudo instruction. Return true if MI was expanded. + bool expandPseudos(MachineBasicBlock::instr_iterator &MI, + MachineBasicBlock &MBB) const; }; } @@ -140,8 +146,8 @@ bool MipsCodeEmitter::runOnMachineFunction(MachineFunction &MF) { MBB != E; ++MBB){ MCE.StartMachineBasicBlock(MBB); for (MachineBasicBlock::instr_iterator I = MBB->instr_begin(), - E = MBB->instr_end(); I != E; ++I) - emitInstruction(*I); + E = MBB->instr_end(); I != E;) + emitInstruction(*I++, *MBB); } } while (MCE.finishFunction(MF)); @@ -266,19 +272,21 @@ void MipsCodeEmitter::emitMachineBasicBlock(MachineBasicBlock *BB, Reloc, BB)); } -void MipsCodeEmitter::emitInstruction(const MachineInstr &MI) { - DEBUG(errs() << "JIT: " << (void*)MCE.getCurrentPCValue() << ":\t" << MI); - - MCE.processDebugLoc(MI.getDebugLoc(), true); +void MipsCodeEmitter::emitInstruction(MachineBasicBlock::instr_iterator MI, + MachineBasicBlock &MBB) { + DEBUG(errs() << "JIT: " << (void*)MCE.getCurrentPCValue() << ":\t" << *MI); - // Skip pseudo instructions. - if ((MI.getDesc().TSFlags & MipsII::FormMask) == MipsII::Pseudo) + // Expand pseudo instruction. Skip if MI was not expanded. + if (((MI->getDesc().TSFlags & MipsII::FormMask) == MipsII::Pseudo) && + !expandPseudos(MI, MBB)) return; - emitWord(getBinaryCodeForInstr(MI)); + MCE.processDebugLoc(MI->getDebugLoc(), true); + + emitWord(getBinaryCodeForInstr(*MI)); ++NumEmitted; // Keep track of the # of mi's emitted - MCE.processDebugLoc(MI.getDebugLoc(), false); + MCE.processDebugLoc(MI->getDebugLoc(), false); } void MipsCodeEmitter::emitWord(unsigned Word) { @@ -290,6 +298,25 @@ void MipsCodeEmitter::emitWord(unsigned Word) { MCE.emitWordBE(Word); } +bool MipsCodeEmitter::expandPseudos(MachineBasicBlock::instr_iterator &MI, + MachineBasicBlock &MBB) const { + switch (MI->getOpcode()) { + case Mips::NOP: + BuildMI(MBB, &*MI, MI->getDebugLoc(), II->get(Mips::SLL), Mips::ZERO) + .addReg(Mips::ZERO).addImm(0); + break; + case Mips::JALRPseudo: + BuildMI(MBB, &*MI, MI->getDebugLoc(), II->get(Mips::JALR), Mips::RA) + .addReg(MI->getOperand(0).getReg()); + break; + default: + return false; + } + + (MI--)->eraseFromBundle(); + return true; +} + /// createMipsJITCodeEmitterPass - Return a pass that emits the collected Mips /// code to the specified MCE object. FunctionPass *llvm::createMipsJITCodeEmitterPass(MipsTargetMachine &TM, -- cgit v1.1 From 8f3359a4b396d3f1a7b2726e02f199be74c62e4c Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Mon, 11 Feb 2013 23:02:09 +0000 Subject: BBVectorize: Omit unnecessary entries in PairableInstUsers This map is queried only for instructions in pairs of pairable instructions; so make sure that only pairs of pairable instructions are added to the map. This gives a 3.5% speedup on the csa.ll test case from PR15222. No functionality change intended. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174914 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/BBVectorize.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp index bf8fca0..b14d91d 100644 --- a/lib/Transforms/Vectorize/BBVectorize.cpp +++ b/lib/Transforms/Vectorize/BBVectorize.cpp @@ -1390,8 +1390,10 @@ namespace { (void) trackUsesOfI(Users, WriteSet, I, J); for (DenseSet::iterator U = Users.begin(), E = Users.end(); - U != E; ++U) + U != E; ++U) { + if (IsInPair.find(*U) == IsInPair.end()) continue; PairableInstUsers.insert(ValuePair(I, *U)); + } } } -- cgit v1.1 From c951003faf4d475d221f5e839971673d2350b983 Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Mon, 11 Feb 2013 23:02:17 +0000 Subject: BBVectorize: Don't over-search when building the dependency map When building the pairable-instruction dependency map, don't search past the last pairable instruction. For large blocks that have been divided into multiple instruction groups, searching past the last instruction in each group is very wasteful. This gives a 32% speedup on the csa.ll test case from PR15222 (when using 50 instructions in each group). No functionality change intended. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174915 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/BBVectorize.cpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp index b14d91d..c11d9f6 100644 --- a/lib/Transforms/Vectorize/BBVectorize.cpp +++ b/lib/Transforms/Vectorize/BBVectorize.cpp @@ -1380,20 +1380,28 @@ namespace { // Iterate through the basic block, recording all users of each // pairable instruction. - BasicBlock::iterator E = BB.end(); + BasicBlock::iterator E = BB.end(), EL = + BasicBlock::iterator(cast(PairableInsts.back())); for (BasicBlock::iterator I = BB.getFirstInsertionPt(); I != E; ++I) { if (IsInPair.find(I) == IsInPair.end()) continue; DenseSet Users; AliasSetTracker WriteSet(*AA); - for (BasicBlock::iterator J = llvm::next(I); J != E; ++J) + for (BasicBlock::iterator J = llvm::next(I); J != E; ++J) { (void) trackUsesOfI(Users, WriteSet, I, J); + if (J == EL) + break; + } + for (DenseSet::iterator U = Users.begin(), E = Users.end(); U != E; ++U) { if (IsInPair.find(*U) == IsInPair.end()) continue; PairableInstUsers.insert(ValuePair(I, *U)); } + + if (I == EL) + break; } } -- cgit v1.1 From 6c3daabc3ee51a8fcb804e0f110f01e59e0e6d61 Mon Sep 17 00:00:00 2001 From: Kay Tiong Khoo Date: Tue, 12 Feb 2013 00:19:12 +0000 Subject: Added 0x0D to 2-byte opcode extension table for prefetch* variants Fixed decode of existing 3dNow prefetchw instruction Intel is scheduled to add a compatible prefetchw (same encoding) to future CPUs git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174920 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86Instr3DNow.td | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86Instr3DNow.td b/lib/Target/X86/X86Instr3DNow.td index 54b91c3..bb362f5 100644 --- a/lib/Target/X86/X86Instr3DNow.td +++ b/lib/Target/X86/X86Instr3DNow.td @@ -87,12 +87,10 @@ defm PMULHRW : I3DNow_binop_rm_int<0xB7, "pmulhrw">; def FEMMS : I3DNow<0x0E, RawFrm, (outs), (ins), "femms", [(int_x86_mmx_femms)]>; def PREFETCH : I3DNow<0x0D, MRM0m, (outs), (ins i32mem:$addr), - "prefetch $addr", []>; + "prefetch\t$addr", []>; -// FIXME: Diassembler gets a bogus decode conflict. -let isAsmParserOnly = 1 in def PREFETCHW : I3DNow<0x0D, MRM1m, (outs), (ins i16mem:$addr), - "prefetchw $addr", []>; + "prefetchw\t$addr", []>; // "3DNowA" instructions defm PF2IW : I3DNow_conv_rm_int<0x1C, "pf2iw", "a">; -- cgit v1.1 From 2ce067a9fb5b2d046c92519428cafa71fae81ed4 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Tue, 12 Feb 2013 00:40:41 +0000 Subject: DIBuilder: make the return type of createBasicType more specific git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174924 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/DIBuilder.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/IR/DIBuilder.cpp b/lib/IR/DIBuilder.cpp index cc397cd..df1a81f 100644 --- a/lib/IR/DIBuilder.cpp +++ b/lib/IR/DIBuilder.cpp @@ -162,9 +162,9 @@ DIType DIBuilder::createNullPtrType(StringRef Name) { /// createBasicType - Create debugging information entry for a basic /// type, e.g 'char'. -DIType DIBuilder::createBasicType(StringRef Name, uint64_t SizeInBits, - uint64_t AlignInBits, - unsigned Encoding) { +DIBasicType +DIBuilder::createBasicType(StringRef Name, uint64_t SizeInBits, + uint64_t AlignInBits, unsigned Encoding) { assert(!Name.empty() && "Unable to create type without name"); // Basic types are encoded in DIBasicType format. Line number, filename, // offset and flags are always empty here. @@ -180,7 +180,7 @@ DIType DIBuilder::createBasicType(StringRef Name, uint64_t SizeInBits, ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags; ConstantInt::get(Type::getInt32Ty(VMContext), Encoding) }; - return DIType(MDNode::get(VMContext, Elts)); + return DIBasicType(MDNode::get(VMContext, Elts)); } /// createQualifiedType - Create debugging information entry for a qualified -- cgit v1.1 From d556fd129026f6e3fa6ea9c2c70ba489bff18954 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Tue, 12 Feb 2013 01:00:01 +0000 Subject: [ms-inline asm] Add support for lexing hexidecimal integers with a [hH] suffix. Part of rdar://12470373 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174926 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCParser/AsmLexer.cpp | 61 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 47 insertions(+), 14 deletions(-) (limited to 'lib') diff --git a/lib/MC/MCParser/AsmLexer.cpp b/lib/MC/MCParser/AsmLexer.cpp index d0492fd..a7de64f 100644 --- a/lib/MC/MCParser/AsmLexer.cpp +++ b/lib/MC/MCParser/AsmLexer.cpp @@ -162,21 +162,43 @@ static void SkipIgnoredIntegerSuffix(const char *&CurPtr) { CurPtr += 3; } +// Look ahead to search for first non-hex digit, if it's [hH], then we treat the +// integer as a hexadecimal, possibly with leading zeroes. +static unsigned doLookAhead(const char *&CurPtr, unsigned DefaultRadix) { + const char *FirstHex = 0; + const char *LookAhead = CurPtr; + while (1) { + if (isdigit(*LookAhead)) { + ++LookAhead; + } else if (isxdigit(*LookAhead)) { + if (!FirstHex) + FirstHex = LookAhead; + ++LookAhead; + } else { + break; + } + } + bool isHex = *LookAhead == 'h' || *LookAhead == 'H'; + CurPtr = isHex || !FirstHex ? LookAhead : FirstHex; + if (isHex) + return 16; + return DefaultRadix; +} + /// LexDigit: First character is [0-9]. /// Local Label: [0-9][:] /// Forward/Backward Label: [0-9][fb] /// Binary integer: 0b[01]+ /// Octal integer: 0[0-7]+ -/// Hex integer: 0x[0-9a-fA-F]+ +/// Hex integer: 0x[0-9a-fA-F]+ or [0x]?[0-9][0-9a-fA-F]*[hH] /// Decimal integer: [1-9][0-9]* AsmToken AsmLexer::LexDigit() { // Decimal integer: [1-9][0-9]* if (CurPtr[-1] != '0' || CurPtr[0] == '.') { - while (isdigit(*CurPtr)) - ++CurPtr; - + unsigned Radix = doLookAhead(CurPtr, 10); + bool isHex = Radix == 16; // Check for floating point literals. - if (*CurPtr == '.' || *CurPtr == 'e') { + if (!isHex && (*CurPtr == '.' || *CurPtr == 'e')) { ++CurPtr; return LexFloatLiteral(); } @@ -184,15 +206,20 @@ AsmToken AsmLexer::LexDigit() { StringRef Result(TokStart, CurPtr - TokStart); long long Value; - if (Result.getAsInteger(10, Value)) { + if (Result.getAsInteger(Radix, Value)) { // Allow positive values that are too large to fit into a signed 64-bit // integer, but that do fit in an unsigned one, we just convert them over. unsigned long long UValue; - if (Result.getAsInteger(10, UValue)) - return ReturnError(TokStart, "invalid decimal number"); + if (Result.getAsInteger(Radix, UValue)) + return ReturnError(TokStart, !isHex ? "invalid decimal number" : + "invalid hexdecimal number"); Value = (long long)UValue; } + // Consume the [bB][hH]. + if (Radix == 2 || Radix == 16) + ++CurPtr; + // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL // suffixes on integer literals. SkipIgnoredIntegerSuffix(CurPtr); @@ -243,6 +270,10 @@ AsmToken AsmLexer::LexDigit() { if (StringRef(TokStart, CurPtr - TokStart).getAsInteger(0, Result)) return ReturnError(TokStart, "invalid hexadecimal number"); + // Consume the optional [hH]. + if (*CurPtr == 'h' || *CurPtr == 'H') + ++CurPtr; + // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL // suffixes on integer literals. SkipIgnoredIntegerSuffix(CurPtr); @@ -251,15 +282,17 @@ AsmToken AsmLexer::LexDigit() { (int64_t)Result); } - // Must be an octal number, it starts with 0. - while (*CurPtr >= '0' && *CurPtr <= '9') - ++CurPtr; - - StringRef Result(TokStart, CurPtr - TokStart); + // Either octal or hexidecimal. long long Value; - if (Result.getAsInteger(8, Value)) + unsigned Radix = doLookAhead(CurPtr, 8); + StringRef Result(TokStart, CurPtr - TokStart); + if (Result.getAsInteger(Radix, Value)) return ReturnError(TokStart, "invalid octal number"); + // Consume the [hH]. + if (Radix == 16) + ++CurPtr; + // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL // suffixes on integer literals. SkipIgnoredIntegerSuffix(CurPtr); -- cgit v1.1 From 53e5bb70db34b736eed01c1580af1afd7314a2d8 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Tue, 12 Feb 2013 01:12:24 +0000 Subject: Update error message due to previous commit, r174926. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174927 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCParser/AsmLexer.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/MC/MCParser/AsmLexer.cpp b/lib/MC/MCParser/AsmLexer.cpp index a7de64f..530e94e 100644 --- a/lib/MC/MCParser/AsmLexer.cpp +++ b/lib/MC/MCParser/AsmLexer.cpp @@ -285,9 +285,11 @@ AsmToken AsmLexer::LexDigit() { // Either octal or hexidecimal. long long Value; unsigned Radix = doLookAhead(CurPtr, 8); + bool isHex = Radix == 16; StringRef Result(TokStart, CurPtr - TokStart); if (Result.getAsInteger(Radix, Value)) - return ReturnError(TokStart, "invalid octal number"); + return ReturnError(TokStart, !isHex ? "invalid octal number" : + "invalid hexdecimal number"); // Consume the [hH]. if (Radix == 16) -- cgit v1.1 From d9316dacf5bb8c02631f782c7f2fc24fb8d788f3 Mon Sep 17 00:00:00 2001 From: Arnold Schwaighofer Date: Tue, 12 Feb 2013 01:58:32 +0000 Subject: ARM NEON: Handle v16i8 and v8i16 reverse shuffles Lower reverse shuffles to a vrev64 and a vext instruction instead of the default legalization of storing and loading to the stack. This is important because we generate reverse shuffles in the loop vectorizer when we reverse store to an array. uint8_t Arr[N]; for (i = 0; i < N; ++i) Arr[N - i - 1] = ... radar://13171760 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174929 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.cpp | 38 +++++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 4c9d2da..9d7a379 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -4294,6 +4294,21 @@ static bool isVZIP_v_undef_Mask(ArrayRef M, EVT VT, unsigned &WhichResult){ return true; } +/// \return true if this is a reverse operation on an vector. +static bool isReverseMask(ArrayRef M, EVT VT) { + unsigned NumElts = VT.getVectorNumElements(); + // Make sure the mask has the right size. + if (NumElts != M.size()) + return false; + + // Look for <15, ..., 3, -1, 1, 0>. + for (unsigned i = 0; i != NumElts; ++i) + if (M[i] >= 0 && M[i] != (int) (NumElts - 1 - i)) + return false; + + return true; +} + // If N is an integer constant that can be moved into a register in one // instruction, return an SDValue of such a constant (will become a MOV // instruction). Otherwise return null. @@ -4689,7 +4704,8 @@ ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl &M, isVZIPMask(M, VT, WhichResult) || isVTRN_v_undef_Mask(M, VT, WhichResult) || isVUZP_v_undef_Mask(M, VT, WhichResult) || - isVZIP_v_undef_Mask(M, VT, WhichResult)); + isVZIP_v_undef_Mask(M, VT, WhichResult) || + ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(M, VT))); } /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit @@ -4793,6 +4809,23 @@ static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op, &VTBLMask[0], 8)); } +static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op, + SelectionDAG &DAG) { + DebugLoc DL = Op.getDebugLoc(); + SDValue OpLHS = Op.getOperand(0); + EVT VT = OpLHS.getValueType(); + + assert((VT == MVT::v8i16 || VT == MVT::v16i8) && + "Expect an v8i16/v16i8 type"); + OpLHS = DAG.getNode(ARMISD::VREV64, DL, VT, OpLHS); + // For a v16i8 type: After the VREV, we have got <8, ...15, 8, ..., 0>. Now, + // extract the first 8 bytes into the top double word and the last 8 bytes + // into the bottom double word. The v8i16 case is similar. + unsigned ExtractNum = (VT == MVT::v16i8) ? 8 : 4; + return DAG.getNode(ARMISD::VEXT, DL, VT, OpLHS, OpLHS, + DAG.getConstant(ExtractNum, MVT::i32)); +} + static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); @@ -4930,6 +4963,9 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(ISD::BITCAST, dl, VT, Val); } + if ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(ShuffleMask, VT)) + return LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(Op, DAG); + if (VT == MVT::v8i8) { SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG); if (NewOp.getNode()) -- cgit v1.1 From 4fc6484ee2439a9506d525ca757171e0ecc07744 Mon Sep 17 00:00:00 2001 From: Arnold Schwaighofer Date: Tue, 12 Feb 2013 02:40:37 +0000 Subject: Cost model: Add check for reverse shuffles to CostModel analysis Check for reverse shuffles in the CostModel analysis pass and query TargetTransform info accordingly. This allows us we can write test cases for reverse shuffles. radar://13171406 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174932 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/CostModel.cpp | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'lib') diff --git a/lib/Analysis/CostModel.cpp b/lib/Analysis/CostModel.cpp index 8435e39..44684a9 100644 --- a/lib/Analysis/CostModel.cpp +++ b/lib/Analysis/CostModel.cpp @@ -80,6 +80,13 @@ CostModelAnalysis::runOnFunction(Function &F) { return false; } +static bool isReverseVectorMask(SmallVector &Mask) { + for (unsigned i = 0, MaskSize = Mask.size(); i < MaskSize; ++i) + if (Mask[i] > 0 && Mask[i] != (int)(MaskSize - 1 - i)) + return false; + return true; +} + unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const { if (!TTI) return -1; @@ -171,6 +178,17 @@ unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const { return TTI->getVectorInstrCost(I->getOpcode(), IE->getType(), Idx); } + case Instruction::ShuffleVector: { + const ShuffleVectorInst *Shuffle = cast(I); + Type *VecTypOp0 = Shuffle->getOperand(0)->getType(); + unsigned NumVecElems = VecTypOp0->getVectorNumElements(); + SmallVector Mask = Shuffle->getShuffleMask(); + + if (NumVecElems == Mask.size() && isReverseVectorMask(Mask)) + return TTI->getShuffleCost(TargetTransformInfo::SK_Reverse, VecTypOp0, 0, + 0); + return -1; + } default: // We don't have any information on this instruction. return -1; -- cgit v1.1 From 6851623c54b35673f6e9a0ed0fd12378c93f48c4 Mon Sep 17 00:00:00 2001 From: Arnold Schwaighofer Date: Tue, 12 Feb 2013 02:40:39 +0000 Subject: ARM cost model: Add vector reverse shuffle costs A reverse shuffle is lowered to a vrev and possibly a vext instruction (quad word). radar://13171406 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174933 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMTargetTransformInfo.cpp | 33 +++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) (limited to 'lib') diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp index f6fa319..01c04b4 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -114,6 +114,9 @@ public: return 1; } + unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, + int Index, Type *SubTp) const; + unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const; @@ -335,3 +338,33 @@ unsigned ARMTTI::getAddressComputationCost(Type *Ty) const { // addressing mode. return 1; } + +unsigned ARMTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, + Type *SubTp) const { + // We only handle costs of reverse shuffles for now. + if (Kind != SK_Reverse) + return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp); + + static const CostTblEntry NEONShuffleTbl[] = { + // Reverse shuffle cost one instruction if we are shuffling within a double + // word (vrev) or two if we shuffle a quad word (vrev, vext). + { ISD::VECTOR_SHUFFLE, MVT::v2i32, 1 }, + { ISD::VECTOR_SHUFFLE, MVT::v2f32, 1 }, + { ISD::VECTOR_SHUFFLE, MVT::v2i64, 1 }, + { ISD::VECTOR_SHUFFLE, MVT::v2f64, 1 }, + + { ISD::VECTOR_SHUFFLE, MVT::v4i32, 2 }, + { ISD::VECTOR_SHUFFLE, MVT::v4f32, 2 }, + { ISD::VECTOR_SHUFFLE, MVT::v8i16, 2 }, + { ISD::VECTOR_SHUFFLE, MVT::v16i8, 2 } + }; + + std::pair LT = TLI->getTypeLegalizationCost(Tp); + + int Idx = CostTableLookup(NEONShuffleTbl, array_lengthof(NEONShuffleTbl), + ISD::VECTOR_SHUFFLE, LT.second); + if (Idx == -1) + return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp); + + return LT.first * NEONShuffleTbl[Idx].Cost; +} -- cgit v1.1 From b0fde6dc029a33945bfeea7082a03940f664943c Mon Sep 17 00:00:00 2001 From: Pete Cooper Date: Tue, 12 Feb 2013 03:14:50 +0000 Subject: Check type for legality before forming a select from loads. Sorry for the lack of a test case. I tried writing one for i386 as i know selects are illegal on this target, but they are actually considered legal by isel and expanded later. I can't see any targets to trigger this, but checking for the legality of a node before forming it is general goodness. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174934 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 472919c..5b83149 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -9400,7 +9400,9 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, // src value info, don't do the transformation if the memory // locations are not in the default address space. LLD->getPointerInfo().getAddrSpace() != 0 || - RLD->getPointerInfo().getAddrSpace() != 0) + RLD->getPointerInfo().getAddrSpace() != 0 || + !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(), + LLD->getBasePtr().getValueType())) return false; // Check that the select condition doesn't reach either load. If so, -- cgit v1.1 From dbf10c4349bb746e6120ff1195c1ce7e21bebf93 Mon Sep 17 00:00:00 2001 From: Cameron Zwarich Date: Tue, 12 Feb 2013 03:49:17 +0000 Subject: Fix the updating of LiveIntervals after splitting a critical edge. PHI operand live ranges should always be extended, and the only successor that should be considered for extension of other ranges is the target of the split edge. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174935 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineBasicBlock.cpp | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index 7351302..71d59f0 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -796,8 +796,8 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { MachineOperand &MO = I->getOperand(ni); unsigned Reg = MO.getReg(); PHISrcRegs.insert(Reg); - if (MO.isUndef() || !isLastMBB) - break; + if (MO.isUndef()) + continue; LiveInterval &LI = LIS->getInterval(Reg); VNInfo *VNI = LI.getVNInfoAt(PrevIndex); @@ -817,16 +817,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { if (!LI.liveAt(PrevIndex)) continue; - bool isLiveOut = false; - for (MachineBasicBlock::succ_iterator SI = succ_begin(), - SE = succ_end(); SI != SE; ++SI) { - MachineBasicBlock *SuccMBB = *SI == NMBB ? Succ : *SI; - if (LI.liveAt(LIS->getMBBStartIdx(SuccMBB))) { - isLiveOut = true; - break; - } - } - + bool isLiveOut = LI.liveAt(LIS->getMBBStartIdx(Succ)); if (isLiveOut && isLastMBB) { VNInfo *VNI = LI.getVNInfoAt(PrevIndex); assert(VNI && "LiveInterval should have VNInfo where it is live."); -- cgit v1.1 From dd58fa4869f9bff909720aaa428487a20fab1391 Mon Sep 17 00:00:00 2001 From: Cameron Zwarich Date: Tue, 12 Feb 2013 03:49:20 +0000 Subject: Add blocks to the LiveIntervalAnalysis RegMaskBlocks array when splitting a critical edge. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174936 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineBasicBlock.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index 71d59f0..df1c7c2 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -664,8 +664,12 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { " BB#" << getNumber() << " -- BB#" << NMBB->getNumber() << " -- BB#" << Succ->getNumber() << '\n'); + + LiveIntervals *LIS = P->getAnalysisIfAvailable(); SlotIndexes *Indexes = P->getAnalysisIfAvailable(); - if (Indexes) + if (LIS) + LIS->insertMBBInMaps(NMBB); + else if (Indexes) Indexes->insertMBBInMaps(NMBB); // On some targets like Mips, branches may kill virtual registers. Make sure @@ -771,7 +775,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { LV->addNewBlock(NMBB, this, Succ); } - if (LiveIntervals *LIS = P->getAnalysisIfAvailable()) { + if (LIS) { // After splitting the edge and updating SlotIndexes, live intervals may be // in one of two situations, depending on whether this block was the last in // the function. If the original block was the last in the function, all live -- cgit v1.1 From 5758a711f4e593d1daff3bae0fa9d694e5980719 Mon Sep 17 00:00:00 2001 From: Cameron Zwarich Date: Tue, 12 Feb 2013 03:49:25 +0000 Subject: Add a hidden option to PHIElimination to split all critical edges. This is particularly useful for catching issues with architectures that have exotic terminators like MIPS. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174938 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/PHIElimination.cpp | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp index aa84a60..6d8f207 100644 --- a/lib/CodeGen/PHIElimination.cpp +++ b/lib/CodeGen/PHIElimination.cpp @@ -40,6 +40,11 @@ DisableEdgeSplitting("disable-phi-elim-edge-splitting", cl::init(false), cl::Hidden, cl::desc("Disable critical edge splitting " "during PHI elimination")); +static cl::opt +SplitAllCriticalEdges("phi-elim-split-all-critical-edges", cl::init(false), + cl::Hidden, cl::desc("Split all critical edges during " + "PHI elimination")); + namespace { class PHIElimination : public MachineFunctionPass { MachineRegisterInfo *MRI; // Machine register information @@ -550,10 +555,10 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF, // Avoid splitting backedges of loops. It would introduce small // out-of-line blocks into the loop which is very bad for code placement. - if (PreMBB == &MBB) + if (PreMBB == &MBB && !SplitAllCriticalEdges) continue; const MachineLoop *PreLoop = MLI ? MLI->getLoopFor(PreMBB) : 0; - if (IsLoopHeader && PreLoop == CurLoop) + if (IsLoopHeader && PreLoop == CurLoop && !SplitAllCriticalEdges) continue; // LV doesn't consider a phi use live-out, so isLiveOut only returns true @@ -562,7 +567,7 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF, // there is a risk it may not be coalesced away. // // If the copy would be a kill, there is no need to split the edge. - if (!isLiveOutPastPHIs(Reg, PreMBB)) + if (!isLiveOutPastPHIs(Reg, PreMBB) && !SplitAllCriticalEdges) continue; DEBUG(dbgs() << PrintReg(Reg) << " live-out before critical edge BB#" @@ -577,7 +582,7 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF, // is likely to be left after coalescing. If we are looking at a loop // exiting edge, split it so we won't insert code in the loop, otherwise // don't bother. - bool ShouldSplit = !isLiveIn(Reg, &MBB); + bool ShouldSplit = !isLiveIn(Reg, &MBB) || SplitAllCriticalEdges; // Check for a loop exiting edge. if (!ShouldSplit && CurLoop != PreLoop) { -- cgit v1.1 From 8d49134eeaa36953410c2fba65f7237fb3f079e7 Mon Sep 17 00:00:00 2001 From: Cameron Zwarich Date: Tue, 12 Feb 2013 05:48:56 +0000 Subject: Fix indentation. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174944 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/PHIElimination.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp index 6d8f207..f046ac9 100644 --- a/lib/CodeGen/PHIElimination.cpp +++ b/lib/CodeGen/PHIElimination.cpp @@ -162,8 +162,8 @@ bool PHIElimination::runOnMachineFunction(MachineFunction &MF) { // Clean up the lowered PHI instructions. for (LoweredPHIMap::iterator I = LoweredPHIs.begin(), E = LoweredPHIs.end(); I != E; ++I) { - if (LIS) - LIS->RemoveMachineInstrFromMaps(I->first); + if (LIS) + LIS->RemoveMachineInstrFromMaps(I->first); MF.DeleteMachineInstr(I->first); } -- cgit v1.1 From 4930e7266b7643410cfbbed5ef6e4d3b19178918 Mon Sep 17 00:00:00 2001 From: Cameron Zwarich Date: Tue, 12 Feb 2013 05:48:58 +0000 Subject: Don't consider definitions by other PHIs live-in when trimming a PHI source's live range after inserting a copy at the end of a block. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174945 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/PHIElimination.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp index f046ac9..abad5c1 100644 --- a/lib/CodeGen/PHIElimination.cpp +++ b/lib/CodeGen/PHIElimination.cpp @@ -467,7 +467,11 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, bool isLiveOut = false; for (MachineBasicBlock::succ_iterator SI = opBlock.succ_begin(), SE = opBlock.succ_end(); SI != SE; ++SI) { - if (SrcLI.liveAt(LIS->getMBBStartIdx(*SI))) { + SlotIndex startIdx = LIS->getMBBStartIdx(*SI); + VNInfo *VNI = SrcLI.getVNInfoAt(startIdx); + + // Definitions by other PHIs are not truly live-in for our purposes. + if (VNI && VNI->def != startIdx) { isLiveOut = true; break; } -- cgit v1.1 From 74fe825ca597f56985ab4387baca35948647ec4b Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Tue, 12 Feb 2013 07:56:49 +0000 Subject: Support string attributes in the AttrBuilder. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174948 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Attributes.cpp | 36 +++++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 11 deletions(-) (limited to 'lib') diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index d338d65..8249be4 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -938,14 +938,22 @@ AttrBuilder &AttrBuilder::removeAttributes(AttributeSet A, uint64_t Index) { assert(Idx != ~0U && "Couldn't find index in AttributeSet!"); for (AttributeSet::iterator I = A.begin(Idx), E = A.end(Idx); I != E; ++I) { - // FIXME: Support string attributes. - Attribute::AttrKind Kind = I->getKindAsEnum(); - Attrs.erase(Kind); + Attribute Attr = *I; + if (Attr.isEnumAttribute() || Attr.isAlignAttribute()) { + Attribute::AttrKind Kind = I->getKindAsEnum(); + Attrs.erase(Kind); - if (Kind == Attribute::Alignment) - Alignment = 0; - else if (Kind == Attribute::StackAlignment) - StackAlignment = 0; + if (Kind == Attribute::Alignment) + Alignment = 0; + else if (Kind == Attribute::StackAlignment) + StackAlignment = 0; + } else { + assert(Attr.isStringAttribute() && "Invalid attribute type!"); + std::map::iterator + Iter = TargetDepAttrs.find(Attr.getKindAsString()); + if (Iter != TargetDepAttrs.end()) + TargetDepAttrs.erase(Iter); + } } return *this; @@ -1021,10 +1029,16 @@ bool AttrBuilder::hasAttributes(AttributeSet A, uint64_t Index) const { assert(Idx != ~0U && "Couldn't find the index!"); for (AttributeSet::iterator I = A.begin(Idx), E = A.end(Idx); - I != E; ++I) - // FIXME: Support string attributes. - if (Attrs.count(I->getKindAsEnum())) - return true; + I != E; ++I) { + Attribute Attr = *I; + if (Attr.isEnumAttribute() || Attr.isAlignAttribute()) { + if (Attrs.count(I->getKindAsEnum())) + return true; + } else { + assert(Attr.isStringAttribute() && "Invalid attribute kind!"); + return TargetDepAttrs.find(Attr.getKindAsString())!=TargetDepAttrs.end(); + } + } return false; } -- cgit v1.1 From 105ea3d49d4a458af8779ae7f144f00d19c4168f Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Tue, 12 Feb 2013 08:01:22 +0000 Subject: Use the AttributeSet as the 'key' to the map instead of the 'raw' pointer. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174950 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Bitcode/Writer/ValueEnumerator.cpp | 5 +++-- lib/Bitcode/Writer/ValueEnumerator.h | 6 +++--- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp index 5822586..4f19dd0 100644 --- a/lib/Bitcode/Writer/ValueEnumerator.cpp +++ b/lib/Bitcode/Writer/ValueEnumerator.cpp @@ -418,10 +418,11 @@ void ValueEnumerator::EnumerateOperandType(const Value *V) { EnumerateMetadata(V); } -void ValueEnumerator::EnumerateAttributes(const AttributeSet &PAL) { +void ValueEnumerator::EnumerateAttributes(AttributeSet PAL) { if (PAL.isEmpty()) return; // null is always 0. + // Do a lookup. - unsigned &Entry = AttributeMap[PAL.getRawPointer()]; + unsigned &Entry = AttributeMap[PAL]; if (Entry == 0) { // Never saw this before, add it. Attribute.push_back(PAL); diff --git a/lib/Bitcode/Writer/ValueEnumerator.h b/lib/Bitcode/Writer/ValueEnumerator.h index 33db523..0af6164 100644 --- a/lib/Bitcode/Writer/ValueEnumerator.h +++ b/lib/Bitcode/Writer/ValueEnumerator.h @@ -56,7 +56,7 @@ private: AttributeGroupMapType AttributeGroupMap; std::vector AttributeGroups; - typedef DenseMap AttributeMapType; + typedef DenseMap AttributeMapType; AttributeMapType AttributeMap; std::vector Attribute; @@ -104,7 +104,7 @@ public: unsigned getAttributeID(AttributeSet PAL) const { if (PAL.isEmpty()) return 0; // Null maps to zero. - AttributeMapType::const_iterator I = AttributeMap.find(PAL.getRawPointer()); + AttributeMapType::const_iterator I = AttributeMap.find(PAL); assert(I != AttributeMap.end() && "Attribute not in ValueEnumerator!"); return I->second; } @@ -160,7 +160,7 @@ private: void EnumerateValue(const Value *V); void EnumerateType(Type *T); void EnumerateOperandType(const Value *V); - void EnumerateAttributes(const AttributeSet &PAL); + void EnumerateAttributes(AttributeSet PAL); void EnumerateValueSymbolTable(const ValueSymbolTable &ST); void EnumerateNamedMetadata(const Module *M); -- cgit v1.1 From 48fbcfe6b959df628a6455e00ac8d94fa6ade87a Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Tue, 12 Feb 2013 08:13:50 +0000 Subject: Have the bitcode writer and reader handle the new attribute references. The bitcode writer emits a reference to the attribute group that the object at the given index refers to. The bitcode reader is modified to read this in and map it back to the attribute group. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174952 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Bitcode/Reader/BitcodeReader.cpp | 8 ++++++++ lib/Bitcode/Writer/BitcodeWriter.cpp | 36 +++--------------------------------- 2 files changed, 11 insertions(+), 33 deletions(-) (limited to 'lib') diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index 476c68a..f348843 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -494,6 +494,14 @@ bool BitcodeReader::ParseAttributeBlock() { Attrs.clear(); break; } + case bitc::PARAMATTR_CODE_ENTRY: { // ENTRY: [attrgrp0, attrgrp1, ...] + for (unsigned i = 0, e = Record.size(); i != e; ++i) + Attrs.push_back(MAttributeGroups[Record[i]]); + + MAttributes.push_back(AttributeSet::get(Context, Attrs)); + Attrs.clear(); + break; + } } } } diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index 84f67ad..1b73f23 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -209,30 +209,6 @@ static void WriteAttributeGroupTable(const ValueEnumerator &VE, Stream.ExitBlock(); } -/// \brief This returns an integer containing an encoding of all the LLVM -/// attributes found in the given attribute bitset. Any change to this encoding -/// is a breaking change to bitcode compatibility. -/// N.B. This should be used only by the bitcode writer! -static uint64_t encodeLLVMAttributesForBitcode(AttributeSet Attrs, - unsigned Index) { - // FIXME: Remove in 4.0! - - // FIXME: It doesn't make sense to store the alignment information as an - // expanded out value, we should store it as a log2 value. However, we can't - // just change that here without breaking bitcode compatibility. If this ever - // becomes a problem in practice, we should introduce new tag numbers in the - // bitcode file and have those tags use a more efficiently encoded alignment - // field. - - // Store the alignment in the bitcode as a 16-bit raw value instead of a 5-bit - // log2 encoded value. Shift the bits above the alignment up by 11 bits. - uint64_t EncodedAttrs = Attrs.Raw(Index) & 0xffff; - if (Attrs.hasAttribute(Index, Attribute::Alignment)) - EncodedAttrs |= Attrs.getParamAlignment(Index) << 16; - EncodedAttrs |= (Attrs.Raw(Index) & (0xfffffULL << 21)) << 11; - return EncodedAttrs; -} - static void WriteAttributeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) { const std::vector &Attrs = VE.getAttributes(); @@ -240,19 +216,13 @@ static void WriteAttributeTable(const ValueEnumerator &VE, Stream.EnterSubblock(bitc::PARAMATTR_BLOCK_ID, 3); - // FIXME: Remove this! It no longer works with the current attributes classes. - SmallVector Record; for (unsigned i = 0, e = Attrs.size(); i != e; ++i) { const AttributeSet &A = Attrs[i]; - for (unsigned i = 0, e = A.getNumSlots(); i != e; ++i) { - unsigned Index = A.getSlotIndex(i); - Record.push_back(Index); - Record.push_back(encodeLLVMAttributesForBitcode(A.getSlotAttributes(i), - Index)); - } + for (unsigned i = 0, e = A.getNumSlots(); i != e; ++i) + Record.push_back(VE.getAttributeGroupID(A.getSlotAttributes(i))); - Stream.EmitRecord(bitc::PARAMATTR_CODE_ENTRY_OLD, Record); + Stream.EmitRecord(bitc::PARAMATTR_CODE_ENTRY, Record); Record.clear(); } -- cgit v1.1 From f54676234a9826fc987512301d4dea36405c2c42 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Tue, 12 Feb 2013 10:13:06 +0000 Subject: Merge the collected attributes into the call instruction's attributes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174955 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/AsmParser/LLParser.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index e4f8d1f..c4b2c0f 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -102,7 +102,7 @@ bool LLParser::ValidateEndOfModule() { AttrBuilder FnAttrs(AS.getFnAttributes(), AttributeSet::FunctionIndex); AS = AS.removeAttributes(Context, AttributeSet::FunctionIndex, AS.getFnAttributes()); - + FnAttrs.merge(B); AS = AS.addAttributes(Context, AttributeSet::FunctionIndex, AttributeSet::get(Context, AttributeSet::FunctionIndex, @@ -113,7 +113,7 @@ bool LLParser::ValidateEndOfModule() { AttrBuilder FnAttrs(AS.getFnAttributes(), AttributeSet::FunctionIndex); AS = AS.removeAttributes(Context, AttributeSet::FunctionIndex, AS.getFnAttributes()); - + FnAttrs.merge(B); AS = AS.addAttributes(Context, AttributeSet::FunctionIndex, AttributeSet::get(Context, AttributeSet::FunctionIndex, -- cgit v1.1 From 0bc55d517e8e64f0f441736fba2447781c405ef4 Mon Sep 17 00:00:00 2001 From: Kostya Serebryany Date: Tue, 12 Feb 2013 11:11:02 +0000 Subject: [asan] change the default mapping offset on x86_64 to 0x7fff8000. This gives roughly 5% speedup. Since this is an ABI change, bump the asan ABI version by renaming __asan_init to __asan_init_v1. llvm part, compiler-rt part will follow git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174957 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Instrumentation/AddressSanitizer.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index c5f77ec..02a8680 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -68,7 +68,7 @@ static const char *kAsanRegisterGlobalsName = "__asan_register_globals"; static const char *kAsanUnregisterGlobalsName = "__asan_unregister_globals"; static const char *kAsanPoisonGlobalsName = "__asan_before_dynamic_init"; static const char *kAsanUnpoisonGlobalsName = "__asan_after_dynamic_init"; -static const char *kAsanInitName = "__asan_init"; +static const char *kAsanInitName = "__asan_init_v1"; static const char *kAsanHandleNoReturnName = "__asan_handle_no_return"; static const char *kAsanMappingOffsetName = "__asan_mapping_offset"; static const char *kAsanMappingScaleName = "__asan_mapping_scale"; @@ -136,7 +136,7 @@ static cl::opt ClMappingOffsetLog("asan-mapping-offset-log", cl::desc("offset of asan shadow mapping"), cl::Hidden, cl::init(-1)); static cl::opt ClShort64BitOffset("asan-short-64bit-mapping-offset", cl::desc("Use short immediate constant as the mapping offset for 64bit"), - cl::Hidden, cl::init(false)); + cl::Hidden, cl::init(true)); // Optimization flags. Not user visible, used mostly for testing // and benchmarking the tool. @@ -203,6 +203,7 @@ static ShadowMapping getShadowMapping(const Module &M, int LongSize, llvm::Triple TargetTriple(M.getTargetTriple()); bool IsAndroid = TargetTriple.getEnvironment() == llvm::Triple::Android; bool IsPPC64 = TargetTriple.getArch() == llvm::Triple::ppc64; + bool IsX86_64 = TargetTriple.getArch() == llvm::Triple::x86_64; ShadowMapping Mapping; @@ -214,7 +215,8 @@ static ShadowMapping getShadowMapping(const Module &M, int LongSize, Mapping.Offset = (IsAndroid || ZeroBaseShadow) ? 0 : (LongSize == 32 ? kDefaultShadowOffset32 : IsPPC64 ? kPPC64_ShadowOffset64 : kDefaultShadowOffset64); - if (!ZeroBaseShadow && ClShort64BitOffset && LongSize == 64) { + if (!ZeroBaseShadow && ClShort64BitOffset && IsX86_64) { + assert(LongSize == 64); Mapping.Offset = kDefaultShort64bitShadowOffset; } if (!ZeroBaseShadow && ClMappingOffsetLog >= 0) { // Zero offset log is the special case. -- cgit v1.1 From 6158ad1bc73652eaba4c14c4c4e1f14b3fdf1be2 Mon Sep 17 00:00:00 2001 From: Michel Danzer Date: Tue, 12 Feb 2013 12:11:23 +0000 Subject: R600: Fix regression with shadow array sampler on pre-SI GPUs. 'R600/SI: Use proper instructions for array/shadow samplers.' removed two cases from TEX_SHADOW. Vincent Lejeune reported on IRC that this broke some shadow array piglit tests with the r600g driver. Reinstating the removed cases should fix this, and still works with radeonsi as well. I will follow up with some lit tests which would have caught the regression. NOTE: This is a candidate for the Mesa stable branch. Tested-by: Vincent Lejeune Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174963 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/R600Instructions.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index 591f66d..373a793 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -399,7 +399,7 @@ class R600_TEX inst, string opName, list pattern, def TEX_SHADOW : PatLeaf< (imm), [{uint32_t TType = (uint32_t)N->getZExtValue(); - return (TType >= 6 && TType <= 8) || TType == 13; + return (TType >= 6 && TType <= 8) || (TType >= 11 && TType <= 13); }] >; -- cgit v1.1 From c8a196ae8fad3cba7a777e2e7916fd36ebf70fe6 Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Tue, 12 Feb 2013 12:41:12 +0000 Subject: [ASan] Do not use kDefaultShort64bitShadowOffset on Mac, where the binaries may get mapped at 0x100000000+ and thus may interleave with the shadow. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174964 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Instrumentation/AddressSanitizer.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 02a8680..4950d83 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -202,6 +202,7 @@ static ShadowMapping getShadowMapping(const Module &M, int LongSize, bool ZeroBaseShadow) { llvm::Triple TargetTriple(M.getTargetTriple()); bool IsAndroid = TargetTriple.getEnvironment() == llvm::Triple::Android; + bool IsMacOSX = TargetTriple.getOS() == llvm::Triple::MacOSX; bool IsPPC64 = TargetTriple.getArch() == llvm::Triple::ppc64; bool IsX86_64 = TargetTriple.getArch() == llvm::Triple::x86_64; @@ -215,7 +216,7 @@ static ShadowMapping getShadowMapping(const Module &M, int LongSize, Mapping.Offset = (IsAndroid || ZeroBaseShadow) ? 0 : (LongSize == 32 ? kDefaultShadowOffset32 : IsPPC64 ? kPPC64_ShadowOffset64 : kDefaultShadowOffset64); - if (!ZeroBaseShadow && ClShort64BitOffset && IsX86_64) { + if (!ZeroBaseShadow && ClShort64BitOffset && IsX86_64 && !IsMacOSX) { assert(LongSize == 64); Mapping.Offset = kDefaultShort64bitShadowOffset; } if (!ZeroBaseShadow && ClMappingOffsetLog >= 0) { -- cgit v1.1 From 7eacad03efda36e09ebd96e95d7891cadaaa9087 Mon Sep 17 00:00:00 2001 From: Justin Holewinski Date: Tue, 12 Feb 2013 14:18:49 +0000 Subject: [NVPTX] Disable vector registers Vectors were being manually scalarized by the backend. Instead, let the target-independent code do all of the work. The manual scalarization was from a time before good target-independent support for scalarization in LLVM. However, this forces us to specially-handle vector loads and stores, which we can turn into PTX instructions that produce/consume multiple operands. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174968 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/NVPTX/CMakeLists.txt | 1 - lib/Target/NVPTX/NVPTX.h | 1 - lib/Target/NVPTX/NVPTXAsmPrinter.cpp | 46 +- lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp | 809 +++++++++++++++++--- lib/Target/NVPTX/NVPTXISelDAGToDAG.h | 7 +- lib/Target/NVPTX/NVPTXISelLowering.cpp | 458 ++++++++++-- lib/Target/NVPTX/NVPTXISelLowering.h | 21 +- lib/Target/NVPTX/NVPTXInstrInfo.cpp | 40 - lib/Target/NVPTX/NVPTXInstrInfo.td | 96 ++- lib/Target/NVPTX/NVPTXIntrinsics.td | 145 ++-- lib/Target/NVPTX/NVPTXRegisterInfo.cpp | 156 ---- lib/Target/NVPTX/NVPTXRegisterInfo.h | 4 - lib/Target/NVPTX/NVPTXRegisterInfo.td | 44 -- lib/Target/NVPTX/NVPTXSubtarget.h | 1 + lib/Target/NVPTX/NVPTXTargetMachine.cpp | 1 - lib/Target/NVPTX/VectorElementize.cpp | 1239 ------------------------------- lib/Target/NVPTX/gen-register-defs.py | 202 ----- 17 files changed, 1274 insertions(+), 1997 deletions(-) delete mode 100644 lib/Target/NVPTX/VectorElementize.cpp delete mode 100644 lib/Target/NVPTX/gen-register-defs.py (limited to 'lib') diff --git a/lib/Target/NVPTX/CMakeLists.txt b/lib/Target/NVPTX/CMakeLists.txt index 7cb16b4..47baef6 100644 --- a/lib/Target/NVPTX/CMakeLists.txt +++ b/lib/Target/NVPTX/CMakeLists.txt @@ -22,7 +22,6 @@ set(NVPTXCodeGen_sources NVPTXAllocaHoisting.cpp NVPTXAsmPrinter.cpp NVPTXUtilities.cpp - VectorElementize.cpp ) add_llvm_target(NVPTXCodeGen ${NVPTXCodeGen_sources}) diff --git a/lib/Target/NVPTX/NVPTX.h b/lib/Target/NVPTX/NVPTX.h index 097b50a..b46ea88 100644 --- a/lib/Target/NVPTX/NVPTX.h +++ b/lib/Target/NVPTX/NVPTX.h @@ -53,7 +53,6 @@ inline static const char *NVPTXCondCodeToString(NVPTXCC::CondCodes CC) { FunctionPass *createNVPTXISelDag(NVPTXTargetMachine &TM, llvm::CodeGenOpt::Level OptLevel); -FunctionPass *createVectorElementizePass(NVPTXTargetMachine &); FunctionPass *createLowerStructArgsPass(NVPTXTargetMachine &); FunctionPass *createNVPTXReMatPass(NVPTXTargetMachine &); FunctionPass *createNVPTXReMatBlockPass(NVPTXTargetMachine &); diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index 3f99d1d..0115e1f 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -503,21 +503,7 @@ NVPTXAsmPrinter::getVirtualRegisterName(unsigned vr, bool isVec, O << getNVPTXRegClassStr(RC) << mapped_vr; return; } - // Vector virtual register - if (getNVPTXVectorSize(RC) == 4) - O << "{" - << getNVPTXRegClassStr(RC) << mapped_vr << "_0, " - << getNVPTXRegClassStr(RC) << mapped_vr << "_1, " - << getNVPTXRegClassStr(RC) << mapped_vr << "_2, " - << getNVPTXRegClassStr(RC) << mapped_vr << "_3" - << "}"; - else if (getNVPTXVectorSize(RC) == 2) - O << "{" - << getNVPTXRegClassStr(RC) << mapped_vr << "_0, " - << getNVPTXRegClassStr(RC) << mapped_vr << "_1" - << "}"; - else - llvm_unreachable("Unsupported vector size"); + report_fatal_error("Bad register!"); } void @@ -2024,29 +2010,9 @@ bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) case NVPTX::StoreParamI64: case NVPTX::StoreParamI8: case NVPTX::StoreParamS32I8: case NVPTX::StoreParamU32I8: case NVPTX::StoreParamS32I16: case NVPTX::StoreParamU32I16: - case NVPTX::StoreParamScalar2F32: case NVPTX::StoreParamScalar2F64: - case NVPTX::StoreParamScalar2I16: case NVPTX::StoreParamScalar2I32: - case NVPTX::StoreParamScalar2I64: case NVPTX::StoreParamScalar2I8: - case NVPTX::StoreParamScalar4F32: case NVPTX::StoreParamScalar4I16: - case NVPTX::StoreParamScalar4I32: case NVPTX::StoreParamScalar4I8: - case NVPTX::StoreParamV2F32: case NVPTX::StoreParamV2F64: - case NVPTX::StoreParamV2I16: case NVPTX::StoreParamV2I32: - case NVPTX::StoreParamV2I64: case NVPTX::StoreParamV2I8: - case NVPTX::StoreParamV4F32: case NVPTX::StoreParamV4I16: - case NVPTX::StoreParamV4I32: case NVPTX::StoreParamV4I8: case NVPTX::StoreRetvalF32: case NVPTX::StoreRetvalF64: case NVPTX::StoreRetvalI16: case NVPTX::StoreRetvalI32: case NVPTX::StoreRetvalI64: case NVPTX::StoreRetvalI8: - case NVPTX::StoreRetvalScalar2F32: case NVPTX::StoreRetvalScalar2F64: - case NVPTX::StoreRetvalScalar2I16: case NVPTX::StoreRetvalScalar2I32: - case NVPTX::StoreRetvalScalar2I64: case NVPTX::StoreRetvalScalar2I8: - case NVPTX::StoreRetvalScalar4F32: case NVPTX::StoreRetvalScalar4I16: - case NVPTX::StoreRetvalScalar4I32: case NVPTX::StoreRetvalScalar4I8: - case NVPTX::StoreRetvalV2F32: case NVPTX::StoreRetvalV2F64: - case NVPTX::StoreRetvalV2I16: case NVPTX::StoreRetvalV2I32: - case NVPTX::StoreRetvalV2I64: case NVPTX::StoreRetvalV2I8: - case NVPTX::StoreRetvalV4F32: case NVPTX::StoreRetvalV4I16: - case NVPTX::StoreRetvalV4I32: case NVPTX::StoreRetvalV4I8: case NVPTX::LastCallArgF32: case NVPTX::LastCallArgF64: case NVPTX::LastCallArgI16: case NVPTX::LastCallArgI32: case NVPTX::LastCallArgI32imm: case NVPTX::LastCallArgI64: @@ -2057,16 +2023,6 @@ bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) case NVPTX::LoadParamRegF32: case NVPTX::LoadParamRegF64: case NVPTX::LoadParamRegI16: case NVPTX::LoadParamRegI32: case NVPTX::LoadParamRegI64: case NVPTX::LoadParamRegI8: - case NVPTX::LoadParamScalar2F32: case NVPTX::LoadParamScalar2F64: - case NVPTX::LoadParamScalar2I16: case NVPTX::LoadParamScalar2I32: - case NVPTX::LoadParamScalar2I64: case NVPTX::LoadParamScalar2I8: - case NVPTX::LoadParamScalar4F32: case NVPTX::LoadParamScalar4I16: - case NVPTX::LoadParamScalar4I32: case NVPTX::LoadParamScalar4I8: - case NVPTX::LoadParamV2F32: case NVPTX::LoadParamV2F64: - case NVPTX::LoadParamV2I16: case NVPTX::LoadParamV2I32: - case NVPTX::LoadParamV2I64: case NVPTX::LoadParamV2I8: - case NVPTX::LoadParamV4F32: case NVPTX::LoadParamV4I16: - case NVPTX::LoadParamV4I32: case NVPTX::LoadParamV4I8: case NVPTX::PrototypeInst: case NVPTX::DBG_VALUE: return true; } diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index 36ab7f5..481f13a 100644 --- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -105,6 +105,21 @@ SDNode* NVPTXDAGToDAGISel::Select(SDNode *N) { case ISD::STORE: ResNode = SelectStore(N); break; + case NVPTXISD::LoadV2: + case NVPTXISD::LoadV4: + ResNode = SelectLoadVector(N); + break; + case NVPTXISD::LDGV2: + case NVPTXISD::LDGV4: + case NVPTXISD::LDUV2: + case NVPTXISD::LDUV4: + ResNode = SelectLDGLDUVector(N); + break; + case NVPTXISD::StoreV2: + case NVPTXISD::StoreV4: + ResNode = SelectStoreVector(N); + break; + default: break; } if (ResNode) return ResNode; @@ -214,16 +229,6 @@ SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) { case MVT::i64: Opcode = NVPTX::LD_i64_avar; break; case MVT::f32: Opcode = NVPTX::LD_f32_avar; break; case MVT::f64: Opcode = NVPTX::LD_f64_avar; break; - case MVT::v2i8: Opcode = NVPTX::LD_v2i8_avar; break; - case MVT::v2i16: Opcode = NVPTX::LD_v2i16_avar; break; - case MVT::v2i32: Opcode = NVPTX::LD_v2i32_avar; break; - case MVT::v2i64: Opcode = NVPTX::LD_v2i64_avar; break; - case MVT::v2f32: Opcode = NVPTX::LD_v2f32_avar; break; - case MVT::v2f64: Opcode = NVPTX::LD_v2f64_avar; break; - case MVT::v4i8: Opcode = NVPTX::LD_v4i8_avar; break; - case MVT::v4i16: Opcode = NVPTX::LD_v4i16_avar; break; - case MVT::v4i32: Opcode = NVPTX::LD_v4i32_avar; break; - case MVT::v4f32: Opcode = NVPTX::LD_v4f32_avar; break; default: return NULL; } SDValue Ops[] = { getI32Imm(isVolatile), @@ -244,16 +249,6 @@ SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) { case MVT::i64: Opcode = NVPTX::LD_i64_asi; break; case MVT::f32: Opcode = NVPTX::LD_f32_asi; break; case MVT::f64: Opcode = NVPTX::LD_f64_asi; break; - case MVT::v2i8: Opcode = NVPTX::LD_v2i8_asi; break; - case MVT::v2i16: Opcode = NVPTX::LD_v2i16_asi; break; - case MVT::v2i32: Opcode = NVPTX::LD_v2i32_asi; break; - case MVT::v2i64: Opcode = NVPTX::LD_v2i64_asi; break; - case MVT::v2f32: Opcode = NVPTX::LD_v2f32_asi; break; - case MVT::v2f64: Opcode = NVPTX::LD_v2f64_asi; break; - case MVT::v4i8: Opcode = NVPTX::LD_v4i8_asi; break; - case MVT::v4i16: Opcode = NVPTX::LD_v4i16_asi; break; - case MVT::v4i32: Opcode = NVPTX::LD_v4i32_asi; break; - case MVT::v4f32: Opcode = NVPTX::LD_v4f32_asi; break; default: return NULL; } SDValue Ops[] = { getI32Imm(isVolatile), @@ -267,24 +262,26 @@ SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) { } else if (Subtarget.is64Bit()? SelectADDRri64(N1.getNode(), N1, Base, Offset): SelectADDRri(N1.getNode(), N1, Base, Offset)) { - switch (TargetVT) { - case MVT::i8: Opcode = NVPTX::LD_i8_ari; break; - case MVT::i16: Opcode = NVPTX::LD_i16_ari; break; - case MVT::i32: Opcode = NVPTX::LD_i32_ari; break; - case MVT::i64: Opcode = NVPTX::LD_i64_ari; break; - case MVT::f32: Opcode = NVPTX::LD_f32_ari; break; - case MVT::f64: Opcode = NVPTX::LD_f64_ari; break; - case MVT::v2i8: Opcode = NVPTX::LD_v2i8_ari; break; - case MVT::v2i16: Opcode = NVPTX::LD_v2i16_ari; break; - case MVT::v2i32: Opcode = NVPTX::LD_v2i32_ari; break; - case MVT::v2i64: Opcode = NVPTX::LD_v2i64_ari; break; - case MVT::v2f32: Opcode = NVPTX::LD_v2f32_ari; break; - case MVT::v2f64: Opcode = NVPTX::LD_v2f64_ari; break; - case MVT::v4i8: Opcode = NVPTX::LD_v4i8_ari; break; - case MVT::v4i16: Opcode = NVPTX::LD_v4i16_ari; break; - case MVT::v4i32: Opcode = NVPTX::LD_v4i32_ari; break; - case MVT::v4f32: Opcode = NVPTX::LD_v4f32_ari; break; - default: return NULL; + if (Subtarget.is64Bit()) { + switch (TargetVT) { + case MVT::i8: Opcode = NVPTX::LD_i8_ari_64; break; + case MVT::i16: Opcode = NVPTX::LD_i16_ari_64; break; + case MVT::i32: Opcode = NVPTX::LD_i32_ari_64; break; + case MVT::i64: Opcode = NVPTX::LD_i64_ari_64; break; + case MVT::f32: Opcode = NVPTX::LD_f32_ari_64; break; + case MVT::f64: Opcode = NVPTX::LD_f64_ari_64; break; + default: return NULL; + } + } else { + switch (TargetVT) { + case MVT::i8: Opcode = NVPTX::LD_i8_ari; break; + case MVT::i16: Opcode = NVPTX::LD_i16_ari; break; + case MVT::i32: Opcode = NVPTX::LD_i32_ari; break; + case MVT::i64: Opcode = NVPTX::LD_i64_ari; break; + case MVT::f32: Opcode = NVPTX::LD_f32_ari; break; + case MVT::f64: Opcode = NVPTX::LD_f64_ari; break; + default: return NULL; + } } SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace), @@ -296,24 +293,26 @@ SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) { MVT::Other, Ops, 8); } else { - switch (TargetVT) { - case MVT::i8: Opcode = NVPTX::LD_i8_areg; break; - case MVT::i16: Opcode = NVPTX::LD_i16_areg; break; - case MVT::i32: Opcode = NVPTX::LD_i32_areg; break; - case MVT::i64: Opcode = NVPTX::LD_i64_areg; break; - case MVT::f32: Opcode = NVPTX::LD_f32_areg; break; - case MVT::f64: Opcode = NVPTX::LD_f64_areg; break; - case MVT::v2i8: Opcode = NVPTX::LD_v2i8_areg; break; - case MVT::v2i16: Opcode = NVPTX::LD_v2i16_areg; break; - case MVT::v2i32: Opcode = NVPTX::LD_v2i32_areg; break; - case MVT::v2i64: Opcode = NVPTX::LD_v2i64_areg; break; - case MVT::v2f32: Opcode = NVPTX::LD_v2f32_areg; break; - case MVT::v2f64: Opcode = NVPTX::LD_v2f64_areg; break; - case MVT::v4i8: Opcode = NVPTX::LD_v4i8_areg; break; - case MVT::v4i16: Opcode = NVPTX::LD_v4i16_areg; break; - case MVT::v4i32: Opcode = NVPTX::LD_v4i32_areg; break; - case MVT::v4f32: Opcode = NVPTX::LD_v4f32_areg; break; - default: return NULL; + if (Subtarget.is64Bit()) { + switch (TargetVT) { + case MVT::i8: Opcode = NVPTX::LD_i8_areg_64; break; + case MVT::i16: Opcode = NVPTX::LD_i16_areg_64; break; + case MVT::i32: Opcode = NVPTX::LD_i32_areg_64; break; + case MVT::i64: Opcode = NVPTX::LD_i64_areg_64; break; + case MVT::f32: Opcode = NVPTX::LD_f32_areg_64; break; + case MVT::f64: Opcode = NVPTX::LD_f64_areg_64; break; + default: return NULL; + } + } else { + switch (TargetVT) { + case MVT::i8: Opcode = NVPTX::LD_i8_areg; break; + case MVT::i16: Opcode = NVPTX::LD_i16_areg; break; + case MVT::i32: Opcode = NVPTX::LD_i32_areg; break; + case MVT::i64: Opcode = NVPTX::LD_i64_areg; break; + case MVT::f32: Opcode = NVPTX::LD_f32_areg; break; + case MVT::f64: Opcode = NVPTX::LD_f64_areg; break; + default: return NULL; + } } SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace), @@ -334,6 +333,370 @@ SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) { return NVPTXLD; } +SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) { + + SDValue Chain = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + SDValue Addr, Offset, Base; + unsigned Opcode; + DebugLoc DL = N->getDebugLoc(); + SDNode *LD; + MemSDNode *MemSD = cast(N); + EVT LoadedVT = MemSD->getMemoryVT(); + + + if (!LoadedVT.isSimple()) + return NULL; + + // Address Space Setting + unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget); + + // Volatile Setting + // - .volatile is only availalble for .global and .shared + bool IsVolatile = MemSD->isVolatile(); + if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL && + CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED && + CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC) + IsVolatile = false; + + // Vector Setting + MVT SimpleVT = LoadedVT.getSimpleVT(); + + // Type Setting: fromType + fromTypeWidth + // + // Sign : ISD::SEXTLOAD + // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the + // type is integer + // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float + MVT ScalarVT = SimpleVT.getScalarType(); + unsigned FromTypeWidth = ScalarVT.getSizeInBits(); + unsigned int FromType; + // The last operand holds the original LoadSDNode::getExtensionType() value + unsigned ExtensionType = + cast(N->getOperand(N->getNumOperands()-1))->getZExtValue(); + if (ExtensionType == ISD::SEXTLOAD) + FromType = NVPTX::PTXLdStInstCode::Signed; + else if (ScalarVT.isFloatingPoint()) + FromType = NVPTX::PTXLdStInstCode::Float; + else + FromType = NVPTX::PTXLdStInstCode::Unsigned; + + unsigned VecType; + + switch (N->getOpcode()) { + case NVPTXISD::LoadV2: VecType = NVPTX::PTXLdStInstCode::V2; break; + case NVPTXISD::LoadV4: VecType = NVPTX::PTXLdStInstCode::V4; break; + default: return NULL; + } + + EVT EltVT = N->getValueType(0); + + if (SelectDirectAddr(Op1, Addr)) { + switch (N->getOpcode()) { + default: return NULL; + case NVPTXISD::LoadV2: + switch (EltVT.getSimpleVT().SimpleTy) { + default: return NULL; + case MVT::i8: Opcode = NVPTX::LDV_i8_v2_avar; break; + case MVT::i16: Opcode = NVPTX::LDV_i16_v2_avar; break; + case MVT::i32: Opcode = NVPTX::LDV_i32_v2_avar; break; + case MVT::i64: Opcode = NVPTX::LDV_i64_v2_avar; break; + case MVT::f32: Opcode = NVPTX::LDV_f32_v2_avar; break; + case MVT::f64: Opcode = NVPTX::LDV_f64_v2_avar; break; + } + break; + case NVPTXISD::LoadV4: + switch (EltVT.getSimpleVT().SimpleTy) { + default: return NULL; + case MVT::i8: Opcode = NVPTX::LDV_i8_v4_avar; break; + case MVT::i16: Opcode = NVPTX::LDV_i16_v4_avar; break; + case MVT::i32: Opcode = NVPTX::LDV_i32_v4_avar; break; + case MVT::f32: Opcode = NVPTX::LDV_f32_v4_avar; break; + } + break; + } + + SDValue Ops[] = { getI32Imm(IsVolatile), + getI32Imm(CodeAddrSpace), + getI32Imm(VecType), + getI32Imm(FromType), + getI32Imm(FromTypeWidth), + Addr, Chain }; + LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 7); + } else if (Subtarget.is64Bit()? + SelectADDRsi64(Op1.getNode(), Op1, Base, Offset): + SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) { + switch (N->getOpcode()) { + default: return NULL; + case NVPTXISD::LoadV2: + switch (EltVT.getSimpleVT().SimpleTy) { + default: return NULL; + case MVT::i8: Opcode = NVPTX::LDV_i8_v2_asi; break; + case MVT::i16: Opcode = NVPTX::LDV_i16_v2_asi; break; + case MVT::i32: Opcode = NVPTX::LDV_i32_v2_asi; break; + case MVT::i64: Opcode = NVPTX::LDV_i64_v2_asi; break; + case MVT::f32: Opcode = NVPTX::LDV_f32_v2_asi; break; + case MVT::f64: Opcode = NVPTX::LDV_f64_v2_asi; break; + } + break; + case NVPTXISD::LoadV4: + switch (EltVT.getSimpleVT().SimpleTy) { + default: return NULL; + case MVT::i8: Opcode = NVPTX::LDV_i8_v4_asi; break; + case MVT::i16: Opcode = NVPTX::LDV_i16_v4_asi; break; + case MVT::i32: Opcode = NVPTX::LDV_i32_v4_asi; break; + case MVT::f32: Opcode = NVPTX::LDV_f32_v4_asi; break; + } + break; + } + + SDValue Ops[] = { getI32Imm(IsVolatile), + getI32Imm(CodeAddrSpace), + getI32Imm(VecType), + getI32Imm(FromType), + getI32Imm(FromTypeWidth), + Base, Offset, Chain }; + LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 8); + } else if (Subtarget.is64Bit()? + SelectADDRri64(Op1.getNode(), Op1, Base, Offset): + SelectADDRri(Op1.getNode(), Op1, Base, Offset)) { + if (Subtarget.is64Bit()) { + switch (N->getOpcode()) { + default: return NULL; + case NVPTXISD::LoadV2: + switch (EltVT.getSimpleVT().SimpleTy) { + default: return NULL; + case MVT::i8: Opcode = NVPTX::LDV_i8_v2_ari_64; break; + case MVT::i16: Opcode = NVPTX::LDV_i16_v2_ari_64; break; + case MVT::i32: Opcode = NVPTX::LDV_i32_v2_ari_64; break; + case MVT::i64: Opcode = NVPTX::LDV_i64_v2_ari_64; break; + case MVT::f32: Opcode = NVPTX::LDV_f32_v2_ari_64; break; + case MVT::f64: Opcode = NVPTX::LDV_f64_v2_ari_64; break; + } + break; + case NVPTXISD::LoadV4: + switch (EltVT.getSimpleVT().SimpleTy) { + default: return NULL; + case MVT::i8: Opcode = NVPTX::LDV_i8_v4_ari_64; break; + case MVT::i16: Opcode = NVPTX::LDV_i16_v4_ari_64; break; + case MVT::i32: Opcode = NVPTX::LDV_i32_v4_ari_64; break; + case MVT::f32: Opcode = NVPTX::LDV_f32_v4_ari_64; break; + } + break; + } + } else { + switch (N->getOpcode()) { + default: return NULL; + case NVPTXISD::LoadV2: + switch (EltVT.getSimpleVT().SimpleTy) { + default: return NULL; + case MVT::i8: Opcode = NVPTX::LDV_i8_v2_ari; break; + case MVT::i16: Opcode = NVPTX::LDV_i16_v2_ari; break; + case MVT::i32: Opcode = NVPTX::LDV_i32_v2_ari; break; + case MVT::i64: Opcode = NVPTX::LDV_i64_v2_ari; break; + case MVT::f32: Opcode = NVPTX::LDV_f32_v2_ari; break; + case MVT::f64: Opcode = NVPTX::LDV_f64_v2_ari; break; + } + break; + case NVPTXISD::LoadV4: + switch (EltVT.getSimpleVT().SimpleTy) { + default: return NULL; + case MVT::i8: Opcode = NVPTX::LDV_i8_v4_ari; break; + case MVT::i16: Opcode = NVPTX::LDV_i16_v4_ari; break; + case MVT::i32: Opcode = NVPTX::LDV_i32_v4_ari; break; + case MVT::f32: Opcode = NVPTX::LDV_f32_v4_ari; break; + } + break; + } + } + + SDValue Ops[] = { getI32Imm(IsVolatile), + getI32Imm(CodeAddrSpace), + getI32Imm(VecType), + getI32Imm(FromType), + getI32Imm(FromTypeWidth), + Base, Offset, Chain }; + + LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 8); + } else { + if (Subtarget.is64Bit()) { + switch (N->getOpcode()) { + default: return NULL; + case NVPTXISD::LoadV2: + switch (EltVT.getSimpleVT().SimpleTy) { + default: return NULL; + case MVT::i8: Opcode = NVPTX::LDV_i8_v2_areg_64; break; + case MVT::i16: Opcode = NVPTX::LDV_i16_v2_areg_64; break; + case MVT::i32: Opcode = NVPTX::LDV_i32_v2_areg_64; break; + case MVT::i64: Opcode = NVPTX::LDV_i64_v2_areg_64; break; + case MVT::f32: Opcode = NVPTX::LDV_f32_v2_areg_64; break; + case MVT::f64: Opcode = NVPTX::LDV_f64_v2_areg_64; break; + } + break; + case NVPTXISD::LoadV4: + switch (EltVT.getSimpleVT().SimpleTy) { + default: return NULL; + case MVT::i8: Opcode = NVPTX::LDV_i8_v4_areg_64; break; + case MVT::i16: Opcode = NVPTX::LDV_i16_v4_areg_64; break; + case MVT::i32: Opcode = NVPTX::LDV_i32_v4_areg_64; break; + case MVT::f32: Opcode = NVPTX::LDV_f32_v4_areg_64; break; + } + break; + } + } else { + switch (N->getOpcode()) { + default: return NULL; + case NVPTXISD::LoadV2: + switch (EltVT.getSimpleVT().SimpleTy) { + default: return NULL; + case MVT::i8: Opcode = NVPTX::LDV_i8_v2_areg; break; + case MVT::i16: Opcode = NVPTX::LDV_i16_v2_areg; break; + case MVT::i32: Opcode = NVPTX::LDV_i32_v2_areg; break; + case MVT::i64: Opcode = NVPTX::LDV_i64_v2_areg; break; + case MVT::f32: Opcode = NVPTX::LDV_f32_v2_areg; break; + case MVT::f64: Opcode = NVPTX::LDV_f64_v2_areg; break; + } + break; + case NVPTXISD::LoadV4: + switch (EltVT.getSimpleVT().SimpleTy) { + default: return NULL; + case MVT::i8: Opcode = NVPTX::LDV_i8_v4_areg; break; + case MVT::i16: Opcode = NVPTX::LDV_i16_v4_areg; break; + case MVT::i32: Opcode = NVPTX::LDV_i32_v4_areg; break; + case MVT::f32: Opcode = NVPTX::LDV_f32_v4_areg; break; + } + break; + } + } + + SDValue Ops[] = { getI32Imm(IsVolatile), + getI32Imm(CodeAddrSpace), + getI32Imm(VecType), + getI32Imm(FromType), + getI32Imm(FromTypeWidth), + Op1, Chain }; + LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 7); + } + + MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); + MemRefs0[0] = cast(N)->getMemOperand(); + cast(LD)->setMemRefs(MemRefs0, MemRefs0 + 1); + + return LD; +} + +SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) { + + SDValue Chain = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + unsigned Opcode; + DebugLoc DL = N->getDebugLoc(); + SDNode *LD; + + EVT RetVT = N->getValueType(0); + + // Select opcode + if (Subtarget.is64Bit()) { + switch (N->getOpcode()) { + default: return NULL; + case NVPTXISD::LDGV2: + switch (RetVT.getSimpleVT().SimpleTy) { + default: return NULL; + case MVT::i8: Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_64; break; + case MVT::i16: Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_64; break; + case MVT::i32: Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_64; break; + case MVT::i64: Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_64; break; + case MVT::f32: Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_64; break; + case MVT::f64: Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_64; break; + } + break; + case NVPTXISD::LDGV4: + switch (RetVT.getSimpleVT().SimpleTy) { + default: return NULL; + case MVT::i8: Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_64; break; + case MVT::i16: Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_64; break; + case MVT::i32: Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_64; break; + case MVT::f32: Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_64; break; + } + break; + case NVPTXISD::LDUV2: + switch (RetVT.getSimpleVT().SimpleTy) { + default: return NULL; + case MVT::i8: Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_64; break; + case MVT::i16: Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_64; break; + case MVT::i32: Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_64; break; + case MVT::i64: Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_64; break; + case MVT::f32: Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_64; break; + case MVT::f64: Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_64; break; + } + break; + case NVPTXISD::LDUV4: + switch (RetVT.getSimpleVT().SimpleTy) { + default: return NULL; + case MVT::i8: Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_64; break; + case MVT::i16: Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_64; break; + case MVT::i32: Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_64; break; + case MVT::f32: Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_64; break; + } + break; + } + } else { + switch (N->getOpcode()) { + default: return NULL; + case NVPTXISD::LDGV2: + switch (RetVT.getSimpleVT().SimpleTy) { + default: return NULL; + case MVT::i8: Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_32; break; + case MVT::i16: Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_32; break; + case MVT::i32: Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_32; break; + case MVT::i64: Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_32; break; + case MVT::f32: Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_32; break; + case MVT::f64: Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_32; break; + } + break; + case NVPTXISD::LDGV4: + switch (RetVT.getSimpleVT().SimpleTy) { + default: return NULL; + case MVT::i8: Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_32; break; + case MVT::i16: Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_32; break; + case MVT::i32: Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_32; break; + case MVT::f32: Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_32; break; + } + break; + case NVPTXISD::LDUV2: + switch (RetVT.getSimpleVT().SimpleTy) { + default: return NULL; + case MVT::i8: Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_32; break; + case MVT::i16: Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_32; break; + case MVT::i32: Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_32; break; + case MVT::i64: Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_32; break; + case MVT::f32: Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_32; break; + case MVT::f64: Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_32; break; + } + break; + case NVPTXISD::LDUV4: + switch (RetVT.getSimpleVT().SimpleTy) { + default: return NULL; + case MVT::i8: Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_32; break; + case MVT::i16: Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_32; break; + case MVT::i32: Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_32; break; + case MVT::f32: Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_32; break; + } + break; + } + } + + SDValue Ops[] = { Op1, Chain }; + LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), &Ops[0], 2); + + MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); + MemRefs0[0] = cast(N)->getMemOperand(); + cast(LD)->setMemRefs(MemRefs0, MemRefs0 + 1); + + return LD; +} + + SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) { DebugLoc dl = N->getDebugLoc(); StoreSDNode *ST = cast(N); @@ -400,16 +763,6 @@ SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) { case MVT::i64: Opcode = NVPTX::ST_i64_avar; break; case MVT::f32: Opcode = NVPTX::ST_f32_avar; break; case MVT::f64: Opcode = NVPTX::ST_f64_avar; break; - case MVT::v2i8: Opcode = NVPTX::ST_v2i8_avar; break; - case MVT::v2i16: Opcode = NVPTX::ST_v2i16_avar; break; - case MVT::v2i32: Opcode = NVPTX::ST_v2i32_avar; break; - case MVT::v2i64: Opcode = NVPTX::ST_v2i64_avar; break; - case MVT::v2f32: Opcode = NVPTX::ST_v2f32_avar; break; - case MVT::v2f64: Opcode = NVPTX::ST_v2f64_avar; break; - case MVT::v4i8: Opcode = NVPTX::ST_v4i8_avar; break; - case MVT::v4i16: Opcode = NVPTX::ST_v4i16_avar; break; - case MVT::v4i32: Opcode = NVPTX::ST_v4i32_avar; break; - case MVT::v4f32: Opcode = NVPTX::ST_v4f32_avar; break; default: return NULL; } SDValue Ops[] = { N1, @@ -431,16 +784,6 @@ SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) { case MVT::i64: Opcode = NVPTX::ST_i64_asi; break; case MVT::f32: Opcode = NVPTX::ST_f32_asi; break; case MVT::f64: Opcode = NVPTX::ST_f64_asi; break; - case MVT::v2i8: Opcode = NVPTX::ST_v2i8_asi; break; - case MVT::v2i16: Opcode = NVPTX::ST_v2i16_asi; break; - case MVT::v2i32: Opcode = NVPTX::ST_v2i32_asi; break; - case MVT::v2i64: Opcode = NVPTX::ST_v2i64_asi; break; - case MVT::v2f32: Opcode = NVPTX::ST_v2f32_asi; break; - case MVT::v2f64: Opcode = NVPTX::ST_v2f64_asi; break; - case MVT::v4i8: Opcode = NVPTX::ST_v4i8_asi; break; - case MVT::v4i16: Opcode = NVPTX::ST_v4i16_asi; break; - case MVT::v4i32: Opcode = NVPTX::ST_v4i32_asi; break; - case MVT::v4f32: Opcode = NVPTX::ST_v4f32_asi; break; default: return NULL; } SDValue Ops[] = { N1, @@ -455,24 +798,26 @@ SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) { } else if (Subtarget.is64Bit()? SelectADDRri64(N2.getNode(), N2, Base, Offset): SelectADDRri(N2.getNode(), N2, Base, Offset)) { - switch (SourceVT) { - case MVT::i8: Opcode = NVPTX::ST_i8_ari; break; - case MVT::i16: Opcode = NVPTX::ST_i16_ari; break; - case MVT::i32: Opcode = NVPTX::ST_i32_ari; break; - case MVT::i64: Opcode = NVPTX::ST_i64_ari; break; - case MVT::f32: Opcode = NVPTX::ST_f32_ari; break; - case MVT::f64: Opcode = NVPTX::ST_f64_ari; break; - case MVT::v2i8: Opcode = NVPTX::ST_v2i8_ari; break; - case MVT::v2i16: Opcode = NVPTX::ST_v2i16_ari; break; - case MVT::v2i32: Opcode = NVPTX::ST_v2i32_ari; break; - case MVT::v2i64: Opcode = NVPTX::ST_v2i64_ari; break; - case MVT::v2f32: Opcode = NVPTX::ST_v2f32_ari; break; - case MVT::v2f64: Opcode = NVPTX::ST_v2f64_ari; break; - case MVT::v4i8: Opcode = NVPTX::ST_v4i8_ari; break; - case MVT::v4i16: Opcode = NVPTX::ST_v4i16_ari; break; - case MVT::v4i32: Opcode = NVPTX::ST_v4i32_ari; break; - case MVT::v4f32: Opcode = NVPTX::ST_v4f32_ari; break; - default: return NULL; + if (Subtarget.is64Bit()) { + switch (SourceVT) { + case MVT::i8: Opcode = NVPTX::ST_i8_ari_64; break; + case MVT::i16: Opcode = NVPTX::ST_i16_ari_64; break; + case MVT::i32: Opcode = NVPTX::ST_i32_ari_64; break; + case MVT::i64: Opcode = NVPTX::ST_i64_ari_64; break; + case MVT::f32: Opcode = NVPTX::ST_f32_ari_64; break; + case MVT::f64: Opcode = NVPTX::ST_f64_ari_64; break; + default: return NULL; + } + } else { + switch (SourceVT) { + case MVT::i8: Opcode = NVPTX::ST_i8_ari; break; + case MVT::i16: Opcode = NVPTX::ST_i16_ari; break; + case MVT::i32: Opcode = NVPTX::ST_i32_ari; break; + case MVT::i64: Opcode = NVPTX::ST_i64_ari; break; + case MVT::f32: Opcode = NVPTX::ST_f32_ari; break; + case MVT::f64: Opcode = NVPTX::ST_f64_ari; break; + default: return NULL; + } } SDValue Ops[] = { N1, getI32Imm(isVolatile), @@ -484,24 +829,26 @@ SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) { NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 9); } else { - switch (SourceVT) { - case MVT::i8: Opcode = NVPTX::ST_i8_areg; break; - case MVT::i16: Opcode = NVPTX::ST_i16_areg; break; - case MVT::i32: Opcode = NVPTX::ST_i32_areg; break; - case MVT::i64: Opcode = NVPTX::ST_i64_areg; break; - case MVT::f32: Opcode = NVPTX::ST_f32_areg; break; - case MVT::f64: Opcode = NVPTX::ST_f64_areg; break; - case MVT::v2i8: Opcode = NVPTX::ST_v2i8_areg; break; - case MVT::v2i16: Opcode = NVPTX::ST_v2i16_areg; break; - case MVT::v2i32: Opcode = NVPTX::ST_v2i32_areg; break; - case MVT::v2i64: Opcode = NVPTX::ST_v2i64_areg; break; - case MVT::v2f32: Opcode = NVPTX::ST_v2f32_areg; break; - case MVT::v2f64: Opcode = NVPTX::ST_v2f64_areg; break; - case MVT::v4i8: Opcode = NVPTX::ST_v4i8_areg; break; - case MVT::v4i16: Opcode = NVPTX::ST_v4i16_areg; break; - case MVT::v4i32: Opcode = NVPTX::ST_v4i32_areg; break; - case MVT::v4f32: Opcode = NVPTX::ST_v4f32_areg; break; - default: return NULL; + if (Subtarget.is64Bit()) { + switch (SourceVT) { + case MVT::i8: Opcode = NVPTX::ST_i8_areg_64; break; + case MVT::i16: Opcode = NVPTX::ST_i16_areg_64; break; + case MVT::i32: Opcode = NVPTX::ST_i32_areg_64; break; + case MVT::i64: Opcode = NVPTX::ST_i64_areg_64; break; + case MVT::f32: Opcode = NVPTX::ST_f32_areg_64; break; + case MVT::f64: Opcode = NVPTX::ST_f64_areg_64; break; + default: return NULL; + } + } else { + switch (SourceVT) { + case MVT::i8: Opcode = NVPTX::ST_i8_areg; break; + case MVT::i16: Opcode = NVPTX::ST_i16_areg; break; + case MVT::i32: Opcode = NVPTX::ST_i32_areg; break; + case MVT::i64: Opcode = NVPTX::ST_i64_areg; break; + case MVT::f32: Opcode = NVPTX::ST_f32_areg; break; + case MVT::f64: Opcode = NVPTX::ST_f64_areg; break; + default: return NULL; + } } SDValue Ops[] = { N1, getI32Imm(isVolatile), @@ -523,6 +870,244 @@ SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) { return NVPTXST; } +SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) { + SDValue Chain = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + SDValue Addr, Offset, Base; + unsigned Opcode; + DebugLoc DL = N->getDebugLoc(); + SDNode *ST; + EVT EltVT = Op1.getValueType(); + MemSDNode *MemSD = cast(N); + EVT StoreVT = MemSD->getMemoryVT(); + + // Address Space Setting + unsigned CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget); + + if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) { + report_fatal_error("Cannot store to pointer that points to constant " + "memory space"); + } + + // Volatile Setting + // - .volatile is only availalble for .global and .shared + bool IsVolatile = MemSD->isVolatile(); + if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL && + CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED && + CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC) + IsVolatile = false; + + // Type Setting: toType + toTypeWidth + // - for integer type, always use 'u' + assert(StoreVT.isSimple() && "Store value is not simple"); + MVT ScalarVT = StoreVT.getSimpleVT().getScalarType(); + unsigned ToTypeWidth = ScalarVT.getSizeInBits(); + unsigned ToType; + if (ScalarVT.isFloatingPoint()) + ToType = NVPTX::PTXLdStInstCode::Float; + else + ToType = NVPTX::PTXLdStInstCode::Unsigned; + + + SmallVector StOps; + SDValue N2; + unsigned VecType; + + switch (N->getOpcode()) { + case NVPTXISD::StoreV2: + VecType = NVPTX::PTXLdStInstCode::V2; + StOps.push_back(N->getOperand(1)); + StOps.push_back(N->getOperand(2)); + N2 = N->getOperand(3); + break; + case NVPTXISD::StoreV4: + VecType = NVPTX::PTXLdStInstCode::V4; + StOps.push_back(N->getOperand(1)); + StOps.push_back(N->getOperand(2)); + StOps.push_back(N->getOperand(3)); + StOps.push_back(N->getOperand(4)); + N2 = N->getOperand(5); + break; + default: return NULL; + } + + StOps.push_back(getI32Imm(IsVolatile)); + StOps.push_back(getI32Imm(CodeAddrSpace)); + StOps.push_back(getI32Imm(VecType)); + StOps.push_back(getI32Imm(ToType)); + StOps.push_back(getI32Imm(ToTypeWidth)); + + if (SelectDirectAddr(N2, Addr)) { + switch (N->getOpcode()) { + default: return NULL; + case NVPTXISD::StoreV2: + switch (EltVT.getSimpleVT().SimpleTy) { + default: return NULL; + case MVT::i8: Opcode = NVPTX::STV_i8_v2_avar; break; + case MVT::i16: Opcode = NVPTX::STV_i16_v2_avar; break; + case MVT::i32: Opcode = NVPTX::STV_i32_v2_avar; break; + case MVT::i64: Opcode = NVPTX::STV_i64_v2_avar; break; + case MVT::f32: Opcode = NVPTX::STV_f32_v2_avar; break; + case MVT::f64: Opcode = NVPTX::STV_f64_v2_avar; break; + } + break; + case NVPTXISD::StoreV4: + switch (EltVT.getSimpleVT().SimpleTy) { + default: return NULL; + case MVT::i8: Opcode = NVPTX::STV_i8_v4_avar; break; + case MVT::i16: Opcode = NVPTX::STV_i16_v4_avar; break; + case MVT::i32: Opcode = NVPTX::STV_i32_v4_avar; break; + case MVT::f32: Opcode = NVPTX::STV_f32_v4_avar; break; + } + break; + } + StOps.push_back(Addr); + } else if (Subtarget.is64Bit()? + SelectADDRsi64(N2.getNode(), N2, Base, Offset): + SelectADDRsi(N2.getNode(), N2, Base, Offset)) { + switch (N->getOpcode()) { + default: return NULL; + case NVPTXISD::StoreV2: + switch (EltVT.getSimpleVT().SimpleTy) { + default: return NULL; + case MVT::i8: Opcode = NVPTX::STV_i8_v2_asi; break; + case MVT::i16: Opcode = NVPTX::STV_i16_v2_asi; break; + case MVT::i32: Opcode = NVPTX::STV_i32_v2_asi; break; + case MVT::i64: Opcode = NVPTX::STV_i64_v2_asi; break; + case MVT::f32: Opcode = NVPTX::STV_f32_v2_asi; break; + case MVT::f64: Opcode = NVPTX::STV_f64_v2_asi; break; + } + break; + case NVPTXISD::StoreV4: + switch (EltVT.getSimpleVT().SimpleTy) { + default: return NULL; + case MVT::i8: Opcode = NVPTX::STV_i8_v4_asi; break; + case MVT::i16: Opcode = NVPTX::STV_i16_v4_asi; break; + case MVT::i32: Opcode = NVPTX::STV_i32_v4_asi; break; + case MVT::f32: Opcode = NVPTX::STV_f32_v4_asi; break; + } + break; + } + StOps.push_back(Base); + StOps.push_back(Offset); + } else if (Subtarget.is64Bit()? + SelectADDRri64(N2.getNode(), N2, Base, Offset): + SelectADDRri(N2.getNode(), N2, Base, Offset)) { + if (Subtarget.is64Bit()) { + switch (N->getOpcode()) { + default: return NULL; + case NVPTXISD::StoreV2: + switch (EltVT.getSimpleVT().SimpleTy) { + default: return NULL; + case MVT::i8: Opcode = NVPTX::STV_i8_v2_ari_64; break; + case MVT::i16: Opcode = NVPTX::STV_i16_v2_ari_64; break; + case MVT::i32: Opcode = NVPTX::STV_i32_v2_ari_64; break; + case MVT::i64: Opcode = NVPTX::STV_i64_v2_ari_64; break; + case MVT::f32: Opcode = NVPTX::STV_f32_v2_ari_64; break; + case MVT::f64: Opcode = NVPTX::STV_f64_v2_ari_64; break; + } + break; + case NVPTXISD::StoreV4: + switch (EltVT.getSimpleVT().SimpleTy) { + default: return NULL; + case MVT::i8: Opcode = NVPTX::STV_i8_v4_ari_64; break; + case MVT::i16: Opcode = NVPTX::STV_i16_v4_ari_64; break; + case MVT::i32: Opcode = NVPTX::STV_i32_v4_ari_64; break; + case MVT::f32: Opcode = NVPTX::STV_f32_v4_ari_64; break; + } + break; + } + } else { + switch (N->getOpcode()) { + default: return NULL; + case NVPTXISD::StoreV2: + switch (EltVT.getSimpleVT().SimpleTy) { + default: return NULL; + case MVT::i8: Opcode = NVPTX::STV_i8_v2_ari; break; + case MVT::i16: Opcode = NVPTX::STV_i16_v2_ari; break; + case MVT::i32: Opcode = NVPTX::STV_i32_v2_ari; break; + case MVT::i64: Opcode = NVPTX::STV_i64_v2_ari; break; + case MVT::f32: Opcode = NVPTX::STV_f32_v2_ari; break; + case MVT::f64: Opcode = NVPTX::STV_f64_v2_ari; break; + } + break; + case NVPTXISD::StoreV4: + switch (EltVT.getSimpleVT().SimpleTy) { + default: return NULL; + case MVT::i8: Opcode = NVPTX::STV_i8_v4_ari; break; + case MVT::i16: Opcode = NVPTX::STV_i16_v4_ari; break; + case MVT::i32: Opcode = NVPTX::STV_i32_v4_ari; break; + case MVT::f32: Opcode = NVPTX::STV_f32_v4_ari; break; + } + break; + } + } + StOps.push_back(Base); + StOps.push_back(Offset); + } else { + if (Subtarget.is64Bit()) { + switch (N->getOpcode()) { + default: return NULL; + case NVPTXISD::StoreV2: + switch (EltVT.getSimpleVT().SimpleTy) { + default: return NULL; + case MVT::i8: Opcode = NVPTX::STV_i8_v2_areg_64; break; + case MVT::i16: Opcode = NVPTX::STV_i16_v2_areg_64; break; + case MVT::i32: Opcode = NVPTX::STV_i32_v2_areg_64; break; + case MVT::i64: Opcode = NVPTX::STV_i64_v2_areg_64; break; + case MVT::f32: Opcode = NVPTX::STV_f32_v2_areg_64; break; + case MVT::f64: Opcode = NVPTX::STV_f64_v2_areg_64; break; + } + break; + case NVPTXISD::StoreV4: + switch (EltVT.getSimpleVT().SimpleTy) { + default: return NULL; + case MVT::i8: Opcode = NVPTX::STV_i8_v4_areg_64; break; + case MVT::i16: Opcode = NVPTX::STV_i16_v4_areg_64; break; + case MVT::i32: Opcode = NVPTX::STV_i32_v4_areg_64; break; + case MVT::f32: Opcode = NVPTX::STV_f32_v4_areg_64; break; + } + break; + } + } else { + switch (N->getOpcode()) { + default: return NULL; + case NVPTXISD::StoreV2: + switch (EltVT.getSimpleVT().SimpleTy) { + default: return NULL; + case MVT::i8: Opcode = NVPTX::STV_i8_v2_areg; break; + case MVT::i16: Opcode = NVPTX::STV_i16_v2_areg; break; + case MVT::i32: Opcode = NVPTX::STV_i32_v2_areg; break; + case MVT::i64: Opcode = NVPTX::STV_i64_v2_areg; break; + case MVT::f32: Opcode = NVPTX::STV_f32_v2_areg; break; + case MVT::f64: Opcode = NVPTX::STV_f64_v2_areg; break; + } + break; + case NVPTXISD::StoreV4: + switch (EltVT.getSimpleVT().SimpleTy) { + default: return NULL; + case MVT::i8: Opcode = NVPTX::STV_i8_v4_areg; break; + case MVT::i16: Opcode = NVPTX::STV_i16_v4_areg; break; + case MVT::i32: Opcode = NVPTX::STV_i32_v4_areg; break; + case MVT::f32: Opcode = NVPTX::STV_f32_v4_areg; break; + } + break; + } + } + StOps.push_back(N2); + } + + StOps.push_back(Chain); + + ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, &StOps[0], StOps.size()); + + MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); + MemRefs0[0] = cast(N)->getMemOperand(); + cast(ST)->setMemRefs(MemRefs0, MemRefs0 + 1); + + return ST; +} + // SelectDirectAddr - Match a direct address for DAG. // A direct address could be a globaladdress or externalsymbol. bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) { diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h index 14f2091..4ec9241 100644 --- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h +++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h @@ -72,8 +72,11 @@ private: #include "NVPTXGenDAGISel.inc" SDNode *Select(SDNode *N); - SDNode* SelectLoad(SDNode *N); - SDNode* SelectStore(SDNode *N); + SDNode *SelectLoad(SDNode *N); + SDNode *SelectLoadVector(SDNode *N); + SDNode *SelectLDGLDUVector(SDNode *N); + SDNode *SelectStore(SDNode *N); + SDNode *SelectStoreVector(SDNode *N); inline SDValue getI32Imm(unsigned Imm) { return CurDAG->getTargetConstant(Imm, MVT::i32); diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index 2699cea..9ba2a1d 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -45,15 +45,27 @@ using namespace llvm; static unsigned int uniqueCallSite = 0; static cl::opt -RetainVectorOperands("nvptx-codegen-vectors", - cl::desc("NVPTX Specific: Retain LLVM's vectors and generate PTX vectors"), - cl::init(true)); - -static cl::opt sched4reg("nvptx-sched4reg", cl::desc("NVPTX Specific: schedule for register pressue"), cl::init(false)); +static bool IsPTXVectorType(MVT VT) { + switch (VT.SimpleTy) { + default: return false; + case MVT::v2i8: + case MVT::v4i8: + case MVT::v2i16: + case MVT::v4i16: + case MVT::v2i32: + case MVT::v4i32: + case MVT::v2i64: + case MVT::v2f32: + case MVT::v4f32: + case MVT::v2f64: + return true; + } +} + // NVPTXTargetLowering Constructor. NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM) : TargetLowering(TM, new NVPTXTargetObjectFile()), @@ -87,41 +99,6 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM) addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass); addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass); - if (RetainVectorOperands) { - addRegisterClass(MVT::v2f32, &NVPTX::V2F32RegsRegClass); - addRegisterClass(MVT::v4f32, &NVPTX::V4F32RegsRegClass); - addRegisterClass(MVT::v2i32, &NVPTX::V2I32RegsRegClass); - addRegisterClass(MVT::v4i32, &NVPTX::V4I32RegsRegClass); - addRegisterClass(MVT::v2f64, &NVPTX::V2F64RegsRegClass); - addRegisterClass(MVT::v2i64, &NVPTX::V2I64RegsRegClass); - addRegisterClass(MVT::v2i16, &NVPTX::V2I16RegsRegClass); - addRegisterClass(MVT::v4i16, &NVPTX::V4I16RegsRegClass); - addRegisterClass(MVT::v2i8, &NVPTX::V2I8RegsRegClass); - addRegisterClass(MVT::v4i8, &NVPTX::V4I8RegsRegClass); - - setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32 , Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32 , Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16 , Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v4i8 , Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64 , Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64 , Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32 , Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v2f32 , Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v2i16 , Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v2i8 , Custom); - - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i32 , Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4f32 , Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i16 , Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i8 , Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i64 , Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f64 , Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32 , Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32 , Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i16 , Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i8 , Custom); - } - // Operations not directly supported by NVPTX. setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); setOperationAction(ISD::BR_CC, MVT::Other, Expand); @@ -191,42 +168,16 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM) // TRAP can be lowered to PTX trap setOperationAction(ISD::TRAP, MVT::Other, Legal); - // By default, CONCAT_VECTORS is implemented via store/load - // through stack. It is slow and uses local memory. We need - // to custom-lowering them. - setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32 , Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32 , Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i16 , Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i8 , Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64 , Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f64 , Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i32 , Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f32 , Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i16 , Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i8 , Custom); - - // Expand vector int to float and float to int conversions - // - For SINT_TO_FP and UINT_TO_FP, the src type - // (Node->getOperand(0).getValueType()) - // is used to determine the action, while for FP_TO_UINT and FP_TO_SINT, - // the dest type (Node->getValueType(0)) is used. - // - // See VectorLegalizer::LegalizeOp() (LegalizeVectorOps.cpp) for the vector - // case, and - // SelectionDAGLegalize::LegalizeOp() (LegalizeDAG.cpp) for the scalar case. - // - // That is why v4i32 or v2i32 are used here. - // - // The expansion for vectors happens in VectorLegalizer::LegalizeOp() - // (LegalizeVectorOps.cpp). - setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Expand); - setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Expand); - setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Expand); - setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Expand); - setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Expand); - setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Expand); - setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Expand); - setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Expand); + // Register custom handling for vector loads/stores + for (int i = MVT::FIRST_VECTOR_VALUETYPE; + i <= MVT::LAST_VECTOR_VALUETYPE; ++i) { + MVT VT = (MVT::SimpleValueType)i; + if (IsPTXVectorType(VT)) { + setOperationAction(ISD::LOAD, VT, Custom); + setOperationAction(ISD::STORE, VT, Custom); + setOperationAction(ISD::INTRINSIC_W_CHAIN, VT, Custom); + } + } // Now deduce the information based on the above mentioned // actions @@ -268,6 +219,14 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { case NVPTXISD::RETURN: return "NVPTXISD::RETURN"; case NVPTXISD::CallSeqBegin: return "NVPTXISD::CallSeqBegin"; case NVPTXISD::CallSeqEnd: return "NVPTXISD::CallSeqEnd"; + case NVPTXISD::LoadV2: return "NVPTXISD::LoadV2"; + case NVPTXISD::LoadV4: return "NVPTXISD::LoadV4"; + case NVPTXISD::LDGV2: return "NVPTXISD::LDGV2"; + case NVPTXISD::LDGV4: return "NVPTXISD::LDGV4"; + case NVPTXISD::LDUV2: return "NVPTXISD::LDUV2"; + case NVPTXISD::LDUV4: return "NVPTXISD::LDUV4"; + case NVPTXISD::StoreV2: return "NVPTXISD::StoreV2"; + case NVPTXISD::StoreV4: return "NVPTXISD::StoreV4"; } } @@ -868,12 +827,19 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const { } +SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { + if (Op.getValueType() == MVT::i1) + return LowerLOADi1(Op, DAG); + else + return SDValue(); +} + // v = ld i1* addr // => // v1 = ld i8* addr // v = trunc v1 to i1 SDValue NVPTXTargetLowering:: -LowerLOAD(SDValue Op, SelectionDAG &DAG) const { +LowerLOADi1(SDValue Op, SelectionDAG &DAG) const { SDNode *Node = Op.getNode(); LoadSDNode *LD = cast(Node); DebugLoc dl = Node->getDebugLoc(); @@ -893,12 +859,109 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const { return DAG.getMergeValues(Ops, 2, dl); } +SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { + EVT ValVT = Op.getOperand(1).getValueType(); + if (ValVT == MVT::i1) + return LowerSTOREi1(Op, DAG); + else if (ValVT.isVector()) + return LowerSTOREVector(Op, DAG); + else + return SDValue(); +} + +SDValue +NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const { + SDNode *N = Op.getNode(); + SDValue Val = N->getOperand(1); + DebugLoc DL = N->getDebugLoc(); + EVT ValVT = Val.getValueType(); + + if (ValVT.isVector()) { + // We only handle "native" vector sizes for now, e.g. <4 x double> is not + // legal. We can (and should) split that into 2 stores of <2 x double> here + // but I'm leaving that as a TODO for now. + if (!ValVT.isSimple()) + return SDValue(); + switch (ValVT.getSimpleVT().SimpleTy) { + default: return SDValue(); + case MVT::v2i8: + case MVT::v2i16: + case MVT::v2i32: + case MVT::v2i64: + case MVT::v2f32: + case MVT::v2f64: + case MVT::v4i8: + case MVT::v4i16: + case MVT::v4i32: + case MVT::v4f32: + // This is a "native" vector type + break; + } + + unsigned Opcode = 0; + EVT EltVT = ValVT.getVectorElementType(); + unsigned NumElts = ValVT.getVectorNumElements(); + + // Since StoreV2 is a target node, we cannot rely on DAG type legalization. + // Therefore, we must ensure the type is legal. For i1 and i8, we set the + // stored type to i16 and propogate the "real" type as the memory type. + bool NeedExt = false; + if (EltVT.getSizeInBits() < 16) + NeedExt = true; + + switch (NumElts) { + default: return SDValue(); + case 2: + Opcode = NVPTXISD::StoreV2; + break; + case 4: { + Opcode = NVPTXISD::StoreV4; + break; + } + } + + SmallVector Ops; + + // First is the chain + Ops.push_back(N->getOperand(0)); + + // Then the split values + for (unsigned i = 0; i < NumElts; ++i) { + SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Val, + DAG.getIntPtrConstant(i)); + if (NeedExt) + // ANY_EXTEND is correct here since the store will only look at the + // lower-order bits anyway. + ExtVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i16, ExtVal); + Ops.push_back(ExtVal); + } + + // Then any remaining arguments + for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i) { + Ops.push_back(N->getOperand(i)); + } + + MemSDNode *MemSD = cast(N); + + SDValue NewSt = DAG.getMemIntrinsicNode(Opcode, DL, + DAG.getVTList(MVT::Other), &Ops[0], + Ops.size(), MemSD->getMemoryVT(), + MemSD->getMemOperand()); + + + //return DCI.CombineTo(N, NewSt, true); + return NewSt; + } + + return SDValue(); +} + // st i1 v, addr // => // v1 = zxt v to i8 // st i8, addr SDValue NVPTXTargetLowering:: -LowerSTORE(SDValue Op, SelectionDAG &DAG) const { +LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const { SDNode *Node = Op.getNode(); DebugLoc dl = Node->getDebugLoc(); StoreSDNode *ST = cast(Node); @@ -1348,3 +1411,242 @@ NVPTXTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, unsigned NVPTXTargetLowering::getFunctionAlignment(const Function *) const { return 4; } + +/// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads. +static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG, + SmallVectorImpl& Results) { + EVT ResVT = N->getValueType(0); + DebugLoc DL = N->getDebugLoc(); + + assert(ResVT.isVector() && "Vector load must have vector type"); + + // We only handle "native" vector sizes for now, e.g. <4 x double> is not + // legal. We can (and should) split that into 2 loads of <2 x double> here + // but I'm leaving that as a TODO for now. + assert(ResVT.isSimple() && "Can only handle simple types"); + switch (ResVT.getSimpleVT().SimpleTy) { + default: return; + case MVT::v2i8: + case MVT::v2i16: + case MVT::v2i32: + case MVT::v2i64: + case MVT::v2f32: + case MVT::v2f64: + case MVT::v4i8: + case MVT::v4i16: + case MVT::v4i32: + case MVT::v4f32: + // This is a "native" vector type + break; + } + + EVT EltVT = ResVT.getVectorElementType(); + unsigned NumElts = ResVT.getVectorNumElements(); + + // Since LoadV2 is a target node, we cannot rely on DAG type legalization. + // Therefore, we must ensure the type is legal. For i1 and i8, we set the + // loaded type to i16 and propogate the "real" type as the memory type. + bool NeedTrunc = false; + if (EltVT.getSizeInBits() < 16) { + EltVT = MVT::i16; + NeedTrunc = true; + } + + unsigned Opcode = 0; + SDVTList LdResVTs; + + switch (NumElts) { + default: return; + case 2: + Opcode = NVPTXISD::LoadV2; + LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other); + break; + case 4: { + Opcode = NVPTXISD::LoadV4; + EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other }; + LdResVTs = DAG.getVTList(ListVTs, 5); + break; + } + } + + SmallVector OtherOps; + + // Copy regular operands + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + OtherOps.push_back(N->getOperand(i)); + + LoadSDNode *LD = cast(N); + + // The select routine does not have access to the LoadSDNode instance, so + // pass along the extension information + OtherOps.push_back(DAG.getIntPtrConstant(LD->getExtensionType())); + + SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, &OtherOps[0], + OtherOps.size(), LD->getMemoryVT(), + LD->getMemOperand()); + + SmallVector ScalarRes; + + for (unsigned i = 0; i < NumElts; ++i) { + SDValue Res = NewLD.getValue(i); + if (NeedTrunc) + Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res); + ScalarRes.push_back(Res); + } + + SDValue LoadChain = NewLD.getValue(NumElts); + + SDValue BuildVec = DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts); + + Results.push_back(BuildVec); + Results.push_back(LoadChain); +} + +static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, + SelectionDAG &DAG, + SmallVectorImpl &Results) { + SDValue Chain = N->getOperand(0); + SDValue Intrin = N->getOperand(1); + DebugLoc DL = N->getDebugLoc(); + + // Get the intrinsic ID + unsigned IntrinNo = cast(Intrin.getNode())->getZExtValue(); + switch(IntrinNo) { + default: return; + case Intrinsic::nvvm_ldg_global_i: + case Intrinsic::nvvm_ldg_global_f: + case Intrinsic::nvvm_ldg_global_p: + case Intrinsic::nvvm_ldu_global_i: + case Intrinsic::nvvm_ldu_global_f: + case Intrinsic::nvvm_ldu_global_p: { + EVT ResVT = N->getValueType(0); + + if (ResVT.isVector()) { + // Vector LDG/LDU + + unsigned NumElts = ResVT.getVectorNumElements(); + EVT EltVT = ResVT.getVectorElementType(); + + // Since LDU/LDG are target nodes, we cannot rely on DAG type legalization. + // Therefore, we must ensure the type is legal. For i1 and i8, we set the + // loaded type to i16 and propogate the "real" type as the memory type. + bool NeedTrunc = false; + if (EltVT.getSizeInBits() < 16) { + EltVT = MVT::i16; + NeedTrunc = true; + } + + unsigned Opcode = 0; + SDVTList LdResVTs; + + switch (NumElts) { + default: return; + case 2: + switch(IntrinNo) { + default: return; + case Intrinsic::nvvm_ldg_global_i: + case Intrinsic::nvvm_ldg_global_f: + case Intrinsic::nvvm_ldg_global_p: + Opcode = NVPTXISD::LDGV2; + break; + case Intrinsic::nvvm_ldu_global_i: + case Intrinsic::nvvm_ldu_global_f: + case Intrinsic::nvvm_ldu_global_p: + Opcode = NVPTXISD::LDUV2; + break; + } + LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other); + break; + case 4: { + switch(IntrinNo) { + default: return; + case Intrinsic::nvvm_ldg_global_i: + case Intrinsic::nvvm_ldg_global_f: + case Intrinsic::nvvm_ldg_global_p: + Opcode = NVPTXISD::LDGV4; + break; + case Intrinsic::nvvm_ldu_global_i: + case Intrinsic::nvvm_ldu_global_f: + case Intrinsic::nvvm_ldu_global_p: + Opcode = NVPTXISD::LDUV4; + break; + } + EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other }; + LdResVTs = DAG.getVTList(ListVTs, 5); + break; + } + } + + SmallVector OtherOps; + + // Copy regular operands + + OtherOps.push_back(Chain); // Chain + // Skip operand 1 (intrinsic ID) + // Others + for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i) + OtherOps.push_back(N->getOperand(i)); + + MemIntrinsicSDNode *MemSD = cast(N); + + SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, &OtherOps[0], + OtherOps.size(), MemSD->getMemoryVT(), + MemSD->getMemOperand()); + + SmallVector ScalarRes; + + for (unsigned i = 0; i < NumElts; ++i) { + SDValue Res = NewLD.getValue(i); + if (NeedTrunc) + Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res); + ScalarRes.push_back(Res); + } + + SDValue LoadChain = NewLD.getValue(NumElts); + + SDValue BuildVec = DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts); + + Results.push_back(BuildVec); + Results.push_back(LoadChain); + } else { + // i8 LDG/LDU + assert(ResVT.isSimple() && ResVT.getSimpleVT().SimpleTy == MVT::i8 && + "Custom handling of non-i8 ldu/ldg?"); + + // Just copy all operands as-is + SmallVector Ops; + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + Ops.push_back(N->getOperand(i)); + + // Force output to i16 + SDVTList LdResVTs = DAG.getVTList(MVT::i16, MVT::Other); + + MemIntrinsicSDNode *MemSD = cast(N); + + // We make sure the memory type is i8, which will be used during isel + // to select the proper instruction. + SDValue NewLD = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, + LdResVTs, &Ops[0], + Ops.size(), MVT::i8, + MemSD->getMemOperand()); + + Results.push_back(NewLD.getValue(0)); + Results.push_back(NewLD.getValue(1)); + } + } + } +} + +void NVPTXTargetLowering::ReplaceNodeResults(SDNode *N, + SmallVectorImpl &Results, + SelectionDAG &DAG) const { + switch (N->getOpcode()) { + default: report_fatal_error("Unhandled custom legalization"); + case ISD::LOAD: + ReplaceLoadVector(N, DAG, Results); + return; + case ISD::INTRINSIC_W_CHAIN: + ReplaceINTRINSIC_W_CHAIN(N, DAG, Results); + return; + } +} diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h index 0a1833a..95e7b55 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/lib/Target/NVPTX/NVPTXISelLowering.h @@ -58,7 +58,16 @@ enum NodeType { RETURN, CallSeqBegin, CallSeqEnd, - Dummy + Dummy, + + LoadV2 = ISD::FIRST_TARGET_MEMORY_OPCODE, + LoadV4, + LDGV2, // LDG.v2 + LDGV4, // LDG.v4 + LDUV2, // LDU.v2 + LDUV4, // LDU.v4 + StoreV2, + StoreV4 }; } @@ -143,8 +152,16 @@ private: SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerLOADi1(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const; + + virtual void ReplaceNodeResults(SDNode *N, + SmallVectorImpl &Results, + SelectionDAG &DAG) const; }; } // namespace llvm diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/lib/Target/NVPTX/NVPTXInstrInfo.cpp index 6fe654cb..9e73d80 100644 --- a/lib/Target/NVPTX/NVPTXInstrInfo.cpp +++ b/lib/Target/NVPTX/NVPTXInstrInfo.cpp @@ -65,46 +65,6 @@ void NVPTXInstrInfo::copyPhysReg (MachineBasicBlock &MBB, NVPTX::Float64RegsRegClass.contains(SrcReg)) BuildMI(MBB, I, DL, get(NVPTX::FMOV64rr), DestReg) .addReg(SrcReg, getKillRegState(KillSrc)); - else if (NVPTX::V4F32RegsRegClass.contains(DestReg) && - NVPTX::V4F32RegsRegClass.contains(SrcReg)) - BuildMI(MBB, I, DL, get(NVPTX::V4f32Mov), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - else if (NVPTX::V4I32RegsRegClass.contains(DestReg) && - NVPTX::V4I32RegsRegClass.contains(SrcReg)) - BuildMI(MBB, I, DL, get(NVPTX::V4i32Mov), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - else if (NVPTX::V2F32RegsRegClass.contains(DestReg) && - NVPTX::V2F32RegsRegClass.contains(SrcReg)) - BuildMI(MBB, I, DL, get(NVPTX::V2f32Mov), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - else if (NVPTX::V2I32RegsRegClass.contains(DestReg) && - NVPTX::V2I32RegsRegClass.contains(SrcReg)) - BuildMI(MBB, I, DL, get(NVPTX::V2i32Mov), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - else if (NVPTX::V4I8RegsRegClass.contains(DestReg) && - NVPTX::V4I8RegsRegClass.contains(SrcReg)) - BuildMI(MBB, I, DL, get(NVPTX::V4i8Mov), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - else if (NVPTX::V2I8RegsRegClass.contains(DestReg) && - NVPTX::V2I8RegsRegClass.contains(SrcReg)) - BuildMI(MBB, I, DL, get(NVPTX::V2i8Mov), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - else if (NVPTX::V4I16RegsRegClass.contains(DestReg) && - NVPTX::V4I16RegsRegClass.contains(SrcReg)) - BuildMI(MBB, I, DL, get(NVPTX::V4i16Mov), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - else if (NVPTX::V2I16RegsRegClass.contains(DestReg) && - NVPTX::V2I16RegsRegClass.contains(SrcReg)) - BuildMI(MBB, I, DL, get(NVPTX::V2i16Mov), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - else if (NVPTX::V2I64RegsRegClass.contains(DestReg) && - NVPTX::V2I64RegsRegClass.contains(SrcReg)) - BuildMI(MBB, I, DL, get(NVPTX::V2i64Mov), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - else if (NVPTX::V2F64RegsRegClass.contains(DestReg) && - NVPTX::V2F64RegsRegClass.contains(SrcReg)) - BuildMI(MBB, I, DL, get(NVPTX::V2f64Mov), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); else { llvm_unreachable("Don't know how to copy a register"); } diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.td b/lib/Target/NVPTX/NVPTXInstrInfo.td index 8a410b8..f43abe2 100644 --- a/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -52,6 +52,7 @@ def hasAtomAddF32 : Predicate<"Subtarget.hasAtomAddF32()">; def hasVote : Predicate<"Subtarget.hasVote()">; def hasDouble : Predicate<"Subtarget.hasDouble()">; def reqPTX20 : Predicate<"Subtarget.reqPTX20()">; +def hasLDG : Predicate<"Subtarget.hasLDG()">; def hasLDU : Predicate<"Subtarget.hasLDU()">; def hasGenericLdSt : Predicate<"Subtarget.hasGenericLdSt()">; @@ -2153,11 +2154,21 @@ multiclass LD { i32imm:$fromWidth, Int32Regs:$addr), !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", "$fromWidth \t$dst, [$addr];"), []>; + def _areg_64 : NVPTXInst<(outs regclass:$dst), + (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, Int64Regs:$addr), + !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth", + " \t$dst, [$addr];"), []>; def _ari : NVPTXInst<(outs regclass:$dst), (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset), !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", "$fromWidth \t$dst, [$addr+$offset];"), []>; + def _ari_64 : NVPTXInst<(outs regclass:$dst), + (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset), + !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth", + " \t$dst, [$addr+$offset];"), []>; def _asi : NVPTXInst<(outs regclass:$dst), (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, i32imm:$offset), @@ -2174,19 +2185,6 @@ defm LD_f32 : LD; defm LD_f64 : LD; } -let VecInstType=isVecLD.Value, mayLoad=1, neverHasSideEffects=1 in { -defm LD_v2i8 : LD; -defm LD_v4i8 : LD; -defm LD_v2i16 : LD; -defm LD_v4i16 : LD; -defm LD_v2i32 : LD; -defm LD_v4i32 : LD; -defm LD_v2f32 : LD; -defm LD_v4f32 : LD; -defm LD_v2i64 : LD; -defm LD_v2f64 : LD; -} - multiclass ST { def _avar : NVPTXInst<(outs), (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, @@ -2198,11 +2196,21 @@ multiclass ST { LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr), !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth", " \t[$addr], $src;"), []>; + def _areg_64 : NVPTXInst<(outs), + (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, + LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr), + !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth ", + "\t[$addr], $src;"), []>; def _ari : NVPTXInst<(outs), (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr, i32imm:$offset), !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth", " \t[$addr+$offset], $src;"), []>; + def _ari_64 : NVPTXInst<(outs), + (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, + LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr, i32imm:$offset), + !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth ", + "\t[$addr+$offset], $src;"), []>; def _asi : NVPTXInst<(outs), (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$toWidth, imem:$addr, i32imm:$offset), @@ -2219,19 +2227,6 @@ defm ST_f32 : ST; defm ST_f64 : ST; } -let VecInstType=isVecST.Value, mayStore=1, neverHasSideEffects=1 in { -defm ST_v2i8 : ST; -defm ST_v4i8 : ST; -defm ST_v2i16 : ST; -defm ST_v4i16 : ST; -defm ST_v2i32 : ST; -defm ST_v4i32 : ST; -defm ST_v2f32 : ST; -defm ST_v4f32 : ST; -defm ST_v2i64 : ST; -defm ST_v2f64 : ST; -} - // The following is used only in and after vector elementizations. // Vector elementization happens at the machine instruction level, so the // following instruction @@ -2247,11 +2242,21 @@ multiclass LD_VEC { i32imm:$fromWidth, Int32Regs:$addr), !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>; + def _v2_areg_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2), + (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, Int64Regs:$addr), + !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", + "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>; def _v2_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2), (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset), !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>; + def _v2_ari_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2), + (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset), + !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", + "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>; def _v2_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2), (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, i32imm:$offset), @@ -2269,6 +2274,12 @@ multiclass LD_VEC { i32imm:$fromWidth, Int32Regs:$addr), !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>; + def _v4_areg_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, + regclass:$dst3, regclass:$dst4), + (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, Int64Regs:$addr), + !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", + "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>; def _v4_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4), (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, @@ -2276,6 +2287,13 @@ multiclass LD_VEC { !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"), []>; + def _v4_ari_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, + regclass:$dst3, regclass:$dst4), + (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset), + !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", + "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"), + []>; def _v4_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4), (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, @@ -2304,12 +2322,23 @@ multiclass ST_VEC { LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr), !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", "$fromWidth \t[$addr], {{$src1, $src2}};"), []>; + def _v2_areg_64 : NVPTXInst<(outs), + (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, + LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr), + !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", + "$fromWidth \t[$addr], {{$src1, $src2}};"), []>; def _v2_ari : NVPTXInst<(outs), (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset), !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>; + def _v2_ari_64 : NVPTXInst<(outs), + (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, + LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, + i32imm:$offset), + !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", + "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>; def _v2_asi : NVPTXInst<(outs), (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, @@ -2328,6 +2357,12 @@ multiclass ST_VEC { i32imm:$fromWidth, Int32Regs:$addr), !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>; + def _v4_areg_64 : NVPTXInst<(outs), + (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, + LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, Int64Regs:$addr), + !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", + "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>; def _v4_ari : NVPTXInst<(outs), (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, @@ -2335,6 +2370,13 @@ multiclass ST_VEC { !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"), []>; + def _v4_ari_64 : NVPTXInst<(outs), + (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, + LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset), + !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", + "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"), + []>; def _v4_asi : NVPTXInst<(outs), (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, @@ -2822,8 +2864,6 @@ def trapinst : NVPTXInst<(outs), (ins), "trap;", [(trap)]>; -include "NVPTXVector.td" - include "NVPTXIntrinsics.td" diff --git a/lib/Target/NVPTX/NVPTXIntrinsics.td b/lib/Target/NVPTX/NVPTXIntrinsics.td index 028a94b..49e2568 100644 --- a/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -1343,52 +1343,113 @@ defm INT_PTX_LDU_G_v4f32_ELE : VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>; -// Vector ldu -multiclass VLDU_G { - def _32: NVPTXVecInst<(outs regclass:$result), (ins Int32Regs:$src), - !strconcat("ldu.global.", TyStr), - [(set regclass:$result, (IntOp Int32Regs:$src))], eleInst>, - Requires<[hasLDU]>; - def _64: NVPTXVecInst<(outs regclass:$result), (ins Int64Regs:$src), - !strconcat("ldu.global.", TyStr), - [(set regclass:$result, (IntOp Int64Regs:$src))], eleInst64>, - Requires<[hasLDU]>; + +//----------------------------------- +// Support for ldg on sm_35 or later +//----------------------------------- + +def ldg_i8 : PatFrag<(ops node:$ptr), (int_nvvm_ldg_global_i node:$ptr), [{ + MemIntrinsicSDNode *M = cast(N); + return M->getMemoryVT() == MVT::i8; +}]>; + +multiclass LDG_G { + def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src), + !strconcat("ld.global.nc.", TyStr), + [(set regclass:$result, (IntOp Int32Regs:$src))]>, Requires<[hasLDG]>; + def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src), + !strconcat("ld.global.nc.", TyStr), + [(set regclass:$result, (IntOp Int64Regs:$src))]>, Requires<[hasLDG]>; + def avar: NVPTXInst<(outs regclass:$result), (ins imem:$src), + !strconcat("ld.global.nc.", TyStr), + [(set regclass:$result, (IntOp (Wrapper tglobaladdr:$src)))]>, + Requires<[hasLDG]>; + def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src), + !strconcat("ld.global.nc.", TyStr), + [(set regclass:$result, (IntOp ADDRri:$src))]>, Requires<[hasLDG]>; + def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src), + !strconcat("ld.global.nc.", TyStr), + [(set regclass:$result, (IntOp ADDRri64:$src))]>, Requires<[hasLDG]>; +} + +multiclass LDG_G_NOINTRIN { + def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src), + !strconcat("ld.global.nc.", TyStr), + [(set regclass:$result, (IntOp Int32Regs:$src))]>, Requires<[hasLDG]>; + def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src), + !strconcat("ld.global.nc.", TyStr), + [(set regclass:$result, (IntOp Int64Regs:$src))]>, Requires<[hasLDG]>; + def avar: NVPTXInst<(outs regclass:$result), (ins imem:$src), + !strconcat("ld.global.nc.", TyStr), + [(set regclass:$result, (IntOp (Wrapper tglobaladdr:$src)))]>, + Requires<[hasLDG]>; + def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src), + !strconcat("ld.global.nc.", TyStr), + [(set regclass:$result, (IntOp ADDRri:$src))]>, Requires<[hasLDG]>; + def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src), + !strconcat("ld.global.nc.", TyStr), + [(set regclass:$result, (IntOp ADDRri64:$src))]>, Requires<[hasLDG]>; +} + +defm INT_PTX_LDG_GLOBAL_i8 + : LDG_G_NOINTRIN<"u8 \t$result, [$src];", Int16Regs, ldg_i8>; +defm INT_PTX_LDG_GLOBAL_i16 + : LDG_G<"u16 \t$result, [$src];", Int16Regs, int_nvvm_ldg_global_i>; +defm INT_PTX_LDG_GLOBAL_i32 + : LDG_G<"u32 \t$result, [$src];", Int32Regs, int_nvvm_ldg_global_i>; +defm INT_PTX_LDG_GLOBAL_i64 + : LDG_G<"u64 \t$result, [$src];", Int64Regs, int_nvvm_ldg_global_i>; +defm INT_PTX_LDG_GLOBAL_f32 + : LDG_G<"f32 \t$result, [$src];", Float32Regs, int_nvvm_ldg_global_f>; +defm INT_PTX_LDG_GLOBAL_f64 + : LDG_G<"f64 \t$result, [$src];", Float64Regs, int_nvvm_ldg_global_f>; +defm INT_PTX_LDG_GLOBAL_p32 + : LDG_G<"u32 \t$result, [$src];", Int32Regs, int_nvvm_ldg_global_p>; +defm INT_PTX_LDG_GLOBAL_p64 + : LDG_G<"u64 \t$result, [$src];", Int64Regs, int_nvvm_ldg_global_p>; + +// vector + +// Elementized vector ldg +multiclass VLDG_G_ELE_V2 { + def _32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), + (ins Int32Regs:$src), + !strconcat("ld.global.nc.", TyStr), []>; + def _64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), + (ins Int64Regs:$src), + !strconcat("ld.global.nc.", TyStr), []>; } -let VecInstType=isVecLD.Value in { -defm INT_PTX_LDU_G_v2i8 : VLDU_G<"v2.u8 \t${result:vecfull}, [$src];", - V2I8Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v2i8_ELE_32, - INT_PTX_LDU_G_v2i8_ELE_64>; -defm INT_PTX_LDU_G_v4i8 : VLDU_G<"v4.u8 \t${result:vecfull}, [$src];", - V4I8Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v4i8_ELE_32, - INT_PTX_LDU_G_v4i8_ELE_64>; -defm INT_PTX_LDU_G_v2i16 : VLDU_G<"v2.u16 \t${result:vecfull}, [$src];", - V2I16Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v2i16_ELE_32, - INT_PTX_LDU_G_v2i16_ELE_64>; -defm INT_PTX_LDU_G_v4i16 : VLDU_G<"v4.u16 \t${result:vecfull}, [$src];", - V4I16Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v4i16_ELE_32, - INT_PTX_LDU_G_v4i16_ELE_64>; -defm INT_PTX_LDU_G_v2i32 : VLDU_G<"v2.u32 \t${result:vecfull}, [$src];", - V2I32Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v2i32_ELE_32, - INT_PTX_LDU_G_v2i32_ELE_64>; -defm INT_PTX_LDU_G_v4i32 : VLDU_G<"v4.u32 \t${result:vecfull}, [$src];", - V4I32Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v4i32_ELE_32, - INT_PTX_LDU_G_v4i32_ELE_64>; -defm INT_PTX_LDU_G_v2f32 : VLDU_G<"v2.f32 \t${result:vecfull}, [$src];", - V2F32Regs, int_nvvm_ldu_global_f, INT_PTX_LDU_G_v2f32_ELE_32, - INT_PTX_LDU_G_v2f32_ELE_64>; -defm INT_PTX_LDU_G_v4f32 : VLDU_G<"v4.f32 \t${result:vecfull}, [$src];", - V4F32Regs, int_nvvm_ldu_global_f, INT_PTX_LDU_G_v4f32_ELE_32, - INT_PTX_LDU_G_v4f32_ELE_64>; -defm INT_PTX_LDU_G_v2i64 : VLDU_G<"v2.u64 \t${result:vecfull}, [$src];", - V2I64Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v2i64_ELE_32, - INT_PTX_LDU_G_v2i64_ELE_64>; -defm INT_PTX_LDU_G_v2f64 : VLDU_G<"v2.f64 \t${result:vecfull}, [$src];", - V2F64Regs, int_nvvm_ldu_global_f, INT_PTX_LDU_G_v2f64_ELE_32, - INT_PTX_LDU_G_v2f64_ELE_64>; +multiclass VLDG_G_ELE_V4 { + def _32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, + regclass:$dst3, regclass:$dst4), (ins Int32Regs:$src), + !strconcat("ld.global.nc.", TyStr), []>; + def _64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, + regclass:$dst3, regclass:$dst4), (ins Int64Regs:$src), + !strconcat("ld.global.nc.", TyStr), []>; } +// FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads. +defm INT_PTX_LDG_G_v2i8_ELE + : VLDG_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; +defm INT_PTX_LDG_G_v2i16_ELE + : VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; +defm INT_PTX_LDG_G_v2i32_ELE + : VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>; +defm INT_PTX_LDG_G_v2f32_ELE + : VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>; +defm INT_PTX_LDG_G_v2i64_ELE + : VLDG_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>; +defm INT_PTX_LDG_G_v2f64_ELE + : VLDG_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>; +defm INT_PTX_LDG_G_v4i8_ELE + : VLDG_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>; +defm INT_PTX_LDG_G_v4i16_ELE + : VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>; +defm INT_PTX_LDG_G_v4i32_ELE + : VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>; +defm INT_PTX_LDG_G_v4f32_ELE + : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>; multiclass NG_TO_G { diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp index e0c9161..8e105b5 100644 --- a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp +++ b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp @@ -54,36 +54,6 @@ std::string getNVPTXRegClassName (TargetRegisterClass const *RC) { else if (RC == &NVPTX::SpecialRegsRegClass) { return "!Special!"; } - else if (RC == &NVPTX::V2F32RegsRegClass) { - return ".v2.f32"; - } - else if (RC == &NVPTX::V4F32RegsRegClass) { - return ".v4.f32"; - } - else if (RC == &NVPTX::V2I32RegsRegClass) { - return ".v2.s32"; - } - else if (RC == &NVPTX::V4I32RegsRegClass) { - return ".v4.s32"; - } - else if (RC == &NVPTX::V2F64RegsRegClass) { - return ".v2.f64"; - } - else if (RC == &NVPTX::V2I64RegsRegClass) { - return ".v2.s64"; - } - else if (RC == &NVPTX::V2I16RegsRegClass) { - return ".v2.s16"; - } - else if (RC == &NVPTX::V4I16RegsRegClass) { - return ".v4.s16"; - } - else if (RC == &NVPTX::V2I8RegsRegClass) { - return ".v2.s16"; - } - else if (RC == &NVPTX::V4I8RegsRegClass) { - return ".v4.s16"; - } else { return "INTERNAL"; } @@ -115,137 +85,11 @@ std::string getNVPTXRegClassStr (TargetRegisterClass const *RC) { else if (RC == &NVPTX::SpecialRegsRegClass) { return "!Special!"; } - else if (RC == &NVPTX::V2F32RegsRegClass) { - return "%v2f"; - } - else if (RC == &NVPTX::V4F32RegsRegClass) { - return "%v4f"; - } - else if (RC == &NVPTX::V2I32RegsRegClass) { - return "%v2r"; - } - else if (RC == &NVPTX::V4I32RegsRegClass) { - return "%v4r"; - } - else if (RC == &NVPTX::V2F64RegsRegClass) { - return "%v2fd"; - } - else if (RC == &NVPTX::V2I64RegsRegClass) { - return "%v2rd"; - } - else if (RC == &NVPTX::V2I16RegsRegClass) { - return "%v2s"; - } - else if (RC == &NVPTX::V4I16RegsRegClass) { - return "%v4rs"; - } - else if (RC == &NVPTX::V2I8RegsRegClass) { - return "%v2rc"; - } - else if (RC == &NVPTX::V4I8RegsRegClass) { - return "%v4rc"; - } else { return "INTERNAL"; } return ""; } - -bool isNVPTXVectorRegClass(TargetRegisterClass const *RC) { - if (RC->getID() == NVPTX::V2F32RegsRegClassID) - return true; - if (RC->getID() == NVPTX::V2F64RegsRegClassID) - return true; - if (RC->getID() == NVPTX::V2I16RegsRegClassID) - return true; - if (RC->getID() == NVPTX::V2I32RegsRegClassID) - return true; - if (RC->getID() == NVPTX::V2I64RegsRegClassID) - return true; - if (RC->getID() == NVPTX::V2I8RegsRegClassID) - return true; - if (RC->getID() == NVPTX::V4F32RegsRegClassID) - return true; - if (RC->getID() == NVPTX::V4I16RegsRegClassID) - return true; - if (RC->getID() == NVPTX::V4I32RegsRegClassID) - return true; - if (RC->getID() == NVPTX::V4I8RegsRegClassID) - return true; - return false; -} - -std::string getNVPTXElemClassName(TargetRegisterClass const *RC) { - if (RC->getID() == NVPTX::V2F32RegsRegClassID) - return getNVPTXRegClassName(&NVPTX::Float32RegsRegClass); - if (RC->getID() == NVPTX::V2F64RegsRegClassID) - return getNVPTXRegClassName(&NVPTX::Float64RegsRegClass); - if (RC->getID() == NVPTX::V2I16RegsRegClassID) - return getNVPTXRegClassName(&NVPTX::Int16RegsRegClass); - if (RC->getID() == NVPTX::V2I32RegsRegClassID) - return getNVPTXRegClassName(&NVPTX::Int32RegsRegClass); - if (RC->getID() == NVPTX::V2I64RegsRegClassID) - return getNVPTXRegClassName(&NVPTX::Int64RegsRegClass); - if (RC->getID() == NVPTX::V2I8RegsRegClassID) - return getNVPTXRegClassName(&NVPTX::Int8RegsRegClass); - if (RC->getID() == NVPTX::V4F32RegsRegClassID) - return getNVPTXRegClassName(&NVPTX::Float32RegsRegClass); - if (RC->getID() == NVPTX::V4I16RegsRegClassID) - return getNVPTXRegClassName(&NVPTX::Int16RegsRegClass); - if (RC->getID() == NVPTX::V4I32RegsRegClassID) - return getNVPTXRegClassName(&NVPTX::Int32RegsRegClass); - if (RC->getID() == NVPTX::V4I8RegsRegClassID) - return getNVPTXRegClassName(&NVPTX::Int8RegsRegClass); - llvm_unreachable("Not a vector register class"); -} - -const TargetRegisterClass *getNVPTXElemClass(TargetRegisterClass const *RC) { - if (RC->getID() == NVPTX::V2F32RegsRegClassID) - return (&NVPTX::Float32RegsRegClass); - if (RC->getID() == NVPTX::V2F64RegsRegClassID) - return (&NVPTX::Float64RegsRegClass); - if (RC->getID() == NVPTX::V2I16RegsRegClassID) - return (&NVPTX::Int16RegsRegClass); - if (RC->getID() == NVPTX::V2I32RegsRegClassID) - return (&NVPTX::Int32RegsRegClass); - if (RC->getID() == NVPTX::V2I64RegsRegClassID) - return (&NVPTX::Int64RegsRegClass); - if (RC->getID() == NVPTX::V2I8RegsRegClassID) - return (&NVPTX::Int8RegsRegClass); - if (RC->getID() == NVPTX::V4F32RegsRegClassID) - return (&NVPTX::Float32RegsRegClass); - if (RC->getID() == NVPTX::V4I16RegsRegClassID) - return (&NVPTX::Int16RegsRegClass); - if (RC->getID() == NVPTX::V4I32RegsRegClassID) - return (&NVPTX::Int32RegsRegClass); - if (RC->getID() == NVPTX::V4I8RegsRegClassID) - return (&NVPTX::Int8RegsRegClass); - llvm_unreachable("Not a vector register class"); -} - -int getNVPTXVectorSize(TargetRegisterClass const *RC) { - if (RC->getID() == NVPTX::V2F32RegsRegClassID) - return 2; - if (RC->getID() == NVPTX::V2F64RegsRegClassID) - return 2; - if (RC->getID() == NVPTX::V2I16RegsRegClassID) - return 2; - if (RC->getID() == NVPTX::V2I32RegsRegClassID) - return 2; - if (RC->getID() == NVPTX::V2I64RegsRegClassID) - return 2; - if (RC->getID() == NVPTX::V2I8RegsRegClassID) - return 2; - if (RC->getID() == NVPTX::V4F32RegsRegClassID) - return 4; - if (RC->getID() == NVPTX::V4I16RegsRegClassID) - return 4; - if (RC->getID() == NVPTX::V4I32RegsRegClassID) - return 4; - if (RC->getID() == NVPTX::V4I8RegsRegClassID) - return 4; - llvm_unreachable("Not a vector register class"); -} } NVPTXRegisterInfo::NVPTXRegisterInfo(const TargetInstrInfo &tii, diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.h b/lib/Target/NVPTX/NVPTXRegisterInfo.h index a3e1252..56e6289 100644 --- a/lib/Target/NVPTX/NVPTXRegisterInfo.h +++ b/lib/Target/NVPTX/NVPTXRegisterInfo.h @@ -81,10 +81,6 @@ public: std::string getNVPTXRegClassName (const TargetRegisterClass *RC); std::string getNVPTXRegClassStr (const TargetRegisterClass *RC); -bool isNVPTXVectorRegClass (const TargetRegisterClass *RC); -std::string getNVPTXElemClassName (const TargetRegisterClass *RC); -int getNVPTXVectorSize (const TargetRegisterClass *RC); -const TargetRegisterClass *getNVPTXElemClass(const TargetRegisterClass *RC); } // end namespace llvm diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.td b/lib/Target/NVPTX/NVPTXRegisterInfo.td index ba15825..8d100d6 100644 --- a/lib/Target/NVPTX/NVPTXRegisterInfo.td +++ b/lib/Target/NVPTX/NVPTXRegisterInfo.td @@ -37,9 +37,6 @@ foreach i = 0-395 in { def RL#i : NVPTXReg<"%rl"#i>; // 64-bit def F#i : NVPTXReg<"%f"#i>; // 32-bit float def FL#i : NVPTXReg<"%fl"#i>; // 64-bit float - // Vectors - foreach s = [ "2b8", "2b16", "2b32", "2b64", "4b8", "4b16", "4b32" ] in - def v#s#_#i : NVPTXReg<"%v"#s#"_"#i>; // Arguments def ia#i : NVPTXReg<"%ia"#i>; @@ -65,44 +62,3 @@ def Float64ArgRegs : NVPTXRegClass<[f64], 64, (add (sequence "da%u", 0, 395))>; // Read NVPTXRegisterInfo.cpp to see how VRFrame and VRDepot are used. def SpecialRegs : NVPTXRegClass<[i32], 32, (add VRFrame, VRDepot)>; - -class NVPTXVecRegClass regTypes, int alignment, dag regList, - NVPTXRegClass sClass, - int e, - string n> - : NVPTXRegClass -{ - NVPTXRegClass scalarClass=sClass; - int elems=e; - string name=n; -} -def V2F32Regs - : NVPTXVecRegClass<[v2f32], 64, (add (sequence "v2b32_%u", 0, 395)), - Float32Regs, 2, ".v2.f32">; -def V4F32Regs - : NVPTXVecRegClass<[v4f32], 128, (add (sequence "v4b32_%u", 0, 395)), - Float32Regs, 4, ".v4.f32">; -def V2I32Regs - : NVPTXVecRegClass<[v2i32], 64, (add (sequence "v2b32_%u", 0, 395)), - Int32Regs, 2, ".v2.u32">; -def V4I32Regs - : NVPTXVecRegClass<[v4i32], 128, (add (sequence "v4b32_%u", 0, 395)), - Int32Regs, 4, ".v4.u32">; -def V2F64Regs - : NVPTXVecRegClass<[v2f64], 128, (add (sequence "v2b64_%u", 0, 395)), - Float64Regs, 2, ".v2.f64">; -def V2I64Regs - : NVPTXVecRegClass<[v2i64], 128, (add (sequence "v2b64_%u", 0, 395)), - Int64Regs, 2, ".v2.u64">; -def V2I16Regs - : NVPTXVecRegClass<[v2i16], 32, (add (sequence "v2b16_%u", 0, 395)), - Int16Regs, 2, ".v2.u16">; -def V4I16Regs - : NVPTXVecRegClass<[v4i16], 64, (add (sequence "v4b16_%u", 0, 395)), - Int16Regs, 4, ".v4.u16">; -def V2I8Regs - : NVPTXVecRegClass<[v2i8], 16, (add (sequence "v2b8_%u", 0, 395)), - Int8Regs, 2, ".v2.u8">; -def V4I8Regs - : NVPTXVecRegClass<[v4i8], 32, (add (sequence "v4b8_%u", 0, 395)), - Int8Regs, 4, ".v4.u8">; diff --git a/lib/Target/NVPTX/NVPTXSubtarget.h b/lib/Target/NVPTX/NVPTXSubtarget.h index e6cb7c2..beea77e 100644 --- a/lib/Target/NVPTX/NVPTXSubtarget.h +++ b/lib/Target/NVPTX/NVPTXSubtarget.h @@ -57,6 +57,7 @@ public: bool hasF32FTZ() const { return SmVersion >= 20; } bool hasFMAF32() const { return SmVersion >= 20; } bool hasFMAF64() const { return SmVersion >= 13; } + bool hasLDG() const { return SmVersion >= 32; } bool hasLDU() const { return SmVersion >= 20; } bool hasGenericLdSt() const { return SmVersion >= 20; } inline bool hasHWROT32() const { return false; } diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp index b4e049e..cd765fa 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -123,7 +123,6 @@ bool NVPTXPassConfig::addInstSelector() { addPass(createSplitBBatBarPass()); addPass(createAllocaHoisting()); addPass(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel())); - addPass(createVectorElementizePass(getNVPTXTargetMachine())); return false; } diff --git a/lib/Target/NVPTX/VectorElementize.cpp b/lib/Target/NVPTX/VectorElementize.cpp deleted file mode 100644 index f1b285d..0000000 --- a/lib/Target/NVPTX/VectorElementize.cpp +++ /dev/null @@ -1,1239 +0,0 @@ -//===-- VectorElementize.cpp - Remove unreachable blocks for codegen --===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This pass converts operations on vector types to operations on their -// element types. -// -// For generic binary and unary vector instructions, the conversion is simple. -// Suppose we have -// av = bv Vop cv -// where av, bv, and cv are vector virtual registers, and Vop is a vector op. -// This gets converted to the following : -// a1 = b1 Sop c1 -// a2 = b2 Sop c2 -// -// VectorToScalarMap maintains the vector vreg to scalar vreg mapping. -// For the above example, the map will look as follows: -// av => [a1, a2] -// bv => [b1, b2] -// -// In addition, initVectorInfo creates the following opcode->opcode map. -// Vop => Sop -// OtherVop => OtherSop -// ... -// -// For vector specific instructions like vecbuild, vecshuffle etc, the -// conversion is different. Look at comments near the functions with -// prefix createVec<...>. -// -//===----------------------------------------------------------------------===// - -#include "NVPTX.h" -#include "NVPTXTargetMachine.h" -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/IR/Constant.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Type.h" -#include "llvm/Pass.h" -#include "llvm/Support/CFG.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Target/TargetInstrInfo.h" - -using namespace llvm; - -namespace { - -class LLVM_LIBRARY_VISIBILITY VectorElementize : public MachineFunctionPass { - virtual bool runOnMachineFunction(MachineFunction &F); - - NVPTXTargetMachine &TM; - MachineRegisterInfo *MRI; - const NVPTXRegisterInfo *RegInfo; - const NVPTXInstrInfo *InstrInfo; - - llvm::DenseMap - RegClassMap; - llvm::DenseMap SimpleMoveMap; - - llvm::DenseMap > VectorToScalarMap; - - bool isVectorInstr(MachineInstr *); - - SmallVector getScalarRegisters(unsigned); - unsigned getScalarVersion(unsigned); - unsigned getScalarVersion(MachineInstr *); - - bool isVectorRegister(unsigned); - const TargetRegisterClass *getScalarRegClass(const TargetRegisterClass *RC); - unsigned numCopiesNeeded(MachineInstr *); - - void createLoadCopy(MachineFunction&, MachineInstr *, - std::vector&); - void createStoreCopy(MachineFunction&, MachineInstr *, - std::vector&); - - void createVecDest(MachineFunction&, MachineInstr *, - std::vector&); - - void createCopies(MachineFunction&, MachineInstr *, - std::vector&); - - unsigned copyProp(MachineFunction&); - unsigned removeDeadMoves(MachineFunction&); - - void elementize(MachineFunction&); - - bool isSimpleMove(MachineInstr *); - - void createVecShuffle(MachineFunction& F, MachineInstr *Instr, - std::vector& copies); - - void createVecExtract(MachineFunction& F, MachineInstr *Instr, - std::vector& copies); - - void createVecInsert(MachineFunction& F, MachineInstr *Instr, - std::vector& copies); - - void createVecBuild(MachineFunction& F, MachineInstr *Instr, - std::vector& copies); - -public: - - static char ID; // Pass identification, replacement for typeid - VectorElementize(NVPTXTargetMachine &tm) - : MachineFunctionPass(ID), TM(tm) {} - - virtual const char *getPassName() const { - return "Convert LLVM vector types to their element types"; - } -}; - -char VectorElementize::ID = 1; -} - -static cl::opt -RemoveRedundantMoves("nvptx-remove-redundant-moves", - cl::desc("NVPTX: Remove redundant moves introduced by vector lowering"), - cl::init(true)); - -#define VECINST(x) ((((x)->getDesc().TSFlags) & NVPTX::VecInstTypeMask) \ - >> NVPTX::VecInstTypeShift) -#define ISVECINST(x) (VECINST(x) != NVPTX::VecNOP) -#define ISVECLOAD(x) (VECINST(x) == NVPTX::VecLoad) -#define ISVECSTORE(x) (VECINST(x) == NVPTX::VecStore) -#define ISVECBUILD(x) (VECINST(x) == NVPTX::VecBuild) -#define ISVECSHUFFLE(x) (VECINST(x) == NVPTX::VecShuffle) -#define ISVECEXTRACT(x) (VECINST(x) == NVPTX::VecExtract) -#define ISVECINSERT(x) (VECINST(x) == NVPTX::VecInsert) -#define ISVECDEST(x) (VECINST(x) == NVPTX::VecDest) - -bool VectorElementize::isSimpleMove(MachineInstr *mi) { - if (mi->isCopy()) - return true; - unsigned TSFlags = (mi->getDesc().TSFlags & NVPTX::SimpleMoveMask) - >> NVPTX::SimpleMoveShift; - return (TSFlags == 1); -} - -bool VectorElementize::isVectorInstr(MachineInstr *mi) { - if ((mi->getOpcode() == NVPTX::PHI) || - (mi->getOpcode() == NVPTX::IMPLICIT_DEF) || mi->isCopy()) { - MachineOperand dest = mi->getOperand(0); - return isVectorRegister(dest.getReg()); - } - return ISVECINST(mi); -} - -unsigned VectorElementize::getScalarVersion(MachineInstr *mi) { - return getScalarVersion(mi->getOpcode()); -} - -///============================================================================= -///Instr is assumed to be a vector instruction. For most vector instructions, -///the size of the destination vector register gives the number of scalar copies -///needed. For VecStore, size of getOperand(1) gives the number of scalar copies -///needed. For VecExtract, the dest is a scalar. So getOperand(1) gives the -///number of scalar copies needed. -///============================================================================= -unsigned VectorElementize::numCopiesNeeded(MachineInstr *Instr) { - unsigned numDefs=0; - unsigned def; - for (unsigned i=0, e=Instr->getNumOperands(); i!=e; ++i) { - MachineOperand oper = Instr->getOperand(i); - - if (!oper.isReg()) continue; - if (!oper.isDef()) continue; - def = i; - numDefs++; - } - assert((numDefs <= 1) && "Only 0 or 1 defs supported"); - - if (numDefs == 1) { - unsigned regnum = Instr->getOperand(def).getReg(); - if (ISVECEXTRACT(Instr)) - regnum = Instr->getOperand(1).getReg(); - return getNVPTXVectorSize(MRI->getRegClass(regnum)); - } - else if (numDefs == 0) { - assert(ISVECSTORE(Instr) - && "Only 0 def instruction supported is vector store"); - - unsigned regnum = Instr->getOperand(0).getReg(); - return getNVPTXVectorSize(MRI->getRegClass(regnum)); - } - return 1; -} - -const TargetRegisterClass *VectorElementize:: -getScalarRegClass(const TargetRegisterClass *RC) { - assert(isNVPTXVectorRegClass(RC) && - "Not a vector register class"); - return getNVPTXElemClass(RC); -} - -bool VectorElementize::isVectorRegister(unsigned reg) { - const TargetRegisterClass *RC=MRI->getRegClass(reg); - return isNVPTXVectorRegClass(RC); -} - -///============================================================================= -///For every vector register 'v' that is not already in the VectorToScalarMap, -///create n scalar registers of the corresponding element type, where n -///is 2 or 4 (getNVPTXVectorSize) and add it VectorToScalarMap. -///============================================================================= -SmallVector VectorElementize::getScalarRegisters(unsigned regnum) { - assert(isVectorRegister(regnum) && "Expecting a vector register here"); - // Create the scalar registers and put them in the map, if not already there. - if (VectorToScalarMap.find(regnum) == VectorToScalarMap.end()) { - const TargetRegisterClass *vecClass = MRI->getRegClass(regnum); - const TargetRegisterClass *scalarClass = getScalarRegClass(vecClass); - - SmallVector temp; - - for (unsigned i=0, e=getNVPTXVectorSize(vecClass); i!=e; ++i) - temp.push_back(MRI->createVirtualRegister(scalarClass)); - - VectorToScalarMap[regnum] = temp; - } - return VectorToScalarMap[regnum]; -} - -///============================================================================= -///For a vector load of the form -///va <= ldv2 [addr] -///the following multi output instruction is created : -///[v1, v2] <= LD [addr] -///Look at NVPTXVector.td for the definitions of multi output loads. -///============================================================================= -void VectorElementize::createLoadCopy(MachineFunction& F, MachineInstr *Instr, - std::vector& copies) { - copies.push_back(F.CloneMachineInstr(Instr)); - - MachineInstrBuilder copy(F, copies[0]); - copy->setDesc(InstrInfo->get(getScalarVersion(copy))); - - // Remove the dest, that should be a vector operand. - MachineOperand dest = copy->getOperand(0); - unsigned regnum = dest.getReg(); - - SmallVector scalarRegs = getScalarRegisters(regnum); - copy->RemoveOperand(0); - - std::vector otherOperands; - for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i) - otherOperands.push_back(copy->getOperand(i)); - - for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i) - copy->RemoveOperand(0); - - for (unsigned i=0, e=scalarRegs.size(); i!=e; ++i) - copy.addReg(scalarRegs[i], RegState::Define); - - for (unsigned i=0, e=otherOperands.size(); i!=e; ++i) - copy.addOperand(otherOperands[i]); - -} - -///============================================================================= -///For a vector store of the form -///stv2 va, [addr] -///the following multi input instruction is created : -///ST v1, v2, [addr] -///Look at NVPTXVector.td for the definitions of multi input stores. -///============================================================================= -void VectorElementize::createStoreCopy(MachineFunction& F, MachineInstr *Instr, - std::vector& copies) { - copies.push_back(F.CloneMachineInstr(Instr)); - - MachineInstrBuilder copy(F, copies[0]); - copy->setDesc(InstrInfo->get(getScalarVersion(copy))); - - MachineOperand src = copy->getOperand(0); - unsigned regnum = src.getReg(); - - SmallVector scalarRegs = getScalarRegisters(regnum); - copy->RemoveOperand(0); - - std::vector otherOperands; - for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i) - otherOperands.push_back(copy->getOperand(i)); - - for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i) - copy->RemoveOperand(0); - - for (unsigned i=0, e=scalarRegs.size(); i!=e; ++i) - copy.addReg(scalarRegs[i]); - - for (unsigned i=0, e=otherOperands.size(); i!=e; ++i) - copy.addOperand(otherOperands[i]); -} - -///============================================================================= -///va <= shufflev2 vb, vc, , -///gets converted to 2 moves into a1 and a2. The source of the moves depend on -///i1 and i2. i1, i2 can belong to the set {0, 1, 2, 3} for shufflev2. For -///shufflev4 the set is {0,..7}. For example, if i1=3, i2=0, the move -///instructions will be -///a1 <= c2 -///a2 <= b1 -///============================================================================= -void VectorElementize::createVecShuffle(MachineFunction& F, MachineInstr *Instr, - std::vector& copies) { - unsigned numcopies=numCopiesNeeded(Instr); - - unsigned destregnum = Instr->getOperand(0).getReg(); - unsigned src1regnum = Instr->getOperand(1).getReg(); - unsigned src2regnum = Instr->getOperand(2).getReg(); - - SmallVector dest = getScalarRegisters(destregnum); - SmallVector src1 = getScalarRegisters(src1regnum); - SmallVector src2 = getScalarRegisters(src2regnum); - - DebugLoc DL = Instr->getDebugLoc(); - - for (unsigned i=0; iget(getScalarVersion(Instr)), dest[i]); - MachineOperand which=Instr->getOperand(3+i); - assert(which.isImm() && "Shuffle operand not a constant"); - - int src=which.getImm(); - int elem=src%numcopies; - - if (which.getImm() < numcopies) - copy.addReg(src1[elem]); - else - copy.addReg(src2[elem]); - copies.push_back(copy); - } -} - -///============================================================================= -///a <= extractv2 va, -///gets turned into a simple move to the scalar register a. The source depends -///on i1. -///============================================================================= -void VectorElementize::createVecExtract(MachineFunction& F, MachineInstr *Instr, - std::vector& copies) { - unsigned srcregnum = Instr->getOperand(1).getReg(); - - SmallVector src = getScalarRegisters(srcregnum); - - MachineOperand which = Instr->getOperand(2); - assert(which.isImm() && "Extract operand not a constant"); - - DebugLoc DL = Instr->getDebugLoc(); - copies.push_back(BuildMI(F, DL, InstrInfo->get(getScalarVersion(Instr)), - Instr->getOperand(0).getReg()) - .addReg(src[which.getImm()])); -} - -///============================================================================= -///va <= vecinsertv2 vb, c, -///This instruction copies all elements of vb to va, except the 'i1'th element. -///The scalar value c becomes the 'i1'th element of va. -///This gets translated to 2 (4 for vecinsertv4) moves. -///============================================================================= -void VectorElementize::createVecInsert(MachineFunction& F, MachineInstr *Instr, - std::vector& copies) { - unsigned numcopies=numCopiesNeeded(Instr); - - unsigned destregnum = Instr->getOperand(0).getReg(); - unsigned srcregnum = Instr->getOperand(1).getReg(); - - SmallVector dest = getScalarRegisters(destregnum); - SmallVector src = getScalarRegisters(srcregnum); - - MachineOperand which=Instr->getOperand(3); - assert(which.isImm() && "Insert operand not a constant"); - unsigned int elem=which.getImm(); - - DebugLoc DL = Instr->getDebugLoc(); - - for (unsigned i=0; iget(getScalarVersion(Instr)), dest[i]); - - if (i != elem) - copy.addReg(src[i]); - else - copy.addOperand(Instr->getOperand(2)); - - copies.push_back(copy); - } - -} - -///============================================================================= -///va <= buildv2 b1, b2 -///gets translated to -///a1 <= b1 -///a2 <= b2 -///============================================================================= -void VectorElementize::createVecBuild(MachineFunction& F, MachineInstr *Instr, - std::vector& copies) { - unsigned numcopies=numCopiesNeeded(Instr); - - unsigned destregnum = Instr->getOperand(0).getReg(); - - SmallVector dest = getScalarRegisters(destregnum); - - DebugLoc DL = Instr->getDebugLoc(); - - for (unsigned i=0; iget(getScalarVersion(Instr)), - dest[i]) - .addOperand(Instr->getOperand(1+i))); -} - -///============================================================================= -///For a tex inst of the form -///va <= op [scalar operands] -///the following multi output instruction is created : -///[v1, v2] <= op' [scalar operands] -///============================================================================= -void VectorElementize::createVecDest(MachineFunction& F, MachineInstr *Instr, - std::vector& copies) { - copies.push_back(F.CloneMachineInstr(Instr)); - - MachineInstrBuilder copy(F, copies[0]); - copy->setDesc(InstrInfo->get(getScalarVersion(copy))); - - // Remove the dest, that should be a vector operand. - MachineOperand dest = copy->getOperand(0); - unsigned regnum = dest.getReg(); - - SmallVector scalarRegs = getScalarRegisters(regnum); - copy->RemoveOperand(0); - - std::vector otherOperands; - for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i) - otherOperands.push_back(copy->getOperand(i)); - - for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i) - copy->RemoveOperand(0); - - for (unsigned i=0, e=scalarRegs.size(); i!=e; ++i) - copy.addReg(scalarRegs[i], RegState::Define); - - for (unsigned i=0, e=otherOperands.size(); i!=e; ++i) - copy.addOperand(otherOperands[i]); -} - -///============================================================================= -///Look at the vector instruction type and dispatch to the createVec<...> -///function that creates the scalar copies. -///============================================================================= -void VectorElementize::createCopies(MachineFunction& F, MachineInstr *Instr, - std::vector& copies) { - if (ISVECLOAD(Instr)) { - createLoadCopy(F, Instr, copies); - return; - } - if (ISVECSTORE(Instr)) { - createStoreCopy(F, Instr, copies); - return; - } - if (ISVECSHUFFLE(Instr)) { - createVecShuffle(F, Instr, copies); - return; - } - if (ISVECEXTRACT(Instr)) { - createVecExtract(F, Instr, copies); - return; - } - if (ISVECINSERT(Instr)) { - createVecInsert(F, Instr, copies); - return; - } - if (ISVECDEST(Instr)) { - createVecDest(F, Instr, copies); - return; - } - if (ISVECBUILD(Instr)) { - createVecBuild(F, Instr, copies); - return; - } - - unsigned numcopies=numCopiesNeeded(Instr); - - for (unsigned i=0; i allOperands; - std::vector isDef; - - for (unsigned j=0, e=copy->getNumOperands(); j!=e; ++j) { - MachineOperand oper = copy->getOperand(j); - allOperands.push_back(oper); - if (oper.isReg()) - isDef.push_back(oper.isDef()); - else - isDef.push_back(false); - } - - for (unsigned j=0, e=copy->getNumOperands(); j!=e; ++j) - copy->RemoveOperand(0); - - copy->setDesc(InstrInfo->get(getScalarVersion(Instr))); - - for (unsigned j=0, e=allOperands.size(); j!=e; ++j) { - MachineOperand oper=allOperands[j]; - if (oper.isReg()) { - unsigned regnum = oper.getReg(); - if (isVectorRegister(regnum)) { - - SmallVector scalarRegs = getScalarRegisters(regnum); - copy.addReg(scalarRegs[i], getDefRegState(isDef[j])); - } - else - copy.addOperand(oper); - } - else - copy.addOperand(oper); - } - } -} - -///============================================================================= -///Scan through all basic blocks, looking for vector instructions. -///For each vector instruction I, insert the scalar copies before I, and -///add I into toRemove vector. Finally remove all instructions in toRemove. -///============================================================================= -void VectorElementize::elementize(MachineFunction &F) { - for (MachineFunction::reverse_iterator BI=F.rbegin(), BE=F.rend(); - BI!=BE; ++BI) { - MachineBasicBlock *BB = &*BI; - - std::vector copies; - std::vector toRemove; - - for (MachineBasicBlock::iterator II=BB->begin(), IE=BB->end(); - II!=IE; ++II) { - MachineInstr *Instr = &*II; - - if (!isVectorInstr(Instr)) - continue; - - copies.clear(); - createCopies(F, Instr, copies); - for (unsigned i=0, e=copies.size(); i!=e; ++i) - BB->insert(II, copies[i]); - - assert((copies.size() > 0) && "Problem in createCopies"); - toRemove.push_back(Instr); - } - for (unsigned i=0, e=toRemove.size(); i!=e; ++i) - F.DeleteMachineInstr(toRemove[i]->getParent()->remove(toRemove[i])); - } -} - -///============================================================================= -///a <= b -///... -///... -///x <= op(a, ...) -///gets converted to -/// -///x <= op(b, ...) -///The original move is still present. This works on SSA form machine code. -///Note that a <= b should be a simple vreg-to-vreg move instruction. -///TBD : I didn't find a function that can do replaceOperand, so I remove -///all operands and add all of them again, replacing the one while adding. -///============================================================================= -unsigned VectorElementize::copyProp(MachineFunction &F) { - unsigned numReplacements = 0; - - for (MachineFunction::reverse_iterator BI=F.rbegin(), BE=F.rend(); BI!=BE; - ++BI) { - MachineBasicBlock *BB = &*BI; - - for (MachineBasicBlock::iterator II=BB->begin(), IE=BB->end(); II!=IE; - ++II) { - MachineInstr *Instr = &*II; - - // Don't do copy propagation on PHI as it will cause unnecessary - // live range overlap. - if ((Instr->getOpcode() == TargetOpcode::PHI) || - (Instr->getOpcode() == TargetOpcode::DBG_VALUE)) - continue; - - bool needsReplacement = false; - - for (unsigned i=0, e=Instr->getNumOperands(); i!=e; ++i) { - MachineOperand oper = Instr->getOperand(i); - if (!oper.isReg()) continue; - if (oper.isDef()) continue; - if (!RegInfo->isVirtualRegister(oper.getReg())) continue; - - MachineInstr *defInstr = MRI->getVRegDef(oper.getReg()); - - if (!defInstr) continue; - - if (!isSimpleMove(defInstr)) continue; - - MachineOperand defSrc = defInstr->getOperand(1); - if (!defSrc.isReg()) continue; - if (!RegInfo->isVirtualRegister(defSrc.getReg())) continue; - - needsReplacement = true; - - } - if (!needsReplacement) continue; - - numReplacements++; - - std::vector operands; - - for (unsigned i=0, e=Instr->getNumOperands(); i!=e; ++i) { - MachineOperand oper = Instr->getOperand(i); - bool flag = false; - do { - if (!(oper.isReg())) - break; - if (oper.isDef()) - break; - if (!(RegInfo->isVirtualRegister(oper.getReg()))) - break; - MachineInstr *defInstr = MRI->getVRegDef(oper.getReg()); - if (!(isSimpleMove(defInstr))) - break; - MachineOperand defSrc = defInstr->getOperand(1); - if (!(defSrc.isReg())) - break; - if (!(RegInfo->isVirtualRegister(defSrc.getReg()))) - break; - operands.push_back(defSrc); - flag = true; - } while (0); - if (flag == false) - operands.push_back(oper); - } - - for (unsigned i=0, e=Instr->getNumOperands(); i!=e; ++i) - Instr->RemoveOperand(0); - for (unsigned i=0, e=operands.size(); i!=e; ++i) - Instr->addOperand(F, operands[i]); - - } - } - return numReplacements; -} - -///============================================================================= -///Look for simple vreg-to-vreg instructions whose use_empty() is true, add -///them to deadMoves vector. Then remove all instructions in deadMoves. -///============================================================================= -unsigned VectorElementize::removeDeadMoves(MachineFunction &F) { - std::vector deadMoves; - for (MachineFunction::reverse_iterator BI=F.rbegin(), BE=F.rend(); BI!=BE; - ++BI) { - MachineBasicBlock *BB = &*BI; - - for (MachineBasicBlock::iterator II=BB->begin(), IE=BB->end(); II!=IE; - ++II) { - MachineInstr *Instr = &*II; - - if (!isSimpleMove(Instr)) continue; - - MachineOperand dest = Instr->getOperand(0); - assert(dest.isReg() && "dest of move not a register"); - assert(RegInfo->isVirtualRegister(dest.getReg()) && - "dest of move not a virtual register"); - - if (MRI->use_empty(dest.getReg())) { - deadMoves.push_back(Instr); - } - } - } - - for (unsigned i=0, e=deadMoves.size(); i!=e; ++i) - F.DeleteMachineInstr(deadMoves[i]->getParent()->remove(deadMoves[i])); - - return deadMoves.size(); -} - -///============================================================================= -///Main function for this pass. -///============================================================================= -bool VectorElementize::runOnMachineFunction(MachineFunction &F) { - MRI = &F.getRegInfo(); - - RegInfo = TM.getRegisterInfo(); - InstrInfo = TM.getInstrInfo(); - - VectorToScalarMap.clear(); - - elementize(F); - - if (RemoveRedundantMoves) - while (1) { - if (copyProp(F) == 0) break; - removeDeadMoves(F); - } - - return true; -} - -FunctionPass *llvm::createVectorElementizePass(NVPTXTargetMachine &tm) { - return new VectorElementize(tm); -} - -unsigned VectorElementize::getScalarVersion(unsigned opcode) { - if (opcode == NVPTX::PHI) - return opcode; - if (opcode == NVPTX::IMPLICIT_DEF) - return opcode; - switch(opcode) { - default: llvm_unreachable("Scalar version not set, fix NVPTXVector.td"); - case TargetOpcode::COPY: return TargetOpcode::COPY; - case NVPTX::AddCCCV2I32: return NVPTX::ADDCCCi32rr; - case NVPTX::AddCCCV4I32: return NVPTX::ADDCCCi32rr; - case NVPTX::AddCCV2I32: return NVPTX::ADDCCi32rr; - case NVPTX::AddCCV4I32: return NVPTX::ADDCCi32rr; - case NVPTX::Build_Vector2_f32: return NVPTX::FMOV32rr; - case NVPTX::Build_Vector2_f64: return NVPTX::FMOV64rr; - case NVPTX::Build_Vector2_i16: return NVPTX::IMOV16rr; - case NVPTX::Build_Vector2_i32: return NVPTX::IMOV32rr; - case NVPTX::Build_Vector2_i64: return NVPTX::IMOV64rr; - case NVPTX::Build_Vector2_i8: return NVPTX::IMOV8rr; - case NVPTX::Build_Vector4_f32: return NVPTX::FMOV32rr; - case NVPTX::Build_Vector4_i16: return NVPTX::IMOV16rr; - case NVPTX::Build_Vector4_i32: return NVPTX::IMOV32rr; - case NVPTX::Build_Vector4_i8: return NVPTX::IMOV8rr; - case NVPTX::CVTv2i16tov2i32: return NVPTX::Zint_extendext16to32; - case NVPTX::CVTv2i64tov2i32: return NVPTX::TRUNC_64to32; - case NVPTX::CVTv2i8tov2i32: return NVPTX::Zint_extendext8to32; - case NVPTX::CVTv4i16tov4i32: return NVPTX::Zint_extendext16to32; - case NVPTX::CVTv4i8tov4i32: return NVPTX::Zint_extendext8to32; - case NVPTX::F32MAD_ftzV2: return NVPTX::FMAD32_ftzrrr; - case NVPTX::F32MADV2: return NVPTX::FMAD32rrr; - case NVPTX::F32MAD_ftzV4: return NVPTX::FMAD32_ftzrrr; - case NVPTX::F32MADV4: return NVPTX::FMAD32rrr; - case NVPTX::F32FMA_ftzV2: return NVPTX::FMA32_ftzrrr; - case NVPTX::F32FMAV2: return NVPTX::FMA32rrr; - case NVPTX::F32FMA_ftzV4: return NVPTX::FMA32_ftzrrr; - case NVPTX::F32FMAV4: return NVPTX::FMA32rrr; - case NVPTX::F64FMAV2: return NVPTX::FMA64rrr; - case NVPTX::FVecEQV2F32: return NVPTX::FSetEQf32rr_toi32; - case NVPTX::FVecEQV2F64: return NVPTX::FSetEQf64rr_toi64; - case NVPTX::FVecEQV4F32: return NVPTX::FSetEQf32rr_toi32; - case NVPTX::FVecGEV2F32: return NVPTX::FSetGEf32rr_toi32; - case NVPTX::FVecGEV2F64: return NVPTX::FSetGEf64rr_toi64; - case NVPTX::FVecGEV4F32: return NVPTX::FSetGEf32rr_toi32; - case NVPTX::FVecGTV2F32: return NVPTX::FSetGTf32rr_toi32; - case NVPTX::FVecGTV2F64: return NVPTX::FSetGTf64rr_toi64; - case NVPTX::FVecGTV4F32: return NVPTX::FSetGTf32rr_toi32; - case NVPTX::FVecLEV2F32: return NVPTX::FSetLEf32rr_toi32; - case NVPTX::FVecLEV2F64: return NVPTX::FSetLEf64rr_toi64; - case NVPTX::FVecLEV4F32: return NVPTX::FSetLEf32rr_toi32; - case NVPTX::FVecLTV2F32: return NVPTX::FSetLTf32rr_toi32; - case NVPTX::FVecLTV2F64: return NVPTX::FSetLTf64rr_toi64; - case NVPTX::FVecLTV4F32: return NVPTX::FSetLTf32rr_toi32; - case NVPTX::FVecNANV2F32: return NVPTX::FSetNANf32rr_toi32; - case NVPTX::FVecNANV2F64: return NVPTX::FSetNANf64rr_toi64; - case NVPTX::FVecNANV4F32: return NVPTX::FSetNANf32rr_toi32; - case NVPTX::FVecNEV2F32: return NVPTX::FSetNEf32rr_toi32; - case NVPTX::FVecNEV2F64: return NVPTX::FSetNEf64rr_toi64; - case NVPTX::FVecNEV4F32: return NVPTX::FSetNEf32rr_toi32; - case NVPTX::FVecNUMV2F32: return NVPTX::FSetNUMf32rr_toi32; - case NVPTX::FVecNUMV2F64: return NVPTX::FSetNUMf64rr_toi64; - case NVPTX::FVecNUMV4F32: return NVPTX::FSetNUMf32rr_toi32; - case NVPTX::FVecUEQV2F32: return NVPTX::FSetUEQf32rr_toi32; - case NVPTX::FVecUEQV2F64: return NVPTX::FSetUEQf64rr_toi64; - case NVPTX::FVecUEQV4F32: return NVPTX::FSetUEQf32rr_toi32; - case NVPTX::FVecUGEV2F32: return NVPTX::FSetUGEf32rr_toi32; - case NVPTX::FVecUGEV2F64: return NVPTX::FSetUGEf64rr_toi64; - case NVPTX::FVecUGEV4F32: return NVPTX::FSetUGEf32rr_toi32; - case NVPTX::FVecUGTV2F32: return NVPTX::FSetUGTf32rr_toi32; - case NVPTX::FVecUGTV2F64: return NVPTX::FSetUGTf64rr_toi64; - case NVPTX::FVecUGTV4F32: return NVPTX::FSetUGTf32rr_toi32; - case NVPTX::FVecULEV2F32: return NVPTX::FSetULEf32rr_toi32; - case NVPTX::FVecULEV2F64: return NVPTX::FSetULEf64rr_toi64; - case NVPTX::FVecULEV4F32: return NVPTX::FSetULEf32rr_toi32; - case NVPTX::FVecULTV2F32: return NVPTX::FSetULTf32rr_toi32; - case NVPTX::FVecULTV2F64: return NVPTX::FSetULTf64rr_toi64; - case NVPTX::FVecULTV4F32: return NVPTX::FSetULTf32rr_toi32; - case NVPTX::FVecUNEV2F32: return NVPTX::FSetUNEf32rr_toi32; - case NVPTX::FVecUNEV2F64: return NVPTX::FSetUNEf64rr_toi64; - case NVPTX::FVecUNEV4F32: return NVPTX::FSetUNEf32rr_toi32; - case NVPTX::I16MADV2: return NVPTX::MAD16rrr; - case NVPTX::I16MADV4: return NVPTX::MAD16rrr; - case NVPTX::I32MADV2: return NVPTX::MAD32rrr; - case NVPTX::I32MADV4: return NVPTX::MAD32rrr; - case NVPTX::I64MADV2: return NVPTX::MAD64rrr; - case NVPTX::I8MADV2: return NVPTX::MAD8rrr; - case NVPTX::I8MADV4: return NVPTX::MAD8rrr; - case NVPTX::ShiftLV2I16: return NVPTX::SHLi16rr; - case NVPTX::ShiftLV2I32: return NVPTX::SHLi32rr; - case NVPTX::ShiftLV2I64: return NVPTX::SHLi64rr; - case NVPTX::ShiftLV2I8: return NVPTX::SHLi8rr; - case NVPTX::ShiftLV4I16: return NVPTX::SHLi16rr; - case NVPTX::ShiftLV4I32: return NVPTX::SHLi32rr; - case NVPTX::ShiftLV4I8: return NVPTX::SHLi8rr; - case NVPTX::ShiftRAV2I16: return NVPTX::SRAi16rr; - case NVPTX::ShiftRAV2I32: return NVPTX::SRAi32rr; - case NVPTX::ShiftRAV2I64: return NVPTX::SRAi64rr; - case NVPTX::ShiftRAV2I8: return NVPTX::SRAi8rr; - case NVPTX::ShiftRAV4I16: return NVPTX::SRAi16rr; - case NVPTX::ShiftRAV4I32: return NVPTX::SRAi32rr; - case NVPTX::ShiftRAV4I8: return NVPTX::SRAi8rr; - case NVPTX::ShiftRLV2I16: return NVPTX::SRLi16rr; - case NVPTX::ShiftRLV2I32: return NVPTX::SRLi32rr; - case NVPTX::ShiftRLV2I64: return NVPTX::SRLi64rr; - case NVPTX::ShiftRLV2I8: return NVPTX::SRLi8rr; - case NVPTX::ShiftRLV4I16: return NVPTX::SRLi16rr; - case NVPTX::ShiftRLV4I32: return NVPTX::SRLi32rr; - case NVPTX::ShiftRLV4I8: return NVPTX::SRLi8rr; - case NVPTX::SubCCCV2I32: return NVPTX::SUBCCCi32rr; - case NVPTX::SubCCCV4I32: return NVPTX::SUBCCCi32rr; - case NVPTX::SubCCV2I32: return NVPTX::SUBCCi32rr; - case NVPTX::SubCCV4I32: return NVPTX::SUBCCi32rr; - case NVPTX::V2F32Div_prec_ftz: return NVPTX::FDIV32rr_prec_ftz; - case NVPTX::V2F32Div_prec: return NVPTX::FDIV32rr_prec; - case NVPTX::V2F32Div_ftz: return NVPTX::FDIV32rr_ftz; - case NVPTX::V2F32Div: return NVPTX::FDIV32rr; - case NVPTX::V2F32_Select: return NVPTX::SELECTf32rr; - case NVPTX::V2F64Div: return NVPTX::FDIV64rr; - case NVPTX::V2F64_Select: return NVPTX::SELECTf64rr; - case NVPTX::V2I16_Select: return NVPTX::SELECTi16rr; - case NVPTX::V2I32_Select: return NVPTX::SELECTi32rr; - case NVPTX::V2I64_Select: return NVPTX::SELECTi64rr; - case NVPTX::V2I8_Select: return NVPTX::SELECTi8rr; - case NVPTX::V2f32Extract: return NVPTX::FMOV32rr; - case NVPTX::V2f32Insert: return NVPTX::FMOV32rr; - case NVPTX::V2f32Mov: return NVPTX::FMOV32rr; - case NVPTX::V2f64Extract: return NVPTX::FMOV64rr; - case NVPTX::V2f64Insert: return NVPTX::FMOV64rr; - case NVPTX::V2f64Mov: return NVPTX::FMOV64rr; - case NVPTX::V2i16Extract: return NVPTX::IMOV16rr; - case NVPTX::V2i16Insert: return NVPTX::IMOV16rr; - case NVPTX::V2i16Mov: return NVPTX::IMOV16rr; - case NVPTX::V2i32Extract: return NVPTX::IMOV32rr; - case NVPTX::V2i32Insert: return NVPTX::IMOV32rr; - case NVPTX::V2i32Mov: return NVPTX::IMOV32rr; - case NVPTX::V2i64Extract: return NVPTX::IMOV64rr; - case NVPTX::V2i64Insert: return NVPTX::IMOV64rr; - case NVPTX::V2i64Mov: return NVPTX::IMOV64rr; - case NVPTX::V2i8Extract: return NVPTX::IMOV8rr; - case NVPTX::V2i8Insert: return NVPTX::IMOV8rr; - case NVPTX::V2i8Mov: return NVPTX::IMOV8rr; - case NVPTX::V4F32Div_prec_ftz: return NVPTX::FDIV32rr_prec_ftz; - case NVPTX::V4F32Div_prec: return NVPTX::FDIV32rr_prec; - case NVPTX::V4F32Div_ftz: return NVPTX::FDIV32rr_ftz; - case NVPTX::V4F32Div: return NVPTX::FDIV32rr; - case NVPTX::V4F32_Select: return NVPTX::SELECTf32rr; - case NVPTX::V4I16_Select: return NVPTX::SELECTi16rr; - case NVPTX::V4I32_Select: return NVPTX::SELECTi32rr; - case NVPTX::V4I8_Select: return NVPTX::SELECTi8rr; - case NVPTX::V4f32Extract: return NVPTX::FMOV32rr; - case NVPTX::V4f32Insert: return NVPTX::FMOV32rr; - case NVPTX::V4f32Mov: return NVPTX::FMOV32rr; - case NVPTX::V4i16Extract: return NVPTX::IMOV16rr; - case NVPTX::V4i16Insert: return NVPTX::IMOV16rr; - case NVPTX::V4i16Mov: return NVPTX::IMOV16rr; - case NVPTX::V4i32Extract: return NVPTX::IMOV32rr; - case NVPTX::V4i32Insert: return NVPTX::IMOV32rr; - case NVPTX::V4i32Mov: return NVPTX::IMOV32rr; - case NVPTX::V4i8Extract: return NVPTX::IMOV8rr; - case NVPTX::V4i8Insert: return NVPTX::IMOV8rr; - case NVPTX::V4i8Mov: return NVPTX::IMOV8rr; - case NVPTX::VAddV2I16: return NVPTX::ADDi16rr; - case NVPTX::VAddV2I32: return NVPTX::ADDi32rr; - case NVPTX::VAddV2I64: return NVPTX::ADDi64rr; - case NVPTX::VAddV2I8: return NVPTX::ADDi8rr; - case NVPTX::VAddV4I16: return NVPTX::ADDi16rr; - case NVPTX::VAddV4I32: return NVPTX::ADDi32rr; - case NVPTX::VAddV4I8: return NVPTX::ADDi8rr; - case NVPTX::VAddfV2F32: return NVPTX::FADDf32rr; - case NVPTX::VAddfV2F32_ftz: return NVPTX::FADDf32rr_ftz; - case NVPTX::VAddfV2F64: return NVPTX::FADDf64rr; - case NVPTX::VAddfV4F32: return NVPTX::FADDf32rr; - case NVPTX::VAddfV4F32_ftz: return NVPTX::FADDf32rr_ftz; - case NVPTX::VAndV2I16: return NVPTX::ANDb16rr; - case NVPTX::VAndV2I32: return NVPTX::ANDb32rr; - case NVPTX::VAndV2I64: return NVPTX::ANDb64rr; - case NVPTX::VAndV2I8: return NVPTX::ANDb8rr; - case NVPTX::VAndV4I16: return NVPTX::ANDb16rr; - case NVPTX::VAndV4I32: return NVPTX::ANDb32rr; - case NVPTX::VAndV4I8: return NVPTX::ANDb8rr; - case NVPTX::VMulfV2F32_ftz: return NVPTX::FMULf32rr_ftz; - case NVPTX::VMulfV2F32: return NVPTX::FMULf32rr; - case NVPTX::VMulfV2F64: return NVPTX::FMULf64rr; - case NVPTX::VMulfV4F32_ftz: return NVPTX::FMULf32rr_ftz; - case NVPTX::VMulfV4F32: return NVPTX::FMULf32rr; - case NVPTX::VMultHSV2I16: return NVPTX::MULTHSi16rr; - case NVPTX::VMultHSV2I32: return NVPTX::MULTHSi32rr; - case NVPTX::VMultHSV2I64: return NVPTX::MULTHSi64rr; - case NVPTX::VMultHSV2I8: return NVPTX::MULTHSi8rr; - case NVPTX::VMultHSV4I16: return NVPTX::MULTHSi16rr; - case NVPTX::VMultHSV4I32: return NVPTX::MULTHSi32rr; - case NVPTX::VMultHSV4I8: return NVPTX::MULTHSi8rr; - case NVPTX::VMultHUV2I16: return NVPTX::MULTHUi16rr; - case NVPTX::VMultHUV2I32: return NVPTX::MULTHUi32rr; - case NVPTX::VMultHUV2I64: return NVPTX::MULTHUi64rr; - case NVPTX::VMultHUV2I8: return NVPTX::MULTHUi8rr; - case NVPTX::VMultHUV4I16: return NVPTX::MULTHUi16rr; - case NVPTX::VMultHUV4I32: return NVPTX::MULTHUi32rr; - case NVPTX::VMultHUV4I8: return NVPTX::MULTHUi8rr; - case NVPTX::VMultV2I16: return NVPTX::MULTi16rr; - case NVPTX::VMultV2I32: return NVPTX::MULTi32rr; - case NVPTX::VMultV2I64: return NVPTX::MULTi64rr; - case NVPTX::VMultV2I8: return NVPTX::MULTi8rr; - case NVPTX::VMultV4I16: return NVPTX::MULTi16rr; - case NVPTX::VMultV4I32: return NVPTX::MULTi32rr; - case NVPTX::VMultV4I8: return NVPTX::MULTi8rr; - case NVPTX::VNegV2I16: return NVPTX::INEG16; - case NVPTX::VNegV2I32: return NVPTX::INEG32; - case NVPTX::VNegV2I64: return NVPTX::INEG64; - case NVPTX::VNegV2I8: return NVPTX::INEG8; - case NVPTX::VNegV4I16: return NVPTX::INEG16; - case NVPTX::VNegV4I32: return NVPTX::INEG32; - case NVPTX::VNegV4I8: return NVPTX::INEG8; - case NVPTX::VNegv2f32: return NVPTX::FNEGf32; - case NVPTX::VNegv2f32_ftz: return NVPTX::FNEGf32_ftz; - case NVPTX::VNegv2f64: return NVPTX::FNEGf64; - case NVPTX::VNegv4f32: return NVPTX::FNEGf32; - case NVPTX::VNegv4f32_ftz: return NVPTX::FNEGf32_ftz; - case NVPTX::VNotV2I16: return NVPTX::NOT16; - case NVPTX::VNotV2I32: return NVPTX::NOT32; - case NVPTX::VNotV2I64: return NVPTX::NOT64; - case NVPTX::VNotV2I8: return NVPTX::NOT8; - case NVPTX::VNotV4I16: return NVPTX::NOT16; - case NVPTX::VNotV4I32: return NVPTX::NOT32; - case NVPTX::VNotV4I8: return NVPTX::NOT8; - case NVPTX::VOrV2I16: return NVPTX::ORb16rr; - case NVPTX::VOrV2I32: return NVPTX::ORb32rr; - case NVPTX::VOrV2I64: return NVPTX::ORb64rr; - case NVPTX::VOrV2I8: return NVPTX::ORb8rr; - case NVPTX::VOrV4I16: return NVPTX::ORb16rr; - case NVPTX::VOrV4I32: return NVPTX::ORb32rr; - case NVPTX::VOrV4I8: return NVPTX::ORb8rr; - case NVPTX::VSDivV2I16: return NVPTX::SDIVi16rr; - case NVPTX::VSDivV2I32: return NVPTX::SDIVi32rr; - case NVPTX::VSDivV2I64: return NVPTX::SDIVi64rr; - case NVPTX::VSDivV2I8: return NVPTX::SDIVi8rr; - case NVPTX::VSDivV4I16: return NVPTX::SDIVi16rr; - case NVPTX::VSDivV4I32: return NVPTX::SDIVi32rr; - case NVPTX::VSDivV4I8: return NVPTX::SDIVi8rr; - case NVPTX::VSRemV2I16: return NVPTX::SREMi16rr; - case NVPTX::VSRemV2I32: return NVPTX::SREMi32rr; - case NVPTX::VSRemV2I64: return NVPTX::SREMi64rr; - case NVPTX::VSRemV2I8: return NVPTX::SREMi8rr; - case NVPTX::VSRemV4I16: return NVPTX::SREMi16rr; - case NVPTX::VSRemV4I32: return NVPTX::SREMi32rr; - case NVPTX::VSRemV4I8: return NVPTX::SREMi8rr; - case NVPTX::VSubV2I16: return NVPTX::SUBi16rr; - case NVPTX::VSubV2I32: return NVPTX::SUBi32rr; - case NVPTX::VSubV2I64: return NVPTX::SUBi64rr; - case NVPTX::VSubV2I8: return NVPTX::SUBi8rr; - case NVPTX::VSubV4I16: return NVPTX::SUBi16rr; - case NVPTX::VSubV4I32: return NVPTX::SUBi32rr; - case NVPTX::VSubV4I8: return NVPTX::SUBi8rr; - case NVPTX::VSubfV2F32_ftz: return NVPTX::FSUBf32rr_ftz; - case NVPTX::VSubfV2F32: return NVPTX::FSUBf32rr; - case NVPTX::VSubfV2F64: return NVPTX::FSUBf64rr; - case NVPTX::VSubfV4F32_ftz: return NVPTX::FSUBf32rr_ftz; - case NVPTX::VSubfV4F32: return NVPTX::FSUBf32rr; - case NVPTX::VUDivV2I16: return NVPTX::UDIVi16rr; - case NVPTX::VUDivV2I32: return NVPTX::UDIVi32rr; - case NVPTX::VUDivV2I64: return NVPTX::UDIVi64rr; - case NVPTX::VUDivV2I8: return NVPTX::UDIVi8rr; - case NVPTX::VUDivV4I16: return NVPTX::UDIVi16rr; - case NVPTX::VUDivV4I32: return NVPTX::UDIVi32rr; - case NVPTX::VUDivV4I8: return NVPTX::UDIVi8rr; - case NVPTX::VURemV2I16: return NVPTX::UREMi16rr; - case NVPTX::VURemV2I32: return NVPTX::UREMi32rr; - case NVPTX::VURemV2I64: return NVPTX::UREMi64rr; - case NVPTX::VURemV2I8: return NVPTX::UREMi8rr; - case NVPTX::VURemV4I16: return NVPTX::UREMi16rr; - case NVPTX::VURemV4I32: return NVPTX::UREMi32rr; - case NVPTX::VURemV4I8: return NVPTX::UREMi8rr; - case NVPTX::VXorV2I16: return NVPTX::XORb16rr; - case NVPTX::VXorV2I32: return NVPTX::XORb32rr; - case NVPTX::VXorV2I64: return NVPTX::XORb64rr; - case NVPTX::VXorV2I8: return NVPTX::XORb8rr; - case NVPTX::VXorV4I16: return NVPTX::XORb16rr; - case NVPTX::VXorV4I32: return NVPTX::XORb32rr; - case NVPTX::VXorV4I8: return NVPTX::XORb8rr; - case NVPTX::VecSEQV2I16: return NVPTX::ISetSEQi16rr_toi16; - case NVPTX::VecSEQV2I32: return NVPTX::ISetSEQi32rr_toi32; - case NVPTX::VecSEQV2I64: return NVPTX::ISetSEQi64rr_toi64; - case NVPTX::VecSEQV2I8: return NVPTX::ISetSEQi8rr_toi8; - case NVPTX::VecSEQV4I16: return NVPTX::ISetSEQi16rr_toi16; - case NVPTX::VecSEQV4I32: return NVPTX::ISetSEQi32rr_toi32; - case NVPTX::VecSEQV4I8: return NVPTX::ISetSEQi8rr_toi8; - case NVPTX::VecSGEV2I16: return NVPTX::ISetSGEi16rr_toi16; - case NVPTX::VecSGEV2I32: return NVPTX::ISetSGEi32rr_toi32; - case NVPTX::VecSGEV2I64: return NVPTX::ISetSGEi64rr_toi64; - case NVPTX::VecSGEV2I8: return NVPTX::ISetSGEi8rr_toi8; - case NVPTX::VecSGEV4I16: return NVPTX::ISetSGEi16rr_toi16; - case NVPTX::VecSGEV4I32: return NVPTX::ISetSGEi32rr_toi32; - case NVPTX::VecSGEV4I8: return NVPTX::ISetSGEi8rr_toi8; - case NVPTX::VecSGTV2I16: return NVPTX::ISetSGTi16rr_toi16; - case NVPTX::VecSGTV2I32: return NVPTX::ISetSGTi32rr_toi32; - case NVPTX::VecSGTV2I64: return NVPTX::ISetSGTi64rr_toi64; - case NVPTX::VecSGTV2I8: return NVPTX::ISetSGTi8rr_toi8; - case NVPTX::VecSGTV4I16: return NVPTX::ISetSGTi16rr_toi16; - case NVPTX::VecSGTV4I32: return NVPTX::ISetSGTi32rr_toi32; - case NVPTX::VecSGTV4I8: return NVPTX::ISetSGTi8rr_toi8; - case NVPTX::VecSLEV2I16: return NVPTX::ISetSLEi16rr_toi16; - case NVPTX::VecSLEV2I32: return NVPTX::ISetSLEi32rr_toi32; - case NVPTX::VecSLEV2I64: return NVPTX::ISetSLEi64rr_toi64; - case NVPTX::VecSLEV2I8: return NVPTX::ISetSLEi8rr_toi8; - case NVPTX::VecSLEV4I16: return NVPTX::ISetSLEi16rr_toi16; - case NVPTX::VecSLEV4I32: return NVPTX::ISetSLEi32rr_toi32; - case NVPTX::VecSLEV4I8: return NVPTX::ISetSLEi8rr_toi8; - case NVPTX::VecSLTV2I16: return NVPTX::ISetSLTi16rr_toi16; - case NVPTX::VecSLTV2I32: return NVPTX::ISetSLTi32rr_toi32; - case NVPTX::VecSLTV2I64: return NVPTX::ISetSLTi64rr_toi64; - case NVPTX::VecSLTV2I8: return NVPTX::ISetSLTi8rr_toi8; - case NVPTX::VecSLTV4I16: return NVPTX::ISetSLTi16rr_toi16; - case NVPTX::VecSLTV4I32: return NVPTX::ISetSLTi32rr_toi32; - case NVPTX::VecSLTV4I8: return NVPTX::ISetSLTi8rr_toi8; - case NVPTX::VecSNEV2I16: return NVPTX::ISetSNEi16rr_toi16; - case NVPTX::VecSNEV2I32: return NVPTX::ISetSNEi32rr_toi32; - case NVPTX::VecSNEV2I64: return NVPTX::ISetSNEi64rr_toi64; - case NVPTX::VecSNEV2I8: return NVPTX::ISetSNEi8rr_toi8; - case NVPTX::VecSNEV4I16: return NVPTX::ISetSNEi16rr_toi16; - case NVPTX::VecSNEV4I32: return NVPTX::ISetSNEi32rr_toi32; - case NVPTX::VecSNEV4I8: return NVPTX::ISetSNEi8rr_toi8; - case NVPTX::VecShuffle_v2f32: return NVPTX::FMOV32rr; - case NVPTX::VecShuffle_v2f64: return NVPTX::FMOV64rr; - case NVPTX::VecShuffle_v2i16: return NVPTX::IMOV16rr; - case NVPTX::VecShuffle_v2i32: return NVPTX::IMOV32rr; - case NVPTX::VecShuffle_v2i64: return NVPTX::IMOV64rr; - case NVPTX::VecShuffle_v2i8: return NVPTX::IMOV8rr; - case NVPTX::VecShuffle_v4f32: return NVPTX::FMOV32rr; - case NVPTX::VecShuffle_v4i16: return NVPTX::IMOV16rr; - case NVPTX::VecShuffle_v4i32: return NVPTX::IMOV32rr; - case NVPTX::VecShuffle_v4i8: return NVPTX::IMOV8rr; - case NVPTX::VecUEQV2I16: return NVPTX::ISetUEQi16rr_toi16; - case NVPTX::VecUEQV2I32: return NVPTX::ISetUEQi32rr_toi32; - case NVPTX::VecUEQV2I64: return NVPTX::ISetUEQi64rr_toi64; - case NVPTX::VecUEQV2I8: return NVPTX::ISetUEQi8rr_toi8; - case NVPTX::VecUEQV4I16: return NVPTX::ISetUEQi16rr_toi16; - case NVPTX::VecUEQV4I32: return NVPTX::ISetUEQi32rr_toi32; - case NVPTX::VecUEQV4I8: return NVPTX::ISetUEQi8rr_toi8; - case NVPTX::VecUGEV2I16: return NVPTX::ISetUGEi16rr_toi16; - case NVPTX::VecUGEV2I32: return NVPTX::ISetUGEi32rr_toi32; - case NVPTX::VecUGEV2I64: return NVPTX::ISetUGEi64rr_toi64; - case NVPTX::VecUGEV2I8: return NVPTX::ISetUGEi8rr_toi8; - case NVPTX::VecUGEV4I16: return NVPTX::ISetUGEi16rr_toi16; - case NVPTX::VecUGEV4I32: return NVPTX::ISetUGEi32rr_toi32; - case NVPTX::VecUGEV4I8: return NVPTX::ISetUGEi8rr_toi8; - case NVPTX::VecUGTV2I16: return NVPTX::ISetUGTi16rr_toi16; - case NVPTX::VecUGTV2I32: return NVPTX::ISetUGTi32rr_toi32; - case NVPTX::VecUGTV2I64: return NVPTX::ISetUGTi64rr_toi64; - case NVPTX::VecUGTV2I8: return NVPTX::ISetUGTi8rr_toi8; - case NVPTX::VecUGTV4I16: return NVPTX::ISetUGTi16rr_toi16; - case NVPTX::VecUGTV4I32: return NVPTX::ISetUGTi32rr_toi32; - case NVPTX::VecUGTV4I8: return NVPTX::ISetUGTi8rr_toi8; - case NVPTX::VecULEV2I16: return NVPTX::ISetULEi16rr_toi16; - case NVPTX::VecULEV2I32: return NVPTX::ISetULEi32rr_toi32; - case NVPTX::VecULEV2I64: return NVPTX::ISetULEi64rr_toi64; - case NVPTX::VecULEV2I8: return NVPTX::ISetULEi8rr_toi8; - case NVPTX::VecULEV4I16: return NVPTX::ISetULEi16rr_toi16; - case NVPTX::VecULEV4I32: return NVPTX::ISetULEi32rr_toi32; - case NVPTX::VecULEV4I8: return NVPTX::ISetULEi8rr_toi8; - case NVPTX::VecULTV2I16: return NVPTX::ISetULTi16rr_toi16; - case NVPTX::VecULTV2I32: return NVPTX::ISetULTi32rr_toi32; - case NVPTX::VecULTV2I64: return NVPTX::ISetULTi64rr_toi64; - case NVPTX::VecULTV2I8: return NVPTX::ISetULTi8rr_toi8; - case NVPTX::VecULTV4I16: return NVPTX::ISetULTi16rr_toi16; - case NVPTX::VecULTV4I32: return NVPTX::ISetULTi32rr_toi32; - case NVPTX::VecULTV4I8: return NVPTX::ISetULTi8rr_toi8; - case NVPTX::VecUNEV2I16: return NVPTX::ISetUNEi16rr_toi16; - case NVPTX::VecUNEV2I32: return NVPTX::ISetUNEi32rr_toi32; - case NVPTX::VecUNEV2I64: return NVPTX::ISetUNEi64rr_toi64; - case NVPTX::VecUNEV2I8: return NVPTX::ISetUNEi8rr_toi8; - case NVPTX::VecUNEV4I16: return NVPTX::ISetUNEi16rr_toi16; - case NVPTX::VecUNEV4I32: return NVPTX::ISetUNEi32rr_toi32; - case NVPTX::VecUNEV4I8: return NVPTX::ISetUNEi8rr_toi8; - case NVPTX::INT_PTX_LDU_G_v2i8_32: return NVPTX::INT_PTX_LDU_G_v2i8_ELE_32; - case NVPTX::INT_PTX_LDU_G_v4i8_32: return NVPTX::INT_PTX_LDU_G_v4i8_ELE_32; - case NVPTX::INT_PTX_LDU_G_v2i16_32: return NVPTX::INT_PTX_LDU_G_v2i16_ELE_32; - case NVPTX::INT_PTX_LDU_G_v4i16_32: return NVPTX::INT_PTX_LDU_G_v4i16_ELE_32; - case NVPTX::INT_PTX_LDU_G_v2i32_32: return NVPTX::INT_PTX_LDU_G_v2i32_ELE_32; - case NVPTX::INT_PTX_LDU_G_v4i32_32: return NVPTX::INT_PTX_LDU_G_v4i32_ELE_32; - case NVPTX::INT_PTX_LDU_G_v2f32_32: return NVPTX::INT_PTX_LDU_G_v2f32_ELE_32; - case NVPTX::INT_PTX_LDU_G_v4f32_32: return NVPTX::INT_PTX_LDU_G_v4f32_ELE_32; - case NVPTX::INT_PTX_LDU_G_v2i64_32: return NVPTX::INT_PTX_LDU_G_v2i64_ELE_32; - case NVPTX::INT_PTX_LDU_G_v2f64_32: return NVPTX::INT_PTX_LDU_G_v2f64_ELE_32; - case NVPTX::INT_PTX_LDU_G_v2i8_64: return NVPTX::INT_PTX_LDU_G_v2i8_ELE_64; - case NVPTX::INT_PTX_LDU_G_v4i8_64: return NVPTX::INT_PTX_LDU_G_v4i8_ELE_64; - case NVPTX::INT_PTX_LDU_G_v2i16_64: return NVPTX::INT_PTX_LDU_G_v2i16_ELE_64; - case NVPTX::INT_PTX_LDU_G_v4i16_64: return NVPTX::INT_PTX_LDU_G_v4i16_ELE_64; - case NVPTX::INT_PTX_LDU_G_v2i32_64: return NVPTX::INT_PTX_LDU_G_v2i32_ELE_64; - case NVPTX::INT_PTX_LDU_G_v4i32_64: return NVPTX::INT_PTX_LDU_G_v4i32_ELE_64; - case NVPTX::INT_PTX_LDU_G_v2f32_64: return NVPTX::INT_PTX_LDU_G_v2f32_ELE_64; - case NVPTX::INT_PTX_LDU_G_v4f32_64: return NVPTX::INT_PTX_LDU_G_v4f32_ELE_64; - case NVPTX::INT_PTX_LDU_G_v2i64_64: return NVPTX::INT_PTX_LDU_G_v2i64_ELE_64; - case NVPTX::INT_PTX_LDU_G_v2f64_64: return NVPTX::INT_PTX_LDU_G_v2f64_ELE_64; - - case NVPTX::LoadParamV4I32: return NVPTX::LoadParamScalar4I32; - case NVPTX::LoadParamV4I16: return NVPTX::LoadParamScalar4I16; - case NVPTX::LoadParamV4I8: return NVPTX::LoadParamScalar4I8; - case NVPTX::LoadParamV2I64: return NVPTX::LoadParamScalar2I64; - case NVPTX::LoadParamV2I32: return NVPTX::LoadParamScalar2I32; - case NVPTX::LoadParamV2I16: return NVPTX::LoadParamScalar2I16; - case NVPTX::LoadParamV2I8: return NVPTX::LoadParamScalar2I8; - case NVPTX::LoadParamV4F32: return NVPTX::LoadParamScalar4F32; - case NVPTX::LoadParamV2F32: return NVPTX::LoadParamScalar2F32; - case NVPTX::LoadParamV2F64: return NVPTX::LoadParamScalar2F64; - case NVPTX::StoreParamV4I32: return NVPTX::StoreParamScalar4I32; - case NVPTX::StoreParamV4I16: return NVPTX::StoreParamScalar4I16; - case NVPTX::StoreParamV4I8: return NVPTX::StoreParamScalar4I8; - case NVPTX::StoreParamV2I64: return NVPTX::StoreParamScalar2I64; - case NVPTX::StoreParamV2I32: return NVPTX::StoreParamScalar2I32; - case NVPTX::StoreParamV2I16: return NVPTX::StoreParamScalar2I16; - case NVPTX::StoreParamV2I8: return NVPTX::StoreParamScalar2I8; - case NVPTX::StoreParamV4F32: return NVPTX::StoreParamScalar4F32; - case NVPTX::StoreParamV2F32: return NVPTX::StoreParamScalar2F32; - case NVPTX::StoreParamV2F64: return NVPTX::StoreParamScalar2F64; - case NVPTX::StoreRetvalV4I32: return NVPTX::StoreRetvalScalar4I32; - case NVPTX::StoreRetvalV4I16: return NVPTX::StoreRetvalScalar4I16; - case NVPTX::StoreRetvalV4I8: return NVPTX::StoreRetvalScalar4I8; - case NVPTX::StoreRetvalV2I64: return NVPTX::StoreRetvalScalar2I64; - case NVPTX::StoreRetvalV2I32: return NVPTX::StoreRetvalScalar2I32; - case NVPTX::StoreRetvalV2I16: return NVPTX::StoreRetvalScalar2I16; - case NVPTX::StoreRetvalV2I8: return NVPTX::StoreRetvalScalar2I8; - case NVPTX::StoreRetvalV4F32: return NVPTX::StoreRetvalScalar4F32; - case NVPTX::StoreRetvalV2F32: return NVPTX::StoreRetvalScalar2F32; - case NVPTX::StoreRetvalV2F64: return NVPTX::StoreRetvalScalar2F64; - case NVPTX::VecI32toV4I8: return NVPTX::I32toV4I8; - case NVPTX::VecI64toV4I16: return NVPTX::I64toV4I16; - case NVPTX::VecI16toV2I8: return NVPTX::I16toV2I8; - case NVPTX::VecI32toV2I16: return NVPTX::I32toV2I16; - case NVPTX::VecI64toV2I32: return NVPTX::I64toV2I32; - case NVPTX::VecF64toV2F32: return NVPTX::F64toV2F32; - - case NVPTX::LD_v2i8_avar: return NVPTX::LDV_i8_v2_avar; - case NVPTX::LD_v2i8_areg: return NVPTX::LDV_i8_v2_areg; - case NVPTX::LD_v2i8_ari: return NVPTX::LDV_i8_v2_ari; - case NVPTX::LD_v2i8_asi: return NVPTX::LDV_i8_v2_asi; - case NVPTX::LD_v4i8_avar: return NVPTX::LDV_i8_v4_avar; - case NVPTX::LD_v4i8_areg: return NVPTX::LDV_i8_v4_areg; - case NVPTX::LD_v4i8_ari: return NVPTX::LDV_i8_v4_ari; - case NVPTX::LD_v4i8_asi: return NVPTX::LDV_i8_v4_asi; - - case NVPTX::LD_v2i16_avar: return NVPTX::LDV_i16_v2_avar; - case NVPTX::LD_v2i16_areg: return NVPTX::LDV_i16_v2_areg; - case NVPTX::LD_v2i16_ari: return NVPTX::LDV_i16_v2_ari; - case NVPTX::LD_v2i16_asi: return NVPTX::LDV_i16_v2_asi; - case NVPTX::LD_v4i16_avar: return NVPTX::LDV_i16_v4_avar; - case NVPTX::LD_v4i16_areg: return NVPTX::LDV_i16_v4_areg; - case NVPTX::LD_v4i16_ari: return NVPTX::LDV_i16_v4_ari; - case NVPTX::LD_v4i16_asi: return NVPTX::LDV_i16_v4_asi; - - case NVPTX::LD_v2i32_avar: return NVPTX::LDV_i32_v2_avar; - case NVPTX::LD_v2i32_areg: return NVPTX::LDV_i32_v2_areg; - case NVPTX::LD_v2i32_ari: return NVPTX::LDV_i32_v2_ari; - case NVPTX::LD_v2i32_asi: return NVPTX::LDV_i32_v2_asi; - case NVPTX::LD_v4i32_avar: return NVPTX::LDV_i32_v4_avar; - case NVPTX::LD_v4i32_areg: return NVPTX::LDV_i32_v4_areg; - case NVPTX::LD_v4i32_ari: return NVPTX::LDV_i32_v4_ari; - case NVPTX::LD_v4i32_asi: return NVPTX::LDV_i32_v4_asi; - - case NVPTX::LD_v2f32_avar: return NVPTX::LDV_f32_v2_avar; - case NVPTX::LD_v2f32_areg: return NVPTX::LDV_f32_v2_areg; - case NVPTX::LD_v2f32_ari: return NVPTX::LDV_f32_v2_ari; - case NVPTX::LD_v2f32_asi: return NVPTX::LDV_f32_v2_asi; - case NVPTX::LD_v4f32_avar: return NVPTX::LDV_f32_v4_avar; - case NVPTX::LD_v4f32_areg: return NVPTX::LDV_f32_v4_areg; - case NVPTX::LD_v4f32_ari: return NVPTX::LDV_f32_v4_ari; - case NVPTX::LD_v4f32_asi: return NVPTX::LDV_f32_v4_asi; - - case NVPTX::LD_v2i64_avar: return NVPTX::LDV_i64_v2_avar; - case NVPTX::LD_v2i64_areg: return NVPTX::LDV_i64_v2_areg; - case NVPTX::LD_v2i64_ari: return NVPTX::LDV_i64_v2_ari; - case NVPTX::LD_v2i64_asi: return NVPTX::LDV_i64_v2_asi; - case NVPTX::LD_v2f64_avar: return NVPTX::LDV_f64_v2_avar; - case NVPTX::LD_v2f64_areg: return NVPTX::LDV_f64_v2_areg; - case NVPTX::LD_v2f64_ari: return NVPTX::LDV_f64_v2_ari; - case NVPTX::LD_v2f64_asi: return NVPTX::LDV_f64_v2_asi; - - case NVPTX::ST_v2i8_avar: return NVPTX::STV_i8_v2_avar; - case NVPTX::ST_v2i8_areg: return NVPTX::STV_i8_v2_areg; - case NVPTX::ST_v2i8_ari: return NVPTX::STV_i8_v2_ari; - case NVPTX::ST_v2i8_asi: return NVPTX::STV_i8_v2_asi; - case NVPTX::ST_v4i8_avar: return NVPTX::STV_i8_v4_avar; - case NVPTX::ST_v4i8_areg: return NVPTX::STV_i8_v4_areg; - case NVPTX::ST_v4i8_ari: return NVPTX::STV_i8_v4_ari; - case NVPTX::ST_v4i8_asi: return NVPTX::STV_i8_v4_asi; - - case NVPTX::ST_v2i16_avar: return NVPTX::STV_i16_v2_avar; - case NVPTX::ST_v2i16_areg: return NVPTX::STV_i16_v2_areg; - case NVPTX::ST_v2i16_ari: return NVPTX::STV_i16_v2_ari; - case NVPTX::ST_v2i16_asi: return NVPTX::STV_i16_v2_asi; - case NVPTX::ST_v4i16_avar: return NVPTX::STV_i16_v4_avar; - case NVPTX::ST_v4i16_areg: return NVPTX::STV_i16_v4_areg; - case NVPTX::ST_v4i16_ari: return NVPTX::STV_i16_v4_ari; - case NVPTX::ST_v4i16_asi: return NVPTX::STV_i16_v4_asi; - - case NVPTX::ST_v2i32_avar: return NVPTX::STV_i32_v2_avar; - case NVPTX::ST_v2i32_areg: return NVPTX::STV_i32_v2_areg; - case NVPTX::ST_v2i32_ari: return NVPTX::STV_i32_v2_ari; - case NVPTX::ST_v2i32_asi: return NVPTX::STV_i32_v2_asi; - case NVPTX::ST_v4i32_avar: return NVPTX::STV_i32_v4_avar; - case NVPTX::ST_v4i32_areg: return NVPTX::STV_i32_v4_areg; - case NVPTX::ST_v4i32_ari: return NVPTX::STV_i32_v4_ari; - case NVPTX::ST_v4i32_asi: return NVPTX::STV_i32_v4_asi; - - case NVPTX::ST_v2f32_avar: return NVPTX::STV_f32_v2_avar; - case NVPTX::ST_v2f32_areg: return NVPTX::STV_f32_v2_areg; - case NVPTX::ST_v2f32_ari: return NVPTX::STV_f32_v2_ari; - case NVPTX::ST_v2f32_asi: return NVPTX::STV_f32_v2_asi; - case NVPTX::ST_v4f32_avar: return NVPTX::STV_f32_v4_avar; - case NVPTX::ST_v4f32_areg: return NVPTX::STV_f32_v4_areg; - case NVPTX::ST_v4f32_ari: return NVPTX::STV_f32_v4_ari; - case NVPTX::ST_v4f32_asi: return NVPTX::STV_f32_v4_asi; - - case NVPTX::ST_v2i64_avar: return NVPTX::STV_i64_v2_avar; - case NVPTX::ST_v2i64_areg: return NVPTX::STV_i64_v2_areg; - case NVPTX::ST_v2i64_ari: return NVPTX::STV_i64_v2_ari; - case NVPTX::ST_v2i64_asi: return NVPTX::STV_i64_v2_asi; - case NVPTX::ST_v2f64_avar: return NVPTX::STV_f64_v2_avar; - case NVPTX::ST_v2f64_areg: return NVPTX::STV_f64_v2_areg; - case NVPTX::ST_v2f64_ari: return NVPTX::STV_f64_v2_ari; - case NVPTX::ST_v2f64_asi: return NVPTX::STV_f64_v2_asi; - } - return 0; -} diff --git a/lib/Target/NVPTX/gen-register-defs.py b/lib/Target/NVPTX/gen-register-defs.py deleted file mode 100644 index ed06668..0000000 --- a/lib/Target/NVPTX/gen-register-defs.py +++ /dev/null @@ -1,202 +0,0 @@ -#!/usr/bin/env python - -num_regs = 396 - -outFile = open('NVPTXRegisterInfo.td', 'w') - -outFile.write(''' -//===-- NVPTXRegisterInfo.td - NVPTX Register defs ---------*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// Declarations that describe the PTX register file -//===----------------------------------------------------------------------===// - -class NVPTXReg : Register { - let Namespace = "NVPTX"; -} - -class NVPTXRegClass regTypes, int alignment, dag regList> - : RegisterClass <"NVPTX", regTypes, alignment, regList>; - -//===----------------------------------------------------------------------===// -// Registers -//===----------------------------------------------------------------------===// - -// Special Registers used as stack pointer -def VRFrame : NVPTXReg<"%SP">; -def VRFrameLocal : NVPTXReg<"%SPL">; - -// Special Registers used as the stack -def VRDepot : NVPTXReg<"%Depot">; -''') - -# Predicates -outFile.write(''' -//===--- Predicate --------------------------------------------------------===// -''') -for i in range(0, num_regs): - outFile.write('def P%d : NVPTXReg<"%%p%d">;\n' % (i, i)) - -# Int8 -outFile.write(''' -//===--- 8-bit ------------------------------------------------------------===// -''') -for i in range(0, num_regs): - outFile.write('def RC%d : NVPTXReg<"%%rc%d">;\n' % (i, i)) - -# Int16 -outFile.write(''' -//===--- 16-bit -----------------------------------------------------------===// -''') -for i in range(0, num_regs): - outFile.write('def RS%d : NVPTXReg<"%%rs%d">;\n' % (i, i)) - -# Int32 -outFile.write(''' -//===--- 32-bit -----------------------------------------------------------===// -''') -for i in range(0, num_regs): - outFile.write('def R%d : NVPTXReg<"%%r%d">;\n' % (i, i)) - -# Int64 -outFile.write(''' -//===--- 64-bit -----------------------------------------------------------===// -''') -for i in range(0, num_regs): - outFile.write('def RL%d : NVPTXReg<"%%rl%d">;\n' % (i, i)) - -# F32 -outFile.write(''' -//===--- 32-bit float -----------------------------------------------------===// -''') -for i in range(0, num_regs): - outFile.write('def F%d : NVPTXReg<"%%f%d">;\n' % (i, i)) - -# F64 -outFile.write(''' -//===--- 64-bit float -----------------------------------------------------===// -''') -for i in range(0, num_regs): - outFile.write('def FL%d : NVPTXReg<"%%fl%d">;\n' % (i, i)) - -# Vector registers -outFile.write(''' -//===--- Vector -----------------------------------------------------------===// -''') -for i in range(0, num_regs): - outFile.write('def v2b8_%d : NVPTXReg<"%%v2b8_%d">;\n' % (i, i)) -for i in range(0, num_regs): - outFile.write('def v2b16_%d : NVPTXReg<"%%v2b16_%d">;\n' % (i, i)) -for i in range(0, num_regs): - outFile.write('def v2b32_%d : NVPTXReg<"%%v2b32_%d">;\n' % (i, i)) -for i in range(0, num_regs): - outFile.write('def v2b64_%d : NVPTXReg<"%%v2b64_%d">;\n' % (i, i)) - -for i in range(0, num_regs): - outFile.write('def v4b8_%d : NVPTXReg<"%%v4b8_%d">;\n' % (i, i)) -for i in range(0, num_regs): - outFile.write('def v4b16_%d : NVPTXReg<"%%v4b16_%d">;\n' % (i, i)) -for i in range(0, num_regs): - outFile.write('def v4b32_%d : NVPTXReg<"%%v4b32_%d">;\n' % (i, i)) - -# Argument registers -outFile.write(''' -//===--- Arguments --------------------------------------------------------===// -''') -for i in range(0, num_regs): - outFile.write('def ia%d : NVPTXReg<"%%ia%d">;\n' % (i, i)) -for i in range(0, num_regs): - outFile.write('def la%d : NVPTXReg<"%%la%d">;\n' % (i, i)) -for i in range(0, num_regs): - outFile.write('def fa%d : NVPTXReg<"%%fa%d">;\n' % (i, i)) -for i in range(0, num_regs): - outFile.write('def da%d : NVPTXReg<"%%da%d">;\n' % (i, i)) - -outFile.write(''' -//===----------------------------------------------------------------------===// -// Register classes -//===----------------------------------------------------------------------===// -''') - -outFile.write('def Int1Regs : NVPTXRegClass<[i1], 8, (add (sequence "P%%u", 0, %d))>;\n' % (num_regs-1)) -outFile.write('def Int8Regs : NVPTXRegClass<[i8], 8, (add (sequence "RC%%u", 0, %d))>;\n' % (num_regs-1)) -outFile.write('def Int16Regs : NVPTXRegClass<[i16], 16, (add (sequence "RS%%u", 0, %d))>;\n' % (num_regs-1)) -outFile.write('def Int32Regs : NVPTXRegClass<[i32], 32, (add (sequence "R%%u", 0, %d))>;\n' % (num_regs-1)) -outFile.write('def Int64Regs : NVPTXRegClass<[i64], 64, (add (sequence "RL%%u", 0, %d))>;\n' % (num_regs-1)) - -outFile.write('def Float32Regs : NVPTXRegClass<[f32], 32, (add (sequence "F%%u", 0, %d))>;\n' % (num_regs-1)) -outFile.write('def Float64Regs : NVPTXRegClass<[f64], 64, (add (sequence "FL%%u", 0, %d))>;\n' % (num_regs-1)) - -outFile.write('def Int32ArgRegs : NVPTXRegClass<[i32], 32, (add (sequence "ia%%u", 0, %d))>;\n' % (num_regs-1)) -outFile.write('def Int64ArgRegs : NVPTXRegClass<[i64], 64, (add (sequence "la%%u", 0, %d))>;\n' % (num_regs-1)) -outFile.write('def Float32ArgRegs : NVPTXRegClass<[f32], 32, (add (sequence "fa%%u", 0, %d))>;\n' % (num_regs-1)) -outFile.write('def Float64ArgRegs : NVPTXRegClass<[f64], 64, (add (sequence "da%%u", 0, %d))>;\n' % (num_regs-1)) - -outFile.write(''' -// Read NVPTXRegisterInfo.cpp to see how VRFrame and VRDepot are used. -def SpecialRegs : NVPTXRegClass<[i32], 32, (add VRFrame, VRDepot)>; -''') - -outFile.write(''' -class NVPTXVecRegClass regTypes, int alignment, dag regList, - NVPTXRegClass sClass, - int e, - string n> - : NVPTXRegClass -{ - NVPTXRegClass scalarClass=sClass; - int elems=e; - string name=n; -} -''') - - -outFile.write('def V2F32Regs\n : NVPTXVecRegClass<[v2f32], 64, (add (sequence "v2b32_%%u", 0, %d)),\n Float32Regs, 2, ".v2.f32">;\n' % (num_regs-1)) -outFile.write('def V4F32Regs\n : NVPTXVecRegClass<[v4f32], 128, (add (sequence "v4b32_%%u", 0, %d)),\n Float32Regs, 4, ".v4.f32">;\n' % (num_regs-1)) - -outFile.write('def V2I32Regs\n : NVPTXVecRegClass<[v2i32], 64, (add (sequence "v2b32_%%u", 0, %d)),\n Int32Regs, 2, ".v2.u32">;\n' % (num_regs-1)) -outFile.write('def V4I32Regs\n : NVPTXVecRegClass<[v4i32], 128, (add (sequence "v4b32_%%u", 0, %d)),\n Int32Regs, 4, ".v4.u32">;\n' % (num_regs-1)) - -outFile.write('def V2F64Regs\n : NVPTXVecRegClass<[v2f64], 128, (add (sequence "v2b64_%%u", 0, %d)),\n Float64Regs, 2, ".v2.f64">;\n' % (num_regs-1)) -outFile.write('def V2I64Regs\n : NVPTXVecRegClass<[v2i64], 128, (add (sequence "v2b64_%%u", 0, %d)),\n Int64Regs, 2, ".v2.u64">;\n' % (num_regs-1)) - -outFile.write('def V2I16Regs\n : NVPTXVecRegClass<[v2i16], 32, (add (sequence "v2b16_%%u", 0, %d)),\n Int16Regs, 2, ".v2.u16">;\n' % (num_regs-1)) -outFile.write('def V4I16Regs\n : NVPTXVecRegClass<[v4i16], 64, (add (sequence "v4b16_%%u", 0, %d)),\n Int16Regs, 4, ".v4.u16">;\n' % (num_regs-1)) - -outFile.write('def V2I8Regs\n : NVPTXVecRegClass<[v2i8], 16, (add (sequence "v2b8_%%u", 0, %d)),\n Int8Regs, 2, ".v2.u8">;\n' % (num_regs-1)) -outFile.write('def V4I8Regs\n : NVPTXVecRegClass<[v4i8], 32, (add (sequence "v4b8_%%u", 0, %d)),\n Int8Regs, 4, ".v4.u8">;\n' % (num_regs-1)) - -outFile.close() - - -outFile = open('NVPTXNumRegisters.h', 'w') -outFile.write(''' -//===-- NVPTXNumRegisters.h - PTX Register Info ---------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#ifndef NVPTX_NUM_REGISTERS_H -#define NVPTX_NUM_REGISTERS_H - -namespace llvm { - -const unsigned NVPTXNumRegisters = %d; - -} - -#endif -''' % num_regs) - -outFile.close() -- cgit v1.1 From 5c97450df748819381daa4c4d400c39b0d7378ae Mon Sep 17 00:00:00 2001 From: Paul Redmond Date: Tue, 12 Feb 2013 15:21:21 +0000 Subject: PR14562 - Truncation of left shift became undef DAGCombiner::ReduceLoadWidth was converting (trunc i32 (shl i64 v, 32)) into (shl i32 v, 32) into undef. To prevent this, check the shift count against the final result size. Patch by: Kevin Schoedel Reviewed by: Nadav Rotem git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174972 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 5b83149..67fa39d 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -5163,8 +5163,15 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { EVT ShImmTy = getShiftAmountTy(Result.getValueType()); if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt)) ShImmTy = VT; - Result = DAG.getNode(ISD::SHL, N0.getDebugLoc(), VT, - Result, DAG.getConstant(ShLeftAmt, ShImmTy)); + // If the shift amount is as large as the result size (but, presumably, + // no larger than the source) then the useful bits of the result are + // zero; we can't simply return the shortened shift, because the result + // of that operation is undefined. + if (ShLeftAmt >= VT.getSizeInBits()) + Result = DAG.getConstant(0, VT); + else + Result = DAG.getNode(ISD::SHL, N0.getDebugLoc(), VT, + Result, DAG.getConstant(ShLeftAmt, ShImmTy)); } // Return the new loaded value. -- cgit v1.1 From 6b8d2026ba0b60a317fa239eacbcaeff5f2270f0 Mon Sep 17 00:00:00 2001 From: Jyotsna Verma Date: Tue, 12 Feb 2013 16:06:23 +0000 Subject: Hexagon: Add support to generate predicated absolute addressing mode instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174973 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Hexagon/HexagonInstrInfo.cpp | 143 +++++++++++++++++++++++++++----- 1 file changed, 123 insertions(+), 20 deletions(-) (limited to 'lib') diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp index ee37dea..1005553 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -2237,38 +2237,141 @@ PredicateInstruction(MachineInstr *MI, assert (isPredicable(MI) && "Expected predicable instruction"); bool invertJump = (!Cond.empty() && Cond[0].isImm() && (Cond[0].getImm() == 0)); + + // This will change MI's opcode to its predicate version. + // However, its operand list is still the old one, i.e. the + // non-predicate one. MI->setDesc(get(getMatchingCondBranchOpcode(Opc, invertJump))); - // - // This assumes that the predicate is always the first operand - // in the set of inputs. - // - MI->addOperand(MI->getOperand(MI->getNumOperands()-1)); - int oper; - for (oper = MI->getNumOperands() - 3; oper >= 0; --oper) { - MachineOperand MO = MI->getOperand(oper); - if ((MO.isReg() && !MO.isUse() && !MO.isImplicit())) { - break; - } - if (MO.isReg()) { - MI->getOperand(oper+1).ChangeToRegister(MO.getReg(), MO.isDef(), - MO.isImplicit(), MO.isKill(), - MO.isDead(), MO.isUndef(), - MO.isDebug()); - } else if (MO.isImm()) { - MI->getOperand(oper+1).ChangeToImmediate(MO.getImm()); - } else { - llvm_unreachable("Unexpected operand type"); + int oper = -1; + unsigned int GAIdx = 0; + + // Indicates whether the current MI has a GlobalAddress operand + bool hasGAOpnd = false; + std::vector tmpOpnds; + + // Indicates whether we need to shift operands to right. + bool needShift = true; + + // The predicate is ALWAYS the FIRST input operand !!! + if (MI->getNumOperands() == 0) { + // The non-predicate version of MI does not take any operands, + // i.e. no outs and no ins. In this condition, the predicate + // operand will be directly placed at Operands[0]. No operand + // shift is needed. + // Example: BARRIER + needShift = false; + oper = -1; + } + else if ( MI->getOperand(MI->getNumOperands()-1).isReg() + && MI->getOperand(MI->getNumOperands()-1).isDef() + && !MI->getOperand(MI->getNumOperands()-1).isImplicit()) { + // The non-predicate version of MI does not have any input operands. + // In this condition, we extend the length of Operands[] by one and + // copy the original last operand to the newly allocated slot. + // At this moment, it is just a place holder. Later, we will put + // predicate operand directly into it. No operand shift is needed. + // Example: r0=BARRIER (this is a faked insn used here for illustration) + MI->addOperand(MI->getOperand(MI->getNumOperands()-1)); + needShift = false; + oper = MI->getNumOperands() - 2; + } + else { + // We need to right shift all input operands by one. Duplicate the + // last operand into the newly allocated slot. + MI->addOperand(MI->getOperand(MI->getNumOperands()-1)); + } + + if (needShift) + { + // Operands[ MI->getNumOperands() - 2 ] has been copied into + // Operands[ MI->getNumOperands() - 1 ], so we start from + // Operands[ MI->getNumOperands() - 3 ]. + // oper is a signed int. + // It is ok if "MI->getNumOperands()-3" is -3, -2, or -1. + for (oper = MI->getNumOperands() - 3; oper >= 0; --oper) + { + MachineOperand &MO = MI->getOperand(oper); + + // Opnd[0] Opnd[1] Opnd[2] Opnd[3] Opnd[4] Opnd[5] Opnd[6] Opnd[7] + // + // /\~ + // /||\~ + // || + // Predicate Operand here + if (MO.isReg() && !MO.isUse() && !MO.isImplicit()) { + break; + } + if (MO.isReg()) { + MI->getOperand(oper+1).ChangeToRegister(MO.getReg(), MO.isDef(), + MO.isImplicit(), MO.isKill(), + MO.isDead(), MO.isUndef(), + MO.isDebug()); + } + else if (MO.isImm()) { + MI->getOperand(oper+1).ChangeToImmediate(MO.getImm()); + } + else if (MO.isGlobal()) { + // MI can not have more than one GlobalAddress operand. + assert(hasGAOpnd == false && "MI can only have one GlobalAddress opnd"); + + // There is no member function called "ChangeToGlobalAddress" in the + // MachineOperand class (not like "ChangeToRegister" and + // "ChangeToImmediate"). So we have to remove them from Operands[] list + // first, and then add them back after we have inserted the predicate + // operand. tmpOpnds[] is to remember these operands before we remove + // them. + tmpOpnds.push_back(MO); + + // Operands[oper] is a GlobalAddress operand; + // Operands[oper+1] has been copied into Operands[oper+2]; + hasGAOpnd = true; + GAIdx = oper; + continue; + } + else { + assert(false && "Unexpected operand type"); + } } } int regPos = invertJump ? 1 : 0; MachineOperand PredMO = Cond[regPos]; + + // [oper] now points to the last explicit Def. Predicate operand must be + // located at [oper+1]. See diagram above. + // This assumes that the predicate is always the first operand, + // i.e. Operands[0+numResults], in the set of inputs + // It is better to have an assert here to check this. But I don't know how + // to write this assert because findFirstPredOperandIdx() would return -1 + if (oper < -1) oper = -1; MI->getOperand(oper+1).ChangeToRegister(PredMO.getReg(), PredMO.isDef(), PredMO.isImplicit(), PredMO.isKill(), PredMO.isDead(), PredMO.isUndef(), PredMO.isDebug()); + if (hasGAOpnd) + { + unsigned int i; + + // Operands[GAIdx] is the original GlobalAddress operand, which is + // already copied into tmpOpnds[0]. + // Operands[GAIdx] now stores a copy of Operands[GAIdx-1] + // Operands[GAIdx+1] has already been copied into Operands[GAIdx+2], + // so we start from [GAIdx+2] + for (i = GAIdx + 2; i < MI->getNumOperands(); ++i) + tmpOpnds.push_back(MI->getOperand(i)); + + // Remove all operands in range [ (GAIdx+1) ... (MI->getNumOperands()-1) ] + // It is very important that we always remove from the end of Operands[] + // MI->getNumOperands() is at least 2 if program goes to here. + for (i = MI->getNumOperands() - 1; i > GAIdx; --i) + MI->RemoveOperand(i); + + for (i = 0; i < tmpOpnds.size(); ++i) + MI->addOperand(tmpOpnds[i]); + } + return true; } -- cgit v1.1 From e38825f490b898644089d5cd9cb90cec681bded8 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Tue, 12 Feb 2013 16:20:28 +0000 Subject: Add support for the pubnames section to llvm-dwarfdump. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174976 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/DebugInfo/DWARFContext.cpp | 20 ++++++++++++++++++++ lib/DebugInfo/DWARFContext.h | 3 +++ 2 files changed, 23 insertions(+) (limited to 'lib') diff --git a/lib/DebugInfo/DWARFContext.cpp b/lib/DebugInfo/DWARFContext.cpp index d061f4e..9e19310 100644 --- a/lib/DebugInfo/DWARFContext.cpp +++ b/lib/DebugInfo/DWARFContext.cpp @@ -88,6 +88,24 @@ void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType) { rangeList.dump(OS); } + if (DumpType == DIDT_All || DumpType == DIDT_Pubnames) { + OS << "\n.debug_pubnames contents:\n"; + DataExtractor pubNames(getPubNamesSection(), isLittleEndian(), 0); + offset = 0; + OS << "Length: " << pubNames.getU32(&offset) << "\n"; + OS << "Version: " << pubNames.getU16(&offset) << "\n"; + OS << "Offset in .debug_info: " << pubNames.getU32(&offset) << "\n"; + OS << "Size: " << pubNames.getU32(&offset) << "\n"; + OS << "\n Offset Name\n"; + while (offset < getPubNamesSection().size()) { + uint32_t n = pubNames.getU32(&offset); + if (n == 0) + break; + OS << format("%8x ", n); + OS << pubNames.getCStr(&offset) << "\n"; + } + } + if (DumpType == DIDT_All || DumpType == DIDT_AbbrevDwo) { OS << "\n.debug_abbrev.dwo contents:\n"; getDebugAbbrevDWO()->dump(OS); @@ -494,6 +512,8 @@ DWARFContextInMemory::DWARFContextInMemory(object::ObjectFile *Obj) : RangeDWOSection = data; RangeSection = data; } + else if (name == "debug_pubnames") + PubNamesSection = data; else if (name == "debug_info.dwo") InfoDWOSection = data; else if (name == "debug_abbrev.dwo") diff --git a/lib/DebugInfo/DWARFContext.h b/lib/DebugInfo/DWARFContext.h index f12a054..37b2729 100644 --- a/lib/DebugInfo/DWARFContext.h +++ b/lib/DebugInfo/DWARFContext.h @@ -111,6 +111,7 @@ public: virtual StringRef getLineSection() = 0; virtual StringRef getStringSection() = 0; virtual StringRef getRangeSection() = 0; + virtual StringRef getPubNamesSection() = 0; // Sections for DWARF5 split dwarf proposal. virtual StringRef getInfoDWOSection() = 0; @@ -149,6 +150,7 @@ class DWARFContextInMemory : public DWARFContext { StringRef LineSection; StringRef StringSection; StringRef RangeSection; + StringRef PubNamesSection; // Sections for DWARF5 split dwarf proposal. RelocAddrMap InfoDWORelocMap; @@ -172,6 +174,7 @@ public: virtual StringRef getLineSection() { return LineSection; } virtual StringRef getStringSection() { return StringSection; } virtual StringRef getRangeSection() { return RangeSection; } + virtual StringRef getPubNamesSection() { return PubNamesSection; } // Sections for DWARF5 split dwarf proposal. virtual StringRef getInfoDWOSection() { return InfoDWOSection; } -- cgit v1.1 From 91231a6dc7c10e05934f2cf141a2d66b604a528f Mon Sep 17 00:00:00 2001 From: Sergei Larin Date: Tue, 12 Feb 2013 16:36:03 +0000 Subject: Equal treatment of labels and other terminators in MI DAG construction. MI sched DAG construction allows targets to include terminators into scheduling DAG. Extend this functionality to labels as well. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174977 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/ScheduleDAGInstrs.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index 59e1ca1..71e7a21 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -746,7 +746,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, assert(RPTracker->getPos() == prior(MII) && "RPTracker can't find MI"); } - assert((!MI->isTerminator() || CanHandleTerminators) && !MI->isLabel() && + assert((CanHandleTerminators || (!MI->isTerminator() && !MI->isLabel())) && "Cannot schedule terminators or labels!"); SUnit *SU = MISUnitMap[MI]; -- cgit v1.1 From c5ef7eee3c412b0f334e395b0cf7c363200c2f79 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Tue, 12 Feb 2013 18:00:14 +0000 Subject: Allow optionally generating pubnames section in DWARF info. Introduce option "generate-dwarf-pubnames" to control it, set to "false" by default. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174981 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp | 4 +- lib/CodeGen/AsmPrinter/DwarfCompileUnit.h | 9 ++++ lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 69 ++++++++++++++++++++++++++++- lib/CodeGen/AsmPrinter/DwarfDebug.h | 3 ++ lib/MC/MCObjectFileInfo.cpp | 12 +++++ 5 files changed, 95 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index dad1054..93b00fb 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -1311,8 +1311,10 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) { addType(VariableDIE, GTy); // Add scoping info. - if (!GV.isLocalToUnit()) + if (!GV.isLocalToUnit()) { addFlag(VariableDIE, dwarf::DW_AT_external); + addGlobalName(GV.getName(), VariableDIE); + } // Add line number info. addSourceLine(VariableDIE, GV); diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index c7662f9..77bf6a9 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -64,6 +64,10 @@ class CompileUnit { /// descriptors to debug information entries using a DIEEntry proxy. DenseMap MDNodeToDIEEntryMap; + /// GlobalNames - A map of globally visible named entities for this unit. + /// + StringMap GlobalNames; + /// GlobalTypes - A map of globally visible types for this unit. /// StringMap GlobalTypes; @@ -99,6 +103,7 @@ public: unsigned getUniqueID() const { return UniqueID; } unsigned getLanguage() const { return Language; } DIE* getCUDie() const { return CUDie.get(); } + const StringMap &getGlobalNames() const { return GlobalNames; } const StringMap &getGlobalTypes() const { return GlobalTypes; } const StringMap > &getAccelNames() const { @@ -119,6 +124,10 @@ public: /// bool hasContent() const { return !CUDie->getChildren().empty(); } + /// addGlobalName - Add a new global entity to the compile unit. + /// + void addGlobalName(StringRef Name, DIE *Die) { GlobalNames[Name] = Die; } + /// addGlobalType - Add a new global type to the compile unit. /// void addGlobalType(DIType Ty); diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 6d3759d..0982dbb 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -54,6 +54,10 @@ static cl::opt UnknownLocations("use-unknown-locations", cl::Hidden, cl::desc("Make an absence of debug location information explicit."), cl::init(false)); +static cl::opt GenerateDwarfPubNamesSection("generate-dwarf-pubnames", + cl::Hidden, cl::ZeroOrMore, cl::init(false), + cl::desc("Generate DWARF pubnames section")); + namespace { enum DefaultOnOff { Default, Enable, Disable @@ -733,7 +737,9 @@ void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU, // Add to context owner. TheCU->addToContextOwner(SubprogramDie, SP.getContext()); - return; + // Expose as global, if requested. + if (GenerateDwarfPubNamesSection) + TheCU->addGlobalName(SP.getName(), SubprogramDie); } // Collect debug info from named mdnodes such as llvm.dbg.enum and llvm.dbg.ty. @@ -1028,6 +1034,10 @@ void DwarfDebug::endModule() { emitAccelTypes(); } + // Emit info into a debug pubnames section, if requested. + if (GenerateDwarfPubNamesSection) + emitDebugPubnames(); + // Emit info into a debug pubtypes section. // TODO: When we don't need the option anymore we can // remove all of the code that adds to the table. @@ -1784,6 +1794,8 @@ void DwarfDebug::emitSectionLabels() { DwarfLineSectionSym = emitSectionSym(Asm, TLOF.getDwarfLineSection(), "section_line"); emitSectionSym(Asm, TLOF.getDwarfLocSection()); + if (GenerateDwarfPubNamesSection) + emitSectionSym(Asm, TLOF.getDwarfPubNamesSection()); emitSectionSym(Asm, TLOF.getDwarfPubTypesSection()); DwarfStrSectionSym = emitSectionSym(Asm, TLOF.getDwarfStrSection(), "info_string"); @@ -2119,6 +2131,61 @@ void DwarfDebug::emitAccelTypes() { AT.Emit(Asm, SectionBegin, &InfoHolder); } +/// emitDebugPubnames - Emit visible names into a debug pubnames section. +/// +void DwarfDebug::emitDebugPubnames() { + const MCSection *ISec = Asm->getObjFileLowering().getDwarfInfoSection(); + + typedef DenseMap CUMapType; + for (CUMapType::iterator I = CUMap.begin(), E = CUMap.end(); I != E; ++I) { + CompileUnit *TheCU = I->second; + unsigned ID = TheCU->getUniqueID(); + + if (TheCU->getGlobalNames().empty()) + continue; + + // Start the dwarf pubnames section. + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfPubNamesSection()); + + Asm->OutStreamer.AddComment("Length of Public Names Info"); + Asm->EmitLabelDifference(Asm->GetTempSymbol("pubnames_end", ID), + Asm->GetTempSymbol("pubnames_begin", ID), 4); + + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_begin", ID)); + + Asm->OutStreamer.AddComment("DWARF Version"); + Asm->EmitInt16(dwarf::DWARF_VERSION); + + Asm->OutStreamer.AddComment("Offset of Compilation Unit Info"); + Asm->EmitSectionOffset(Asm->GetTempSymbol(ISec->getLabelBeginName(), ID), + DwarfInfoSectionSym); + + Asm->OutStreamer.AddComment("Compilation Unit Length"); + Asm->EmitLabelDifference(Asm->GetTempSymbol(ISec->getLabelEndName(), ID), + Asm->GetTempSymbol(ISec->getLabelBeginName(), ID), + 4); + + const StringMap &Globals = TheCU->getGlobalNames(); + for (StringMap::const_iterator + GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) { + const char *Name = GI->getKeyData(); + const DIE *Entity = GI->second; + + Asm->OutStreamer.AddComment("DIE offset"); + Asm->EmitInt32(Entity->getOffset()); + + if (Asm->isVerbose()) + Asm->OutStreamer.AddComment("External Name"); + Asm->OutStreamer.EmitBytes(StringRef(Name, strlen(Name)+1), 0); + } + + Asm->OutStreamer.AddComment("End Mark"); + Asm->EmitInt32(0); + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_end", ID)); + } +} + void DwarfDebug::emitDebugPubTypes() { for (DenseMap::iterator I = CUMap.begin(), E = CUMap.end(); I != E; ++I) { diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index 7d57a82..7b56815 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -500,6 +500,9 @@ private: /// \brief Emit type dies into a hashed accelerator table. void emitAccelTypes(); + /// \brief Emit visible names into a debug pubnames section. + void emitDebugPubnames(); + /// \brief Emit visible types into a debug pubtypes section. void emitDebugPubTypes(); diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp index ae0abde..2e1a045 100644 --- a/lib/MC/MCObjectFileInfo.cpp +++ b/lib/MC/MCObjectFileInfo.cpp @@ -186,6 +186,10 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) { Ctx->getMachOSection("__DWARF", "__debug_frame", MCSectionMachO::S_ATTR_DEBUG, SectionKind::getMetadata()); + DwarfPubNamesSection = + Ctx->getMachOSection("__DWARF", "__debug_pubnames", + MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); DwarfPubTypesSection = Ctx->getMachOSection("__DWARF", "__debug_pubtypes", MCSectionMachO::S_ATTR_DEBUG, @@ -400,6 +404,9 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) { DwarfFrameSection = Ctx->getELFSection(".debug_frame", ELF::SHT_PROGBITS, 0, SectionKind::getMetadata()); + DwarfPubNamesSection = + Ctx->getELFSection(".debug_pubnames", ELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); DwarfPubTypesSection = Ctx->getELFSection(".debug_pubtypes", ELF::SHT_PROGBITS, 0, SectionKind::getMetadata()); @@ -543,6 +550,11 @@ void MCObjectFileInfo::InitCOFFMCObjectFileInfo(Triple T) { COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_MEM_READ, SectionKind::getMetadata()); + DwarfPubNamesSection = + Ctx->getCOFFSection(".debug_pubnames", + COFF::IMAGE_SCN_MEM_DISCARDABLE | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata()); DwarfPubTypesSection = Ctx->getCOFFSection(".debug_pubtypes", COFF::IMAGE_SCN_MEM_DISCARDABLE | -- cgit v1.1 From 8915e27704b2afd362a69c6be1111fb06bbcc727 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Tue, 12 Feb 2013 18:29:02 +0000 Subject: [ms-inline asm] Add support for lexing binary integers with a [bB] suffix. This is complicated by backward labels (e.g., 0b can be both a backward label and a binary zero). The current implementation assumes [0-9]b is always a label and thus it's possible for 0b and 1b to not be interpreted correctly for ms-style inline assembly. However, this is relatively simple to fix in the inline assembly (i.e., drop the [bB]). This patch also limits backward labels to [0-9]b, so that only 0b and 1b are ambiguous. Part of rdar://12470373 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174983 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCParser/AsmLexer.cpp | 59 ++++++++++++++++++++++++++++++-------------- 1 file changed, 40 insertions(+), 19 deletions(-) (limited to 'lib') diff --git a/lib/MC/MCParser/AsmLexer.cpp b/lib/MC/MCParser/AsmLexer.cpp index 530e94e..8fcc264 100644 --- a/lib/MC/MCParser/AsmLexer.cpp +++ b/lib/MC/MCParser/AsmLexer.cpp @@ -179,26 +179,48 @@ static unsigned doLookAhead(const char *&CurPtr, unsigned DefaultRadix) { } } bool isHex = *LookAhead == 'h' || *LookAhead == 'H'; - CurPtr = isHex || !FirstHex ? LookAhead : FirstHex; + bool isBinary = LookAhead[-1] == 'b' || LookAhead[-1] == 'B'; + CurPtr = (isBinary || isHex || !FirstHex) ? LookAhead : FirstHex; if (isHex) return 16; + if (isBinary) { + --CurPtr; + return 2; + } return DefaultRadix; } /// LexDigit: First character is [0-9]. /// Local Label: [0-9][:] -/// Forward/Backward Label: [0-9][fb] -/// Binary integer: 0b[01]+ +/// Forward/Backward Label: [0-9]+f or [0-9]b +/// Binary integer: 0b[01]+ or [01][bB] /// Octal integer: 0[0-7]+ /// Hex integer: 0x[0-9a-fA-F]+ or [0x]?[0-9][0-9a-fA-F]*[hH] /// Decimal integer: [1-9][0-9]* AsmToken AsmLexer::LexDigit() { + + // Backward Label: [0-9]b + if (*CurPtr == 'b') { + // See if we actually have "0b" as part of something like "jmp 0b\n" + if (!isdigit(CurPtr[1])) { + long long Value; + StringRef Result(TokStart, CurPtr - TokStart); + if (Result.getAsInteger(10, Value)) + return ReturnError(TokStart, "invalid backward label"); + + return AsmToken(AsmToken::Integer, Result, Value); + } + } + + // Binary integer: 1[01]*[bB] // Decimal integer: [1-9][0-9]* + // Hexidecimal integer: [1-9][0-9a-fA-F]*[hH] if (CurPtr[-1] != '0' || CurPtr[0] == '.') { unsigned Radix = doLookAhead(CurPtr, 10); - bool isHex = Radix == 16; + bool isDecimal = Radix == 10; + // Check for floating point literals. - if (!isHex && (*CurPtr == '.' || *CurPtr == 'e')) { + if (isDecimal && (*CurPtr == '.' || *CurPtr == 'e')) { ++CurPtr; return LexFloatLiteral(); } @@ -211,7 +233,7 @@ AsmToken AsmLexer::LexDigit() { // integer, but that do fit in an unsigned one, we just convert them over. unsigned long long UValue; if (Result.getAsInteger(Radix, UValue)) - return ReturnError(TokStart, !isHex ? "invalid decimal number" : + return ReturnError(TokStart, isDecimal ? "invalid decimal number" : "invalid hexdecimal number"); Value = (long long)UValue; } @@ -227,15 +249,9 @@ AsmToken AsmLexer::LexDigit() { return AsmToken(AsmToken::Integer, Result, Value); } + // Binary integer: 0b[01]+ if (*CurPtr == 'b') { - ++CurPtr; - // See if we actually have "0b" as part of something like "jmp 0b\n" - if (!isdigit(CurPtr[0])) { - --CurPtr; - StringRef Result(TokStart, CurPtr - TokStart); - return AsmToken(AsmToken::Integer, Result, 0); - } - const char *NumStart = CurPtr; + const char *NumStart = ++CurPtr; while (CurPtr[0] == '0' || CurPtr[0] == '1') ++CurPtr; @@ -256,6 +272,7 @@ AsmToken AsmLexer::LexDigit() { return AsmToken(AsmToken::Integer, Result, Value); } + // Hex integer: 0x[0-9a-fA-F]+ if (*CurPtr == 'x') { ++CurPtr; const char *NumStart = CurPtr; @@ -282,17 +299,21 @@ AsmToken AsmLexer::LexDigit() { (int64_t)Result); } - // Either octal or hexidecimal. + // Binary: 0[01]*[Bb], but not 0b. + // Octal: 0[0-7]* + // Hexidecimal: [0][0-9a-fA-F]*[hH] long long Value; unsigned Radix = doLookAhead(CurPtr, 8); - bool isHex = Radix == 16; + bool isBinary = Radix == 2; + bool isOctal = Radix == 8; StringRef Result(TokStart, CurPtr - TokStart); if (Result.getAsInteger(Radix, Value)) - return ReturnError(TokStart, !isHex ? "invalid octal number" : + return ReturnError(TokStart, isOctal ? "invalid octal number" : + isBinary ? "invalid binary number" : "invalid hexdecimal number"); - // Consume the [hH]. - if (Radix == 16) + // Consume the [bB][hH]. + if (Radix == 2 || Radix == 16) ++CurPtr; // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL -- cgit v1.1 From 67cd669f7b046191646351ec7005bc1b26bb01c3 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Tue, 12 Feb 2013 18:38:36 +0000 Subject: Minor code simplification. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174985 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/GVN.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index 50c4714..fae0a1f 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -2445,7 +2445,7 @@ bool GVN::performPRE(Function &F) { if (P == CurrentBlock) { NumWithout = 2; break; - } else if (!DT->dominates(&F.getEntryBlock(), P)) { + } else if (!DT->isReachableFromEntry(P)) { NumWithout = 2; break; } -- cgit v1.1 From c73b96a99f0192cd0a64f6d9e20b6e92e1003366 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Tue, 12 Feb 2013 18:44:43 +0000 Subject: Check that pointers are removed from maps before calling delete on the pointers, for tidiness' sake. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174988 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/GVN.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index fae0a1f..0fe1096 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -2371,8 +2371,8 @@ bool GVN::processBlock(BasicBlock *BB) { E = InstrsToErase.end(); I != E; ++I) { DEBUG(dbgs() << "GVN removed: " << **I << '\n'); if (MD) MD->removeInstruction(*I); - (*I)->eraseFromParent(); DEBUG(verifyRemoved(*I)); + (*I)->eraseFromParent(); } InstrsToErase.clear(); @@ -2504,8 +2504,8 @@ bool GVN::performPRE(Function &F) { // the PRE predecessor. This is typically because of loads which // are not value numbered precisely. if (!success) { - delete PREInstr; DEBUG(verifyRemoved(PREInstr)); + delete PREInstr; continue; } @@ -2551,8 +2551,8 @@ bool GVN::performPRE(Function &F) { DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n'); if (MD) MD->removeInstruction(CurInst); - CurInst->eraseFromParent(); DEBUG(verifyRemoved(CurInst)); + CurInst->eraseFromParent(); Changed = true; } } -- cgit v1.1 From 8c0d29fee988928d2ce439b7a5f772e1cda62060 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Tue, 12 Feb 2013 19:05:10 +0000 Subject: When disabling PRE for a value is directly redundant with itself (through a loop), don't continue to iterate through the reamining predecessors. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174994 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/GVN.cpp | 2 ++ 1 file changed, 2 insertions(+) (limited to 'lib') diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index 0fe1096..ff55f6f 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -2455,7 +2455,9 @@ bool GVN::performPRE(Function &F) { PREPred = P; ++NumWithout; } else if (predV == CurInst) { + /* CurInst dominates this predecessor. */ NumWithout = 2; + break; } else { predMap[P] = predV; ++NumWith; -- cgit v1.1 From ab9d251e8569416e37caf29485ad1dd89f148fac Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Tue, 12 Feb 2013 19:31:23 +0000 Subject: [ms-inline asm] Accept the emit directive as either _emit or __emit. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174998 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCParser/AsmParser.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index 4d6756e..a4b4a54 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -1446,8 +1446,8 @@ bool AsmParser::ParseStatement(ParseStatementInfo &Info) { return Error(IDLoc, "unknown directive"); } - // _emit - if (ParsingInlineAsm && IDVal == "_emit") + // _emit or __emit + if (ParsingInlineAsm && (IDVal == "_emit" || IDVal == "__emit")) return ParseDirectiveEmit(IDLoc, Info); CheckForValidSection(); -- cgit v1.1 From e1d640312889a9e5ec4fc6cd3d6a1bc74289c8a9 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Tue, 12 Feb 2013 19:42:32 +0000 Subject: [ms-inline asm] Pass the length of the IDVal, so we can do a proper AsmRewrite. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174999 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCParser/AsmParser.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index a4b4a54..c01ea33 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -444,7 +444,8 @@ private: bool ParseDirectiveEndr(SMLoc DirectiveLoc); // ".endr" // "_emit" - bool ParseDirectiveEmit(SMLoc DirectiveLoc, ParseStatementInfo &Info); + bool ParseDirectiveEmit(SMLoc DirectiveLoc, ParseStatementInfo &Info, + size_t len); void initializeDirectiveKindMap(); }; @@ -1448,7 +1449,7 @@ bool AsmParser::ParseStatement(ParseStatementInfo &Info) { // _emit or __emit if (ParsingInlineAsm && (IDVal == "_emit" || IDVal == "__emit")) - return ParseDirectiveEmit(IDLoc, Info); + return ParseDirectiveEmit(IDLoc, Info, IDVal.size()); CheckForValidSection(); @@ -3985,7 +3986,7 @@ bool AsmParser::ParseDirectiveEndr(SMLoc DirectiveLoc) { return false; } -bool AsmParser::ParseDirectiveEmit(SMLoc IDLoc, ParseStatementInfo &Info) { +bool AsmParser::ParseDirectiveEmit(SMLoc IDLoc, ParseStatementInfo &Info, size_t len) { const MCExpr *Value; SMLoc ExprLoc = getLexer().getLoc(); if (ParseExpression(Value)) @@ -3997,7 +3998,7 @@ bool AsmParser::ParseDirectiveEmit(SMLoc IDLoc, ParseStatementInfo &Info) { if (!isUIntN(8, IntValue) && !isIntN(8, IntValue)) return Error(ExprLoc, "literal value out of range for directive"); - Info.AsmRewrites->push_back(AsmRewrite(AOK_Emit, IDLoc, 5)); + Info.AsmRewrites->push_back(AsmRewrite(AOK_Emit, IDLoc, len)); return false; } -- cgit v1.1 From 5f3c4a39109479e81238ce28e91e5dcc565f068c Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Tue, 12 Feb 2013 19:49:10 +0000 Subject: Record PRE predecessors with a SmallVector instead of a DenseMap, and avoid a second pred_iterator traversal. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175001 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/GVN.cpp | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index ff55f6f..26b1134 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -2389,7 +2389,7 @@ bool GVN::processBlock(BasicBlock *BB) { /// control flow patterns and attempts to perform simple PRE at the join point. bool GVN::performPRE(Function &F) { bool Changed = false; - DenseMap predMap; + SmallVector, 8> predMap; for (df_iterator DI = df_begin(&F.getEntryBlock()), DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) { BasicBlock *CurrentBlock = *DI; @@ -2452,6 +2452,7 @@ bool GVN::performPRE(Function &F) { Value* predV = findLeader(P, ValNo); if (predV == 0) { + predMap.push_back(std::make_pair(static_cast(0), P)); PREPred = P; ++NumWithout; } else if (predV == CurInst) { @@ -2459,7 +2460,7 @@ bool GVN::performPRE(Function &F) { NumWithout = 2; break; } else { - predMap[P] = predV; + predMap.push_back(std::make_pair(predV, P)); ++NumWith; } } @@ -2514,7 +2515,6 @@ bool GVN::performPRE(Function &F) { PREInstr->insertBefore(PREPred->getTerminator()); PREInstr->setName(CurInst->getName() + ".pre"); PREInstr->setDebugLoc(CurInst->getDebugLoc()); - predMap[PREPred] = PREInstr; VN.add(PREInstr, ValNo); ++NumGVNPRE; @@ -2522,13 +2522,14 @@ bool GVN::performPRE(Function &F) { addToLeaderTable(ValNo, PREInstr, PREPred); // Create a PHI to make the value available in this block. - pred_iterator PB = pred_begin(CurrentBlock), PE = pred_end(CurrentBlock); - PHINode* Phi = PHINode::Create(CurInst->getType(), std::distance(PB, PE), + PHINode* Phi = PHINode::Create(CurInst->getType(), predMap.size(), CurInst->getName() + ".pre-phi", CurrentBlock->begin()); - for (pred_iterator PI = PB; PI != PE; ++PI) { - BasicBlock *P = *PI; - Phi->addIncoming(predMap[P], P); + for (unsigned i = 0, e = predMap.size(); i != e; ++i) { + if (Value *V = predMap[i].first) + Phi->addIncoming(V, predMap[i].second); + else + Phi->addIncoming(PREInstr, PREPred); } VN.add(Phi, ValNo); -- cgit v1.1 From 87d0b9ed1462705dd9bf1cb7f67d0bf03af776c8 Mon Sep 17 00:00:00 2001 From: Guy Benyei Date: Tue, 12 Feb 2013 21:21:59 +0000 Subject: Add static cast to unsigned char whenever a character classification function is called with a signed char argument, in order to avoid assertions in Windows Debug configuration. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175006 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/AsmParser/LLLexer.cpp | 74 +++++++++++++++++------------ lib/CodeGen/SelectionDAG/TargetLowering.cpp | 2 +- lib/CodeGen/TargetInstrInfo.cpp | 2 +- lib/IR/AsmWriter.cpp | 11 +++-- lib/IR/InlineAsm.cpp | 4 +- lib/IR/LLVMContext.cpp | 5 +- lib/Linker/LinkModules.cpp | 3 +- lib/MC/MCParser/AsmParser.cpp | 9 ++-- lib/MC/MCSectionMachO.cpp | 4 +- lib/Object/COFFObjectFile.cpp | 2 +- lib/Object/MachOObjectFile.cpp | 2 +- lib/Support/FileUtilities.cpp | 4 +- lib/Support/PathV2.cpp | 3 +- lib/Support/Windows/Path.inc | 2 +- lib/Support/raw_ostream.cpp | 3 +- 15 files changed, 77 insertions(+), 53 deletions(-) (limited to 'lib') diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp index 3b8b033..35108af 100644 --- a/lib/AsmParser/LLLexer.cpp +++ b/lib/AsmParser/LLLexer.cpp @@ -119,7 +119,9 @@ static void UnEscapeLexed(std::string &Str) { if (BIn < EndBuffer-1 && BIn[1] == '\\') { *BOut++ = '\\'; // Two \ becomes one BIn += 2; - } else if (BIn < EndBuffer-2 && isxdigit(BIn[1]) && isxdigit(BIn[2])) { + } else if (BIn < EndBuffer-2 && + isxdigit(static_cast(BIn[1])) && + isxdigit(static_cast(BIn[2]))) { *BOut = hexDigitValue(BIn[1]) * 16 + hexDigitValue(BIn[2]); BIn += 3; // Skip over handled chars ++BOut; @@ -135,7 +137,8 @@ static void UnEscapeLexed(std::string &Str) { /// isLabelChar - Return true for [-a-zA-Z$._0-9]. static bool isLabelChar(char C) { - return isalnum(C) || C == '-' || C == '$' || C == '.' || C == '_'; + return isalnum(static_cast(C)) || C == '-' || C == '$' || + C == '.' || C == '_'; } @@ -188,7 +191,7 @@ lltok::Kind LLLexer::LexToken() { switch (CurChar) { default: // Handle letters: [a-zA-Z_] - if (isalpha(CurChar) || CurChar == '_') + if (isalpha(static_cast(CurChar)) || CurChar == '_') return LexIdentifier(); return lltok::Error; @@ -282,8 +285,8 @@ lltok::Kind LLLexer::LexAt() { return lltok::GlobalVar; // Handle GlobalVarID: @[0-9]+ - if (isdigit(CurPtr[0])) { - for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr) + if (isdigit(static_cast(CurPtr[0]))) { + for (++CurPtr; isdigit(static_cast(CurPtr[0])); ++CurPtr) /*empty*/; uint64_t Val = atoull(TokStart+1, CurPtr); @@ -317,10 +320,12 @@ lltok::Kind LLLexer::ReadString(lltok::Kind kind) { /// ReadVarName - Read the rest of a token containing a variable name. bool LLLexer::ReadVarName() { const char *NameStart = CurPtr; - if (isalpha(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' || + if (isalpha(static_cast(CurPtr[0])) || + CurPtr[0] == '-' || CurPtr[0] == '$' || CurPtr[0] == '.' || CurPtr[0] == '_') { ++CurPtr; - while (isalnum(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' || + while (isalnum(static_cast(CurPtr[0])) || + CurPtr[0] == '-' || CurPtr[0] == '$' || CurPtr[0] == '.' || CurPtr[0] == '_') ++CurPtr; @@ -346,8 +351,8 @@ lltok::Kind LLLexer::LexPercent() { return lltok::LocalVar; // Handle LocalVarID: %[0-9]+ - if (isdigit(CurPtr[0])) { - for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr) + if (isdigit(static_cast(CurPtr[0]))) { + for (++CurPtr; isdigit(static_cast(CurPtr[0])); ++CurPtr) /*empty*/; uint64_t Val = atoull(TokStart+1, CurPtr); @@ -381,10 +386,12 @@ lltok::Kind LLLexer::LexQuote() { /// ! lltok::Kind LLLexer::LexExclaim() { // Lex a metadata name as a MetadataVar. - if (isalpha(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' || + if (isalpha(static_cast(CurPtr[0])) || + CurPtr[0] == '-' || CurPtr[0] == '$' || CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\') { ++CurPtr; - while (isalnum(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' || + while (isalnum(static_cast(CurPtr[0])) || + CurPtr[0] == '-' || CurPtr[0] == '$' || CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\') ++CurPtr; @@ -399,8 +406,8 @@ lltok::Kind LLLexer::LexExclaim() { /// AttrGrpID ::= #[0-9]+ lltok::Kind LLLexer::LexHash() { // Handle AttrGrpID: #[0-9]+ - if (isdigit(CurPtr[0])) { - for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr) + if (isdigit(static_cast(CurPtr[0]))) { + for (++CurPtr; isdigit(static_cast(CurPtr[0])); ++CurPtr) /*empty*/; uint64_t Val = atoull(TokStart+1, CurPtr); @@ -425,8 +432,11 @@ lltok::Kind LLLexer::LexIdentifier() { for (; isLabelChar(*CurPtr); ++CurPtr) { // If we decide this is an integer, remember the end of the sequence. - if (!IntEnd && !isdigit(*CurPtr)) IntEnd = CurPtr; - if (!KeywordEnd && !isalnum(*CurPtr) && *CurPtr != '_') KeywordEnd = CurPtr; + if (!IntEnd && !isdigit(static_cast(*CurPtr))) + IntEnd = CurPtr; + if (!KeywordEnd && !isalnum(static_cast(*CurPtr)) && + *CurPtr != '_') + KeywordEnd = CurPtr; } // If we stopped due to a colon, this really is a label. @@ -676,7 +686,8 @@ lltok::Kind LLLexer::LexIdentifier() { // Check for [us]0x[0-9A-Fa-f]+ which are Hexadecimal constant generated by // the CFE to avoid forcing it to deal with 64-bit numbers. if ((TokStart[0] == 'u' || TokStart[0] == 's') && - TokStart[1] == '0' && TokStart[2] == 'x' && isxdigit(TokStart[3])) { + TokStart[1] == '0' && TokStart[2] == 'x' && + isxdigit(static_cast(TokStart[3]))) { int len = CurPtr-TokStart-3; uint32_t bits = len * 4; APInt Tmp(bits, StringRef(TokStart+3, len), 16); @@ -716,13 +727,13 @@ lltok::Kind LLLexer::Lex0x() { Kind = 'J'; } - if (!isxdigit(CurPtr[0])) { + if (!isxdigit(static_cast(CurPtr[0]))) { // Bad token, return it as an error. CurPtr = TokStart+1; return lltok::Error; } - while (isxdigit(CurPtr[0])) + while (isxdigit(static_cast(CurPtr[0]))) ++CurPtr; if (Kind == 'J') { @@ -769,7 +780,8 @@ lltok::Kind LLLexer::Lex0x() { /// HexPPC128Constant 0xM[0-9A-Fa-f]+ lltok::Kind LLLexer::LexDigitOrNegative() { // If the letter after the negative is not a number, this is probably a label. - if (!isdigit(TokStart[0]) && !isdigit(CurPtr[0])) { + if (!isdigit(static_cast(TokStart[0])) && + !isdigit(static_cast(CurPtr[0]))) { // Okay, this is not a number after the -, it's probably a label. if (const char *End = isLabelTail(CurPtr)) { StrVal.assign(TokStart, End-1); @@ -783,7 +795,7 @@ lltok::Kind LLLexer::LexDigitOrNegative() { // At this point, it is either a label, int or fp constant. // Skip digits, we have at least one. - for (; isdigit(CurPtr[0]); ++CurPtr) + for (; isdigit(static_cast(CurPtr[0])); ++CurPtr) /*empty*/; // Check to see if this really is a label afterall, e.g. "-1:". @@ -820,13 +832,14 @@ lltok::Kind LLLexer::LexDigitOrNegative() { ++CurPtr; // Skip over [0-9]*([eE][-+]?[0-9]+)? - while (isdigit(CurPtr[0])) ++CurPtr; + while (isdigit(static_cast(CurPtr[0]))) ++CurPtr; if (CurPtr[0] == 'e' || CurPtr[0] == 'E') { - if (isdigit(CurPtr[1]) || - ((CurPtr[1] == '-' || CurPtr[1] == '+') && isdigit(CurPtr[2]))) { + if (isdigit(static_cast(CurPtr[1])) || + ((CurPtr[1] == '-' || CurPtr[1] == '+') && + isdigit(static_cast(CurPtr[2])))) { CurPtr += 2; - while (isdigit(CurPtr[0])) ++CurPtr; + while (isdigit(static_cast(CurPtr[0]))) ++CurPtr; } } @@ -838,11 +851,11 @@ lltok::Kind LLLexer::LexDigitOrNegative() { lltok::Kind LLLexer::LexPositive() { // If the letter after the negative is a number, this is probably not a // label. - if (!isdigit(CurPtr[0])) + if (!isdigit(static_cast(CurPtr[0]))) return lltok::Error; // Skip digits. - for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr) + for (++CurPtr; isdigit(static_cast(CurPtr[0])); ++CurPtr) /*empty*/; // At this point, we need a '.'. @@ -854,13 +867,14 @@ lltok::Kind LLLexer::LexPositive() { ++CurPtr; // Skip over [0-9]*([eE][-+]?[0-9]+)? - while (isdigit(CurPtr[0])) ++CurPtr; + while (isdigit(static_cast(CurPtr[0]))) ++CurPtr; if (CurPtr[0] == 'e' || CurPtr[0] == 'E') { - if (isdigit(CurPtr[1]) || - ((CurPtr[1] == '-' || CurPtr[1] == '+') && isdigit(CurPtr[2]))) { + if (isdigit(static_cast(CurPtr[1])) || + ((CurPtr[1] == '-' || CurPtr[1] == '+') && + isdigit(static_cast(CurPtr[2])))) { CurPtr += 2; - while (isdigit(CurPtr[0])) ++CurPtr; + while (isdigit(static_cast(CurPtr[0]))) ++CurPtr; } } diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 86949a7..f5fc66c 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -2038,7 +2038,7 @@ getRegForInlineAsmConstraint(const std::string &Constraint, /// a matching constraint like "4". bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const { assert(!ConstraintCode.empty() && "No known constraint!"); - return isdigit(ConstraintCode[0]); + return isdigit(static_cast(ConstraintCode[0])); } /// getMatchedOperand - If this is an input matching constraint, this method diff --git a/lib/CodeGen/TargetInstrInfo.cpp b/lib/CodeGen/TargetInstrInfo.cpp index d5fbf14..20eb918 100644 --- a/lib/CodeGen/TargetInstrInfo.cpp +++ b/lib/CodeGen/TargetInstrInfo.cpp @@ -80,7 +80,7 @@ unsigned TargetInstrInfo::getInlineAsmLength(const char *Str, if (*Str == '\n' || strncmp(Str, MAI.getSeparatorString(), strlen(MAI.getSeparatorString())) == 0) atInsnStart = true; - if (atInsnStart && !std::isspace(*Str)) { + if (atInsnStart && !std::isspace(static_cast(*Str))) { Length += MAI.getMaxInstLength(); atInsnStart = false; } diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp index bf893e8..17d49ac 100644 --- a/lib/IR/AsmWriter.cpp +++ b/lib/IR/AsmWriter.cpp @@ -117,7 +117,7 @@ static void PrintLLVMName(raw_ostream &OS, StringRef Name, PrefixType Prefix) { } // Scan the name to see if it needs quotes first. - bool NeedsQuotes = isdigit(Name[0]); + bool NeedsQuotes = isdigit(static_cast(Name[0])); if (!NeedsQuotes) { for (unsigned i = 0, e = Name.size(); i != e; ++i) { // By making this unsigned, the value passed in to isalnum will always be @@ -125,7 +125,8 @@ static void PrintLLVMName(raw_ostream &OS, StringRef Name, PrefixType Prefix) { // its implementation will assert. This situation can arise when dealing // with UTF-8 multibyte characters. unsigned char C = Name[i]; - if (!isalnum(C) && C != '-' && C != '.' && C != '_') { + if (!isalnum(static_cast(C)) && C != '-' && C != '.' && + C != '_') { NeedsQuotes = true; break; } @@ -1392,14 +1393,16 @@ void AssemblyWriter::printNamedMDNode(const NamedMDNode *NMD) { if (Name.empty()) { Out << " "; } else { - if (isalpha(Name[0]) || Name[0] == '-' || Name[0] == '$' || + if (isalpha(static_cast(Name[0])) || + Name[0] == '-' || Name[0] == '$' || Name[0] == '.' || Name[0] == '_') Out << Name[0]; else Out << '\\' << hexdigit(Name[0] >> 4) << hexdigit(Name[0] & 0x0F); for (unsigned i = 1, e = Name.size(); i != e; ++i) { unsigned char C = Name[i]; - if (isalnum(C) || C == '-' || C == '$' || C == '.' || C == '_') + if (isalnum(static_cast(C)) || C == '-' || C == '$' || + C == '.' || C == '_') Out << C; else Out << '\\' << hexdigit(C >> 4) << hexdigit(C & 0x0F); diff --git a/lib/IR/InlineAsm.cpp b/lib/IR/InlineAsm.cpp index 10d281b..9f2a9fe 100644 --- a/lib/IR/InlineAsm.cpp +++ b/lib/IR/InlineAsm.cpp @@ -151,10 +151,10 @@ bool InlineAsm::ConstraintInfo::Parse(StringRef Str, if (ConstraintEnd == E) return true; // "{foo" pCodes->push_back(std::string(I, ConstraintEnd+1)); I = ConstraintEnd+1; - } else if (isdigit(*I)) { // Matching Constraint + } else if (isdigit(static_cast(*I))) { // Matching Constraint // Maximal munch numbers. StringRef::iterator NumStart = I; - while (I != E && isdigit(*I)) + while (I != E && isdigit(static_cast(*I))) ++I; pCodes->push_back(std::string(NumStart, I)); unsigned N = atoi(pCodes->back().c_str()); diff --git a/lib/IR/LLVMContext.cpp b/lib/IR/LLVMContext.cpp index 8e2bbb7..b73cd03 100644 --- a/lib/IR/LLVMContext.cpp +++ b/lib/IR/LLVMContext.cpp @@ -130,12 +130,13 @@ static bool isValidName(StringRef MDName) { if (MDName.empty()) return false; - if (!std::isalpha(MDName[0])) + if (!std::isalpha(static_cast(MDName[0]))) return false; for (StringRef::iterator I = MDName.begin() + 1, E = MDName.end(); I != E; ++I) { - if (!std::isalnum(*I) && *I != '_' && *I != '-' && *I != '.') + if (!std::isalnum(static_cast(*I)) && *I != '_' && + *I != '-' && *I != '.') return false; } return true; diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp index 3b8928a..c358a0a 100644 --- a/lib/Linker/LinkModules.cpp +++ b/lib/Linker/LinkModules.cpp @@ -606,7 +606,8 @@ void ModuleLinker::computeTypeMapping() { // Check to see if there is a dot in the name followed by a digit. size_t DotPos = ST->getName().rfind('.'); if (DotPos == 0 || DotPos == StringRef::npos || - ST->getName().back() == '.' || !isdigit(ST->getName()[DotPos+1])) + ST->getName().back() == '.' || + !isdigit(static_cast(ST->getName()[DotPos+1]))) continue; // Check to see if the destination module has a struct with the prefix name. diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index c01ea33..bd2c65e 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -1622,7 +1622,8 @@ void AsmParser::DiagHandler(const SMDiagnostic &Diag, void *Context) { // we can't do that. AsmLexer.cpp should probably be changed to handle // '@' as a special case when needed. static bool isIdentifierChar(char c) { - return isalnum(c) || c == '_' || c == '$' || c == '.'; + return isalnum(static_cast(c)) || c == '_' || c == '$' || + c == '.'; } bool AsmParser::expandMacro(raw_svector_ostream &OS, StringRef Body, @@ -1646,7 +1647,8 @@ bool AsmParser::expandMacro(raw_svector_ostream &OS, StringRef Body, continue; char Next = Body[Pos + 1]; - if (Next == '$' || Next == 'n' || isdigit(Next)) + if (Next == '$' || Next == 'n' || + isdigit(static_cast(Next))) break; } else { // This macro has parameters, look for \foo, \bar, etc. @@ -3094,7 +3096,8 @@ void AsmParser::CheckForBadMacro(SMLoc DirectiveLoc, StringRef Name, if (Body[Pos] != '$' || Pos + 1 == End) continue; char Next = Body[Pos + 1]; - if (Next == '$' || Next == 'n' || isdigit(Next)) + if (Next == '$' || Next == 'n' || + isdigit(static_cast(Next))) break; } diff --git a/lib/MC/MCSectionMachO.cpp b/lib/MC/MCSectionMachO.cpp index e771556..fc32315 100644 --- a/lib/MC/MCSectionMachO.cpp +++ b/lib/MC/MCSectionMachO.cpp @@ -165,9 +165,9 @@ bool MCSectionMachO::isVirtualSection() const { /// StripSpaces - This removes leading and trailing spaces from the StringRef. static void StripSpaces(StringRef &Str) { - while (!Str.empty() && isspace(Str[0])) + while (!Str.empty() && isspace(static_cast(Str[0]))) Str = Str.substr(1); - while (!Str.empty() && isspace(Str.back())) + while (!Str.empty() && isspace(static_cast(Str.back()))) Str = Str.substr(0, Str.size()-1); } diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp index 0b7ee34..ca90e0e 100644 --- a/lib/Object/COFFObjectFile.cpp +++ b/lib/Object/COFFObjectFile.cpp @@ -267,7 +267,7 @@ error_code COFFObjectFile::getSymbolNMTypeChar(DataRefImpl Symb, } if (symb->StorageClass == COFF::IMAGE_SYM_CLASS_EXTERNAL) - ret = ::toupper(ret); + ret = ::toupper(static_cast(ret)); Result = ret; return object_error::success; diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp index a853618..eb1690e 100644 --- a/lib/Object/MachOObjectFile.cpp +++ b/lib/Object/MachOObjectFile.cpp @@ -273,7 +273,7 @@ error_code MachOObjectFile::getSymbolNMTypeChar(DataRefImpl DRI, } if (Flags & (macho::STF_External | macho::STF_PrivateExtern)) - Char = toupper(Char); + Char = toupper(static_cast(Char)); Result = Char; return object_error::success; } diff --git a/lib/Support/FileUtilities.cpp b/lib/Support/FileUtilities.cpp index fc8a2f3..4d7b239 100644 --- a/lib/Support/FileUtilities.cpp +++ b/lib/Support/FileUtilities.cpp @@ -87,9 +87,9 @@ static bool CompareNumbers(const char *&F1P, const char *&F2P, // If one of the positions is at a space and the other isn't, chomp up 'til // the end of the space. - while (isspace(*F1P) && F1P != F1End) + while (isspace(static_cast(*F1P)) && F1P != F1End) ++F1P; - while (isspace(*F2P) && F2P != F2End) + while (isspace(static_cast(*F2P)) && F2P != F2End) ++F2P; // If we stop on numbers, compare their difference. diff --git a/lib/Support/PathV2.cpp b/lib/Support/PathV2.cpp index 98d7382..41add96 100644 --- a/lib/Support/PathV2.cpp +++ b/lib/Support/PathV2.cpp @@ -44,7 +44,8 @@ namespace { #ifdef LLVM_ON_WIN32 // C: - if (path.size() >= 2 && std::isalpha(path[0]) && path[1] == ':') + if (path.size() >= 2 && std::isalpha(static_cast(path[0])) && + path[1] == ':') return path.substr(0, 2); #endif diff --git a/lib/Support/Windows/Path.inc b/lib/Support/Windows/Path.inc index 98d8a18..f4898e6 100644 --- a/lib/Support/Windows/Path.inc +++ b/lib/Support/Windows/Path.inc @@ -82,7 +82,7 @@ Path::isValid() const { pos = path.rfind(':',len); size_t rootslash = 0; if (pos != std::string::npos) { - if (pos != 1 || !isalpha(path[0]) || len < 3) + if (pos != 1 || !isalpha(static_cast(path[0])) || len < 3) return false; rootslash = 2; } diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp index 106864d..f71abd3 100644 --- a/lib/Support/raw_ostream.cpp +++ b/lib/Support/raw_ostream.cpp @@ -241,7 +241,8 @@ raw_ostream &raw_ostream::operator<<(double N) { if (cs == '+' || cs == '-') { int c1 = buf[len - 2]; int c0 = buf[len - 1]; - if (isdigit(c1) && isdigit(c0)) { + if (isdigit(static_cast(c1)) && + isdigit(static_cast(c0))) { // Trim leading '0': "...e+012" -> "...e+12\0" buf[len - 3] = c1; buf[len - 2] = c0; -- cgit v1.1 From dc08bfbd565ba6540be698bba551b2039661299d Mon Sep 17 00:00:00 2001 From: Jack Carter Date: Tue, 12 Feb 2013 21:29:39 +0000 Subject: This patch just fixes up various llvm formatting violations such as tabs, blanks at eol and long lines. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175007 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/ELFObjectWriter.cpp | 3 ++- lib/MC/MCStreamer.cpp | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp index d65f227..fb14dc9 100644 --- a/lib/MC/ELFObjectWriter.cpp +++ b/lib/MC/ELFObjectWriter.cpp @@ -135,7 +135,8 @@ class ELFObjectWriter : public MCObjectWriter { const MCSymbol *undefinedExplicitRelSym(const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const { - return TargetObjectWriter->undefinedExplicitRelSym(Target, Fixup, IsPCRel); + return TargetObjectWriter->undefinedExplicitRelSym(Target, Fixup, + IsPCRel); } bool is64Bit() const { return TargetObjectWriter->is64Bit(); } diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp index e92569b..7d79d62 100644 --- a/lib/MC/MCStreamer.cpp +++ b/lib/MC/MCStreamer.cpp @@ -42,7 +42,7 @@ void MCStreamer::reset() { LastSymbol = 0; const MCSection *section = NULL; SectionStack.clear(); - SectionStack.push_back(std::make_pair(section, section)); + SectionStack.push_back(std::make_pair(section, section)); } const MCExpr *MCStreamer::BuildSymbolDiff(MCContext &Context, @@ -104,7 +104,7 @@ void MCStreamer::EmitIntValue(uint64_t Value, unsigned Size, /// EmitULEB128Value - Special case of EmitULEB128Value that avoids the /// client having to pass in a MCExpr for constant integers. void MCStreamer::EmitULEB128IntValue(uint64_t Value, unsigned Padding, - unsigned AddrSpace) { + unsigned AddrSpace) { SmallString<128> Tmp; raw_svector_ostream OSE(Tmp); encodeULEB128(Value, OSE, Padding); -- cgit v1.1 From 469b144f1ce7dc3984cda265bc23ed0b41c20584 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Tue, 12 Feb 2013 21:33:51 +0000 Subject: [ms-inline-asm] Implement align directive (which is roughly equivalent to .align). Also, allow _EMIT and __EMIT for the emit directive. We already do the same for TYPE, SIZE, and LENGTH. rdar://13200215 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175008 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCParser/AsmParser.cpp | 52 +++++++++++++++++++++++++++++++++++-------- 1 file changed, 43 insertions(+), 9 deletions(-) (limited to 'lib') diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index bd2c65e..7aa3812 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -443,9 +443,12 @@ private: bool ParseDirectiveIrpc(SMLoc DirectiveLoc); // ".irpc" bool ParseDirectiveEndr(SMLoc DirectiveLoc); // ".endr" - // "_emit" - bool ParseDirectiveEmit(SMLoc DirectiveLoc, ParseStatementInfo &Info, - size_t len); + // "_emit" or "__emit" + bool ParseDirectiveMSEmit(SMLoc DirectiveLoc, ParseStatementInfo &Info, + size_t Len); + + // "align" + bool ParseDirectiveMSAlign(SMLoc DirectiveLoc, ParseStatementInfo &Info); void initializeDirectiveKindMap(); }; @@ -1447,9 +1450,14 @@ bool AsmParser::ParseStatement(ParseStatementInfo &Info) { return Error(IDLoc, "unknown directive"); } - // _emit or __emit - if (ParsingInlineAsm && (IDVal == "_emit" || IDVal == "__emit")) - return ParseDirectiveEmit(IDLoc, Info, IDVal.size()); + // __asm _emit or __asm __emit + if (ParsingInlineAsm && (IDVal == "_emit" || IDVal == "__emit" || + IDVal == "_EMIT" || IDVal == "__EMIT")) + return ParseDirectiveMSEmit(IDLoc, Info, IDVal.size()); + + // __asm align + if (ParsingInlineAsm && (IDVal == "align" || IDVal == "ALIGN")) + return ParseDirectiveMSAlign(IDLoc, Info); CheckForValidSection(); @@ -3989,7 +3997,7 @@ bool AsmParser::ParseDirectiveEndr(SMLoc DirectiveLoc) { return false; } -bool AsmParser::ParseDirectiveEmit(SMLoc IDLoc, ParseStatementInfo &Info, size_t len) { +bool AsmParser::ParseDirectiveMSEmit(SMLoc IDLoc, ParseStatementInfo &Info, size_t Len) { const MCExpr *Value; SMLoc ExprLoc = getLexer().getLoc(); if (ParseExpression(Value)) @@ -4001,7 +4009,23 @@ bool AsmParser::ParseDirectiveEmit(SMLoc IDLoc, ParseStatementInfo &Info, size_t if (!isUIntN(8, IntValue) && !isIntN(8, IntValue)) return Error(ExprLoc, "literal value out of range for directive"); - Info.AsmRewrites->push_back(AsmRewrite(AOK_Emit, IDLoc, len)); + Info.AsmRewrites->push_back(AsmRewrite(AOK_Emit, IDLoc, Len)); + return false; +} + +bool AsmParser::ParseDirectiveMSAlign(SMLoc IDLoc, ParseStatementInfo &Info) { + const MCExpr *Value; + SMLoc ExprLoc = getLexer().getLoc(); + if (ParseExpression(Value)) + return true; + const MCConstantExpr *MCE = dyn_cast(Value); + if (!MCE) + return Error(ExprLoc, "unexpected expression in align"); + uint64_t IntValue = MCE->getValue(); + if (!isPowerOf2_64(IntValue)) + return Error(ExprLoc, "literal value not a power of two greater then zero"); + + Info.AsmRewrites->push_back(AsmRewrite(AOK_Align, IDLoc, 5, Log2_64(IntValue))); return false; } @@ -4133,6 +4157,7 @@ bool AsmParser::ParseMSInlineAsm(void *AsmLoc, std::string &AsmString, I = AsmStrRewrites.begin(), E = AsmStrRewrites.end(); I != E; ++I) { const char *Loc = (*I).Loc.getPointer(); + unsigned AdditionalSkip = 0; AsmRewriteKind Kind = (*I).Kind; // Emit everything up to the immediate/expression. If the previous rewrite @@ -4180,6 +4205,15 @@ bool AsmParser::ParseMSInlineAsm(void *AsmLoc, std::string &AsmString, case AOK_Emit: OS << ".byte"; break; + case AOK_Align: { + unsigned Val = (*I).Val; + OS << ".align " << Val; + + // Skip the original immediate. + assert (Val < 10 && "Expected alignment less then 2^10."); + AdditionalSkip = (Val < 4) ? 2 : Val < 7 ? 3 : 4; + break; + } case AOK_DotOperator: OS << (*I).Val; break; @@ -4187,7 +4221,7 @@ bool AsmParser::ParseMSInlineAsm(void *AsmLoc, std::string &AsmString, // Skip the original expression. if (Kind != AOK_SizeDirective) - Start = Loc + (*I).Len; + Start = Loc + (*I).Len + AdditionalSkip; } // Emit the remainder of the asm string. -- cgit v1.1 From 8a8de9889d7030292d0256d91ff93e9230c025c1 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Tue, 12 Feb 2013 22:26:41 +0000 Subject: Actually delete this code, since it's really not clear what it's trying to do. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175014 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/GVN.cpp | 24 ------------------------ 1 file changed, 24 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index 26b1134..c04b447 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -1526,10 +1526,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { BasicBlock *LoadBB = LI->getParent(); BasicBlock *TmpBB = LoadBB; - bool isSinglePred = false; bool allSingleSucc = true; while (TmpBB->getSinglePredecessor()) { - isSinglePred = true; TmpBB = TmpBB->getSinglePredecessor(); if (TmpBB == LoadBB) // Infinite (unreachable) loop. return false; @@ -1548,28 +1546,6 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { assert(TmpBB); LoadBB = TmpBB; - // FIXME: It is extremely unclear what this loop is doing, other than - // artificially restricting loadpre. - if (isSinglePred) { - bool isHot = false; - for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) { - const AvailableValueInBlock &AV = ValuesPerBlock[i]; - if (AV.isSimpleValue()) - // "Hot" Instruction is in some loop (because it dominates its dep. - // instruction). - if (Instruction *I = dyn_cast(AV.getSimpleValue())) - if (DT->dominates(LI, I)) { - isHot = true; - break; - } - } - - // We are interested only in "hot" instructions. We don't want to do any - // mis-optimizations here. - if (!isHot) - return false; - } - // Check to see how many predecessors have the loaded value fully // available. DenseMap PredLoads; -- cgit v1.1 From 49b5bb4cb4ec8c93f51cdaad925c9073788b94c8 Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Tue, 12 Feb 2013 23:35:08 +0000 Subject: Changed isStoredObjCPointer => IsStoredObjCPointer. No functionality change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175017 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp b/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp index ff38c9d..ae3c628 100644 --- a/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp +++ b/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp @@ -72,7 +72,7 @@ bool ProvenanceAnalysis::relatedPHI(const PHINode *A, /// Test if the value of P, or any value covered by its provenance, is ever /// stored within the function (not counting callees). -static bool isStoredObjCPointer(const Value *P) { +static bool IsStoredObjCPointer(const Value *P) { SmallPtrSet Visited; SmallVector Worklist; Worklist.push_back(P); @@ -132,18 +132,18 @@ bool ProvenanceAnalysis::relatedCheck(const Value *A, if (AIsIdentified) { // Check for an obvious escape. if (isa(B)) - return isStoredObjCPointer(A); + return IsStoredObjCPointer(A); if (BIsIdentified) { // Check for an obvious escape. if (isa(A)) - return isStoredObjCPointer(B); + return IsStoredObjCPointer(B); // Both pointers are identified and escapes aren't an evident problem. return false; } } else if (BIsIdentified) { // Check for an obvious escape. if (isa(A)) - return isStoredObjCPointer(B); + return IsStoredObjCPointer(B); } // Special handling for PHI and Select. -- cgit v1.1 From 66bff1eec9f45733ff12b8d2307326a389270874 Mon Sep 17 00:00:00 2001 From: "Arnaud A. de Grandmaison" Date: Wed, 13 Feb 2013 00:19:19 +0000 Subject: Fix comment visitSExt is an adapted copy of the related visitZExt method, so adapt the comment accordingly. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175019 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/InstCombine/InstCombineCasts.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index fbc259b..a960ab2 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -1041,8 +1041,8 @@ static bool CanEvaluateSExtd(Value *V, Type *Ty) { } Instruction *InstCombiner::visitSExt(SExtInst &CI) { - // If this sign extend is only used by a truncate, let the truncate by - // eliminated before we try to optimize this zext. + // If this sign extend is only used by a truncate, let the truncate be + // eliminated before we try to optimize this sext. if (CI.hasOneUse() && isa(CI.use_back())) return 0; -- cgit v1.1 From e68542e67e5c0f8d4bbdae0dde6ccd24525a18e3 Mon Sep 17 00:00:00 2001 From: David Peixotto Date: Wed, 13 Feb 2013 00:36:35 +0000 Subject: Test commit. Fixed typo. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175020 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 9d7a379..bd53334a 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -2576,7 +2576,7 @@ ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF, } // The remaining GPRs hold either the beginning of variable-argument -// data, or the beginning of an aggregate passed by value (usuall +// data, or the beginning of an aggregate passed by value (usually // byval). Either way, we allocate stack slots adjacent to the data // provided by our caller, and store the unallocated registers there. // If this is a variadic function, the va_list pointer will begin with -- cgit v1.1 From b1953981926991bb1373c915952368c653138e71 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Wed, 13 Feb 2013 01:03:13 +0000 Subject: [ms-inline-asm] Make sure the AsmRewrite list is sorted in lexical order. rdar://13202662 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175021 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCParser/AsmParser.cpp | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'lib') diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index 7aa3812..0644ea3 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -4029,6 +4029,10 @@ bool AsmParser::ParseDirectiveMSAlign(SMLoc IDLoc, ParseStatementInfo &Info) { return false; } +bool AsmStringSort (AsmRewrite A, AsmRewrite B) { + return A.Loc.getPointer() < B.Loc.getPointer(); +} + bool AsmParser::ParseMSInlineAsm(void *AsmLoc, std::string &AsmString, unsigned &NumOutputs, unsigned &NumInputs, SmallVectorImpl > &OpDecls, @@ -4153,6 +4157,7 @@ bool AsmParser::ParseMSInlineAsm(void *AsmLoc, std::string &AsmString, AsmRewriteKind PrevKind = AOK_Imm; raw_string_ostream OS(AsmStringIR); const char *Start = SrcMgr.getMemoryBuffer(0)->getBufferStart(); + std::sort (AsmStrRewrites.begin(), AsmStrRewrites.end(), AsmStringSort); for (SmallVectorImpl::iterator I = AsmStrRewrites.begin(), E = AsmStrRewrites.end(); I != E; ++I) { const char *Loc = (*I).Loc.getPointer(); -- cgit v1.1 From 4be3853fd0a0e3b37a27afe05327e638e680c463 Mon Sep 17 00:00:00 2001 From: Manman Ren Date: Wed, 13 Feb 2013 01:14:49 +0000 Subject: Debug Info: LiveDebugVarible can remove DBG_VALUEs, make sure we emit them back. RegisterCoalescer used to depend on LiveDebugVariable. LDV removes DBG_VALUEs without emitting them at the end. We fix this by removing LDV from RegisterCoalescer. Also add an assertion to make sure we call emitDebugValues if DBG_VALUEs are removed at runOnMachineFunction. rdar://problem/13183203 Reviewed by Andy & Jakob git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175023 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/LiveDebugVariables.cpp | 16 ++++++++++++---- lib/CodeGen/LiveDebugVariables.h | 4 ++++ lib/CodeGen/RegisterCoalescer.cpp | 10 ---------- 3 files changed, 16 insertions(+), 14 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp index 786f353..3e31051 100644 --- a/lib/CodeGen/LiveDebugVariables.cpp +++ b/lib/CodeGen/LiveDebugVariables.cpp @@ -64,7 +64,8 @@ void LiveDebugVariables::getAnalysisUsage(AnalysisUsage &AU) const { MachineFunctionPass::getAnalysisUsage(AU); } -LiveDebugVariables::LiveDebugVariables() : MachineFunctionPass(ID), pImpl(0) { +LiveDebugVariables::LiveDebugVariables() : MachineFunctionPass(ID), pImpl(0), + EmitDone(false), ModifiedMF(false) { initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry()); } @@ -701,12 +702,17 @@ bool LiveDebugVariables::runOnMachineFunction(MachineFunction &mf) { return false; if (!pImpl) pImpl = new LDVImpl(this); - return static_cast(pImpl)->runOnMachineFunction(mf); + ModifiedMF = static_cast(pImpl)->runOnMachineFunction(mf); + return ModifiedMF; } void LiveDebugVariables::releaseMemory() { - if (pImpl) + if (pImpl) { static_cast(pImpl)->clear(); + // Make sure we call emitDebugValues if the machine function was modified. + assert((!ModifiedMF || EmitDone) && + "Dbg values are not emitted in LDV"); + } } LiveDebugVariables::~LiveDebugVariables() { @@ -1014,8 +1020,10 @@ void LDVImpl::emitDebugValues(VirtRegMap *VRM) { } void LiveDebugVariables::emitDebugValues(VirtRegMap *VRM) { - if (pImpl) + if (pImpl) { static_cast(pImpl)->emitDebugValues(VRM); + EmitDone = true; + } } diff --git a/lib/CodeGen/LiveDebugVariables.h b/lib/CodeGen/LiveDebugVariables.h index 3ce3c39..bb4c160 100644 --- a/lib/CodeGen/LiveDebugVariables.h +++ b/lib/CodeGen/LiveDebugVariables.h @@ -31,6 +31,10 @@ class VirtRegMap; class LiveDebugVariables : public MachineFunctionPass { void *pImpl; + /// Whether emitDebugValues is called. + bool EmitDone; + /// Whether the machine function is modified during the pass. + bool ModifiedMF; public: static char ID; // Pass identification, replacement for typeid diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index 36d8101..e682d63 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -15,7 +15,6 @@ #define DEBUG_TYPE "regalloc" #include "RegisterCoalescer.h" -#include "LiveDebugVariables.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" @@ -84,7 +83,6 @@ namespace { const TargetRegisterInfo* TRI; const TargetInstrInfo* TII; LiveIntervals *LIS; - LiveDebugVariables *LDV; const MachineLoopInfo* Loops; AliasAnalysis *AA; RegisterClassInfo RegClassInfo; @@ -208,7 +206,6 @@ char &llvm::RegisterCoalescerID = RegisterCoalescer::ID; INITIALIZE_PASS_BEGIN(RegisterCoalescer, "simple-register-coalescing", "Simple Register Coalescing", false, false) INITIALIZE_PASS_DEPENDENCY(LiveIntervals) -INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables) INITIALIZE_PASS_DEPENDENCY(SlotIndexes) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_AG_DEPENDENCY(AliasAnalysis) @@ -394,8 +391,6 @@ void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); AU.addRequired(); AU.addPreserved(); - AU.addRequired(); - AU.addPreserved(); AU.addPreserved(); AU.addRequired(); AU.addPreserved(); @@ -883,9 +878,6 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg, bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); LiveInterval *DstInt = DstIsPhys ? 0 : &LIS->getInterval(DstReg); - // Update LiveDebugVariables. - LDV->renameRegister(SrcReg, DstReg, SubIdx); - SmallPtrSet Visited; for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(SrcReg); MachineInstr *UseMI = I.skipInstruction();) { @@ -2136,7 +2128,6 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { TRI = TM->getRegisterInfo(); TII = TM->getInstrInfo(); LIS = &getAnalysis(); - LDV = &getAnalysis(); AA = &getAnalysis(); Loops = &getAnalysis(); @@ -2182,7 +2173,6 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { } DEBUG(dump()); - DEBUG(LDV->dump()); if (VerifyCoalescing) MF->verify(this, "After register coalescing"); return true; -- cgit v1.1 From b3cecdfcf9a0b736d3c0f6f5abffb41ecda3577c Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Wed, 13 Feb 2013 02:29:18 +0000 Subject: Fix comment. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175024 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/LiveDebugVariables.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp index 3e31051..ecc9978 100644 --- a/lib/CodeGen/LiveDebugVariables.cpp +++ b/lib/CodeGen/LiveDebugVariables.cpp @@ -260,7 +260,7 @@ public: /// provided virtual register map. void rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI); - /// emitDebugVariables - Recreate DBG_VALUE instruction from data structures. + /// emitDebugValues - Recreate DBG_VALUE instruction from data structures. void emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS, const TargetInstrInfo &TRI); @@ -341,7 +341,7 @@ public: /// splitRegister - Replace all references to OldReg with NewRegs. void splitRegister(unsigned OldReg, ArrayRef NewRegs); - /// emitDebugVariables - Recreate DBG_VALUE instruction from data structures. + /// emitDebugValues - Recreate DBG_VALUE instruction from data structures. void emitDebugValues(VirtRegMap *VRM); void print(raw_ostream&); -- cgit v1.1 From 39f02940ba085af79011f7e9095bf9902cd0fa6f Mon Sep 17 00:00:00 2001 From: Kostya Serebryany Date: Wed, 13 Feb 2013 05:14:12 +0000 Subject: [asan] fix confusing indentation git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175033 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Instrumentation/AddressSanitizer.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 4950d83..b97e342 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -219,7 +219,8 @@ static ShadowMapping getShadowMapping(const Module &M, int LongSize, if (!ZeroBaseShadow && ClShort64BitOffset && IsX86_64 && !IsMacOSX) { assert(LongSize == 64); Mapping.Offset = kDefaultShort64bitShadowOffset; - } if (!ZeroBaseShadow && ClMappingOffsetLog >= 0) { + } + if (!ZeroBaseShadow && ClMappingOffsetLog >= 0) { // Zero offset log is the special case. Mapping.Offset = (ClMappingOffsetLog == 0) ? 0 : 1ULL << ClMappingOffsetLog; } -- cgit v1.1 From 7bce462c15356229e13d78d14560feaac30a0f5f Mon Sep 17 00:00:00 2001 From: Kostya Serebryany Date: Wed, 13 Feb 2013 05:59:45 +0000 Subject: [tsan] disable load widening in ThreadSanitizer mode git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175034 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/MemoryDependenceAnalysis.cpp | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'lib') diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index 5cb0016..9a1edc7 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -283,6 +283,12 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, const DataLayout &TD) { // We can only extend simple integer loads. if (!isa(LI->getType()) || !LI->isSimple()) return 0; + + // Load widening is hostile to ThreadSanitizer: it may cause false positives + // or make the reports more cryptic (access sizes are wrong). + if (LI->getParent()->getParent()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, Attribute::ThreadSafety)) + return 0; // Get the base of this load. int64_t LIOffs = 0; -- cgit v1.1 From 23571f4f2c895d60c9ed23b831f988b49a55478e Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Wed, 13 Feb 2013 06:01:05 +0000 Subject: Check i1 as well as i8 variables for 8 bit registers for x86 inline assembly. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175036 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index c24d41b..bc29df49 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -18135,7 +18135,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, // really want an 8-bit or 32-bit register, map to the appropriate register // class and return the appropriate register. if (Res.second == &X86::GR16RegClass) { - if (VT == MVT::i8) { + if (VT == MVT::i8 || VT == MVT::i1) { unsigned DestReg = 0; switch (Res.first) { default: break; -- cgit v1.1 From d29804f80d1cc26ea552b58693ce883f5b13de7a Mon Sep 17 00:00:00 2001 From: Elena Demikhovsky Date: Wed, 13 Feb 2013 08:02:04 +0000 Subject: Prevent insertion of "vzeroupper" before call that preserves YMM registers, since a caller uses preserved registers across the call. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175043 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86VZeroUpper.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'lib') diff --git a/lib/Target/X86/X86VZeroUpper.cpp b/lib/Target/X86/X86VZeroUpper.cpp index c4a5887..0f77948 100644 --- a/lib/Target/X86/X86VZeroUpper.cpp +++ b/lib/Target/X86/X86VZeroUpper.cpp @@ -120,9 +120,19 @@ static bool checkFnHasLiveInYmm(MachineRegisterInfo &MRI) { return false; } +static bool clobbersAllYmmRegs(const MachineOperand &MO) { + for (unsigned reg = X86::YMM0; reg < X86::YMM15; ++reg) { + if (!MO.clobbersPhysReg(reg)) + return false; + } + return true; +} + static bool hasYmmReg(MachineInstr *MI) { for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); + if (MI->isCall() && MO.isRegMask() && !clobbersAllYmmRegs(MO)) + return true; if (!MO.isReg()) continue; if (MO.isDebug()) -- cgit v1.1 From 80806961035807d1b9566cdd34233c1b4cf49282 Mon Sep 17 00:00:00 2001 From: Reed Kotler Date: Wed, 13 Feb 2013 08:32:14 +0000 Subject: Make jumptables work for -static git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175044 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips16InstrInfo.td | 2 ++ 1 file changed, 2 insertions(+) (limited to 'lib') diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td index 49048db..c3e61bb 100644 --- a/lib/Target/Mips/Mips16InstrInfo.td +++ b/lib/Target/Mips/Mips16InstrInfo.td @@ -1607,6 +1607,8 @@ def: Mips16Pat<(add CPU16Regs:$hi, (MipsLo tglobaladdr:$lo)), def : Mips16Pat<(MipsHi tglobaladdr:$in), (SllX16 (LiRxImmX16 tglobaladdr:$in), 16)>; +def : Mips16Pat<(MipsHi tjumptable:$in), + (SllX16 (LiRxImmX16 tjumptable:$in), 16)>; def : Mips16Pat<(MipsHi tglobaltlsaddr:$in), (SllX16 (LiRxImmX16 tglobaltlsaddr:$in), 16)>; -- cgit v1.1 From 0e9d5d059c4aa959e9ef4dff011dbd38d45a1016 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Wed, 13 Feb 2013 08:42:21 +0000 Subject: Add some accessor and query methods for retrieving Attribute objects and such. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175046 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/AttributeImpl.h | 4 ++++ lib/IR/Attributes.cpp | 41 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) (limited to 'lib') diff --git a/lib/IR/AttributeImpl.h b/lib/IR/AttributeImpl.h index 7bb1fcc..cb2c55c 100644 --- a/lib/IR/AttributeImpl.h +++ b/lib/IR/AttributeImpl.h @@ -170,8 +170,12 @@ public: static AttributeSetNode *get(LLVMContext &C, ArrayRef Attrs); bool hasAttribute(Attribute::AttrKind Kind) const; + bool hasAttribute(StringRef Kind) const; bool hasAttributes() const { return !AttrList.empty(); } + Attribute getAttribute(Attribute::AttrKind Kind) const; + Attribute getAttribute(StringRef Kind) const; + unsigned getAlignment() const; unsigned getStackAlignment() const; std::string getAsString(bool InAttrGrp) const; diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 8249be4..7d0bec2 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -441,6 +441,30 @@ bool AttributeSetNode::hasAttribute(Attribute::AttrKind Kind) const { return false; } +bool AttributeSetNode::hasAttribute(StringRef Kind) const { + for (SmallVectorImpl::const_iterator I = AttrList.begin(), + E = AttrList.end(); I != E; ++I) + if (I->hasAttribute(Kind)) + return true; + return false; +} + +Attribute AttributeSetNode::getAttribute(Attribute::AttrKind Kind) const { + for (SmallVectorImpl::const_iterator I = AttrList.begin(), + E = AttrList.end(); I != E; ++I) + if (I->hasAttribute(Kind)) + return *I; + return Attribute(); +} + +Attribute AttributeSetNode::getAttribute(StringRef Kind) const { + for (SmallVectorImpl::const_iterator I = AttrList.begin(), + E = AttrList.end(); I != E; ++I) + if (I->hasAttribute(Kind)) + return *I; + return Attribute(); +} + unsigned AttributeSetNode::getAlignment() const { for (SmallVectorImpl::const_iterator I = AttrList.begin(), E = AttrList.end(); I != E; ++I) @@ -760,6 +784,11 @@ bool AttributeSet::hasAttribute(unsigned Index, Attribute::AttrKind Kind) const{ return ASN ? ASN->hasAttribute(Kind) : false; } +bool AttributeSet::hasAttribute(unsigned Index, StringRef Kind) const { + AttributeSetNode *ASN = getAttributes(Index); + return ASN ? ASN->hasAttribute(Kind) : false; +} + bool AttributeSet::hasAttributes(unsigned Index) const { AttributeSetNode *ASN = getAttributes(Index); return ASN ? ASN->hasAttributes() : false; @@ -779,6 +808,18 @@ bool AttributeSet::hasAttrSomewhere(Attribute::AttrKind Attr) const { return false; } +Attribute AttributeSet::getAttribute(unsigned Index, + Attribute::AttrKind Kind) const { + AttributeSetNode *ASN = getAttributes(Index); + return ASN ? ASN->getAttribute(Kind) : Attribute(); +} + +Attribute AttributeSet::getAttribute(unsigned Index, + StringRef Kind) const { + AttributeSetNode *ASN = getAttributes(Index); + return ASN ? ASN->getAttribute(Kind) : Attribute(); +} + unsigned AttributeSet::getParamAlignment(unsigned Index) const { AttributeSetNode *ASN = getAttributes(Index); return ASN ? ASN->getAlignment() : 0; -- cgit v1.1 From f107e6ca9bbfc82c980c3e8e5c6bf04261b49c90 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Wed, 13 Feb 2013 09:26:26 +0000 Subject: Use array_pod_sort. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175048 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Attributes.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 7d0bec2..3de304e 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -412,7 +412,7 @@ AttributeSetNode *AttributeSetNode::get(LLVMContext &C, FoldingSetNodeID ID; SmallVector SortedAttrs(Attrs.begin(), Attrs.end()); - std::sort(SortedAttrs.begin(), SortedAttrs.end()); + array_pod_sort(SortedAttrs.begin(), SortedAttrs.end()); for (SmallVectorImpl::iterator I = SortedAttrs.begin(), E = SortedAttrs.end(); I != E; ++I) -- cgit v1.1 From f09e02f01a817f4daf95ef8d3f1f2545297d32e7 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Wed, 13 Feb 2013 13:40:35 +0000 Subject: X86: Disable generation of rep;movsl when %esi is used as a base pointer. This happens when there is both stack realignment and a dynamic alloca in the function. If we overwrite %esi (rep;movsl uses fixed registers) we'll lose the base pointer and the next register spill will write into oblivion. Fixes PR15249 and unbreaks firefox on i386/freebsd. Mozilla uses dynamic allocas and freebsd a 4 byte stack alignment. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175057 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86SelectionDAGInfo.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'lib') diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp index 757e8c7..f934fdd 100644 --- a/lib/Target/X86/X86SelectionDAGInfo.cpp +++ b/lib/Target/X86/X86SelectionDAGInfo.cpp @@ -202,6 +202,14 @@ X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, SrcPtrInfo.getAddrSpace() >= 256) return SDValue(); + // ESI might be used as a base pointer, in that case we can't simply overwrite + // the register. Fall back to generic code. + const X86RegisterInfo *TRI = + static_cast(DAG.getTarget().getRegisterInfo()); + if (TRI->hasBasePointer(DAG.getMachineFunction()) && + TRI->getBaseRegister() == X86::ESI) + return SDValue(); + MVT AVT; if (Align & 1) AVT = MVT::i8; -- cgit v1.1 From 96848dfc465c8c7f156a562c246803ebefcf21cf Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Wed, 13 Feb 2013 17:40:07 +0000 Subject: Add registration for PPC-specific passes to allow the IR to be dumped via -print-after-all. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175058 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCBranchSelector.cpp | 11 ++++++++++- lib/Target/PowerPC/PPCCTRLoops.cpp | 14 +++++++++++++- lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 19 ++++++++++++++++++- 3 files changed, 41 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Target/PowerPC/PPCBranchSelector.cpp b/lib/Target/PowerPC/PPCBranchSelector.cpp index 9911575..bd1c378 100644 --- a/lib/Target/PowerPC/PPCBranchSelector.cpp +++ b/lib/Target/PowerPC/PPCBranchSelector.cpp @@ -28,10 +28,16 @@ using namespace llvm; STATISTIC(NumExpanded, "Number of branches expanded to long format"); +namespace llvm { + void initializePPCBSelPass(PassRegistry&); +} + namespace { struct PPCBSel : public MachineFunctionPass { static char ID; - PPCBSel() : MachineFunctionPass(ID) {} + PPCBSel() : MachineFunctionPass(ID) { + initializePPCBSelPass(*PassRegistry::getPassRegistry()); + } /// BlockSizes - The sizes of the basic blocks in the function. std::vector BlockSizes; @@ -45,6 +51,9 @@ namespace { char PPCBSel::ID = 0; } +INITIALIZE_PASS(PPCBSel, "ppc-branch-select", "PowerPC Branch Selector", + false, false) + /// createPPCBranchSelectionPass - returns an instance of the Branch Selection /// Pass /// diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp index a74932c..b98cc48 100644 --- a/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -54,6 +54,10 @@ using namespace llvm; STATISTIC(NumCTRLoops, "Number of loops converted to CTR loops"); +namespace llvm { + void initializePPCCTRLoopsPass(PassRegistry&); +} + namespace { class CountValue; struct PPCCTRLoops : public MachineFunctionPass { @@ -64,7 +68,9 @@ namespace { public: static char ID; // Pass identification, replacement for typeid - PPCCTRLoops() : MachineFunctionPass(ID) {} + PPCCTRLoops() : MachineFunctionPass(ID) { + initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry()); + } virtual bool runOnMachineFunction(MachineFunction &MF); @@ -174,6 +180,12 @@ namespace { }; } // end anonymous namespace +INITIALIZE_PASS_BEGIN(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops", + false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops", + false, false) /// isCompareEquals - Returns true if the instruction is a compare equals /// instruction with an immediate operand. diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 5856a95..0f943e8 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -34,6 +34,10 @@ #include "llvm/Target/TargetOptions.h" using namespace llvm; +namespace llvm { + void initializePPCDAGToDAGISelPass(PassRegistry&); +} + namespace { //===--------------------------------------------------------------------===// /// PPCDAGToDAGISel - PPC specific code to select PPC machine @@ -48,7 +52,9 @@ namespace { explicit PPCDAGToDAGISel(PPCTargetMachine &tm) : SelectionDAGISel(tm), TM(tm), PPCLowering(*TM.getTargetLowering()), - PPCSubTarget(*TM.getSubtargetImpl()) {} + PPCSubTarget(*TM.getSubtargetImpl()) { + initializePPCDAGToDAGISelPass(*PassRegistry::getPassRegistry()); + } virtual bool runOnMachineFunction(MachineFunction &MF) { // Make sure we re-emit a set of the global base reg if necessary @@ -1330,3 +1336,14 @@ FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM) { return new PPCDAGToDAGISel(TM); } +static void initializePassOnce(PassRegistry &Registry) { + const char *Name = "PowerPC DAG->DAG Pattern Instruction Selection"; + PassInfo *PI = new PassInfo(Name, "ppc-codegen", &SelectionDAGISel::ID, 0, + false, false); + Registry.registerPass(*PI, true); +} + +void llvm::initializePPCDAGToDAGISelPass(PassRegistry &Registry) { + CALL_ONCE_INITIALIZATION(initializePassOnce); +} + -- cgit v1.1 From 5d0ce79e26f40141f35cc0002dc5cc6060382359 Mon Sep 17 00:00:00 2001 From: Pekka Jaaskelainen Date: Wed, 13 Feb 2013 18:08:57 +0000 Subject: Metadata for annotating loops as parallel. The first consumer for this metadata is the loop vectorizer. See the documentation update for more info. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175060 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/LoopInfo.cpp | 50 ++++++++++++++++++++++++++++++ lib/Transforms/Vectorize/LoopVectorize.cpp | 8 +++++ 2 files changed, 58 insertions(+) (limited to 'lib') diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp index 4d4c627..f1ad650 100644 --- a/lib/Analysis/LoopInfo.cpp +++ b/lib/Analysis/LoopInfo.cpp @@ -24,6 +24,7 @@ #include "llvm/Assembly/Writer.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/Metadata.h" #include "llvm/Support/CFG.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -233,6 +234,55 @@ bool Loop::isSafeToClone() const { return true; } +bool Loop::isAnnotatedParallel() const { + + BasicBlock *latch = getLoopLatch(); + if (latch == NULL) + return false; + + MDNode *desiredLoopIdMetadata = + latch->getTerminator()->getMetadata("llvm.loop.parallel"); + + if (!desiredLoopIdMetadata) + return false; + + // The loop branch contains the parallel loop metadata. In order to ensure + // that any parallel-loop-unaware optimization pass hasn't added loop-carried + // dependencies (thus converted the loop back to a sequential loop), check + // that all the memory instructions in the loop contain parallelism metadata + // that point to the same unique "loop id metadata" the loop branch does. + for (block_iterator BB = block_begin(), BE = block_end(); BB != BE; ++BB) { + for (BasicBlock::iterator II = (*BB)->begin(), EE = (*BB)->end(); + II != EE; II++) { + + if (!II->mayReadOrWriteMemory()) + continue; + + if (!II->getMetadata("llvm.mem.parallel_loop_access")) + return false; + + // The memory instruction can refer to the loop identifier metadata + // directly or indirectly through another list metadata (in case of + // nested parallel loops). The loop identifier metadata refers to + // itself so we can check both cases with the same routine. + MDNode *loopIdMD = + dyn_cast(II->getMetadata("llvm.mem.parallel_loop_access")); + bool loopIdMDFound = false; + for (unsigned i = 0, e = loopIdMD->getNumOperands(); i < e; ++i) { + if (loopIdMD->getOperand(i) == desiredLoopIdMetadata) { + loopIdMDFound = true; + break; + } + } + + if (!loopIdMDFound) + return false; + } + } + return true; +} + + /// hasDedicatedExits - Return true if no exit block for the loop /// has a predecessor that is outside the loop. bool Loop::hasDedicatedExits() const { diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 9fb451b..842ae02 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2276,6 +2276,14 @@ void LoopVectorizationLegality::collectLoopUniforms() { } bool LoopVectorizationLegality::canVectorizeMemory() { + + if (TheLoop->isAnnotatedParallel()) { + DEBUG(dbgs() + << "LV: A loop annotated parallel, ignore memory dependency " + << "checks.\n"); + return true; + } + typedef SmallVector ValueVector; typedef SmallPtrSet ValueSet; // Holds the Load and Store *instructions*. -- cgit v1.1 From abde6755f93cbfa7ad06c71f7e535b0b36f6f532 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Wed, 13 Feb 2013 18:38:58 +0000 Subject: [ms-inline-asm] Use an array_pod_sort, rather than a std:sort. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175063 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCParser/AsmParser.cpp | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index 0644ea3..2cce8b0 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -13,6 +13,7 @@ #include "llvm/ADT/APFloat.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/Twine.h" #include "llvm/MC/MCAsmInfo.h" @@ -4029,8 +4030,14 @@ bool AsmParser::ParseDirectiveMSAlign(SMLoc IDLoc, ParseStatementInfo &Info) { return false; } -bool AsmStringSort (AsmRewrite A, AsmRewrite B) { - return A.Loc.getPointer() < B.Loc.getPointer(); +static int RewritesSort (const void *A, const void *B) { + const AsmRewrite *AsmRewriteA = static_cast(A); + const AsmRewrite *AsmRewriteB = static_cast(B); + if (AsmRewriteA->Loc.getPointer() < AsmRewriteB->Loc.getPointer()) + return -1; + if (AsmRewriteB->Loc.getPointer() < AsmRewriteA->Loc.getPointer()) + return 1; + return 0; } bool AsmParser::ParseMSInlineAsm(void *AsmLoc, std::string &AsmString, @@ -4157,7 +4164,7 @@ bool AsmParser::ParseMSInlineAsm(void *AsmLoc, std::string &AsmString, AsmRewriteKind PrevKind = AOK_Imm; raw_string_ostream OS(AsmStringIR); const char *Start = SrcMgr.getMemoryBuffer(0)->getBufferStart(); - std::sort (AsmStrRewrites.begin(), AsmStrRewrites.end(), AsmStringSort); + array_pod_sort (AsmStrRewrites.begin(), AsmStrRewrites.end(), RewritesSort); for (SmallVectorImpl::iterator I = AsmStrRewrites.begin(), E = AsmStrRewrites.end(); I != E; ++I) { const char *Loc = (*I).Loc.getPointer(); -- cgit v1.1 From ecb8c2ba6029f02b01b20b110cc1b3b3ea2e1f1c Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Wed, 13 Feb 2013 19:22:27 +0000 Subject: MIsched: HazardRecognizers are created for each DAG. Free them. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175067 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineScheduler.cpp | 7 ++++++- lib/Target/Hexagon/HexagonMachineScheduler.cpp | 4 +++- 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index 513d8a9..ddaf566 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -1054,6 +1054,9 @@ public: #endif void reset() { + // A new HazardRec is created for each DAG and owned by SchedBoundary. + delete HazardRec; + Available.clear(); Pending.clear(); CheckPending = false; @@ -1079,7 +1082,8 @@ public: /// PendingFlag set. SchedBoundary(unsigned ID, const Twine &Name): DAG(0), SchedModel(0), Rem(0), Available(ID, Name+".A"), - Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P") { + Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P"), + HazardRec(0) { reset(); } @@ -1223,6 +1227,7 @@ void ConvergingScheduler::initialize(ScheduleDAGMI *dag) { DAG = dag; SchedModel = DAG->getSchedModel(); TRI = DAG->TRI; + Rem.init(DAG, SchedModel); Top.init(DAG, SchedModel, &Rem); Bot.init(DAG, SchedModel, &Rem); diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/lib/Target/Hexagon/HexagonMachineScheduler.cpp index 36dfaa4..ced17b3 100644 --- a/lib/Target/Hexagon/HexagonMachineScheduler.cpp +++ b/lib/Target/Hexagon/HexagonMachineScheduler.cpp @@ -192,6 +192,7 @@ void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) { DAG = static_cast(dag); SchedModel = DAG->getSchedModel(); TRI = DAG->TRI; + Top.init(DAG, SchedModel); Bot.init(DAG, SchedModel); @@ -199,6 +200,8 @@ void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) { // are disabled, then these HazardRecs will be disabled. const InstrItineraryData *Itin = DAG->getSchedModel()->getInstrItineraries(); const TargetMachine &TM = DAG->MF.getTarget(); + delete Top.HazardRec; + delete Bot.HazardRec; Top.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); Bot.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); @@ -683,4 +686,3 @@ void ConvergingVLIWScheduler::schedNode(SUnit *SU, bool IsTopNode) { Bot.bumpNode(SU); } } - -- cgit v1.1 From f098620095727dd2a823a94a3a8d47108361ad83 Mon Sep 17 00:00:00 2001 From: Manman Ren Date: Wed, 13 Feb 2013 20:23:48 +0000 Subject: Clean up LDV, no functionality change. Remove dead functions: renameRegister Move private member variables from LDV to Impl Remove ssp/uwtable from testing case git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175072 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/LiveDebugVariables.cpp | 79 +++++++++----------------------------- lib/CodeGen/LiveDebugVariables.h | 4 -- 2 files changed, 19 insertions(+), 64 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp index ecc9978..0b117ac 100644 --- a/lib/CodeGen/LiveDebugVariables.cpp +++ b/lib/CodeGen/LiveDebugVariables.cpp @@ -64,8 +64,7 @@ void LiveDebugVariables::getAnalysisUsage(AnalysisUsage &AU) const { MachineFunctionPass::getAnalysisUsage(AU); } -LiveDebugVariables::LiveDebugVariables() : MachineFunctionPass(ID), pImpl(0), - EmitDone(false), ModifiedMF(false) { +LiveDebugVariables::LiveDebugVariables() : MachineFunctionPass(ID), pImpl(0) { initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry()); } @@ -248,10 +247,6 @@ public: LiveIntervals &LIS, MachineDominatorTree &MDT, UserValueScopes &UVS); - /// renameRegister - Update locations to rewrite OldReg as NewReg:SubIdx. - void renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx, - const TargetRegisterInfo *TRI); - /// splitRegister - Replace OldReg ranges with NewRegs ranges where NewRegs is /// live. Returns true if any changes were made. bool splitRegister(unsigned OldLocNo, ArrayRef NewRegs); @@ -287,6 +282,11 @@ class LDVImpl { MachineDominatorTree *MDT; const TargetRegisterInfo *TRI; + /// Whether emitDebugValues is called. + bool EmitDone; + /// Whether the machine function is modified during the pass. + bool ModifiedMF; + /// userValues - All allocated UserValue instances. SmallVector userValues; @@ -321,23 +321,26 @@ class LDVImpl { void computeIntervals(); public: - LDVImpl(LiveDebugVariables *ps) : pass(*ps) {} + LDVImpl(LiveDebugVariables *ps) : pass(*ps), EmitDone(false), + ModifiedMF(false) {} bool runOnMachineFunction(MachineFunction &mf); - /// clear - Relase all memory. + /// clear - Release all memory. void clear() { DeleteContainerPointers(userValues); userValues.clear(); virtRegToEqClass.clear(); userVarMap.clear(); + // Make sure we call emitDebugValues if the machine function was modified. + assert((!ModifiedMF || EmitDone) && + "Dbg values are not emitted in LDV"); + EmitDone = false; + ModifiedMF = false; } /// mapVirtReg - Map virtual register to an equivalence class. void mapVirtReg(unsigned VirtReg, UserValue *EC); - /// renameRegister - Replace all references to OldReg with NewReg:SubIdx. - void renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx); - /// splitRegister - Replace all references to OldReg with NewRegs. void splitRegister(unsigned OldReg, ArrayRef NewRegs); @@ -694,6 +697,7 @@ bool LDVImpl::runOnMachineFunction(MachineFunction &mf) { computeIntervals(); DEBUG(print(dbgs())); LS.releaseMemory(); + ModifiedMF = Changed; return Changed; } @@ -702,17 +706,12 @@ bool LiveDebugVariables::runOnMachineFunction(MachineFunction &mf) { return false; if (!pImpl) pImpl = new LDVImpl(this); - ModifiedMF = static_cast(pImpl)->runOnMachineFunction(mf); - return ModifiedMF; + return static_cast(pImpl)->runOnMachineFunction(mf); } void LiveDebugVariables::releaseMemory() { - if (pImpl) { + if (pImpl) static_cast(pImpl)->clear(); - // Make sure we call emitDebugValues if the machine function was modified. - assert((!ModifiedMF || EmitDone) && - "Dbg values are not emitted in LDV"); - } } LiveDebugVariables::~LiveDebugVariables() { @@ -720,45 +719,6 @@ LiveDebugVariables::~LiveDebugVariables() { delete static_cast(pImpl); } -void UserValue:: -renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx, - const TargetRegisterInfo *TRI) { - for (unsigned i = locations.size(); i; --i) { - unsigned LocNo = i - 1; - MachineOperand &Loc = locations[LocNo]; - if (!Loc.isReg() || Loc.getReg() != OldReg) - continue; - if (TargetRegisterInfo::isPhysicalRegister(NewReg)) - Loc.substPhysReg(NewReg, *TRI); - else - Loc.substVirtReg(NewReg, SubIdx, *TRI); - coalesceLocation(LocNo); - } -} - -void LDVImpl:: -renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx) { - UserValue *UV = lookupVirtReg(OldReg); - if (!UV) - return; - - if (TargetRegisterInfo::isVirtualRegister(NewReg)) - mapVirtReg(NewReg, UV); - if (OldReg != NewReg) - virtRegToEqClass.erase(OldReg); - - do { - UV->renameRegister(OldReg, NewReg, SubIdx, TRI); - UV = UV->getNext(); - } while (UV); -} - -void LiveDebugVariables:: -renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx) { - if (pImpl) - static_cast(pImpl)->renameRegister(OldReg, NewReg, SubIdx); -} - //===----------------------------------------------------------------------===// // Live Range Splitting //===----------------------------------------------------------------------===// @@ -1017,13 +977,12 @@ void LDVImpl::emitDebugValues(VirtRegMap *VRM) { userValues[i]->rewriteLocations(*VRM, *TRI); userValues[i]->emitDebugValues(VRM, *LIS, *TII); } + EmitDone = true; } void LiveDebugVariables::emitDebugValues(VirtRegMap *VRM) { - if (pImpl) { + if (pImpl) static_cast(pImpl)->emitDebugValues(VRM); - EmitDone = true; - } } diff --git a/lib/CodeGen/LiveDebugVariables.h b/lib/CodeGen/LiveDebugVariables.h index bb4c160..3ce3c39 100644 --- a/lib/CodeGen/LiveDebugVariables.h +++ b/lib/CodeGen/LiveDebugVariables.h @@ -31,10 +31,6 @@ class VirtRegMap; class LiveDebugVariables : public MachineFunctionPass { void *pImpl; - /// Whether emitDebugValues is called. - bool EmitDone; - /// Whether the machine function is modified during the pass. - bool ModifiedMF; public: static char ID; // Pass identification, replacement for typeid -- cgit v1.1 From 6b9d4617800d9450825f8a4b122a9aeb76f2795f Mon Sep 17 00:00:00 2001 From: Reed Kotler Date: Wed, 13 Feb 2013 20:28:27 +0000 Subject: For Mips 16, add the optimization where the 16 bit form of addiu sp can be used if the offset fits in 11 bits. This makes use of the fact that the abi requires sp to be 8 byte aligned so the actual offset can fit in 8 bits. It will be shifted left and sign extended before being actually used. The assembler or direct object emitter will shift right the 11 bit signed field by 3 bits. We don't need to deal with that here. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175073 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips16InstrInfo.cpp | 15 ++++++++++++--- lib/Target/Mips/Mips16InstrInfo.h | 12 ++++++++++++ lib/Target/Mips/Mips16InstrInfo.td | 22 ++++++++++++++++++++++ 3 files changed, 46 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp index 9f27ac3..5a91519 100644 --- a/lib/Target/Mips/Mips16InstrInfo.cpp +++ b/lib/Target/Mips/Mips16InstrInfo.cpp @@ -184,7 +184,7 @@ void Mips16InstrInfo::makeFrame(unsigned SP, int64_t FrameSize, int64_t Remainder = FrameSize - Base; BuildMI(MBB, I, DL, get(Mips::SaveRaF16)). addImm(Base); if (isInt<16>(-Remainder)) - BuildMI(MBB, I, DL, get(Mips::AddiuSpImmX16)). addImm(-Remainder); + BuildAddiuSpImm(MBB, I, DL, -Remainder); else adjustStackPtrBig(SP, -Remainder, MBB, I, Mips::V0, Mips::V1); } @@ -225,7 +225,7 @@ void Mips16InstrInfo::restoreFrame(unsigned SP, int64_t FrameSize, // returns largest possible n bit unsigned integer int64_t Remainder = FrameSize - Base; if (isInt<16>(Remainder)) - BuildMI(MBB, I, DL, get(Mips::AddiuSpImmX16)). addImm(Remainder); + BuildAddiuSpImm(MBB, I, DL, Remainder); else adjustStackPtrBig(SP, Remainder, MBB, I, Mips::A0, Mips::A1); BuildMI(MBB, I, DL, get(Mips::RestoreRaF16)). addImm(Base); @@ -299,7 +299,7 @@ void Mips16InstrInfo::adjustStackPtr(unsigned SP, int64_t Amount, MachineBasicBlock::iterator I) const { DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); if (isInt<16>(Amount)) // need to change to addiu sp, ....and isInt<16> - BuildMI(MBB, I, DL, get(Mips::AddiuSpImmX16)). addImm(Amount); + BuildAddiuSpImm(MBB, I, DL, Amount); else adjustStackPtrBigUnrestricted(SP, Amount, MBB, I); } @@ -400,6 +400,15 @@ void Mips16InstrInfo::ExpandRetRA16(MachineBasicBlock &MBB, BuildMI(MBB, I, I->getDebugLoc(), get(Opc)); } +void Mips16InstrInfo::BuildAddiuSpImm( + MachineBasicBlock &MBB, + MachineBasicBlock::iterator II, DebugLoc DL, int64_t Imm) const { + if (validSpImm8(Imm)) + BuildMI(MBB, II, DL, get(Mips::AddiuSpImm16)).addImm(Imm); + else + BuildMI(MBB, II, DL, get(Mips::AddiuSpImmX16)).addImm(Imm); +} + const MipsInstrInfo *llvm::createMips16InstrInfo(MipsTargetMachine &TM) { return new Mips16InstrInfo(TM); } diff --git a/lib/Target/Mips/Mips16InstrInfo.h b/lib/Target/Mips/Mips16InstrInfo.h index 26a5a5e..f8570bd 100644 --- a/lib/Target/Mips/Mips16InstrInfo.h +++ b/lib/Target/Mips/Mips16InstrInfo.h @@ -86,6 +86,18 @@ public: MachineBasicBlock::iterator II, DebugLoc DL, unsigned &NewImm) const; + static bool validSpImm8(int offset) { + return ((offset & 7) == 0) && isInt<11>(offset); + } + + // + // build the proper one based on the Imm field + // + void BuildAddiuSpImm(MachineBasicBlock &MBB, + MachineBasicBlock::iterator II, DebugLoc DL, + int64_t Imm) const; + + private: virtual unsigned GetAnalyzableBrOpc(unsigned Opc) const; diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td index c3e61bb..06344db 100644 --- a/lib/Target/Mips/Mips16InstrInfo.td +++ b/lib/Target/Mips/Mips16InstrInfo.td @@ -32,6 +32,21 @@ def mem16_ea : Operand { } // +// +// I8 instruction format +// + +class FI816_ins_base _func, string asmstr, + string asmstr2, InstrItinClass itin>: + FI816<_func, (outs), (ins simm16:$imm), !strconcat(asmstr, asmstr2), + [], itin>; + + +class FI816_SP_ins _func, string asmstr, + InstrItinClass itin>: + FI816_ins_base<_func, asmstr, "\t$$sp, $imm # 16 bit inst", itin>; + +// // RI instruction format // @@ -451,6 +466,13 @@ def AddiuRxPcImmX16: FEXT_RI16_PC_ins<0b00001, "addiu", IIAlu>; // Purpose: Add Immediate Unsigned Word (2-Operand, SP-Relative, Extended) // To add a constant to the stack pointer. // +def AddiuSpImm16 + : FI816_SP_ins<0b011, "addiu", IIAlu> { + let Defs = [SP]; + let Uses = [SP]; + let AddedComplexity = 5; +} + def AddiuSpImmX16 : FEXT_I816_SP_ins<0b011, "addiu", IIAlu> { let Defs = [SP]; -- cgit v1.1 From c0a6e070fc8fccb86ed91d503a7efc64a2abaa14 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Wed, 13 Feb 2013 21:12:29 +0000 Subject: LoopVectorize: Simplify code for clarity. No functionality change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175076 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/LoopVectorize.cpp | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 842ae02..ab1068d 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2805,17 +2805,17 @@ unsigned LoopVectorizationCostModel::getWidestType() { continue; // Examine the stored values. - StoreInst *ST = 0; - if ((ST = dyn_cast(it))) + if (StoreInst *ST = dyn_cast(it)) T = ST->getValueOperand()->getType(); // Ignore loaded pointer types and stored pointer types that are not // consecutive. However, we do want to take consecutive stores/loads of // pointer vectors into account. - if (T->isPointerTy() && isConsecutiveLoadOrStore(it)) - MaxWidth = std::max(MaxWidth, DL->getPointerSizeInBits()); - else - MaxWidth = std::max(MaxWidth, T->getScalarSizeInBits()); + if (T->isPointerTy() && !isConsecutiveLoadOrStore(it)) + continue; + + MaxWidth = std::max(MaxWidth, + (unsigned)DL->getTypeSizeInBits(T->getScalarType())); } } @@ -3242,13 +3242,11 @@ namespace llvm { bool LoopVectorizationCostModel::isConsecutiveLoadOrStore(Instruction *Inst) { // Check for a store. - StoreInst *ST = dyn_cast(Inst); - if (ST) + if (StoreInst *ST = dyn_cast(Inst)) return Legal->isConsecutivePtr(ST->getPointerOperand()) != 0; // Check for a load. - LoadInst *LI = dyn_cast(Inst); - if (LI) + if (LoadInst *LI = dyn_cast(Inst)) return Legal->isConsecutivePtr(LI->getPointerOperand()) != 0; return false; -- cgit v1.1 From 19aa3e37dcf09b126bbd83e11ec9059615c6bd8b Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Wed, 13 Feb 2013 21:27:17 +0000 Subject: [ms-inline asm] Add a comment about the determinism of the rewrite sort. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175082 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCParser/AsmParser.cpp | 2 ++ 1 file changed, 2 insertions(+) (limited to 'lib') diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index 2cce8b0..aa07ecb 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -4030,6 +4030,8 @@ bool AsmParser::ParseDirectiveMSAlign(SMLoc IDLoc, ParseStatementInfo &Info) { return false; } +// We are comparing pointers, but the pointers are relative to a single string. +// Thus, this should always be deterministic. static int RewritesSort (const void *A, const void *B) { const AsmRewrite *AsmRewriteA = static_cast(A); const AsmRewrite *AsmRewriteB = static_cast(B); -- cgit v1.1 From 7b0bc3fe3e8210ec7fffb311443a07d81cbe1308 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Wed, 13 Feb 2013 21:33:44 +0000 Subject: [ms-inline-asm] Add support for memory references that have non-immediate displacements. rdar://12974533 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175083 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86AsmPrinter.cpp | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp index 75fa9d2..ac5daec 100644 --- a/lib/Target/X86/X86AsmPrinter.cpp +++ b/lib/Target/X86/X86AsmPrinter.cpp @@ -252,14 +252,15 @@ void X86AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, } case MachineOperand::MO_Immediate: - O << '$' << MO.getImm(); + if (AsmVariant == 0) O << '$'; + O << MO.getImm(); return; case MachineOperand::MO_JumpTableIndex: case MachineOperand::MO_ConstantPoolIndex: case MachineOperand::MO_GlobalAddress: case MachineOperand::MO_ExternalSymbol: { - O << '$'; + if (AsmVariant == 0) O << '$'; printSymbolOperand(MO, O); break; } @@ -355,19 +356,23 @@ void X86AsmPrinter::printIntelMemReference(const MachineInstr *MI, unsigned Op, NeedPlus = true; } - assert (DispSpec.isImm() && "Displacement is not an immediate!"); - int64_t DispVal = DispSpec.getImm(); - if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg())) { - if (NeedPlus) { - if (DispVal > 0) - O << " + "; - else { - O << " - "; - DispVal = -DispVal; + if (!DispSpec.isImm()) { + if (NeedPlus) O << " + "; + printOperand(MI, Op+3, O, Modifier, AsmVariant); + } else { + int64_t DispVal = DispSpec.getImm(); + if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg())) { + if (NeedPlus) { + if (DispVal > 0) + O << " + "; + else { + O << " - "; + DispVal = -DispVal; + } } + O << DispVal; } - O << DispVal; - } + } O << ']'; } -- cgit v1.1 From f6563427c40774953d4a7a0387a000f7a6d94ee1 Mon Sep 17 00:00:00 2001 From: Jyotsna Verma Date: Wed, 13 Feb 2013 21:38:46 +0000 Subject: Hexagon: Use absolute addressing mode loads/stores for global+offset instead of redefining separate instructions for them. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175086 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Hexagon/Hexagon.h | 3 +- lib/Target/Hexagon/HexagonISelDAGToDAG.cpp | 111 +++- lib/Target/Hexagon/HexagonInstrInfo.cpp | 127 ----- lib/Target/Hexagon/HexagonInstrInfoV4.td | 754 +++++---------------------- lib/Target/Hexagon/HexagonTargetMachine.cpp | 2 +- lib/Target/Hexagon/HexagonVLIWPacketizer.cpp | 279 +--------- 6 files changed, 224 insertions(+), 1052 deletions(-) (limited to 'lib') diff --git a/lib/Target/Hexagon/Hexagon.h b/lib/Target/Hexagon/Hexagon.h index 45f857b..7e7f756 100644 --- a/lib/Target/Hexagon/Hexagon.h +++ b/lib/Target/Hexagon/Hexagon.h @@ -28,7 +28,8 @@ namespace llvm { class HexagonTargetMachine; class raw_ostream; - FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM); + FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM, + CodeGenOpt::Level OptLevel); FunctionPass *createHexagonDelaySlotFillerPass(TargetMachine &TM); FunctionPass *createHexagonFPMoverPass(TargetMachine &TM); FunctionPass *createHexagonRemoveExtendOps(HexagonTargetMachine &TM); diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp index 6443cb2..3a1c48b 100644 --- a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp +++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp @@ -15,18 +15,29 @@ #include "Hexagon.h" #include "HexagonISelLowering.h" #include "HexagonTargetMachine.h" -#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" - using namespace llvm; +static +cl::opt +MaxNumOfUsesForConstExtenders("ga-max-num-uses-for-constant-extenders", + cl::Hidden, cl::init(2), + cl::desc("Maximum number of uses of a global address such that we still us a" + "constant extended instruction")); //===----------------------------------------------------------------------===// // Instruction Selector Implementation //===----------------------------------------------------------------------===// +namespace llvm { + void initializeHexagonDAGToDAGISelPass(PassRegistry&); +} + //===--------------------------------------------------------------------===// /// HexagonDAGToDAGISel - Hexagon specific code to select Hexagon machine /// instructions for SelectionDAG operations. @@ -40,19 +51,24 @@ class HexagonDAGToDAGISel : public SelectionDAGISel { // Keep a reference to HexagonTargetMachine. HexagonTargetMachine& TM; const HexagonInstrInfo *TII; - + DenseMap GlobalAddressUseCountMap; public: - explicit HexagonDAGToDAGISel(HexagonTargetMachine &targetmachine) - : SelectionDAGISel(targetmachine), + explicit HexagonDAGToDAGISel(HexagonTargetMachine &targetmachine, + CodeGenOpt::Level OptLevel) + : SelectionDAGISel(targetmachine, OptLevel), Subtarget(targetmachine.getSubtarget()), TM(targetmachine), TII(static_cast(TM.getInstrInfo())) { - + initializeHexagonDAGToDAGISelPass(*PassRegistry::getPassRegistry()); } + bool hasNumUsesBelowThresGA(SDNode *N) const; SDNode *Select(SDNode *N); // Complex Pattern Selectors. + inline bool foldGlobalAddress(SDValue &N, SDValue &R); + inline bool foldGlobalAddressGP(SDValue &N, SDValue &R); + bool foldGlobalAddressImpl(SDValue &N, SDValue &R, bool ShouldLookForGP); bool SelectADDRri(SDValue& N, SDValue &R1, SDValue &R2); bool SelectADDRriS11_0(SDValue& N, SDValue &R1, SDValue &R2); bool SelectADDRriS11_1(SDValue& N, SDValue &R1, SDValue &R2); @@ -113,10 +129,23 @@ inline SDValue XformU7ToU7M1Imm(signed Imm) { /// createHexagonISelDag - This pass converts a legalized DAG into a /// Hexagon-specific DAG, ready for instruction scheduling. /// -FunctionPass *llvm::createHexagonISelDag(HexagonTargetMachine &TM) { - return new HexagonDAGToDAGISel(TM); +FunctionPass *llvm::createHexagonISelDag(HexagonTargetMachine &TM, + CodeGenOpt::Level OptLevel) { + return new HexagonDAGToDAGISel(TM, OptLevel); } +static void initializePassOnce(PassRegistry &Registry) { + const char *Name = "Hexagon DAG->DAG Pattern Instruction Selection"; + PassInfo *PI = new PassInfo(Name, "hexagon-isel", + &SelectionDAGISel::ID, 0, false, false); + Registry.registerPass(*PI, true); +} + +void llvm::initializeHexagonDAGToDAGISelPass(PassRegistry &Registry) { + CALL_ONCE_INITIALIZATION(initializePassOnce) +} + + static bool IsS11_0_Offset(SDNode * S) { ConstantSDNode *N = cast(S); @@ -1526,3 +1555,69 @@ bool HexagonDAGToDAGISel::isConstExtProfitable(SDNode *N) const { return (UseCount <= 1); } + +//===--------------------------------------------------------------------===// +// Return 'true' if use count of the global address is below threshold. +//===--------------------------------------------------------------------===// +bool HexagonDAGToDAGISel::hasNumUsesBelowThresGA(SDNode *N) const { + assert(N->getOpcode() == ISD::TargetGlobalAddress && + "Expecting a target global address"); + + // Always try to fold the address. + if (TM.getOptLevel() == CodeGenOpt::Aggressive) + return true; + + GlobalAddressSDNode *GA = cast(N); + DenseMap::const_iterator GI = + GlobalAddressUseCountMap.find(GA->getGlobal()); + + if (GI == GlobalAddressUseCountMap.end()) + return false; + + return GI->second <= MaxNumOfUsesForConstExtenders; +} + +//===--------------------------------------------------------------------===// +// Return true if the non GP-relative global address can be folded. +//===--------------------------------------------------------------------===// +inline bool HexagonDAGToDAGISel::foldGlobalAddress(SDValue &N, SDValue &R) { + return foldGlobalAddressImpl(N, R, false); +} + +//===--------------------------------------------------------------------===// +// Return true if the GP-relative global address can be folded. +//===--------------------------------------------------------------------===// +inline bool HexagonDAGToDAGISel::foldGlobalAddressGP(SDValue &N, SDValue &R) { + return foldGlobalAddressImpl(N, R, true); +} + +//===--------------------------------------------------------------------===// +// Fold offset of the global address if number of uses are below threshold. +//===--------------------------------------------------------------------===// +bool HexagonDAGToDAGISel::foldGlobalAddressImpl(SDValue &N, SDValue &R, + bool ShouldLookForGP) { + if (N.getOpcode() == ISD::ADD) { + SDValue N0 = N.getOperand(0); + SDValue N1 = N.getOperand(1); + if ((ShouldLookForGP && (N0.getOpcode() == HexagonISD::CONST32_GP)) || + (!ShouldLookForGP && (N0.getOpcode() == HexagonISD::CONST32))) { + ConstantSDNode *Const = dyn_cast(N1); + GlobalAddressSDNode *GA = + dyn_cast(N0.getOperand(0)); + + if (Const && GA && + (GA->getOpcode() == ISD::TargetGlobalAddress)) { + if ((N0.getOpcode() == HexagonISD::CONST32) && + !hasNumUsesBelowThresGA(GA)) + return false; + R = CurDAG->getTargetGlobalAddress(GA->getGlobal(), + Const->getDebugLoc(), + N.getValueType(), + GA->getOffset() + + (uint64_t)Const->getSExtValue()); + return true; + } + } + } + return false; +} diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp index 1005553..6801467 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -690,30 +690,6 @@ bool HexagonInstrInfo::isExtended(const MachineInstr *MI) const { case Hexagon::STriw_abs_setimm_V4: // V4 global address load. - case Hexagon::LDrid_GP_cPt_V4 : - case Hexagon::LDrid_GP_cNotPt_V4 : - case Hexagon::LDrid_GP_cdnPt_V4 : - case Hexagon::LDrid_GP_cdnNotPt_V4 : - case Hexagon::LDrib_GP_cPt_V4 : - case Hexagon::LDrib_GP_cNotPt_V4 : - case Hexagon::LDrib_GP_cdnPt_V4 : - case Hexagon::LDrib_GP_cdnNotPt_V4 : - case Hexagon::LDriub_GP_cPt_V4 : - case Hexagon::LDriub_GP_cNotPt_V4 : - case Hexagon::LDriub_GP_cdnPt_V4 : - case Hexagon::LDriub_GP_cdnNotPt_V4 : - case Hexagon::LDrih_GP_cPt_V4 : - case Hexagon::LDrih_GP_cNotPt_V4 : - case Hexagon::LDrih_GP_cdnPt_V4 : - case Hexagon::LDrih_GP_cdnNotPt_V4 : - case Hexagon::LDriuh_GP_cPt_V4 : - case Hexagon::LDriuh_GP_cNotPt_V4 : - case Hexagon::LDriuh_GP_cdnPt_V4 : - case Hexagon::LDriuh_GP_cdnNotPt_V4 : - case Hexagon::LDriw_GP_cPt_V4 : - case Hexagon::LDriw_GP_cNotPt_V4 : - case Hexagon::LDriw_GP_cdnPt_V4 : - case Hexagon::LDriw_GP_cdnNotPt_V4 : case Hexagon::LDd_GP_cPt_V4 : case Hexagon::LDd_GP_cNotPt_V4 : case Hexagon::LDd_GP_cdnPt_V4 : @@ -740,22 +716,6 @@ bool HexagonInstrInfo::isExtended(const MachineInstr *MI) const { case Hexagon::LDw_GP_cdnNotPt_V4 : // V4 global address store. - case Hexagon::STrid_GP_cPt_V4 : - case Hexagon::STrid_GP_cNotPt_V4 : - case Hexagon::STrid_GP_cdnPt_V4 : - case Hexagon::STrid_GP_cdnNotPt_V4 : - case Hexagon::STrib_GP_cPt_V4 : - case Hexagon::STrib_GP_cNotPt_V4 : - case Hexagon::STrib_GP_cdnPt_V4 : - case Hexagon::STrib_GP_cdnNotPt_V4 : - case Hexagon::STrih_GP_cPt_V4 : - case Hexagon::STrih_GP_cNotPt_V4 : - case Hexagon::STrih_GP_cdnPt_V4 : - case Hexagon::STrih_GP_cdnNotPt_V4 : - case Hexagon::STriw_GP_cPt_V4 : - case Hexagon::STriw_GP_cNotPt_V4 : - case Hexagon::STriw_GP_cdnPt_V4 : - case Hexagon::STriw_GP_cdnNotPt_V4 : case Hexagon::STd_GP_cPt_V4 : case Hexagon::STd_GP_cNotPt_V4 : case Hexagon::STd_GP_cdnPt_V4 : @@ -774,18 +734,6 @@ bool HexagonInstrInfo::isExtended(const MachineInstr *MI) const { case Hexagon::STw_GP_cdnNotPt_V4 : // V4 predicated global address new value store. - case Hexagon::STrib_GP_cPt_nv_V4 : - case Hexagon::STrib_GP_cNotPt_nv_V4 : - case Hexagon::STrib_GP_cdnPt_nv_V4 : - case Hexagon::STrib_GP_cdnNotPt_nv_V4 : - case Hexagon::STrih_GP_cPt_nv_V4 : - case Hexagon::STrih_GP_cNotPt_nv_V4 : - case Hexagon::STrih_GP_cdnPt_nv_V4 : - case Hexagon::STrih_GP_cdnNotPt_nv_V4 : - case Hexagon::STriw_GP_cPt_nv_V4 : - case Hexagon::STriw_GP_cNotPt_nv_V4 : - case Hexagon::STriw_GP_cdnPt_nv_V4 : - case Hexagon::STriw_GP_cdnNotPt_nv_V4 : case Hexagon::STb_GP_cPt_nv_V4 : case Hexagon::STb_GP_cNotPt_nv_V4 : case Hexagon::STb_GP_cdnPt_nv_V4 : @@ -1177,7 +1125,6 @@ bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const { case Hexagon::STrib_indexed_nv_V4: case Hexagon::STrib_indexed_shl_nv_V4: case Hexagon::STrib_shl_nv_V4: - case Hexagon::STrib_GP_nv_V4: case Hexagon::STb_GP_nv_V4: case Hexagon::POST_STbri_nv_V4: case Hexagon::STrib_cPt_nv_V4: @@ -1200,10 +1147,6 @@ bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const { case Hexagon::STb_GP_cNotPt_nv_V4: case Hexagon::STb_GP_cdnPt_nv_V4: case Hexagon::STb_GP_cdnNotPt_nv_V4: - case Hexagon::STrib_GP_cPt_nv_V4: - case Hexagon::STrib_GP_cNotPt_nv_V4: - case Hexagon::STrib_GP_cdnPt_nv_V4: - case Hexagon::STrib_GP_cdnNotPt_nv_V4: case Hexagon::STrib_abs_nv_V4: case Hexagon::STrib_abs_cPt_nv_V4: case Hexagon::STrib_abs_cdnPt_nv_V4: @@ -1220,7 +1163,6 @@ bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const { case Hexagon::STrih_indexed_nv_V4: case Hexagon::STrih_indexed_shl_nv_V4: case Hexagon::STrih_shl_nv_V4: - case Hexagon::STrih_GP_nv_V4: case Hexagon::STh_GP_nv_V4: case Hexagon::POST_SThri_nv_V4: case Hexagon::STrih_cPt_nv_V4: @@ -1243,10 +1185,6 @@ bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const { case Hexagon::STh_GP_cNotPt_nv_V4: case Hexagon::STh_GP_cdnPt_nv_V4: case Hexagon::STh_GP_cdnNotPt_nv_V4: - case Hexagon::STrih_GP_cPt_nv_V4: - case Hexagon::STrih_GP_cNotPt_nv_V4: - case Hexagon::STrih_GP_cdnPt_nv_V4: - case Hexagon::STrih_GP_cdnNotPt_nv_V4: case Hexagon::STrih_abs_nv_V4: case Hexagon::STrih_abs_cPt_nv_V4: case Hexagon::STrih_abs_cdnPt_nv_V4: @@ -1263,7 +1201,6 @@ bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const { case Hexagon::STriw_indexed_nv_V4: case Hexagon::STriw_indexed_shl_nv_V4: case Hexagon::STriw_shl_nv_V4: - case Hexagon::STriw_GP_nv_V4: case Hexagon::STw_GP_nv_V4: case Hexagon::POST_STwri_nv_V4: case Hexagon::STriw_cPt_nv_V4: @@ -1286,10 +1223,6 @@ bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const { case Hexagon::STw_GP_cNotPt_nv_V4: case Hexagon::STw_GP_cdnPt_nv_V4: case Hexagon::STw_GP_cdnNotPt_nv_V4: - case Hexagon::STriw_GP_cPt_nv_V4: - case Hexagon::STriw_GP_cNotPt_nv_V4: - case Hexagon::STriw_GP_cdnPt_nv_V4: - case Hexagon::STriw_GP_cdnNotPt_nv_V4: case Hexagon::STriw_abs_nv_V4: case Hexagon::STriw_abs_cPt_nv_V4: case Hexagon::STriw_abs_cdnPt_nv_V4: @@ -1732,26 +1665,6 @@ unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const { case Hexagon::STw_GP_cNotPt_V4: return Hexagon::STw_GP_cPt_V4; - case Hexagon::STrid_GP_cPt_V4: - return Hexagon::STrid_GP_cNotPt_V4; - case Hexagon::STrid_GP_cNotPt_V4: - return Hexagon::STrid_GP_cPt_V4; - - case Hexagon::STrib_GP_cPt_V4: - return Hexagon::STrib_GP_cNotPt_V4; - case Hexagon::STrib_GP_cNotPt_V4: - return Hexagon::STrib_GP_cPt_V4; - - case Hexagon::STrih_GP_cPt_V4: - return Hexagon::STrih_GP_cNotPt_V4; - case Hexagon::STrih_GP_cNotPt_V4: - return Hexagon::STrih_GP_cPt_V4; - - case Hexagon::STriw_GP_cPt_V4: - return Hexagon::STriw_GP_cNotPt_V4; - case Hexagon::STriw_GP_cNotPt_V4: - return Hexagon::STriw_GP_cPt_V4; - // Load. case Hexagon::LDrid_cPt: return Hexagon::LDrid_cNotPt; @@ -2037,25 +1950,6 @@ getMatchingCondBranchOpcode(int Opc, bool invertPredicate) const { Hexagon::LDriw_indexed_shl_cNotPt_V4; // V4 Load from global address - case Hexagon::LDrid_GP_V4: - return !invertPredicate ? Hexagon::LDrid_GP_cPt_V4 : - Hexagon::LDrid_GP_cNotPt_V4; - case Hexagon::LDrib_GP_V4: - return !invertPredicate ? Hexagon::LDrib_GP_cPt_V4 : - Hexagon::LDrib_GP_cNotPt_V4; - case Hexagon::LDriub_GP_V4: - return !invertPredicate ? Hexagon::LDriub_GP_cPt_V4 : - Hexagon::LDriub_GP_cNotPt_V4; - case Hexagon::LDrih_GP_V4: - return !invertPredicate ? Hexagon::LDrih_GP_cPt_V4 : - Hexagon::LDrih_GP_cNotPt_V4; - case Hexagon::LDriuh_GP_V4: - return !invertPredicate ? Hexagon::LDriuh_GP_cPt_V4 : - Hexagon::LDriuh_GP_cNotPt_V4; - case Hexagon::LDriw_GP_V4: - return !invertPredicate ? Hexagon::LDriw_GP_cPt_V4 : - Hexagon::LDriw_GP_cNotPt_V4; - case Hexagon::LDd_GP_V4: return !invertPredicate ? Hexagon::LDd_GP_cPt_V4 : Hexagon::LDd_GP_cNotPt_V4; @@ -2138,19 +2032,6 @@ getMatchingCondBranchOpcode(int Opc, bool invertPredicate) const { Hexagon::STrid_indexed_shl_cNotPt_V4; // V4 Store to global address - case Hexagon::STrid_GP_V4: - return !invertPredicate ? Hexagon::STrid_GP_cPt_V4 : - Hexagon::STrid_GP_cNotPt_V4; - case Hexagon::STrib_GP_V4: - return !invertPredicate ? Hexagon::STrib_GP_cPt_V4 : - Hexagon::STrib_GP_cNotPt_V4; - case Hexagon::STrih_GP_V4: - return !invertPredicate ? Hexagon::STrih_GP_cPt_V4 : - Hexagon::STrih_GP_cNotPt_V4; - case Hexagon::STriw_GP_V4: - return !invertPredicate ? Hexagon::STriw_GP_cPt_V4 : - Hexagon::STriw_GP_cNotPt_V4; - case Hexagon::STd_GP_V4: return !invertPredicate ? Hexagon::STd_GP_cPt_V4 : Hexagon::STd_GP_cNotPt_V4; @@ -2867,14 +2748,6 @@ isConditionalStore (const MachineInstr* MI) const { return QRI.Subtarget.hasV4TOps(); // V4 global address store before promoting to dot new. - case Hexagon::STrid_GP_cPt_V4 : - case Hexagon::STrid_GP_cNotPt_V4 : - case Hexagon::STrib_GP_cPt_V4 : - case Hexagon::STrib_GP_cNotPt_V4 : - case Hexagon::STrih_GP_cPt_V4 : - case Hexagon::STrih_GP_cNotPt_V4 : - case Hexagon::STriw_GP_cPt_V4 : - case Hexagon::STriw_GP_cNotPt_V4 : case Hexagon::STd_GP_cPt_V4 : case Hexagon::STd_GP_cNotPt_V4 : case Hexagon::STb_GP_cPt_V4 : diff --git a/lib/Target/Hexagon/HexagonInstrInfoV4.td b/lib/Target/Hexagon/HexagonInstrInfoV4.td index 4e37b99..169660d 100644 --- a/lib/Target/Hexagon/HexagonInstrInfoV4.td +++ b/lib/Target/Hexagon/HexagonInstrInfoV4.td @@ -21,6 +21,17 @@ def IMMEXT_c : T_Immext<(ins calltarget:$imm)>; def IMMEXT_g : T_Immext<(ins globaladdress:$imm)>; def IMMEXT_i : T_Immext<(ins u26_6Imm:$imm)>; +// Fold (add (CONST32 tglobaladdr:$addr) ) into a global address. +def FoldGlobalAddr : ComplexPattern; + +// Fold (add (CONST32_GP tglobaladdr:$addr) ) into a global address. +def FoldGlobalAddrGP : ComplexPattern; + +def NumUsesBelowThresCONST32 : PatFrag<(ops node:$addr), + (HexagonCONST32 node:$addr), [{ + return hasNumUsesBelowThresGA(N->getOperand(0).getNode()); +}]>; + // Hexagon V4 Architecture spec defines 8 instruction classes: // LD ST ALU32 XTYPE J JR MEMOP NV CR SYSTEM(system is not implemented in the // compiler) @@ -585,226 +596,6 @@ def : Pat <(i32 (load (add IntRegs:$src1, IntRegs:$src2))), Requires<[HasV4T]>; } -/// Load from global offset - -let isPredicable = 1, neverHasSideEffects = 1 in -def LDrid_GP_V4 : LDInst2<(outs DoubleRegs:$dst), - (ins globaladdress:$global, u16Imm:$offset), - "$dst=memd(#$global+$offset)", - []>, - Requires<[HasV4T]>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def LDrid_GP_cPt_V4 : LDInst2<(outs DoubleRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), - "if ($src1) $dst=memd(##$global+$offset)", - []>, - Requires<[HasV4T]>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def LDrid_GP_cNotPt_V4 : LDInst2<(outs DoubleRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), - "if (!$src1) $dst=memd(##$global+$offset)", - []>, - Requires<[HasV4T]>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def LDrid_GP_cdnPt_V4 : LDInst2<(outs DoubleRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), - "if ($src1.new) $dst=memd(##$global+$offset)", - []>, - Requires<[HasV4T]>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def LDrid_GP_cdnNotPt_V4 : LDInst2<(outs DoubleRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), - "if (!$src1.new) $dst=memd(##$global+$offset)", - []>, - Requires<[HasV4T]>; - -let isPredicable = 1, neverHasSideEffects = 1 in -def LDrib_GP_V4 : LDInst2<(outs IntRegs:$dst), - (ins globaladdress:$global, u16Imm:$offset), - "$dst=memb(#$global+$offset)", - []>, - Requires<[HasV4T]>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def LDrib_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), - "if ($src1) $dst=memb(##$global+$offset)", - []>, - Requires<[HasV4T]>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def LDrib_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), - "if (!$src1) $dst=memb(##$global+$offset)", - []>, - Requires<[HasV4T]>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def LDrib_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), - "if ($src1.new) $dst=memb(##$global+$offset)", - []>, - Requires<[HasV4T]>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def LDrib_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), - "if (!$src1.new) $dst=memb(##$global+$offset)", - []>, - Requires<[HasV4T]>; - - -let isPredicable = 1, neverHasSideEffects = 1 in -def LDriub_GP_V4 : LDInst2<(outs IntRegs:$dst), - (ins globaladdress:$global, u16Imm:$offset), - "$dst=memub(#$global+$offset)", - []>, - Requires<[HasV4T]>; - - -let neverHasSideEffects = 1, isPredicated = 1 in -def LDriub_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), - "if ($src1) $dst=memub(##$global+$offset)", - []>, - Requires<[HasV4T]>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def LDriub_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), - "if (!$src1) $dst=memub(##$global+$offset)", - []>, - Requires<[HasV4T]>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def LDriub_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), - "if ($src1.new) $dst=memub(##$global+$offset)", - []>, - Requires<[HasV4T]>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def LDriub_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), - "if (!$src1.new) $dst=memub(##$global+$offset)", - []>, - Requires<[HasV4T]>; - - -let isPredicable = 1, neverHasSideEffects = 1 in -def LDrih_GP_V4 : LDInst2<(outs IntRegs:$dst), - (ins globaladdress:$global, u16Imm:$offset), - "$dst=memh(#$global+$offset)", - []>, - Requires<[HasV4T]>; - - -let neverHasSideEffects = 1, isPredicated = 1 in -def LDrih_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), - "if ($src1) $dst=memh(##$global+$offset)", - []>, - Requires<[HasV4T]>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def LDrih_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), - "if (!$src1) $dst=memh(##$global+$offset)", - []>, - Requires<[HasV4T]>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def LDrih_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), - "if ($src1.new) $dst=memh(##$global+$offset)", - []>, - Requires<[HasV4T]>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def LDrih_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), - "if (!$src1.new) $dst=memh(##$global+$offset)", - []>, - Requires<[HasV4T]>; - - -let isPredicable = 1, neverHasSideEffects = 1 in -def LDriuh_GP_V4 : LDInst2<(outs IntRegs:$dst), - (ins globaladdress:$global, u16Imm:$offset), - "$dst=memuh(#$global+$offset)", - []>, - Requires<[HasV4T]>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def LDriuh_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), - "if ($src1) $dst=memuh(##$global+$offset)", - []>, - Requires<[HasV4T]>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def LDriuh_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), - "if (!$src1) $dst=memuh(##$global+$offset)", - []>, - Requires<[HasV4T]>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def LDriuh_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), - "if ($src1.new) $dst=memuh(##$global+$offset)", - []>, - Requires<[HasV4T]>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def LDriuh_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), - "if (!$src1.new) $dst=memuh(##$global+$offset)", - []>, - Requires<[HasV4T]>; - -let isPredicable = 1, neverHasSideEffects = 1 in -def LDriw_GP_V4 : LDInst2<(outs IntRegs:$dst), - (ins globaladdress:$global, u16Imm:$offset), - "$dst=memw(#$global+$offset)", - []>, - Requires<[HasV4T]>; - - -let neverHasSideEffects = 1, isPredicated = 1 in -def LDriw_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), - "if ($src1) $dst=memw(##$global+$offset)", - []>, - Requires<[HasV4T]>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def LDriw_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), - "if (!$src1) $dst=memw(##$global+$offset)", - []>, - Requires<[HasV4T]>; - - -let neverHasSideEffects = 1, isPredicated = 1 in -def LDriw_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), - "if ($src1.new) $dst=memw(##$global+$offset)", - []>, - Requires<[HasV4T]>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def LDriw_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), - "if (!$src1.new) $dst=memw(##$global+$offset)", - []>, - Requires<[HasV4T]>; - - let isPredicable = 1, neverHasSideEffects = 1, validSubTargets = HasV4SubT in def LDd_GP_V4 : LDInst2<(outs DoubleRegs:$dst), (ins globaladdress:$global), @@ -1128,82 +919,6 @@ def : Pat <(i32 (load (HexagonCONST32_GP tglobaladdr:$global))), (i32 (LDw_GP_V4 tglobaladdr:$global))>, Requires<[HasV4T]>; -def : Pat <(atomic_load_64 (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset)), - (i64 (LDrid_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>, - Requires<[HasV4T]>; - -def : Pat <(atomic_load_32 (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset)), - (i32 (LDriw_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>, - Requires<[HasV4T]>; - -def : Pat <(atomic_load_16 (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset)), - (i32 (LDriuh_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>, - Requires<[HasV4T]>; - -def : Pat <(atomic_load_8 (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset)), - (i32 (LDriub_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>, - Requires<[HasV4T]>; - -// Map from load(globaladdress + x) -> memd(#foo + x) -let AddedComplexity = 100 in -def : Pat <(i64 (load (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset))), - (i64 (LDrid_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>, - Requires<[HasV4T]>; - -// Map from load(globaladdress + x) -> memb(#foo + x) -let AddedComplexity = 100 in -def : Pat <(i32 (extloadi8 (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset))), - (i32 (LDrib_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>, - Requires<[HasV4T]>; - -// Map from load(globaladdress + x) -> memb(#foo + x) -let AddedComplexity = 100 in -def : Pat <(i32 (sextloadi8 (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset))), - (i32 (LDrib_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>, - Requires<[HasV4T]>; - -// Map from load(globaladdress + x) -> memub(#foo + x) -let AddedComplexity = 100 in -def : Pat <(i32 (zextloadi8 (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset))), - (i32 (LDriub_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>, - Requires<[HasV4T]>; - -// Map from load(globaladdress + x) -> memuh(#foo + x) -let AddedComplexity = 100 in -def : Pat <(i32 (extloadi16 (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset))), - (i32 (LDrih_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>, - Requires<[HasV4T]>; - -// Map from load(globaladdress + x) -> memh(#foo + x) -let AddedComplexity = 100 in -def : Pat <(i32 (sextloadi16 (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset))), - (i32 (LDrih_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>, - Requires<[HasV4T]>; - - -// Map from load(globaladdress + x) -> memuh(#foo + x) -let AddedComplexity = 100 in -def : Pat <(i32 (zextloadi16 (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset))), - (i32 (LDriuh_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>, - Requires<[HasV4T]>; - -// Map from load(globaladdress + x) -> memw(#foo + x) -let AddedComplexity = 100 in -def : Pat <(i32 (load (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset))), - (i32 (LDriw_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>, - Requires<[HasV4T]>; // zext i1->i64 def : Pat <(i64 (zext (i1 PredRegs:$src1))), (i64 (COMBINE_Ir_V4 0, (MUX_ii (i1 PredRegs:$src1), 1, 0)))>, @@ -1649,163 +1364,6 @@ def STriw_shl_V4 : STInst<(outs), // memw(Rx++Mu:brev)=Rt // memw(gp+#u16:2)=Rt -/// store to global address - -let isPredicable = 1, neverHasSideEffects = 1 in -def STrid_GP_V4 : STInst2<(outs), - (ins globaladdress:$global, u16Imm:$offset, DoubleRegs:$src), - "memd(#$global+$offset) = $src", - []>, - Requires<[HasV4T]>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def STrid_GP_cPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, - DoubleRegs:$src2), - "if ($src1) memd(##$global+$offset) = $src2", - []>, - Requires<[HasV4T]>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def STrid_GP_cNotPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, - DoubleRegs:$src2), - "if (!$src1) memd(##$global+$offset) = $src2", - []>, - Requires<[HasV4T]>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def STrid_GP_cdnPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, - DoubleRegs:$src2), - "if ($src1.new) memd(##$global+$offset) = $src2", - []>, - Requires<[HasV4T]>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def STrid_GP_cdnNotPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, - DoubleRegs:$src2), - "if (!$src1.new) memd(##$global+$offset) = $src2", - []>, - Requires<[HasV4T]>; - -let isPredicable = 1, neverHasSideEffects = 1 in -def STrib_GP_V4 : STInst2<(outs), - (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src), - "memb(#$global+$offset) = $src", - []>, - Requires<[HasV4T]>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def STrib_GP_cPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, - IntRegs:$src2), - "if ($src1) memb(##$global+$offset) = $src2", - []>, - Requires<[HasV4T]>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def STrib_GP_cNotPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, - IntRegs:$src2), - "if (!$src1) memb(##$global+$offset) = $src2", - []>, - Requires<[HasV4T]>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def STrib_GP_cdnPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, - IntRegs:$src2), - "if ($src1.new) memb(##$global+$offset) = $src2", - []>, - Requires<[HasV4T]>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def STrib_GP_cdnNotPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, - IntRegs:$src2), - "if (!$src1.new) memb(##$global+$offset) = $src2", - []>, - Requires<[HasV4T]>; - -let isPredicable = 1, neverHasSideEffects = 1 in -def STrih_GP_V4 : STInst2<(outs), - (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src), - "memh(#$global+$offset) = $src", - []>, - Requires<[HasV4T]>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def STrih_GP_cPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, - IntRegs:$src2), - "if ($src1) memh(##$global+$offset) = $src2", - []>, - Requires<[HasV4T]>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def STrih_GP_cNotPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, - IntRegs:$src2), - "if (!$src1) memh(##$global+$offset) = $src2", - []>, - Requires<[HasV4T]>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def STrih_GP_cdnPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, - IntRegs:$src2), - "if ($src1.new) memh(##$global+$offset) = $src2", - []>, - Requires<[HasV4T]>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def STrih_GP_cdnNotPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, - IntRegs:$src2), - "if (!$src1.new) memh(##$global+$offset) = $src2", - []>, - Requires<[HasV4T]>; - -let isPredicable = 1, neverHasSideEffects = 1 in -def STriw_GP_V4 : STInst2<(outs), - (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src), - "memw(#$global+$offset) = $src", - []>, - Requires<[HasV4T]>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def STriw_GP_cPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, - IntRegs:$src2), - "if ($src1) memw(##$global+$offset) = $src2", - []>, - Requires<[HasV4T]>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def STriw_GP_cNotPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, - IntRegs:$src2), - "if (!$src1) memw(##$global+$offset) = $src2", - []>, - Requires<[HasV4T]>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def STriw_GP_cdnPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, - IntRegs:$src2), - "if ($src1.new) memw(##$global+$offset) = $src2", - []>, - Requires<[HasV4T]>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def STriw_GP_cdnNotPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, - IntRegs:$src2), - "if (!$src1.new) memw(##$global+$offset) = $src2", - []>, - Requires<[HasV4T]>; // memd(#global)=Rtt let isPredicable = 1, neverHasSideEffects = 1 in @@ -2024,72 +1582,6 @@ def : Pat<(store (i32 IntRegs:$src1), (HexagonCONST32_GP tglobaladdr:$global)), (STw_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>, Requires<[HasV4T]>; -def : Pat<(atomic_store_64 (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset), - (i64 DoubleRegs:$src1)), - (STrid_GP_V4 tglobaladdr:$global, u16ImmPred:$offset, - (i64 DoubleRegs:$src1))>, - Requires<[HasV4T]>; - -def : Pat<(atomic_store_32 (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset), - (i32 IntRegs:$src1)), - (STriw_GP_V4 tglobaladdr:$global, u16ImmPred:$offset, - (i32 IntRegs:$src1))>, - Requires<[HasV4T]>; - -def : Pat<(atomic_store_16 (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset), - (i32 IntRegs:$src1)), - (STrih_GP_V4 tglobaladdr:$global, u16ImmPred:$offset, - (i32 IntRegs:$src1))>, - Requires<[HasV4T]>; - -def : Pat<(atomic_store_8 (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset), - (i32 IntRegs:$src1)), - (STrib_GP_V4 tglobaladdr:$global, u16ImmPred:$offset, - (i32 IntRegs:$src1))>, - Requires<[HasV4T]>; - -// Map from store(globaladdress + x) -> memd(#foo + x) -let AddedComplexity = 100 in -def : Pat<(store (i64 DoubleRegs:$src1), - (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset)), - (STrid_GP_V4 tglobaladdr:$global, u16ImmPred:$offset, - (i64 DoubleRegs:$src1))>, - Requires<[HasV4T]>; - -// Map from store(globaladdress + x) -> memb(#foo + x) -let AddedComplexity = 100 in -def : Pat<(truncstorei8 (i32 IntRegs:$src1), - (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset)), - (STrib_GP_V4 tglobaladdr:$global, u16ImmPred:$offset, - (i32 IntRegs:$src1))>, - Requires<[HasV4T]>; - -// Map from store(globaladdress + x) -> memh(#foo + x) -let AddedComplexity = 100 in -def : Pat<(truncstorei16 (i32 IntRegs:$src1), - (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset)), - (STrih_GP_V4 tglobaladdr:$global, u16ImmPred:$offset, - (i32 IntRegs:$src1))>, - Requires<[HasV4T]>; - -// Map from store(globaladdress + x) -> memw(#foo + x) -let AddedComplexity = 100 in -def : Pat<(store (i32 IntRegs:$src1), - (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset)), - (STriw_GP_V4 tglobaladdr:$global, u16ImmPred:$offset, - (i32 IntRegs:$src1))>, - Requires<[HasV4T]>; - - - //===----------------------------------------------------------------------=== // ST - //===----------------------------------------------------------------------=== @@ -2269,14 +1761,6 @@ defm POST_STwri: ST_PostInc_nv <"memw", "STriw", IntRegs, s4_2Imm>, AddrModeRel; // memb(Rx++Mu)=Nt.new // memb(Rx++Mu:brev)=Nt.new -// memb(gp+#u16:0)=Nt.new -let mayStore = 1, neverHasSideEffects = 1 in -def STrib_GP_nv_V4 : NVInst_V4<(outs), - (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src), - "memb(#$global+$offset) = $src.new", - []>, - Requires<[HasV4T]>; - // memb(#global)=Nt.new let mayStore = 1, neverHasSideEffects = 1 in def STb_GP_nv_V4 : NVInst_V4<(outs), @@ -2299,14 +1783,6 @@ def STrih_shl_nv_V4 : NVInst_V4<(outs), // memh(Rx++Mu)=Nt.new // memh(Rx++Mu:brev)=Nt.new -// memh(gp+#u16:1)=Nt.new -let mayStore = 1, neverHasSideEffects = 1 in -def STrih_GP_nv_V4 : NVInst_V4<(outs), - (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src), - "memh(#$global+$offset) = $src.new", - []>, - Requires<[HasV4T]>; - // memh(#global)=Nt.new let mayStore = 1, neverHasSideEffects = 1 in def STh_GP_nv_V4 : NVInst_V4<(outs), @@ -2329,12 +1805,6 @@ def STriw_shl_nv_V4 : NVInst_V4<(outs), // memw(Rx++Mu)=Nt.new // memw(Rx++Mu:brev)=Nt.new // memw(gp+#u16:2)=Nt.new -let mayStore = 1, neverHasSideEffects = 1 in -def STriw_GP_nv_V4 : NVInst_V4<(outs), - (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src), - "memw(#$global+$offset) = $src.new", - []>, - Requires<[HasV4T]>; let mayStore = 1, neverHasSideEffects = 1 in def STw_GP_nv_V4 : NVInst_V4<(outs), @@ -2439,102 +1909,6 @@ def STw_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs), []>, Requires<[HasV4T]>; -let mayStore = 1, neverHasSideEffects = 1 in -def STrib_GP_cPt_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, - IntRegs:$src2), - "if ($src1) memb(##$global+$offset) = $src2.new", - []>, - Requires<[HasV4T]>; - -let mayStore = 1, neverHasSideEffects = 1 in -def STrib_GP_cNotPt_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, - IntRegs:$src2), - "if (!$src1) memb(##$global+$offset) = $src2.new", - []>, - Requires<[HasV4T]>; - -let mayStore = 1, neverHasSideEffects = 1 in -def STrib_GP_cdnPt_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, - IntRegs:$src2), - "if ($src1.new) memb(##$global+$offset) = $src2.new", - []>, - Requires<[HasV4T]>; - -let mayStore = 1, neverHasSideEffects = 1 in -def STrib_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, - IntRegs:$src2), - "if (!$src1.new) memb(##$global+$offset) = $src2.new", - []>, - Requires<[HasV4T]>; - -let mayStore = 1, neverHasSideEffects = 1 in -def STrih_GP_cPt_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, - IntRegs:$src2), - "if ($src1) memh(##$global+$offset) = $src2.new", - []>, - Requires<[HasV4T]>; - -let mayStore = 1, neverHasSideEffects = 1 in -def STrih_GP_cNotPt_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, - IntRegs:$src2), - "if (!$src1) memh(##$global+$offset) = $src2.new", - []>, - Requires<[HasV4T]>; - -let mayStore = 1, neverHasSideEffects = 1 in -def STrih_GP_cdnPt_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, - IntRegs:$src2), - "if ($src1.new) memh(##$global+$offset) = $src2.new", - []>, - Requires<[HasV4T]>; - -let mayStore = 1, neverHasSideEffects = 1 in -def STrih_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, - IntRegs:$src2), - "if (!$src1.new) memh(##$global+$offset) = $src2.new", - []>, - Requires<[HasV4T]>; - -let mayStore = 1, neverHasSideEffects = 1 in -def STriw_GP_cPt_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, - IntRegs:$src2), - "if ($src1) memw(##$global+$offset) = $src2.new", - []>, - Requires<[HasV4T]>; - -let mayStore = 1, neverHasSideEffects = 1 in -def STriw_GP_cNotPt_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, - IntRegs:$src2), - "if (!$src1) memw(##$global+$offset) = $src2.new", - []>, - Requires<[HasV4T]>; - -let mayStore = 1, neverHasSideEffects = 1 in -def STriw_GP_cdnPt_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, - IntRegs:$src2), - "if ($src1.new) memw(##$global+$offset) = $src2.new", - []>, - Requires<[HasV4T]>; - -let mayStore = 1, neverHasSideEffects = 1 in -def STriw_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, - IntRegs:$src2), - "if (!$src1.new) memw(##$global+$offset) = $src2.new", - []>, - Requires<[HasV4T]>; - //===----------------------------------------------------------------------===// // NV/ST - //===----------------------------------------------------------------------===// @@ -4736,3 +4110,109 @@ def STrih_offset_ext_V4 : STInst<(outs), [(truncstorei16 (HexagonCONST32 tglobaladdr:$src3), (add IntRegs:$src1, u6_1ImmPred:$src2))]>, Requires<[HasV4T]>; +// Map from store(globaladdress + x) -> memd(#foo + x) +let AddedComplexity = 100 in +def : Pat<(store (i64 DoubleRegs:$src1), + FoldGlobalAddrGP:$addr), + (STrid_abs_V4 FoldGlobalAddrGP:$addr, (i64 DoubleRegs:$src1))>, + Requires<[HasV4T]>; + +def : Pat<(atomic_store_64 FoldGlobalAddrGP:$addr, + (i64 DoubleRegs:$src1)), + (STrid_abs_V4 FoldGlobalAddrGP:$addr, (i64 DoubleRegs:$src1))>, + Requires<[HasV4T]>; + +// Map from store(globaladdress + x) -> memb(#foo + x) +let AddedComplexity = 100 in +def : Pat<(truncstorei8 (i32 IntRegs:$src1), FoldGlobalAddrGP:$addr), + (STrib_abs_V4 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>, + Requires<[HasV4T]>; + +def : Pat<(atomic_store_8 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1)), + (STrib_abs_V4 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>, + Requires<[HasV4T]>; + +// Map from store(globaladdress + x) -> memh(#foo + x) +let AddedComplexity = 100 in +def : Pat<(truncstorei16 (i32 IntRegs:$src1), FoldGlobalAddrGP:$addr), + (STrih_abs_V4 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>, + Requires<[HasV4T]>; + +def : Pat<(atomic_store_16 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1)), + (STrih_abs_V4 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>, + Requires<[HasV4T]>; + +// Map from store(globaladdress + x) -> memw(#foo + x) +let AddedComplexity = 100 in +def : Pat<(store (i32 IntRegs:$src1), FoldGlobalAddrGP:$addr), + (STriw_abs_V4 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>, + Requires<[HasV4T]>; + +def : Pat<(atomic_store_32 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1)), + (STriw_abs_V4 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>, + Requires<[HasV4T]>; + +// Map from load(globaladdress + x) -> memd(#foo + x) +let AddedComplexity = 100 in +def : Pat<(i64 (load FoldGlobalAddrGP:$addr)), + (i64 (LDrid_abs_V4 FoldGlobalAddrGP:$addr))>, + Requires<[HasV4T]>; + +def : Pat<(atomic_load_64 FoldGlobalAddrGP:$addr), + (i64 (LDrid_abs_V4 FoldGlobalAddrGP:$addr))>, + Requires<[HasV4T]>; + +// Map from load(globaladdress + x) -> memb(#foo + x) +let AddedComplexity = 100 in +def : Pat<(i32 (extloadi8 FoldGlobalAddrGP:$addr)), + (i32 (LDrib_abs_V4 FoldGlobalAddrGP:$addr))>, + Requires<[HasV4T]>; + +// Map from load(globaladdress + x) -> memb(#foo + x) +let AddedComplexity = 100 in +def : Pat<(i32 (sextloadi8 FoldGlobalAddrGP:$addr)), + (i32 (LDrib_abs_V4 FoldGlobalAddrGP:$addr))>, + Requires<[HasV4T]>; + +//let AddedComplexity = 100 in +let AddedComplexity = 100 in +def : Pat<(i32 (extloadi16 FoldGlobalAddrGP:$addr)), + (i32 (LDrih_abs_V4 FoldGlobalAddrGP:$addr))>, + Requires<[HasV4T]>; + +// Map from load(globaladdress + x) -> memh(#foo + x) +let AddedComplexity = 100 in +def : Pat<(i32 (sextloadi16 FoldGlobalAddrGP:$addr)), + (i32 (LDrih_abs_V4 FoldGlobalAddrGP:$addr))>, + Requires<[HasV4T]>; + +// Map from load(globaladdress + x) -> memuh(#foo + x) +let AddedComplexity = 100 in +def : Pat<(i32 (zextloadi16 FoldGlobalAddrGP:$addr)), + (i32 (LDriuh_abs_V4 FoldGlobalAddrGP:$addr))>, + Requires<[HasV4T]>; + +def : Pat<(atomic_load_16 FoldGlobalAddrGP:$addr), + (i32 (LDriuh_abs_V4 FoldGlobalAddrGP:$addr))>, + Requires<[HasV4T]>; + +// Map from load(globaladdress + x) -> memub(#foo + x) +let AddedComplexity = 100 in +def : Pat<(i32 (zextloadi8 FoldGlobalAddrGP:$addr)), + (i32 (LDriub_abs_V4 FoldGlobalAddrGP:$addr))>, + Requires<[HasV4T]>; + +def : Pat<(atomic_load_8 FoldGlobalAddrGP:$addr), + (i32 (LDriub_abs_V4 FoldGlobalAddrGP:$addr))>, + Requires<[HasV4T]>; + +// Map from load(globaladdress + x) -> memw(#foo + x) +let AddedComplexity = 100 in +def : Pat<(i32 (load FoldGlobalAddrGP:$addr)), + (i32 (LDriw_abs_V4 FoldGlobalAddrGP:$addr))>, + Requires<[HasV4T]>; + +def : Pat<(atomic_load_32 FoldGlobalAddrGP:$addr), + (i32 (LDriw_abs_V4 FoldGlobalAddrGP:$addr))>, + Requires<[HasV4T]>; + diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp index 287b3d6..d9fef3e 100644 --- a/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -122,7 +122,7 @@ TargetPassConfig *HexagonTargetMachine::createPassConfig(PassManagerBase &PM) { bool HexagonPassConfig::addInstSelector() { addPass(createHexagonRemoveExtendOps(getHexagonTargetMachine())); - addPass(createHexagonISelDag(getHexagonTargetMachine())); + addPass(createHexagonISelDag(getHexagonTargetMachine(), getOptLevel())); addPass(createHexagonPeephole()); return false; } diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp index 2ab6dee..aff6b86 100644 --- a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp +++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp @@ -376,7 +376,6 @@ bool HexagonPacketizerList::IsNewifyStore (MachineInstr* MI) { case Hexagon::STrib_indexed: case Hexagon::STrib_indexed_shl_V4: case Hexagon::STrib_shl_V4: - case Hexagon::STrib_GP_V4: case Hexagon::STb_GP_V4: case Hexagon::POST_STbri: case Hexagon::STrib_cPt: @@ -399,17 +398,12 @@ bool HexagonPacketizerList::IsNewifyStore (MachineInstr* MI) { case Hexagon::STb_GP_cNotPt_V4: case Hexagon::STb_GP_cdnPt_V4: case Hexagon::STb_GP_cdnNotPt_V4: - case Hexagon::STrib_GP_cPt_V4: - case Hexagon::STrib_GP_cNotPt_V4: - case Hexagon::STrib_GP_cdnPt_V4: - case Hexagon::STrib_GP_cdnNotPt_V4: // store halfword case Hexagon::STrih: case Hexagon::STrih_indexed: case Hexagon::STrih_indexed_shl_V4: case Hexagon::STrih_shl_V4: - case Hexagon::STrih_GP_V4: case Hexagon::STh_GP_V4: case Hexagon::POST_SThri: case Hexagon::STrih_cPt: @@ -432,17 +426,12 @@ bool HexagonPacketizerList::IsNewifyStore (MachineInstr* MI) { case Hexagon::STh_GP_cNotPt_V4: case Hexagon::STh_GP_cdnPt_V4: case Hexagon::STh_GP_cdnNotPt_V4: - case Hexagon::STrih_GP_cPt_V4: - case Hexagon::STrih_GP_cNotPt_V4: - case Hexagon::STrih_GP_cdnPt_V4: - case Hexagon::STrih_GP_cdnNotPt_V4: // store word case Hexagon::STriw: case Hexagon::STriw_indexed: case Hexagon::STriw_indexed_shl_V4: case Hexagon::STriw_shl_V4: - case Hexagon::STriw_GP_V4: case Hexagon::STw_GP_V4: case Hexagon::POST_STwri: case Hexagon::STriw_cPt: @@ -465,10 +454,6 @@ bool HexagonPacketizerList::IsNewifyStore (MachineInstr* MI) { case Hexagon::STw_GP_cNotPt_V4: case Hexagon::STw_GP_cdnPt_V4: case Hexagon::STw_GP_cdnNotPt_V4: - case Hexagon::STriw_GP_cPt_V4: - case Hexagon::STriw_GP_cNotPt_V4: - case Hexagon::STriw_GP_cdnPt_V4: - case Hexagon::STriw_GP_cdnNotPt_V4: return QRI->Subtarget.hasV4TOps(); } return false; @@ -508,9 +493,6 @@ static int GetDotNewOp(const int opc) { case Hexagon::STrib_shl_V4: return Hexagon::STrib_shl_nv_V4; - case Hexagon::STrib_GP_V4: - return Hexagon::STrib_GP_nv_V4; - case Hexagon::STb_GP_V4: return Hexagon::STb_GP_nv_V4; @@ -577,18 +559,6 @@ static int GetDotNewOp(const int opc) { case Hexagon::STb_GP_cdnNotPt_V4: return Hexagon::STb_GP_cdnNotPt_nv_V4; - case Hexagon::STrib_GP_cPt_V4: - return Hexagon::STrib_GP_cPt_nv_V4; - - case Hexagon::STrib_GP_cNotPt_V4: - return Hexagon::STrib_GP_cNotPt_nv_V4; - - case Hexagon::STrib_GP_cdnPt_V4: - return Hexagon::STrib_GP_cdnPt_nv_V4; - - case Hexagon::STrib_GP_cdnNotPt_V4: - return Hexagon::STrib_GP_cdnNotPt_nv_V4; - // store new value halfword case Hexagon::STrih: return Hexagon::STrih_nv_V4; @@ -602,9 +572,6 @@ static int GetDotNewOp(const int opc) { case Hexagon::STrih_shl_V4: return Hexagon::STrih_shl_nv_V4; - case Hexagon::STrih_GP_V4: - return Hexagon::STrih_GP_nv_V4; - case Hexagon::STh_GP_V4: return Hexagon::STh_GP_nv_V4; @@ -671,18 +638,6 @@ static int GetDotNewOp(const int opc) { case Hexagon::STh_GP_cdnNotPt_V4: return Hexagon::STh_GP_cdnNotPt_nv_V4; - case Hexagon::STrih_GP_cPt_V4: - return Hexagon::STrih_GP_cPt_nv_V4; - - case Hexagon::STrih_GP_cNotPt_V4: - return Hexagon::STrih_GP_cNotPt_nv_V4; - - case Hexagon::STrih_GP_cdnPt_V4: - return Hexagon::STrih_GP_cdnPt_nv_V4; - - case Hexagon::STrih_GP_cdnNotPt_V4: - return Hexagon::STrih_GP_cdnNotPt_nv_V4; - // store new value word case Hexagon::STriw: return Hexagon::STriw_nv_V4; @@ -696,9 +651,6 @@ static int GetDotNewOp(const int opc) { case Hexagon::STriw_shl_V4: return Hexagon::STriw_shl_nv_V4; - case Hexagon::STriw_GP_V4: - return Hexagon::STriw_GP_nv_V4; - case Hexagon::STw_GP_V4: return Hexagon::STw_GP_nv_V4; @@ -765,17 +717,6 @@ static int GetDotNewOp(const int opc) { case Hexagon::STw_GP_cdnNotPt_V4: return Hexagon::STw_GP_cdnNotPt_nv_V4; - case Hexagon::STriw_GP_cPt_V4: - return Hexagon::STriw_GP_cPt_nv_V4; - - case Hexagon::STriw_GP_cNotPt_V4: - return Hexagon::STriw_GP_cNotPt_nv_V4; - - case Hexagon::STriw_GP_cdnPt_V4: - return Hexagon::STriw_GP_cdnPt_nv_V4; - - case Hexagon::STriw_GP_cdnNotPt_V4: - return Hexagon::STriw_GP_cdnNotPt_nv_V4; } } @@ -821,12 +762,6 @@ static int GetDotNewPredOp(const int opc) { case Hexagon::STb_GP_cNotPt_V4 : return Hexagon::STb_GP_cdnNotPt_V4; - case Hexagon::STrib_GP_cPt_V4 : - return Hexagon::STrib_GP_cdnPt_V4; - - case Hexagon::STrib_GP_cNotPt_V4 : - return Hexagon::STrib_GP_cdnNotPt_V4; - // Store doubleword conditionally case Hexagon::STrid_cPt : return Hexagon::STrid_cdnPt_V4; @@ -858,12 +793,6 @@ static int GetDotNewPredOp(const int opc) { case Hexagon::STd_GP_cNotPt_V4 : return Hexagon::STd_GP_cdnNotPt_V4; - case Hexagon::STrid_GP_cPt_V4 : - return Hexagon::STrid_GP_cdnPt_V4; - - case Hexagon::STrid_GP_cNotPt_V4 : - return Hexagon::STrid_GP_cdnNotPt_V4; - // Store halfword conditionally case Hexagon::STrih_cPt : return Hexagon::STrih_cdnPt_V4; @@ -901,12 +830,6 @@ static int GetDotNewPredOp(const int opc) { case Hexagon::STh_GP_cNotPt_V4 : return Hexagon::STh_GP_cdnNotPt_V4; - case Hexagon::STrih_GP_cPt_V4 : - return Hexagon::STrih_GP_cdnPt_V4; - - case Hexagon::STrih_GP_cNotPt_V4 : - return Hexagon::STrih_GP_cdnNotPt_V4; - // Store word conditionally case Hexagon::STriw_cPt : return Hexagon::STriw_cdnPt_V4; @@ -944,12 +867,6 @@ static int GetDotNewPredOp(const int opc) { case Hexagon::STw_GP_cNotPt_V4 : return Hexagon::STw_GP_cdnNotPt_V4; - case Hexagon::STriw_GP_cPt_V4 : - return Hexagon::STriw_GP_cdnPt_V4; - - case Hexagon::STriw_GP_cNotPt_V4 : - return Hexagon::STriw_GP_cdnNotPt_V4; - // Condtional Jumps case Hexagon::JMP_c: return Hexagon::JMP_cdnPt; @@ -1166,42 +1083,6 @@ static int GetDotNewPredOp(const int opc) { case Hexagon::LDw_GP_cNotPt_V4: return Hexagon::LDw_GP_cdnNotPt_V4; - case Hexagon::LDrid_GP_cPt_V4: - return Hexagon::LDrid_GP_cdnPt_V4; - - case Hexagon::LDrid_GP_cNotPt_V4: - return Hexagon::LDrid_GP_cdnNotPt_V4; - - case Hexagon::LDrib_GP_cPt_V4: - return Hexagon::LDrib_GP_cdnPt_V4; - - case Hexagon::LDrib_GP_cNotPt_V4: - return Hexagon::LDrib_GP_cdnNotPt_V4; - - case Hexagon::LDriub_GP_cPt_V4: - return Hexagon::LDriub_GP_cdnPt_V4; - - case Hexagon::LDriub_GP_cNotPt_V4: - return Hexagon::LDriub_GP_cdnNotPt_V4; - - case Hexagon::LDrih_GP_cPt_V4: - return Hexagon::LDrih_GP_cdnPt_V4; - - case Hexagon::LDrih_GP_cNotPt_V4: - return Hexagon::LDrih_GP_cdnNotPt_V4; - - case Hexagon::LDriuh_GP_cPt_V4: - return Hexagon::LDriuh_GP_cdnPt_V4; - - case Hexagon::LDriuh_GP_cNotPt_V4: - return Hexagon::LDriuh_GP_cdnNotPt_V4; - - case Hexagon::LDriw_GP_cPt_V4: - return Hexagon::LDriw_GP_cdnPt_V4; - - case Hexagon::LDriw_GP_cNotPt_V4: - return Hexagon::LDriw_GP_cdnNotPt_V4; - // Conditional store new-value byte case Hexagon::STrib_cPt_nv_V4 : return Hexagon::STrib_cdnPt_nv_V4; @@ -1229,12 +1110,6 @@ static int GetDotNewPredOp(const int opc) { case Hexagon::STb_GP_cNotPt_nv_V4 : return Hexagon::STb_GP_cdnNotPt_nv_V4; - case Hexagon::STrib_GP_cPt_nv_V4 : - return Hexagon::STrib_GP_cdnPt_nv_V4; - - case Hexagon::STrib_GP_cNotPt_nv_V4 : - return Hexagon::STrib_GP_cdnNotPt_nv_V4; - // Conditional store new-value halfword case Hexagon::STrih_cPt_nv_V4 : return Hexagon::STrih_cdnPt_nv_V4; @@ -1262,12 +1137,6 @@ static int GetDotNewPredOp(const int opc) { case Hexagon::STh_GP_cNotPt_nv_V4 : return Hexagon::STh_GP_cdnNotPt_nv_V4; - case Hexagon::STrih_GP_cPt_nv_V4 : - return Hexagon::STrih_GP_cdnPt_nv_V4; - - case Hexagon::STrih_GP_cNotPt_nv_V4 : - return Hexagon::STrih_GP_cdnNotPt_nv_V4; - // Conditional store new-value word case Hexagon::STriw_cPt_nv_V4 : return Hexagon::STriw_cdnPt_nv_V4; @@ -1295,12 +1164,6 @@ static int GetDotNewPredOp(const int opc) { case Hexagon::STw_GP_cNotPt_nv_V4 : return Hexagon::STw_GP_cdnNotPt_nv_V4; - case Hexagon::STriw_GP_cPt_nv_V4 : - return Hexagon::STriw_GP_cdnPt_nv_V4; - - case Hexagon::STriw_GP_cNotPt_nv_V4 : - return Hexagon::STriw_GP_cdnNotPt_nv_V4; - // Conditional add case Hexagon::ADD_ri_cPt : return Hexagon::ADD_ri_cdnPt; @@ -1661,42 +1524,6 @@ static int GetDotOldOp(const int opc) { case Hexagon::LDw_GP_cdnNotPt_V4: return Hexagon::LDw_GP_cNotPt_V4; - case Hexagon::LDrid_GP_cdnPt_V4: - return Hexagon::LDrid_GP_cPt_V4; - - case Hexagon::LDrid_GP_cdnNotPt_V4: - return Hexagon::LDrid_GP_cNotPt_V4; - - case Hexagon::LDrib_GP_cdnPt_V4: - return Hexagon::LDrib_GP_cPt_V4; - - case Hexagon::LDrib_GP_cdnNotPt_V4: - return Hexagon::LDrib_GP_cNotPt_V4; - - case Hexagon::LDriub_GP_cdnPt_V4: - return Hexagon::LDriub_GP_cPt_V4; - - case Hexagon::LDriub_GP_cdnNotPt_V4: - return Hexagon::LDriub_GP_cNotPt_V4; - - case Hexagon::LDrih_GP_cdnPt_V4: - return Hexagon::LDrih_GP_cPt_V4; - - case Hexagon::LDrih_GP_cdnNotPt_V4: - return Hexagon::LDrih_GP_cNotPt_V4; - - case Hexagon::LDriuh_GP_cdnPt_V4: - return Hexagon::LDriuh_GP_cPt_V4; - - case Hexagon::LDriuh_GP_cdnNotPt_V4: - return Hexagon::LDriuh_GP_cNotPt_V4; - - case Hexagon::LDriw_GP_cdnPt_V4: - return Hexagon::LDriw_GP_cPt_V4; - - case Hexagon::LDriw_GP_cdnNotPt_V4: - return Hexagon::LDriw_GP_cNotPt_V4; - // Conditional add case Hexagon::ADD_ri_cdnPt : @@ -1830,16 +1657,6 @@ static int GetDotOldOp(const int opc) { case Hexagon::STb_GP_cNotPt_nv_V4: return Hexagon::STb_GP_cNotPt_V4; - case Hexagon::STrib_GP_cdnPt_nv_V4: - case Hexagon::STrib_GP_cdnPt_V4: - case Hexagon::STrib_GP_cPt_nv_V4: - return Hexagon::STrib_GP_cPt_V4; - - case Hexagon::STrib_GP_cdnNotPt_nv_V4: - case Hexagon::STrib_GP_cdnNotPt_V4: - case Hexagon::STrib_GP_cNotPt_nv_V4: - return Hexagon::STrib_GP_cNotPt_V4; - // Store new-value byte - unconditional case Hexagon::STrib_nv_V4: return Hexagon::STrib; @@ -1853,9 +1670,6 @@ static int GetDotOldOp(const int opc) { case Hexagon::STrib_shl_nv_V4: return Hexagon::STrib_shl_V4; - case Hexagon::STrib_GP_nv_V4: - return Hexagon::STrib_GP_V4; - case Hexagon::STb_GP_nv_V4: return Hexagon::STb_GP_V4; @@ -1919,16 +1733,6 @@ static int GetDotOldOp(const int opc) { case Hexagon::STh_GP_cNotPt_nv_V4: return Hexagon::STh_GP_cNotPt_V4; - case Hexagon::STrih_GP_cdnPt_nv_V4: - case Hexagon::STrih_GP_cdnPt_V4: - case Hexagon::STrih_GP_cPt_nv_V4: - return Hexagon::STrih_GP_cPt_V4; - - case Hexagon::STrih_GP_cdnNotPt_nv_V4: - case Hexagon::STrih_GP_cdnNotPt_V4: - case Hexagon::STrih_GP_cNotPt_nv_V4: - return Hexagon::STrih_GP_cNotPt_V4; - // Store new-value halfword - unconditional case Hexagon::STrih_nv_V4: @@ -1943,9 +1747,6 @@ static int GetDotOldOp(const int opc) { case Hexagon::STrih_shl_nv_V4: return Hexagon::STrih_shl_V4; - case Hexagon::STrih_GP_nv_V4: - return Hexagon::STrih_GP_V4; - case Hexagon::STh_GP_nv_V4: return Hexagon::STh_GP_V4; @@ -2010,16 +1811,6 @@ static int GetDotOldOp(const int opc) { case Hexagon::STw_GP_cNotPt_nv_V4: return Hexagon::STw_GP_cNotPt_V4; - case Hexagon::STriw_GP_cdnPt_nv_V4: - case Hexagon::STriw_GP_cdnPt_V4: - case Hexagon::STriw_GP_cPt_nv_V4: - return Hexagon::STriw_GP_cPt_V4; - - case Hexagon::STriw_GP_cdnNotPt_nv_V4: - case Hexagon::STriw_GP_cdnNotPt_V4: - case Hexagon::STriw_GP_cNotPt_nv_V4: - return Hexagon::STriw_GP_cNotPt_V4; - // Store new-value word - unconditional case Hexagon::STriw_nv_V4: @@ -2034,9 +1825,6 @@ static int GetDotOldOp(const int opc) { case Hexagon::STriw_shl_nv_V4: return Hexagon::STriw_shl_V4; - case Hexagon::STriw_GP_nv_V4: - return Hexagon::STriw_GP_V4; - case Hexagon::STw_GP_nv_V4: return Hexagon::STw_GP_V4; @@ -2075,11 +1863,6 @@ static int GetDotOldOp(const int opc) { case Hexagon::STd_GP_cdnNotPt_V4 : return Hexagon::STd_GP_cNotPt_V4; - case Hexagon::STrid_GP_cdnPt_V4 : - return Hexagon::STrid_GP_cPt_V4; - - case Hexagon::STrid_GP_cdnNotPt_V4 : - return Hexagon::STrid_GP_cNotPt_V4; } } @@ -2215,42 +1998,22 @@ static bool GetPredicateSense(MachineInstr* MI, case Hexagon::ZXTB_cdnPt_V4 : case Hexagon::ZXTH_cPt_V4 : case Hexagon::ZXTH_cdnPt_V4 : - case Hexagon::LDrid_GP_cPt_V4 : - case Hexagon::LDrib_GP_cPt_V4 : - case Hexagon::LDriub_GP_cPt_V4 : - case Hexagon::LDrih_GP_cPt_V4 : - case Hexagon::LDriuh_GP_cPt_V4 : - case Hexagon::LDriw_GP_cPt_V4 : case Hexagon::LDd_GP_cPt_V4 : case Hexagon::LDb_GP_cPt_V4 : case Hexagon::LDub_GP_cPt_V4 : case Hexagon::LDh_GP_cPt_V4 : case Hexagon::LDuh_GP_cPt_V4 : case Hexagon::LDw_GP_cPt_V4 : - case Hexagon::STrid_GP_cPt_V4 : - case Hexagon::STrib_GP_cPt_V4 : - case Hexagon::STrih_GP_cPt_V4 : - case Hexagon::STriw_GP_cPt_V4 : case Hexagon::STd_GP_cPt_V4 : case Hexagon::STb_GP_cPt_V4 : case Hexagon::STh_GP_cPt_V4 : case Hexagon::STw_GP_cPt_V4 : - case Hexagon::LDrid_GP_cdnPt_V4 : - case Hexagon::LDrib_GP_cdnPt_V4 : - case Hexagon::LDriub_GP_cdnPt_V4 : - case Hexagon::LDrih_GP_cdnPt_V4 : - case Hexagon::LDriuh_GP_cdnPt_V4 : - case Hexagon::LDriw_GP_cdnPt_V4 : case Hexagon::LDd_GP_cdnPt_V4 : case Hexagon::LDb_GP_cdnPt_V4 : case Hexagon::LDub_GP_cdnPt_V4 : case Hexagon::LDh_GP_cdnPt_V4 : case Hexagon::LDuh_GP_cdnPt_V4 : case Hexagon::LDw_GP_cdnPt_V4 : - case Hexagon::STrid_GP_cdnPt_V4 : - case Hexagon::STrib_GP_cdnPt_V4 : - case Hexagon::STrih_GP_cdnPt_V4 : - case Hexagon::STriw_GP_cdnPt_V4 : case Hexagon::STd_GP_cdnPt_V4 : case Hexagon::STb_GP_cdnPt_V4 : case Hexagon::STh_GP_cdnPt_V4 : @@ -2375,42 +2138,22 @@ static bool GetPredicateSense(MachineInstr* MI, case Hexagon::ZXTH_cNotPt_V4 : case Hexagon::ZXTH_cdnNotPt_V4 : - case Hexagon::LDrid_GP_cNotPt_V4 : - case Hexagon::LDrib_GP_cNotPt_V4 : - case Hexagon::LDriub_GP_cNotPt_V4 : - case Hexagon::LDrih_GP_cNotPt_V4 : - case Hexagon::LDriuh_GP_cNotPt_V4 : - case Hexagon::LDriw_GP_cNotPt_V4 : case Hexagon::LDd_GP_cNotPt_V4 : case Hexagon::LDb_GP_cNotPt_V4 : case Hexagon::LDub_GP_cNotPt_V4 : case Hexagon::LDh_GP_cNotPt_V4 : case Hexagon::LDuh_GP_cNotPt_V4 : case Hexagon::LDw_GP_cNotPt_V4 : - case Hexagon::STrid_GP_cNotPt_V4 : - case Hexagon::STrib_GP_cNotPt_V4 : - case Hexagon::STrih_GP_cNotPt_V4 : - case Hexagon::STriw_GP_cNotPt_V4 : case Hexagon::STd_GP_cNotPt_V4 : case Hexagon::STb_GP_cNotPt_V4 : case Hexagon::STh_GP_cNotPt_V4 : case Hexagon::STw_GP_cNotPt_V4 : - case Hexagon::LDrid_GP_cdnNotPt_V4 : - case Hexagon::LDrib_GP_cdnNotPt_V4 : - case Hexagon::LDriub_GP_cdnNotPt_V4 : - case Hexagon::LDrih_GP_cdnNotPt_V4 : - case Hexagon::LDriuh_GP_cdnNotPt_V4 : - case Hexagon::LDriw_GP_cdnNotPt_V4 : case Hexagon::LDd_GP_cdnNotPt_V4 : case Hexagon::LDb_GP_cdnNotPt_V4 : case Hexagon::LDub_GP_cdnNotPt_V4 : case Hexagon::LDh_GP_cdnNotPt_V4 : case Hexagon::LDuh_GP_cdnNotPt_V4 : case Hexagon::LDw_GP_cdnNotPt_V4 : - case Hexagon::STrid_GP_cdnNotPt_V4 : - case Hexagon::STrib_GP_cdnNotPt_V4 : - case Hexagon::STrih_GP_cdnNotPt_V4 : - case Hexagon::STriw_GP_cdnNotPt_V4 : case Hexagon::STd_GP_cdnNotPt_V4 : case Hexagon::STb_GP_cdnNotPt_V4 : case Hexagon::STh_GP_cdnNotPt_V4 : @@ -2572,27 +2315,7 @@ bool HexagonPacketizerList::isDotNewInst(MachineInstr* MI) { case Hexagon::LDuh_GP_cdnNotPt_V4: case Hexagon::LDw_GP_cdnPt_V4: case Hexagon::LDw_GP_cdnNotPt_V4: - case Hexagon::LDrid_GP_cdnPt_V4: - case Hexagon::LDrid_GP_cdnNotPt_V4: - case Hexagon::LDrib_GP_cdnPt_V4: - case Hexagon::LDrib_GP_cdnNotPt_V4: - case Hexagon::LDriub_GP_cdnPt_V4: - case Hexagon::LDriub_GP_cdnNotPt_V4: - case Hexagon::LDrih_GP_cdnPt_V4: - case Hexagon::LDrih_GP_cdnNotPt_V4: - case Hexagon::LDriuh_GP_cdnPt_V4: - case Hexagon::LDriuh_GP_cdnNotPt_V4: - case Hexagon::LDriw_GP_cdnPt_V4: - case Hexagon::LDriw_GP_cdnNotPt_V4: - - case Hexagon::STrid_GP_cdnPt_V4: - case Hexagon::STrid_GP_cdnNotPt_V4: - case Hexagon::STrib_GP_cdnPt_V4: - case Hexagon::STrib_GP_cdnNotPt_V4: - case Hexagon::STrih_GP_cdnPt_V4: - case Hexagon::STrih_GP_cdnNotPt_V4: - case Hexagon::STriw_GP_cdnPt_V4: - case Hexagon::STriw_GP_cdnNotPt_V4: + case Hexagon::STd_GP_cdnPt_V4: case Hexagon::STd_GP_cdnNotPt_V4: case Hexagon::STb_GP_cdnPt_V4: -- cgit v1.1 From 3019fbbe6ab4c23a5a580f0cc6ba1ba1b124e1da Mon Sep 17 00:00:00 2001 From: Weiming Zhao Date: Wed, 13 Feb 2013 21:43:02 +0000 Subject: Bug fix 13622: Add paired register support for inline asm with 64-bit data on ARM git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175088 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMAsmPrinter.cpp | 15 ++-- lib/Target/ARM/ARMISelDAGToDAG.cpp | 141 +++++++++++++++++++++++++++++++++++++ 2 files changed, 150 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index 986dfb7..58c7798 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -342,6 +342,11 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, unsigned Reg = MO.getReg(); assert(TargetRegisterInfo::isPhysicalRegister(Reg)); assert(!MO.getSubReg() && "Subregs should be eliminated!"); + if(ARM::GPRPairRegClass.contains(Reg)) { + const MachineFunction &MF = *MI->getParent()->getParent(); + const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); + Reg = TRI->getSubReg(Reg, ARM::gsub_0); + } O << ARMInstPrinter::getRegisterName(Reg); break; } @@ -530,14 +535,12 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, const MachineOperand &MO = MI->getOperand(OpNum); if (!MO.isReg()) return true; - const TargetRegisterClass &RC = ARM::GPRRegClass; const MachineFunction &MF = *MI->getParent()->getParent(); const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); - - unsigned RegIdx = TRI->getEncodingValue(MO.getReg()); - RegIdx |= 1; //The odd register is also the higher-numbered one of a pair. - - unsigned Reg = RC.getRegister(RegIdx); + unsigned Reg = MO.getReg(); + if(!ARM::GPRPairRegClass.contains(Reg)) + return false; + Reg = TRI->getSubReg(Reg, ARM::gsub_1); O << ARMInstPrinter::getRegisterName(Reg); return false; } diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 939bed7..a83f052 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -19,6 +19,7 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/IR/CallingConv.h" @@ -257,6 +258,8 @@ private: // Select special operations if node forms integer ABS pattern SDNode *SelectABSOp(SDNode *N); + SDNode *SelectInlineAsm(SDNode *N); + SDNode *SelectConcatVector(SDNode *N); SDNode *SelectAtomic64(SDNode *Node, unsigned Opc); @@ -2552,6 +2555,12 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { switch (N->getOpcode()) { default: break; + case ISD::INLINEASM: { + SDNode *ResNode = SelectInlineAsm(N); + if (ResNode) + return ResNode; + break; + } case ISD::XOR: { // Select special operations if XOR node forms integer ABS pattern SDNode *ResNode = SelectABSOp(N); @@ -3446,6 +3455,138 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { return SelectCode(N); } +SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){ + std::vector AsmNodeOperands; + unsigned Flag, Kind; + bool Changed = false; + unsigned NumOps = N->getNumOperands(); + + ExternalSymbolSDNode *S = dyn_cast( + N->getOperand(InlineAsm::Op_AsmString)); + StringRef AsmString = StringRef(S->getSymbol()); + + // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint. + // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require + // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs + // respectively. Since there is no constraint to explicitly specify a + // reg pair, we search %H operand inside the asm string. If it is found, the + // transformation below enforces a GPRPair reg class for "%r" for 64-bit data. + if (AsmString.find(":H}") == StringRef::npos) + return NULL; + + DebugLoc dl = N->getDebugLoc(); + SDValue Glue = N->getOperand(NumOps-1); + + // Glue node will be appended late. + for(unsigned i = 0; i < NumOps -1; ++i) { + SDValue op = N->getOperand(i); + AsmNodeOperands.push_back(op); + + if (i < InlineAsm::Op_FirstOperand) + continue; + + if (ConstantSDNode *C = dyn_cast(N->getOperand(i))) { + Flag = C->getZExtValue(); + Kind = InlineAsm::getKind(Flag); + } + else + continue; + + if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef + && Kind != InlineAsm::Kind_RegDefEarlyClobber) + continue; + + unsigned RegNum = InlineAsm::getNumOperandRegisters(Flag); + unsigned RC; + bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC); + if (!HasRC || RC != ARM::GPRRegClassID || RegNum != 2) + continue; + + assert((i+2 < NumOps-1) && "Invalid number of operands in inline asm"); + SDValue V0 = N->getOperand(i+1); + SDValue V1 = N->getOperand(i+2); + unsigned Reg0 = cast(V0)->getReg(); + unsigned Reg1 = cast(V1)->getReg(); + SDValue PairedReg; + MachineRegisterInfo &MRI = MF->getRegInfo(); + + if (Kind == InlineAsm::Kind_RegDef || + Kind == InlineAsm::Kind_RegDefEarlyClobber) { + // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to + // the original GPRs. + + unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); + PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped); + SDValue Chain = SDValue(N,0); + + SDNode *GU = N->getGluedUser(); + SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped, + Chain.getValue(1)); + + // Extract values from a GPRPair reg and copy to the original GPR reg. + SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32, + RegCopy); + SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32, + RegCopy); + SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0, + RegCopy.getValue(1)); + SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1)); + + // Update the original glue user. + std::vector Ops(GU->op_begin(), GU->op_end()-1); + Ops.push_back(T1.getValue(1)); + CurDAG->UpdateNodeOperands(GU, &Ops[0], Ops.size()); + GU = T1.getNode(); + } + else { + // For Kind == InlineAsm::Kind_RegUse, we first copy two GPRs into a + // GPRPair and then pass the GPRPair to the inline asm. + SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain]; + + // As REG_SEQ doesn't take RegisterSDNode, we copy them first. + SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32, + Chain.getValue(1)); + SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32, + T0.getValue(1)); + SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0); + + // Copy REG_SEQ into a GPRPair-typed VR and replace the original two + // i32 VRs of inline asm with it. + unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); + PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped); + Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1)); + + AsmNodeOperands[InlineAsm::Op_InputChain] = Chain; + Glue = Chain.getValue(1); + } + + Changed = true; + + if(PairedReg.getNode()) { + Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/); + Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID); + // Replace the current flag. + AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant( + Flag, MVT::i32); + // Add the new register node and skip the original two GPRs. + AsmNodeOperands.push_back(PairedReg); + // Skip the next two GPRs. + i += 2; + } + } + + AsmNodeOperands.push_back(Glue); + if (!Changed) + return NULL; + + SDValue New = CurDAG->getNode(ISD::INLINEASM, N->getDebugLoc(), + CurDAG->getVTList(MVT::Other, MVT::Glue), &AsmNodeOperands[0], + AsmNodeOperands.size()); + New->setNodeId(-1); + return New.getNode(); +} + + bool ARMDAGToDAGISel:: SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, std::vector &OutOps) { -- cgit v1.1 From 48aaf5fd02bbdcde0935929f0c5bf08b1f8f20b8 Mon Sep 17 00:00:00 2001 From: Nick Lewycky Date: Wed, 13 Feb 2013 21:59:15 +0000 Subject: Don't build tail calls to functions with three inreg arguments on x86-32 PIC. Fixes PR15250! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175092 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index bc29df49..55de3d3 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -2814,7 +2814,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, - SelectionDAG& DAG) const { + SelectionDAG &DAG) const { if (!IsTailCallConvention(CalleeCC) && CalleeCC != CallingConv::C) return false; @@ -2853,7 +2853,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, // An stdcall caller is expected to clean up its arguments; the callee // isn't going to do that. - if (!CCMatch && CallerCC==CallingConv::X86_StdCall) + if (!CCMatch && CallerCC == CallingConv::X86_StdCall) return false; // Do not sibcall optimize vararg calls unless all arguments are passed via @@ -2973,9 +2973,15 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, // callee-saved registers are restored. These happen to be the same // registers used to pass 'inreg' arguments so watch out for those. if (!Subtarget->is64Bit() && - !isa(Callee) && - !isa(Callee)) { + ((!isa(Callee) && + !isa(Callee)) || + getTargetMachine().getRelocationModel() == Reloc::PIC_)) { unsigned NumInRegs = 0; + // In PIC we need an extra register to formulate the address computation + // for the callee. + unsigned MaxInRegs = + (getTargetMachine().getRelocationModel() == Reloc::PIC_) ? 2 : 3; + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; if (!VA.isRegLoc()) @@ -2984,7 +2990,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, switch (Reg) { default: break; case X86::EAX: case X86::EDX: case X86::ECX: - if (++NumInRegs == 3) + if (++NumInRegs == MaxInRegs) return false; break; } -- cgit v1.1 From 76308d8d287e427cce8ea4374acc8e78648dcf79 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Wed, 13 Feb 2013 22:05:20 +0000 Subject: R600: Add support for 128-bit parameters NOTE: This is a candidate for the Mesa stable branch. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175096 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp | 1 + lib/Target/R600/R600Instructions.td | 4 ++++ 2 files changed, 5 insertions(+) (limited to 'lib') diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp index 01df808..2171f90 100644 --- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp +++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp @@ -161,6 +161,7 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS, case AMDGPU::VTX_READ_PARAM_8_eg: case AMDGPU::VTX_READ_PARAM_16_eg: case AMDGPU::VTX_READ_PARAM_32_eg: + case AMDGPU::VTX_READ_PARAM_128_eg: case AMDGPU::VTX_READ_GLOBAL_8_eg: case AMDGPU::VTX_READ_GLOBAL_32_eg: case AMDGPU::VTX_READ_GLOBAL_128_eg: diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index 373a793..c9885a3 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -1491,6 +1491,10 @@ def VTX_READ_PARAM_32_eg : VTX_READ_32_eg <0, [(set (i32 R600_TReg32_X:$dst), (load_param ADDRVTX_READ:$ptr))] >; +def VTX_READ_PARAM_128_eg : VTX_READ_128_eg <0, + [(set (v4i32 R600_Reg128:$dst), (load_param ADDRVTX_READ:$ptr))] +>; + //===----------------------------------------------------------------------===// // VTX Read from global memory space //===----------------------------------------------------------------------===// -- cgit v1.1 From 666e0d3bc4a28540c1aa73b24573af70d1d46aea Mon Sep 17 00:00:00 2001 From: Anshuman Dasgupta Date: Wed, 13 Feb 2013 22:56:34 +0000 Subject: Hexagon: add support for predicate-GPR copies. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175102 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Hexagon/HexagonInstrInfo.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'lib') diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp index 6801467..d30cdda 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -426,6 +426,18 @@ void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB, BuildMI(MBB, I, DL, get(Hexagon::TFCR), DestReg).addReg(SrcReg); return; } + if (Hexagon::PredRegsRegClass.contains(SrcReg) && + Hexagon::IntRegsRegClass.contains(DestReg)) { + BuildMI(MBB, I, DL, get(Hexagon::TFR_RsPd), DestReg). + addReg(SrcReg, getKillRegState(KillSrc)); + return; + } + if (Hexagon::IntRegsRegClass.contains(SrcReg) && + Hexagon::PredRegsRegClass.contains(DestReg)) { + BuildMI(MBB, I, DL, get(Hexagon::TFR_PdRs), DestReg). + addReg(SrcReg, getKillRegState(KillSrc)); + return; + } llvm_unreachable("Unimplemented"); } -- cgit v1.1 From 17fe48ce6ee694eb1fb29804f4cba3dbea5e9248 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Wed, 13 Feb 2013 23:00:51 +0000 Subject: Retain the name of the new internal global that's been shrunk. It's possible (e.g. after an LTO build) that an internal global may be used for debugging purposes. If that's the case appending a '.b' to it makes it hard to find that variable. Steal the name from the old GV before deleting it so that they can find that variable again. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175104 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/IPO/GlobalOpt.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index c753e2a..2b9d667 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -1846,10 +1846,10 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { bool StoringOther = SI->getOperand(0) == OtherVal; // Only do this if we weren't storing a loaded value. Value *StoreVal; - if (StoringOther || SI->getOperand(0) == InitVal) + if (StoringOther || SI->getOperand(0) == InitVal) { StoreVal = ConstantInt::get(Type::getInt1Ty(GV->getContext()), StoringOther); - else { + } else { // Otherwise, we are storing a previously loaded copy. To do this, // change the copy from copying the original value to just copying the // bool. @@ -1888,6 +1888,9 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { UI->eraseFromParent(); } + // Retain the name of the old global variable. People who are debugging their + // programs may expect these variables to be named the same. + NewGV->takeName(GV); GV->eraseFromParent(); return true; } -- cgit v1.1 From c0c2816fb3d137c096d0bd20b8ad2d92ce25a976 Mon Sep 17 00:00:00 2001 From: Weiming Zhao Date: Wed, 13 Feb 2013 23:24:40 +0000 Subject: temporarily revert the patch due to some conflicts git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175107 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMAsmPrinter.cpp | 15 ++-- lib/Target/ARM/ARMISelDAGToDAG.cpp | 141 ------------------------------------- 2 files changed, 6 insertions(+), 150 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index 58c7798..986dfb7 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -342,11 +342,6 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, unsigned Reg = MO.getReg(); assert(TargetRegisterInfo::isPhysicalRegister(Reg)); assert(!MO.getSubReg() && "Subregs should be eliminated!"); - if(ARM::GPRPairRegClass.contains(Reg)) { - const MachineFunction &MF = *MI->getParent()->getParent(); - const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); - Reg = TRI->getSubReg(Reg, ARM::gsub_0); - } O << ARMInstPrinter::getRegisterName(Reg); break; } @@ -535,12 +530,14 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, const MachineOperand &MO = MI->getOperand(OpNum); if (!MO.isReg()) return true; + const TargetRegisterClass &RC = ARM::GPRRegClass; const MachineFunction &MF = *MI->getParent()->getParent(); const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); - unsigned Reg = MO.getReg(); - if(!ARM::GPRPairRegClass.contains(Reg)) - return false; - Reg = TRI->getSubReg(Reg, ARM::gsub_1); + + unsigned RegIdx = TRI->getEncodingValue(MO.getReg()); + RegIdx |= 1; //The odd register is also the higher-numbered one of a pair. + + unsigned Reg = RC.getRegister(RegIdx); O << ARMInstPrinter::getRegisterName(Reg); return false; } diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index a83f052..939bed7 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -19,7 +19,6 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/IR/CallingConv.h" @@ -258,8 +257,6 @@ private: // Select special operations if node forms integer ABS pattern SDNode *SelectABSOp(SDNode *N); - SDNode *SelectInlineAsm(SDNode *N); - SDNode *SelectConcatVector(SDNode *N); SDNode *SelectAtomic64(SDNode *Node, unsigned Opc); @@ -2555,12 +2552,6 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { switch (N->getOpcode()) { default: break; - case ISD::INLINEASM: { - SDNode *ResNode = SelectInlineAsm(N); - if (ResNode) - return ResNode; - break; - } case ISD::XOR: { // Select special operations if XOR node forms integer ABS pattern SDNode *ResNode = SelectABSOp(N); @@ -3455,138 +3446,6 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { return SelectCode(N); } -SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){ - std::vector AsmNodeOperands; - unsigned Flag, Kind; - bool Changed = false; - unsigned NumOps = N->getNumOperands(); - - ExternalSymbolSDNode *S = dyn_cast( - N->getOperand(InlineAsm::Op_AsmString)); - StringRef AsmString = StringRef(S->getSymbol()); - - // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint. - // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require - // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs - // respectively. Since there is no constraint to explicitly specify a - // reg pair, we search %H operand inside the asm string. If it is found, the - // transformation below enforces a GPRPair reg class for "%r" for 64-bit data. - if (AsmString.find(":H}") == StringRef::npos) - return NULL; - - DebugLoc dl = N->getDebugLoc(); - SDValue Glue = N->getOperand(NumOps-1); - - // Glue node will be appended late. - for(unsigned i = 0; i < NumOps -1; ++i) { - SDValue op = N->getOperand(i); - AsmNodeOperands.push_back(op); - - if (i < InlineAsm::Op_FirstOperand) - continue; - - if (ConstantSDNode *C = dyn_cast(N->getOperand(i))) { - Flag = C->getZExtValue(); - Kind = InlineAsm::getKind(Flag); - } - else - continue; - - if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef - && Kind != InlineAsm::Kind_RegDefEarlyClobber) - continue; - - unsigned RegNum = InlineAsm::getNumOperandRegisters(Flag); - unsigned RC; - bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC); - if (!HasRC || RC != ARM::GPRRegClassID || RegNum != 2) - continue; - - assert((i+2 < NumOps-1) && "Invalid number of operands in inline asm"); - SDValue V0 = N->getOperand(i+1); - SDValue V1 = N->getOperand(i+2); - unsigned Reg0 = cast(V0)->getReg(); - unsigned Reg1 = cast(V1)->getReg(); - SDValue PairedReg; - MachineRegisterInfo &MRI = MF->getRegInfo(); - - if (Kind == InlineAsm::Kind_RegDef || - Kind == InlineAsm::Kind_RegDefEarlyClobber) { - // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to - // the original GPRs. - - unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped); - SDValue Chain = SDValue(N,0); - - SDNode *GU = N->getGluedUser(); - SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped, - Chain.getValue(1)); - - // Extract values from a GPRPair reg and copy to the original GPR reg. - SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32, - RegCopy); - SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32, - RegCopy); - SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0, - RegCopy.getValue(1)); - SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1)); - - // Update the original glue user. - std::vector Ops(GU->op_begin(), GU->op_end()-1); - Ops.push_back(T1.getValue(1)); - CurDAG->UpdateNodeOperands(GU, &Ops[0], Ops.size()); - GU = T1.getNode(); - } - else { - // For Kind == InlineAsm::Kind_RegUse, we first copy two GPRs into a - // GPRPair and then pass the GPRPair to the inline asm. - SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain]; - - // As REG_SEQ doesn't take RegisterSDNode, we copy them first. - SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32, - Chain.getValue(1)); - SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32, - T0.getValue(1)); - SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0); - - // Copy REG_SEQ into a GPRPair-typed VR and replace the original two - // i32 VRs of inline asm with it. - unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped); - Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1)); - - AsmNodeOperands[InlineAsm::Op_InputChain] = Chain; - Glue = Chain.getValue(1); - } - - Changed = true; - - if(PairedReg.getNode()) { - Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/); - Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID); - // Replace the current flag. - AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant( - Flag, MVT::i32); - // Add the new register node and skip the original two GPRs. - AsmNodeOperands.push_back(PairedReg); - // Skip the next two GPRs. - i += 2; - } - } - - AsmNodeOperands.push_back(Glue); - if (!Changed) - return NULL; - - SDValue New = CurDAG->getNode(ISD::INLINEASM, N->getDebugLoc(), - CurDAG->getVTList(MVT::Other, MVT::Glue), &AsmNodeOperands[0], - AsmNodeOperands.size()); - New->setNodeId(-1); - return New.getNode(); -} - - bool ARMDAGToDAGISel:: SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, std::vector &OutOps) { -- cgit v1.1 From 382a5530ec0682d8dd17f7d9212f52ace38460ed Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Thu, 14 Feb 2013 01:58:08 +0000 Subject: Don't asume that a static function in an extern "C" block will not be mangled. Since functions with internal linkage don't have language linkage, it is valid to overload them: extern "C" { static int foo(); static int foo(int); } So we mangle them. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175120 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCJITInfo.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/PowerPC/PPCJITInfo.cpp b/lib/Target/PowerPC/PPCJITInfo.cpp index 851de17..df40cf1 100644 --- a/lib/Target/PowerPC/PPCJITInfo.cpp +++ b/lib/Target/PowerPC/PPCJITInfo.cpp @@ -292,7 +292,7 @@ void PPC64CompilationCallback() { #endif extern "C" { -static void* LLVM_ATTRIBUTE_USED PPCCompilationCallbackC(unsigned *StubCallAddrPlus4, +void* LLVM_ATTRIBUTE_USED PPCCompilationCallbackC(unsigned *StubCallAddrPlus4, unsigned *OrigCallAddrPlus4, bool is64Bit) { // Adjust the pointer to the address of the call instruction in the stub -- cgit v1.1 From ad236eb8c6fec966463fd7186b582284e1dcdb1b Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Thu, 14 Feb 2013 02:49:18 +0000 Subject: Don't assume the mangling of static functions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175121 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86JITInfo.cpp | 6 ------ 1 file changed, 6 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp index 0a4acce..2e003f7 100644 --- a/lib/Target/X86/X86JITInfo.cpp +++ b/lib/Target/X86/X86JITInfo.cpp @@ -342,12 +342,6 @@ extern "C" { /// must locate the start of the stub or call site and pass it into the JIT /// compiler function. extern "C" { -#if !(defined (X86_64_JIT) && defined(_MSC_VER)) - // the following function is called only from this translation unit, - // unless we are under 64bit Windows with MSC, where there is - // no support for inline assembly -static -#endif void LLVM_ATTRIBUTE_USED X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) { intptr_t *RetAddrLoc = &StackPtr[1]; -- cgit v1.1 From c5f147459b1704e25df4ff888729dd5d22d73381 Mon Sep 17 00:00:00 2001 From: Cameron Zwarich Date: Thu, 14 Feb 2013 02:51:03 +0000 Subject: Fix some issues with rematerialization in RegisterCoalescer when the destination of the copy is a subregister def. The current code assumes that it can do a full def of the destination register, but it is not checking that the def operand is read-undef. It also doesn't clear the subregister index of the destination in the new instruction to reflect the full subregister def. These issues were found running 'make check' with my next commit that enables rematerialization in more cases. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175122 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/RegisterCoalescer.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'lib') diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index e682d63..dfd9b86 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -755,6 +755,10 @@ bool RegisterCoalescer::reMaterializeTrivialDef(LiveInterval &SrcInt, const MCInstrDesc &MCID = DefMI->getDesc(); if (MCID.getNumDefs() != 1) return false; + // Only support subregister destinations when the def is read-undef. + MachineOperand &DstOperand = CopyMI->getOperand(0); + if (DstOperand.getSubReg() && !DstOperand.isUndef()) + return false; if (!DefMI->isImplicitDef()) { // Make sure the copy destination register class fits the instruction // definition register class. The mismatch can happen as a result of earlier @@ -773,6 +777,12 @@ bool RegisterCoalescer::reMaterializeTrivialDef(LiveInterval &SrcInt, TII->reMaterialize(*MBB, MII, DstReg, 0, DefMI, *TRI); MachineInstr *NewMI = prior(MII); + // The original DefMI may have been a subregister def, but the full register + // class of its destination matches the destination of CopyMI, and CopyMI is + // either a full register def or is read-undef. Therefore we can clear the + // subregister index on the rematerialized instruction. + NewMI->getOperand(0).setSubReg(0); + // NewMI may have dead implicit defs (E.g. EFLAGS for MOVr0 on X86). // We need to remember these so we can add intervals once we insert // NewMI into SlotIndexes. -- cgit v1.1 From 1d4673228143965fafbcb679417f5a9be53d0a2a Mon Sep 17 00:00:00 2001 From: Cameron Zwarich Date: Thu, 14 Feb 2013 02:51:05 +0000 Subject: Fix RegisterCoalescer::rematerializeTrivialDef() so that it works on flipped CoalescerPairs. Also, make it take a CoalescerPair directly like other methods of RegisterCoalescer. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175123 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/RegisterCoalescer.cpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index dfd9b86..07ae8a2 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -167,8 +167,7 @@ namespace { /// reMaterializeTrivialDef - If the source of a copy is defined by a /// trivial computation, replace the copy by rematerialize the definition. - bool reMaterializeTrivialDef(LiveInterval &SrcInt, unsigned DstReg, - MachineInstr *CopyMI); + bool reMaterializeTrivialDef(CoalescerPair &CP, MachineInstr *CopyMI); /// canJoinPhys - Return true if a physreg copy should be joined. bool canJoinPhys(const CoalescerPair &CP); @@ -732,9 +731,14 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, /// reMaterializeTrivialDef - If the source of a copy is defined by a trivial /// computation, replace the copy by rematerialize the definition. -bool RegisterCoalescer::reMaterializeTrivialDef(LiveInterval &SrcInt, - unsigned DstReg, +bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP, MachineInstr *CopyMI) { + unsigned SrcReg = CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg(); + unsigned DstReg = CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg(); + if (TargetRegisterInfo::isPhysicalRegister(SrcReg)) + return false; + + LiveInterval &SrcInt = LIS->getInterval(SrcReg); SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(true); LiveInterval::iterator SrcLR = SrcInt.FindLiveRangeContaining(CopyIdx); assert(SrcLR != SrcInt.end() && "Live range not found!"); @@ -1012,9 +1016,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { if (!canJoinPhys(CP)) { // Before giving up coalescing, if definition of source is defined by // trivial computation, try rematerializing it. - if (!CP.isFlipped() && - reMaterializeTrivialDef(LIS->getInterval(CP.getSrcReg()), - CP.getDstReg(), CopyMI)) + if (reMaterializeTrivialDef(CP, CopyMI)) return true; return false; } @@ -1047,9 +1049,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { // If definition of source is defined by trivial computation, try // rematerializing it. - if (!CP.isFlipped() && - reMaterializeTrivialDef(LIS->getInterval(CP.getSrcReg()), - CP.getDstReg(), CopyMI)) + if (reMaterializeTrivialDef(CP, CopyMI)) return true; // If we can eliminate the copy without merging the live ranges, do so now. -- cgit v1.1 From 38f85c5b9f2e2a7e1364ce44b6b8cd1ec0ffb0b3 Mon Sep 17 00:00:00 2001 From: Reed Kotler Date: Thu, 14 Feb 2013 03:05:25 +0000 Subject: Remove the form field from Mips16 instruction formats and set things up so that we can apply the direct object emitter patch. This patch should be a nop right now and it's test is to not break what is already there. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175126 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Disassembler/MipsDisassembler.cpp | 14 +++ lib/Target/Mips/Mips16InstrFormats.td | 111 ++++++++-------------- lib/Target/Mips/Mips16InstrInfo.td | 35 ++++--- 3 files changed, 73 insertions(+), 87 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp index 9560f3f..025a783 100644 --- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp +++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp @@ -93,6 +93,11 @@ static DecodeStatus DecodeCPU64RegsRegisterClass(MCInst &Inst, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeCPU16RegsRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder); + static DecodeStatus DecodeCPURegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, @@ -322,6 +327,15 @@ static unsigned getReg(const void *D, unsigned RC, unsigned RegNo) { return *(Dis->getRegInfo()->getRegClass(RC).begin() + RegNo); } +static DecodeStatus DecodeCPU16RegsRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder) { + + return MCDisassembler::Fail; + +} + static DecodeStatus DecodeCPU64RegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, diff --git a/lib/Target/Mips/Mips16InstrFormats.td b/lib/Target/Mips/Mips16InstrFormats.td index 61602b6..4ff62ef 100644 --- a/lib/Target/Mips/Mips16InstrFormats.td +++ b/lib/Target/Mips/Mips16InstrFormats.td @@ -29,45 +29,13 @@ // //===----------------------------------------------------------------------===// -// Format specifies the encoding used by the instruction. This is part of the -// ad-hoc solution used to emit machine instruction encodings by our machine -// code emitter. -// -class Format16 val> { - bits<5> Value = val; -} - -def Pseudo16 : Format16<0>; -def FrmI16 : Format16<1>; -def FrmRI16 : Format16<2>; -def FrmRR16 : Format16<3>; -def FrmRRI16 : Format16<4>; -def FrmRRR16 : Format16<5>; -def FrmRRI_A16 : Format16<6>; -def FrmSHIFT16 : Format16<7>; -def FrmI8_TYPE16 : Format16<8>; -def FrmI8_MOVR3216 : Format16<9>; -def FrmI8_MOV32R16 : Format16<10>; -def FrmI8_SVRS16 : Format16<11>; -def FrmJAL16 : Format16<12>; -def FrmJALX16 : Format16<13>; -def FrmEXT_I16 : Format16<14>; -def FrmASMACRO16 : Format16<15>; -def FrmEXT_RI16 : Format16<16>; -def FrmEXT_RRI16 : Format16<17>; -def FrmEXT_RRI_A16 : Format16<18>; -def FrmEXT_SHIFT16 : Format16<19>; -def FrmEXT_I816 : Format16<20>; -def FrmEXT_I8_SVRS16 : Format16<21>; -def FrmOther16 : Format16<22>; // Instruction w/ a custom format // Base class for Mips 16 Format // This class does not depend on the instruction size // class MipsInst16_Base pattern, - InstrItinClass itin, Format16 f>: Instruction + InstrItinClass itin>: Instruction { - Format16 Form = f; let Namespace = "Mips"; @@ -78,14 +46,6 @@ class MipsInst16_Base pattern, let Pattern = pattern; let Itinerary = itin; - // - // Attributes specific to Mips instructions... - // - bits<5> FormBits = Form.Value; - - // TSFlags layout should be kept in sync with MipsInstrInfo.h. - let TSFlags{4-0} = FormBits; - let Predicates = [InMips16Mode]; } @@ -93,30 +53,35 @@ class MipsInst16_Base pattern, // Generic Mips 16 Format // class MipsInst16 pattern, - InstrItinClass itin, Format16 f>: - MipsInst16_Base + InstrItinClass itin>: + MipsInst16_Base { field bits<16> Inst; bits<5> Opcode = 0; // Top 5 bits are the 'opcode' field let Inst{15-11} = Opcode; + + let Size=2; + field bits<16> SoftFail = 0; } // // For 32 bit extended instruction forms. // class MipsInst16_32 pattern, - InstrItinClass itin, Format16 f>: - MipsInst16_Base + InstrItinClass itin>: + MipsInst16_Base { field bits<32> Inst; - + + let Size=4; + field bits<32> SoftFail = 0; } class MipsInst16_EXTEND pattern, - InstrItinClass itin, Format16 f>: - MipsInst16_32 + InstrItinClass itin>: + MipsInst16_32 { let Inst{31-27} = 0b11110; } @@ -125,7 +90,7 @@ class MipsInst16_EXTEND pattern, // Mips Pseudo Instructions Format class MipsPseudo16 pattern>: - MipsInst16 { + MipsInst16 { let isCodeGenOnly = 1; let isPseudo = 1; } @@ -137,7 +102,7 @@ class MipsPseudo16 pattern>: class FI16 op, dag outs, dag ins, string asmstr, list pattern, InstrItinClass itin>: - MipsInst16 + MipsInst16 { bits<11> imm11; @@ -152,7 +117,7 @@ class FI16 op, dag outs, dag ins, string asmstr, list pattern, class FRI16 op, dag outs, dag ins, string asmstr, list pattern, InstrItinClass itin>: - MipsInst16 + MipsInst16 { bits<3> rx; bits<8> imm8; @@ -169,7 +134,7 @@ class FRI16 op, dag outs, dag ins, string asmstr, class FRR16 _funct, dag outs, dag ins, string asmstr, list pattern, InstrItinClass itin>: - MipsInst16 + MipsInst16 { bits<3> rx; bits<3> ry; @@ -188,7 +153,7 @@ class FRR16 _funct, dag outs, dag ins, string asmstr, // class FRR_SF16 _funct, bits<3> _subfunct, dag outs, dag ins, string asmstr, list pattern, InstrItinClass itin>: - MipsInst16 + MipsInst16 { bits<3> rx; bits<3> subfunct; @@ -208,7 +173,7 @@ class FRR_SF16 _funct, bits<3> _subfunct, dag outs, dag ins, // class FC16 _funct, dag outs, dag ins, string asmstr, list pattern, InstrItinClass itin>: - MipsInst16 + MipsInst16 { bits<6> _code; // code is a keyword in tablegen bits<5> funct; @@ -226,7 +191,7 @@ class FC16 _funct, dag outs, dag ins, string asmstr, class FRR16_JALRC _nd, bits<1> _l, bits<1> r_a, dag outs, dag ins, string asmstr, list pattern, InstrItinClass itin>: - MipsInst16 + MipsInst16 { bits<3> rx; bits<1> nd; @@ -252,7 +217,7 @@ class FRR16_JALRC _nd, bits<1> _l, bits<1> r_a, class FRRI16 op, dag outs, dag ins, string asmstr, list pattern, InstrItinClass itin>: - MipsInst16 + MipsInst16 { bits<3> rx; bits<3> ry; @@ -272,7 +237,7 @@ class FRRI16 op, dag outs, dag ins, string asmstr, class FRRR16 _f, dag outs, dag ins, string asmstr, list pattern, InstrItinClass itin>: - MipsInst16 + MipsInst16 { bits<3> rx; bits<3> ry; @@ -294,7 +259,7 @@ class FRRR16 _f, dag outs, dag ins, string asmstr, class FRRI_A16 _f, dag outs, dag ins, string asmstr, list pattern, InstrItinClass itin>: - MipsInst16 + MipsInst16 { bits<3> rx; bits<3> ry; @@ -316,7 +281,7 @@ class FRRI_A16 _f, dag outs, dag ins, string asmstr, class FSHIFT16 _f, dag outs, dag ins, string asmstr, list pattern, InstrItinClass itin>: - MipsInst16 + MipsInst16 { bits<3> rx; bits<3> ry; @@ -338,7 +303,7 @@ class FSHIFT16 _f, dag outs, dag ins, string asmstr, class FI816 _func, dag outs, dag ins, string asmstr, list pattern, InstrItinClass itin>: - MipsInst16 + MipsInst16 { bits<3> func; bits<8> imm8; @@ -356,7 +321,7 @@ class FI816 _func, dag outs, dag ins, string asmstr, class FI8_MOVR3216 pattern, InstrItinClass itin>: - MipsInst16 + MipsInst16 { bits<4> ry; @@ -378,7 +343,7 @@ class FI8_MOVR3216 pattern, InstrItinClass itin>: - MipsInst16 + MipsInst16 { bits<3> func; @@ -402,7 +367,7 @@ class FI8_MOV32R16 _s, dag outs, dag ins, string asmstr, list pattern, InstrItinClass itin>: - MipsInst16 + MipsInst16 { bits<1> s; bits<1> ra = 0; @@ -429,7 +394,7 @@ class FI8_SVRS16 _s, dag outs, dag ins, string asmstr, class FJAL16 _X, dag outs, dag ins, string asmstr, list pattern, InstrItinClass itin>: - MipsInst16_32 + MipsInst16_32 { bits<1> X; bits<26> imm26; @@ -452,7 +417,7 @@ class FJAL16 _X, dag outs, dag ins, string asmstr, class FEXT_I16 _eop, dag outs, dag ins, string asmstr, list pattern, InstrItinClass itin>: - MipsInst16_EXTEND + MipsInst16_EXTEND { bits<16> imm16; bits<5> eop; @@ -474,7 +439,7 @@ class FEXT_I16 _eop, dag outs, dag ins, string asmstr, class FASMACRO16 pattern, InstrItinClass itin>: - MipsInst16_EXTEND + MipsInst16_EXTEND { bits<3> select; bits<3> p4; @@ -503,7 +468,7 @@ class FASMACRO16 _op, dag outs, dag ins, string asmstr, list pattern, InstrItinClass itin>: - MipsInst16_EXTEND + MipsInst16_EXTEND { bits<16> imm16; bits<5> op; @@ -527,7 +492,7 @@ class FEXT_RI16 _op, dag outs, dag ins, string asmstr, class FEXT_RRI16 _op, dag outs, dag ins, string asmstr, list pattern, InstrItinClass itin>: - MipsInst16_EXTEND + MipsInst16_EXTEND { bits<5> op; bits<16> imm16; @@ -552,7 +517,7 @@ class FEXT_RRI16 _op, dag outs, dag ins, string asmstr, class FEXT_RRI_A16 _f, dag outs, dag ins, string asmstr, list pattern, InstrItinClass itin>: - MipsInst16_EXTEND + MipsInst16_EXTEND { bits<15> imm15; bits<3> rx; @@ -578,7 +543,7 @@ class FEXT_RRI_A16 _f, dag outs, dag ins, string asmstr, class FEXT_SHIFT16 _f, dag outs, dag ins, string asmstr, list pattern, InstrItinClass itin>: - MipsInst16_EXTEND + MipsInst16_EXTEND { bits<6> sa6; bits<3> rx; @@ -605,7 +570,7 @@ class FEXT_SHIFT16 _f, dag outs, dag ins, string asmstr, class FEXT_I816 _funct, dag outs, dag ins, string asmstr, list pattern, InstrItinClass itin>: - MipsInst16_EXTEND + MipsInst16_EXTEND { bits<16> imm16; bits<5> I8; @@ -630,7 +595,7 @@ class FEXT_I816 _funct, dag outs, dag ins, string asmstr, class FEXT_I8_SVRS16 s_, dag outs, dag ins, string asmstr, list pattern, InstrItinClass itin>: - MipsInst16_EXTEND + MipsInst16_EXTEND { bits<3> xsregs =0; bits<8> framesize =0; @@ -659,5 +624,3 @@ class FEXT_I8_SVRS16 s_, dag outs, dag ins, string asmstr, } - - diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td index 06344db..872de52 100644 --- a/lib/Target/Mips/Mips16InstrInfo.td +++ b/lib/Target/Mips/Mips16InstrInfo.td @@ -335,8 +335,7 @@ class FRRR16_ins _f, string asmstr, InstrItinClass itin> : class Sel f1, string op, InstrItinClass itin>: MipsInst16_32<(outs CPU16Regs:$rd_), (ins CPU16Regs:$rd, CPU16Regs:$rs, CPU16Regs:$rt), - !strconcat(op, "\t$rt, .+4\n\t\n\tmove $rd, $rs"), [], itin, - Pseudo16> { + !strconcat(op, "\t$rt, .+4\n\t\n\tmove $rd, $rs"), [], itin> { let isCodeGenOnly=1; let Constraints = "$rd = $rd_"; } @@ -362,8 +361,7 @@ class SeliT f1, string op1, bits<5> f2, string op2, CPU16Regs:$rl, simm16:$imm), !strconcat(op2, !strconcat("\t$rl, $imm\n\t", - !strconcat(op1, "\t.+4\n\tmove $rd, $rs"))), [], itin, - Pseudo16> { + !strconcat(op1, "\t.+4\n\tmove $rd, $rs"))), [], itin> { let isCodeGenOnly=1; let Constraints = "$rd = $rd_"; } @@ -386,8 +384,7 @@ class SelT f1, string op1, bits<5> f2, string op2, CPU16Regs:$rl, CPU16Regs:$rr), !strconcat(op2, !strconcat("\t$rl, $rr\n\t", - !strconcat(op1, "\t.+4\n\tmove $rd, $rs"))), [], itin, - Pseudo16> { + !strconcat(op1, "\t.+4\n\tmove $rd, $rs"))), [], itin> { let isCodeGenOnly=1; let Constraints = "$rd = $rd_"; } @@ -448,7 +445,9 @@ def AddiuRxRxImm16: F2RI16_ins<0b01001, "addiu", IIAlu>, let AddedComplexity = 5; } def AddiuRxRxImmX16: FEXT_2RI16_ins<0b01001, "addiu", IIAlu>, - ArithLogic16Defs<0>; + ArithLogic16Defs<0> { + let isCodeGenOnly = 1; +} def AddiuRxRyOffMemX16: FEXT_RRI_A16_mem_ins<0, "addiu", mem16_ea, IIAlu>; @@ -602,7 +601,7 @@ def JrRa16: FRR16_JALRC_RA_only_ins<0, 0, "jr", IIAlu> { let isBarrier=1; } -def JrcRa16: FRR16_JALRC_RA_only_ins<0, 0, "jrc", IIAlu> { +def JrcRa16: FRR16_JALRC_RA_only_ins<1, 1, "jrc", IIAlu> { let isBranch = 1; let isIndirectBranch = 1; let isTerminator=1; @@ -620,7 +619,9 @@ def JrcRx16: FRR16_JALRC_ins<1, 1, 0, "jrc", IIAlu> { // Purpose: Load Byte (Extended) // To load a byte from memory as a signed value. // -def LbRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10011, "lb", mem16, IILoad>, MayLoad; +def LbRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10011, "lb", mem16, IILoad>, MayLoad{ + let isCodeGenOnly = 1; +} // // Format: LBU ry, offset(rx) MIPS16e @@ -628,14 +629,18 @@ def LbRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10011, "lb", mem16, IILoad>, MayLoad; // To load a byte from memory as a unsigned value. // def LbuRxRyOffMemX16: - FEXT_RRI16_mem_ins<0b10100, "lbu", mem16, IILoad>, MayLoad; + FEXT_RRI16_mem_ins<0b10100, "lbu", mem16, IILoad>, MayLoad { + let isCodeGenOnly = 1; +} // // Format: LH ry, offset(rx) MIPS16e // Purpose: Load Halfword signed (Extended) // To load a halfword from memory as a signed value. // -def LhRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10100, "lh", mem16, IILoad>, MayLoad; +def LhRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10100, "lh", mem16, IILoad>, MayLoad{ + let isCodeGenOnly = 1; +} // // Format: LHU ry, offset(rx) MIPS16e @@ -643,7 +648,9 @@ def LhRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10100, "lh", mem16, IILoad>, MayLoad; // To load a halfword from memory as an unsigned value. // def LhuRxRyOffMemX16: - FEXT_RRI16_mem_ins<0b10100, "lhu", mem16, IILoad>, MayLoad; + FEXT_RRI16_mem_ins<0b10100, "lhu", mem16, IILoad>, MayLoad { + let isCodeGenOnly = 1; +} // // Format: LI rx, immediate MIPS16e @@ -657,7 +664,9 @@ def LiRxImmX16: FEXT_RI16_ins<0b01101, "li", IIAlu>; // Purpose: Load Word (Extended) // To load a word from memory as a signed value. // -def LwRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10011, "lw", mem16, IILoad>, MayLoad; +def LwRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10011, "lw", mem16, IILoad>, MayLoad{ + let isCodeGenOnly = 1; +} // Format: LW rx, offset(sp) MIPS16e // Purpose: Load Word (SP-Relative, Extended) -- cgit v1.1 From 9d90163bc31b71bb20b4423f3254544134d31ea2 Mon Sep 17 00:00:00 2001 From: Nick Lewycky Date: Thu, 14 Feb 2013 03:23:37 +0000 Subject: Teach the DataLayout aware constant folder to be much more aggressive towards 'and' instructions. This is a pattern that shows up a lot in ubsan binaries. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175128 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/ConstantFolding.cpp | 38 ++++++++++++++++++++++++++++++-------- 1 file changed, 30 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index 26e3888..c6aff9d 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -536,10 +536,10 @@ static Constant *ConstantFoldLoadInst(const LoadInst *LI, const DataLayout *TD){ /// SymbolicallyEvaluateBinop - One of Op0/Op1 is a constant expression. /// Attempt to symbolically evaluate the result of a binary operator merging -/// these together. If target data info is available, it is provided as TD, -/// otherwise TD is null. +/// these together. If target data info is available, it is provided as DL, +/// otherwise DL is null. static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, - Constant *Op1, const DataLayout *TD){ + Constant *Op1, const DataLayout *DL){ // SROA // Fold (and 0xffffffff00000000, (shl x, 32)) -> shl. @@ -547,16 +547,38 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, // bits. + if (Opc == Instruction::And && DL) { + unsigned BitWidth = DL->getTypeSizeInBits(Op0->getType()); + APInt KnownZero0(BitWidth, 0), KnownOne0(BitWidth, 0); + APInt KnownZero1(BitWidth, 0), KnownOne1(BitWidth, 0); + ComputeMaskedBits(Op0, KnownZero0, KnownOne0, DL); + ComputeMaskedBits(Op1, KnownZero1, KnownOne1, DL); + if ((KnownOne1 | KnownZero0).isAllOnesValue()) { + // All the bits of Op0 that the 'and' could be masking are already zero. + return Op0; + } + if ((KnownOne0 | KnownZero1).isAllOnesValue()) { + // All the bits of Op1 that the 'and' could be masking are already zero. + return Op1; + } + + APInt KnownZero = KnownZero0 | KnownZero1; + APInt KnownOne = KnownOne0 & KnownOne1; + if ((KnownZero | KnownOne).isAllOnesValue()) { + return ConstantInt::get(Op0->getType(), KnownOne); + } + } + // If the constant expr is something like &A[123] - &A[4].f, fold this into a // constant. This happens frequently when iterating over a global array. - if (Opc == Instruction::Sub && TD) { + if (Opc == Instruction::Sub && DL) { GlobalValue *GV1, *GV2; - unsigned PtrSize = TD->getPointerSizeInBits(); - unsigned OpSize = TD->getTypeSizeInBits(Op0->getType()); + unsigned PtrSize = DL->getPointerSizeInBits(); + unsigned OpSize = DL->getTypeSizeInBits(Op0->getType()); APInt Offs1(PtrSize, 0), Offs2(PtrSize, 0); - if (IsConstantOffsetFromGlobal(Op0, GV1, Offs1, *TD)) - if (IsConstantOffsetFromGlobal(Op1, GV2, Offs2, *TD) && + if (IsConstantOffsetFromGlobal(Op0, GV1, Offs1, *DL)) + if (IsConstantOffsetFromGlobal(Op1, GV2, Offs2, *DL) && GV1 == GV2) { // (&GV+C1) - (&GV+C2) -> C1-C2, pointer arithmetic cannot overflow. // PtrToInt may change the bitwidth so we have convert to the right size -- cgit v1.1 From 1f258a6d53b29f6dfb60436da4aa76e996644d84 Mon Sep 17 00:00:00 2001 From: Cameron Zwarich Date: Thu, 14 Feb 2013 03:25:24 +0000 Subject: RegisterCoalescer::reMaterializeTrivialDef() can constrain the destination register class to match the defining instruction. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175130 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/RegisterCoalescer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index 07ae8a2..e2488ad 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -769,7 +769,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP, // extract_subreg, insert_subreg, subreg_to_reg coalescing. const TargetRegisterClass *RC = TII->getRegClass(MCID, 0, TRI, *MF); if (TargetRegisterInfo::isVirtualRegister(DstReg)) { - if (MRI->getRegClass(DstReg) != RC) + if (!MRI->constrainRegClass(DstReg, RC)) return false; } else if (!RC->contains(DstReg)) return false; -- cgit v1.1 From bf7ac42663e087b2effc6b9428eddab3b2475073 Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Thu, 14 Feb 2013 03:33:34 +0000 Subject: Revert r175120 and r175121. Clang is producing the expected asm names again. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175133 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCJITInfo.cpp | 2 +- lib/Target/X86/X86JITInfo.cpp | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/PowerPC/PPCJITInfo.cpp b/lib/Target/PowerPC/PPCJITInfo.cpp index df40cf1..851de17 100644 --- a/lib/Target/PowerPC/PPCJITInfo.cpp +++ b/lib/Target/PowerPC/PPCJITInfo.cpp @@ -292,7 +292,7 @@ void PPC64CompilationCallback() { #endif extern "C" { -void* LLVM_ATTRIBUTE_USED PPCCompilationCallbackC(unsigned *StubCallAddrPlus4, +static void* LLVM_ATTRIBUTE_USED PPCCompilationCallbackC(unsigned *StubCallAddrPlus4, unsigned *OrigCallAddrPlus4, bool is64Bit) { // Adjust the pointer to the address of the call instruction in the stub diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp index 2e003f7..0a4acce 100644 --- a/lib/Target/X86/X86JITInfo.cpp +++ b/lib/Target/X86/X86JITInfo.cpp @@ -342,6 +342,12 @@ extern "C" { /// must locate the start of the stub or call site and pass it into the JIT /// compiler function. extern "C" { +#if !(defined (X86_64_JIT) && defined(_MSC_VER)) + // the following function is called only from this translation unit, + // unless we are under 64bit Windows with MSC, where there is + // no support for inline assembly +static +#endif void LLVM_ATTRIBUTE_USED X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) { intptr_t *RetAddrLoc = &StackPtr[1]; -- cgit v1.1 From c556fcc153a727945dbbe222a5b7c1dfce141a33 Mon Sep 17 00:00:00 2001 From: Michel Danzer Date: Thu, 14 Feb 2013 08:00:33 +0000 Subject: R600/SI: Check for empty stack in SIAnnotateControlFlow::isTopOfStack Fixes assertion failure in newly added lit test. Might just be a bandaid that needs to be revisited. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175139 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIAnnotateControlFlow.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/R600/SIAnnotateControlFlow.cpp b/lib/Target/R600/SIAnnotateControlFlow.cpp index f580377..2477e2a 100644 --- a/lib/Target/R600/SIAnnotateControlFlow.cpp +++ b/lib/Target/R600/SIAnnotateControlFlow.cpp @@ -147,7 +147,7 @@ bool SIAnnotateControlFlow::doInitialization(Module &M) { /// \brief Is BB the last block saved on the stack ? bool SIAnnotateControlFlow::isTopOfStack(BasicBlock *BB) { - return Stack.back().first == BB; + return !Stack.empty() && Stack.back().first == BB; } /// \brief Pop the last saved value from the control flow stack -- cgit v1.1 From 60b3e18d674489364b6448d376ff6666f106c1fc Mon Sep 17 00:00:00 2001 From: Elena Demikhovsky Date: Thu, 14 Feb 2013 08:20:26 +0000 Subject: Fixed a bug in X86TargetLowering::LowerVectorIntExtend() (assertion failure). Added a test. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175144 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 55de3d3..dbc0e01 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -6662,9 +6662,10 @@ X86TargetLowering::LowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const { return SDValue(); } + LLVMContext *Context = DAG.getContext(); unsigned NBits = VT.getVectorElementType().getSizeInBits() << Shift; - EVT NeVT = EVT::getIntegerVT(*DAG.getContext(), NBits); - EVT NVT = EVT::getVectorVT(*DAG.getContext(), NeVT, NumElems >> Shift); + EVT NeVT = EVT::getIntegerVT(*Context, NBits); + EVT NVT = EVT::getVectorVT(*Context, NeVT, NumElems >> Shift); if (!isTypeLegal(NVT)) return SDValue(); @@ -6683,8 +6684,21 @@ X86TargetLowering::LowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const { // If it's foldable, i.e. normal load with single use, we will let code // selection to fold it. Otherwise, we will short the conversion sequence. if (CIdx && CIdx->getZExtValue() == 0 && - (!ISD::isNormalLoad(V.getNode()) || !V.hasOneUse())) + (!ISD::isNormalLoad(V.getNode()) || !V.hasOneUse())) { + if (V.getValueSizeInBits() > V1.getValueSizeInBits()) { + // The "ext_vec_elt" node is wider than the result node. + // In this case we should extract subvector from V. + // (bitcast (sclr2vec (ext_vec_elt x))) -> (bitcast (extract_subvector x)). + unsigned Ratio = V.getValueSizeInBits() / V1.getValueSizeInBits(); + EVT FullVT = V.getValueType(); + EVT SubVecVT = EVT::getVectorVT(*Context, + FullVT.getVectorElementType(), + FullVT.getVectorNumElements()/Ratio); + V = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, V, + DAG.getIntPtrConstant(0)); + } V1 = DAG.getNode(ISD::BITCAST, DL, V1.getValueType(), V); + } } return DAG.getNode(ISD::BITCAST, DL, VT, -- cgit v1.1 From 39125d9a3a319b6dfd1fba639cd7df2fa1465a8e Mon Sep 17 00:00:00 2001 From: Owen Anderson Date: Thu, 14 Feb 2013 09:07:33 +0000 Subject: Add some legality checks for SETCC before introducing it in the DAG combiner post-operand legalization. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175149 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 67fa39d..c61d7ac 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2634,7 +2634,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) { ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger); if (Result != ISD::SETCC_INVALID && (!LegalOperations || - TLI.isCondCodeLegal(Result, LL.getSimpleValueType()))) + (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && + TLI.isOperationLegal(ISD::SETCC, + TLI.getSetCCResultType(N0.getSimpleValueType()))))) return DAG.getSetCC(N->getDebugLoc(), N0.getValueType(), LL, LR, Result); } @@ -3144,7 +3146,9 @@ SDValue DAGCombiner::visitOR(SDNode *N) { ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger); if (Result != ISD::SETCC_INVALID && (!LegalOperations || - TLI.isCondCodeLegal(Result, LL.getSimpleValueType()))) + (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && + TLI.isOperationLegal(ISD::SETCC, + TLI.getSetCCResultType(N0.getValueType()))))) return DAG.getSetCC(N->getDebugLoc(), N0.getValueType(), LL, LR, Result); } -- cgit v1.1 From 79e8429e41c98d42e9e4d13de8276b8de24ddc83 Mon Sep 17 00:00:00 2001 From: Evgeniy Stepanov Date: Thu, 14 Feb 2013 12:18:32 +0000 Subject: Workaround an MSan false positive. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175156 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Support/Unix/Process.inc | 2 ++ 1 file changed, 2 insertions(+) (limited to 'lib') diff --git a/lib/Support/Unix/Process.inc b/lib/Support/Unix/Process.inc index 1335b78..9a4454f 100644 --- a/lib/Support/Unix/Process.inc +++ b/lib/Support/Unix/Process.inc @@ -224,6 +224,8 @@ static unsigned getColumns(int FileID) { #if defined(HAVE_SYS_IOCTL_H) && defined(HAVE_TERMIOS_H) // Try to determine the width of the terminal. struct winsize ws; + // Zero-fill ws to avoid a false positive from MemorySanitizer. + memset(&ws, 0, sizeof(ws)); if (ioctl(FileID, TIOCGWINSZ, &ws) == 0) Columns = ws.ws_col; #endif -- cgit v1.1 From b1d081230e40e5c86f3cc44a7cfd7241732eabfb Mon Sep 17 00:00:00 2001 From: Kristof Beyls Date: Thu, 14 Feb 2013 14:46:12 +0000 Subject: Make ARMAsmParser accept the correct alignment specifier syntax in instructions. The parser will now accept instructions with alignment specifiers written like vld1.8 {d16}, [r0:64] , while also still accepting the incorrect syntax vld1.8 {d16}, [r0, :64] git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175164 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 3174e9a..8d6cf3c 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -4237,9 +4237,10 @@ parseMemory(SmallVectorImpl &Operands) { if (BaseRegNum == -1) return Error(BaseRegTok.getLoc(), "register expected"); - // The next token must either be a comma or a closing bracket. + // The next token must either be a comma, a colon or a closing bracket. const AsmToken &Tok = Parser.getTok(); - if (!Tok.is(AsmToken::Comma) && !Tok.is(AsmToken::RBrac)) + if (!Tok.is(AsmToken::Colon) && !Tok.is(AsmToken::Comma) && + !Tok.is(AsmToken::RBrac)) return Error(Tok.getLoc(), "malformed memory operand"); if (Tok.is(AsmToken::RBrac)) { @@ -4259,8 +4260,11 @@ parseMemory(SmallVectorImpl &Operands) { return false; } - assert(Tok.is(AsmToken::Comma) && "Lost comma in memory operand?!"); - Parser.Lex(); // Eat the comma. + assert((Tok.is(AsmToken::Colon) || Tok.is(AsmToken::Comma)) && + "Lost colon or comma in memory operand?!"); + if (Tok.is(AsmToken::Comma)) { + Parser.Lex(); // Eat the comma. + } // If we have a ':', it's an alignment specifier. if (Parser.getTok().is(AsmToken::Colon)) { -- cgit v1.1 From 5bd6cb2dabf3fea9cb9fa0b275fbc7ceb85ba970 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Thu, 14 Feb 2013 16:17:01 +0000 Subject: AArch64: add block comments where missing Only comments affected. No code change at all. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175169 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64.td | 2 ++ lib/Target/AArch64/AArch64AsmPrinter.h | 2 +- lib/Target/AArch64/AArch64FrameLowering.h | 3 ++- lib/Target/AArch64/AArch64InstrFormats.td | 7 +++++-- lib/Target/AArch64/AArch64InstrInfo.td | 13 +++++++++++++ lib/Target/AArch64/AArch64MachineFunctionInfo.cpp | 6 +++++- lib/Target/AArch64/AArch64RegisterInfo.h | 2 +- lib/Target/AArch64/AArch64RegisterInfo.td | 8 ++++---- lib/Target/AArch64/AArch64TargetMachine.cpp | 3 +++ lib/Target/AArch64/AArch64TargetObjectFile.cpp | 5 +++++ lib/Target/AArch64/AArch64TargetObjectFile.h | 4 ++++ lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp | 5 +++++ lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp | 8 +++++++- lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp | 5 +++++ lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp | 5 +++++ lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h | 5 +++++ lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp | 5 +++++ lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h | 5 +++++ lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp | 4 ++++ 19 files changed, 86 insertions(+), 11 deletions(-) (limited to 'lib') diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td index 0e4f5fb..e17052b 100644 --- a/lib/Target/AArch64/AArch64.td +++ b/lib/Target/AArch64/AArch64.td @@ -6,7 +6,9 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +// // This is the top level entry point for the AArch64 target. +// //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// diff --git a/lib/Target/AArch64/AArch64AsmPrinter.h b/lib/Target/AArch64/AArch64AsmPrinter.h index 492be66..b6f9ee6 100644 --- a/lib/Target/AArch64/AArch64AsmPrinter.h +++ b/lib/Target/AArch64/AArch64AsmPrinter.h @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// AArch64 Assembly printer class. +// This file defines the AArch64 assembly printer class. // //===----------------------------------------------------------------------===// diff --git a/lib/Target/AArch64/AArch64FrameLowering.h b/lib/Target/AArch64/AArch64FrameLowering.h index a14c2bb..bca7b06 100644 --- a/lib/Target/AArch64/AArch64FrameLowering.h +++ b/lib/Target/AArch64/AArch64FrameLowering.h @@ -7,7 +7,8 @@ // //===----------------------------------------------------------------------===// // -// +// This class implements the AArch64-specific parts of the TargetFrameLowering +// class. // //===----------------------------------------------------------------------===// diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td index c6aa265..8cb770e 100644 --- a/lib/Target/AArch64/AArch64InstrFormats.td +++ b/lib/Target/AArch64/AArch64InstrFormats.td @@ -6,11 +6,14 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +// This file describes AArch64 instruction formats, down to the level of the +// instruction's overall class. +// ===----------------------------------------------------------------------===// + //===----------------------------------------------------------------------===// -// // A64 Instruction Format Definitions. -// +//===----------------------------------------------------------------------===// // A64 is currently the only instruction set supported by the AArch64 // architecture. diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index adcab89..d514364 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -1,3 +1,16 @@ +//===----- AArch64InstrInfo.td - AArch64 Instruction Info ----*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the AArch64 scalar instructions in TableGen format. +// +//===----------------------------------------------------------------------===// + include "AArch64InstrFormats.td" //===----------------------------------------------------------------------===// diff --git a/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp b/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp index 012a4f8..f45d8f7 100644 --- a/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp +++ b/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp @@ -6,7 +6,11 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// - +// +// This file just contains the anchor for the AArch64MachineFunctionInfo to +// force vtable emission. +// +//===----------------------------------------------------------------------===// #include "AArch64MachineFunctionInfo.h" using namespace llvm; diff --git a/lib/Target/AArch64/AArch64RegisterInfo.h b/lib/Target/AArch64/AArch64RegisterInfo.h index 3be083d..a25f9d2 100644 --- a/lib/Target/AArch64/AArch64RegisterInfo.h +++ b/lib/Target/AArch64/AArch64RegisterInfo.h @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This file contains the AArch64 implementation of the MRegisterInfo class. +// This file contains the AArch64 implementation of the MCRegisterInfo class. // //===----------------------------------------------------------------------===// diff --git a/lib/Target/AArch64/AArch64RegisterInfo.td b/lib/Target/AArch64/AArch64RegisterInfo.td index 3cbbf14..bd79546 100644 --- a/lib/Target/AArch64/AArch64RegisterInfo.td +++ b/lib/Target/AArch64/AArch64RegisterInfo.td @@ -1,4 +1,4 @@ -//===- ARMRegisterInfo.td - ARM Register defs --------------*- tablegen -*-===// +//===- AArch64RegisterInfo.td - ARM Register defs ----------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -6,9 +6,9 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// Declarations that describe the ARM register file +// +// This file contains declarations that describe the AArch64 register file +// //===----------------------------------------------------------------------===// let Namespace = "AArch64" in { diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp index 68e3643..5a18338 100644 --- a/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -7,6 +7,9 @@ // //===----------------------------------------------------------------------===// // +// This file contains the implementation of the AArch64TargetMachine +// methods. Principally just setting up the passes needed to generate correct +// code on this architecture. // //===----------------------------------------------------------------------===// diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.cpp b/lib/Target/AArch64/AArch64TargetObjectFile.cpp index d5c3e89..b4452f5 100644 --- a/lib/Target/AArch64/AArch64TargetObjectFile.cpp +++ b/lib/Target/AArch64/AArch64TargetObjectFile.cpp @@ -6,6 +6,11 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +// +// This file deals with any AArch64 specific requirements on object files. +// +//===----------------------------------------------------------------------===// + #include "AArch64TargetObjectFile.h" diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.h b/lib/Target/AArch64/AArch64TargetObjectFile.h index 07caac1..bf0565a 100644 --- a/lib/Target/AArch64/AArch64TargetObjectFile.h +++ b/lib/Target/AArch64/AArch64TargetObjectFile.h @@ -6,6 +6,10 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +// +// This file deals with any AArch64 specific requirements on object files. +// +//===----------------------------------------------------------------------===// #ifndef LLVM_TARGET_AARCH64_TARGETOBJECTFILE_H #define LLVM_TARGET_AARCH64_TARGETOBJECTFILE_H diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 1cdeafb..f871ecf 100644 --- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -6,6 +6,11 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +// +// This file contains the (GNU-style) assembly parser for the AArch64 +// architecture. +// +//===----------------------------------------------------------------------===// #include "MCTargetDesc/AArch64MCTargetDesc.h" diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp index 38d0e8e..eba7666 100644 --- a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp +++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp @@ -1,4 +1,4 @@ -//===- AArch64Disassembler.cpp - Disassembler for AArch64/Thumb ISA -------===// +//===- AArch64Disassembler.cpp - Disassembler for AArch64 ISA -------------===// // // The LLVM Compiler Infrastructure // @@ -6,6 +6,12 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +// +// This file contains the functions necessary to decode AArch64 instruction +// bitpatterns into MCInsts (with the help of TableGenerated information from +// the instruction definitions). +// +//===----------------------------------------------------------------------===// #define DEBUG_TYPE "arm-disassembler" diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp index 5d5e38e..a3373b1 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp @@ -6,6 +6,11 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +// +// This file contains the AArch64 implementation of the MCAsmBackend class, +// which is principally concerned with relaxation of the various fixup kinds. +// +//===----------------------------------------------------------------------===// #include "MCTargetDesc/AArch64FixupKinds.h" #include "MCTargetDesc/AArch64MCTargetDesc.h" diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp index 476b94e..4bcc65d 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp @@ -6,6 +6,11 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +// +// This file handles ELF-specific object emission, converting LLVM's internal +// fixups into the appropriate relocations. +// +//===----------------------------------------------------------------------===// #include "MCTargetDesc/AArch64FixupKinds.h" #include "MCTargetDesc/AArch64MCTargetDesc.h" diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h b/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h index 15e0886..eeb122d 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h +++ b/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h @@ -6,6 +6,11 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +// +// This file describes the LLVM fixups applied to MCInsts in the AArch64 +// backend. +// +//===----------------------------------------------------------------------===// #ifndef LLVM_AARCH64_AARCH64FIXUPKINDS_H #define LLVM_AARCH64_AARCH64FIXUPKINDS_H diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp index e86e04a..c1abfe7 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp @@ -6,6 +6,11 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +// +// This file contains the implementation of the assembly expression modifiers +// accepted by the AArch64 architecture (e.g. ":lo12:", ":gottprel_g1:", ...). +// +//===----------------------------------------------------------------------===// #define DEBUG_TYPE "aarch64mcexpr" #include "AArch64MCExpr.h" diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h index f2af204..c0e3b29 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h @@ -6,6 +6,11 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +// +// This file describes AArch64-specific MCExprs, used for modifiers like +// ":lo12:" or ":gottprel_g1:". +// +//===----------------------------------------------------------------------===// #ifndef LLVM_AARCH64MCEXPR_H #define LLVM_AARCH64MCEXPR_H diff --git a/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp b/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp index 5dbdc57..b8099cb 100644 --- a/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp +++ b/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp @@ -6,6 +6,10 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +// +// This file contains the key registration step for the architecture. +// +//===----------------------------------------------------------------------===// #include "AArch64.h" #include "llvm/IR/Module.h" -- cgit v1.1 From 5464c301c4472f54f700e171750fc51d39a0f4b8 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Thu, 14 Feb 2013 16:22:14 +0000 Subject: AArch64: stop claiming that NEON registers are usable for now. If vector types have legal register classes, then LLVM bypasses LegalizeTypes on them, which causes faults currently since the code to handle them isn't in place. This fixes test failures when AArch64 is the default target. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175172 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64ISelLowering.cpp | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'lib') diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index ff28dc1..2c11547 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -57,17 +57,6 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM) addRegisterClass(MVT::f64, &AArch64::FPR64RegClass); addRegisterClass(MVT::f128, &AArch64::FPR128RegClass); - // And the vectors - addRegisterClass(MVT::v8i8, &AArch64::VPR64RegClass); - addRegisterClass(MVT::v4i16, &AArch64::VPR64RegClass); - addRegisterClass(MVT::v2i32, &AArch64::VPR64RegClass); - addRegisterClass(MVT::v2f32, &AArch64::VPR64RegClass); - addRegisterClass(MVT::v16i8, &AArch64::VPR128RegClass); - addRegisterClass(MVT::v8i16, &AArch64::VPR128RegClass); - addRegisterClass(MVT::v4i32, &AArch64::VPR128RegClass); - addRegisterClass(MVT::v4f32, &AArch64::VPR128RegClass); - addRegisterClass(MVT::v2f64, &AArch64::VPR128RegClass); - computeRegisterProperties(); // Some atomic operations can be folded into load-acquire or store-release -- cgit v1.1 From e186d7191c2cf95753a9790b1490df8a07416daa Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Thu, 14 Feb 2013 16:23:08 +0000 Subject: Revert r15266. This fixes llvm.org/pr15266. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175173 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCParser/AsmLexer.cpp | 59 ++++++++++++++------------------------------ 1 file changed, 19 insertions(+), 40 deletions(-) (limited to 'lib') diff --git a/lib/MC/MCParser/AsmLexer.cpp b/lib/MC/MCParser/AsmLexer.cpp index 8fcc264..530e94e 100644 --- a/lib/MC/MCParser/AsmLexer.cpp +++ b/lib/MC/MCParser/AsmLexer.cpp @@ -179,48 +179,26 @@ static unsigned doLookAhead(const char *&CurPtr, unsigned DefaultRadix) { } } bool isHex = *LookAhead == 'h' || *LookAhead == 'H'; - bool isBinary = LookAhead[-1] == 'b' || LookAhead[-1] == 'B'; - CurPtr = (isBinary || isHex || !FirstHex) ? LookAhead : FirstHex; + CurPtr = isHex || !FirstHex ? LookAhead : FirstHex; if (isHex) return 16; - if (isBinary) { - --CurPtr; - return 2; - } return DefaultRadix; } /// LexDigit: First character is [0-9]. /// Local Label: [0-9][:] -/// Forward/Backward Label: [0-9]+f or [0-9]b -/// Binary integer: 0b[01]+ or [01][bB] +/// Forward/Backward Label: [0-9][fb] +/// Binary integer: 0b[01]+ /// Octal integer: 0[0-7]+ /// Hex integer: 0x[0-9a-fA-F]+ or [0x]?[0-9][0-9a-fA-F]*[hH] /// Decimal integer: [1-9][0-9]* AsmToken AsmLexer::LexDigit() { - - // Backward Label: [0-9]b - if (*CurPtr == 'b') { - // See if we actually have "0b" as part of something like "jmp 0b\n" - if (!isdigit(CurPtr[1])) { - long long Value; - StringRef Result(TokStart, CurPtr - TokStart); - if (Result.getAsInteger(10, Value)) - return ReturnError(TokStart, "invalid backward label"); - - return AsmToken(AsmToken::Integer, Result, Value); - } - } - - // Binary integer: 1[01]*[bB] // Decimal integer: [1-9][0-9]* - // Hexidecimal integer: [1-9][0-9a-fA-F]*[hH] if (CurPtr[-1] != '0' || CurPtr[0] == '.') { unsigned Radix = doLookAhead(CurPtr, 10); - bool isDecimal = Radix == 10; - + bool isHex = Radix == 16; // Check for floating point literals. - if (isDecimal && (*CurPtr == '.' || *CurPtr == 'e')) { + if (!isHex && (*CurPtr == '.' || *CurPtr == 'e')) { ++CurPtr; return LexFloatLiteral(); } @@ -233,7 +211,7 @@ AsmToken AsmLexer::LexDigit() { // integer, but that do fit in an unsigned one, we just convert them over. unsigned long long UValue; if (Result.getAsInteger(Radix, UValue)) - return ReturnError(TokStart, isDecimal ? "invalid decimal number" : + return ReturnError(TokStart, !isHex ? "invalid decimal number" : "invalid hexdecimal number"); Value = (long long)UValue; } @@ -249,9 +227,15 @@ AsmToken AsmLexer::LexDigit() { return AsmToken(AsmToken::Integer, Result, Value); } - // Binary integer: 0b[01]+ if (*CurPtr == 'b') { - const char *NumStart = ++CurPtr; + ++CurPtr; + // See if we actually have "0b" as part of something like "jmp 0b\n" + if (!isdigit(CurPtr[0])) { + --CurPtr; + StringRef Result(TokStart, CurPtr - TokStart); + return AsmToken(AsmToken::Integer, Result, 0); + } + const char *NumStart = CurPtr; while (CurPtr[0] == '0' || CurPtr[0] == '1') ++CurPtr; @@ -272,7 +256,6 @@ AsmToken AsmLexer::LexDigit() { return AsmToken(AsmToken::Integer, Result, Value); } - // Hex integer: 0x[0-9a-fA-F]+ if (*CurPtr == 'x') { ++CurPtr; const char *NumStart = CurPtr; @@ -299,21 +282,17 @@ AsmToken AsmLexer::LexDigit() { (int64_t)Result); } - // Binary: 0[01]*[Bb], but not 0b. - // Octal: 0[0-7]* - // Hexidecimal: [0][0-9a-fA-F]*[hH] + // Either octal or hexidecimal. long long Value; unsigned Radix = doLookAhead(CurPtr, 8); - bool isBinary = Radix == 2; - bool isOctal = Radix == 8; + bool isHex = Radix == 16; StringRef Result(TokStart, CurPtr - TokStart); if (Result.getAsInteger(Radix, Value)) - return ReturnError(TokStart, isOctal ? "invalid octal number" : - isBinary ? "invalid binary number" : + return ReturnError(TokStart, !isHex ? "invalid octal number" : "invalid hexdecimal number"); - // Consume the [bB][hH]. - if (Radix == 2 || Radix == 16) + // Consume the [hH]. + if (Radix == 16) ++CurPtr; // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL -- cgit v1.1 From ad1b9dcb2c9f1a1a031ff5d026bd988999a54c4b Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Thu, 14 Feb 2013 16:31:12 +0000 Subject: AArch64: switch from neverHasSideEffects to hasSideEffects. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175176 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64InstrInfo.td | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index d514364..bdef183 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -1651,7 +1651,7 @@ class A64I_dp_1src_impl opcode, string asmop, itin>; multiclass A64I_dp_1src opcode, string asmop> { - let neverHasSideEffects = 1 in { + let hasSideEffects = 0 in { def ww : A64I_dp_1src_impl<0b0, opcode, asmop, [], GPR32, NoItinerary>; def xx : A64I_dp_1src_impl<0b1, opcode, asmop, [], GPR64, NoItinerary>; } @@ -3907,7 +3907,7 @@ multiclass A64I_movwSizes opc, string asmop, dag ins32bit, } let isMoveImm = 1, isReMaterializable = 1, - isAsCheapAsAMove = 1, neverHasSideEffects = 1 in { + isAsCheapAsAMove = 1, hasSideEffects = 0 in { defm MOVN : A64I_movwSizes<0b00, "movn", (ins movn32_imm:$FullImm), (ins movn64_imm:$FullImm)>; @@ -3993,7 +3993,7 @@ def adrp_label : Operand { let OperandType = "OPERAND_PCREL"; } -let neverHasSideEffects = 1 in { +let hasSideEffects = 0 in { def ADRxi : A64I_PCADR<0b0, (outs GPR64:$Rd), (ins adr_label:$Label), "adr\t$Rd, $Label", [], NoItinerary>; @@ -4576,7 +4576,7 @@ def cpinst_operand : Operand; def CONSTPOOL_ENTRY : PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx, i32imm:$size), []> { - let neverHasSideEffects = 1; + let hasSideEffects = 0; let isNotDuplicable = 1; } -- cgit v1.1 From df65b0fb51d57a7e8dfcd19557b1d00c11c9fe2a Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Thu, 14 Feb 2013 16:55:01 +0000 Subject: R600: Do not fold modifier/litterals in vector inst This fixes a couple of regressions on (probably not just) cayman NOTE: This is a candidate for the Mesa stable branch. Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175180 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDILISelDAGToDAG.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Target/R600/AMDILISelDAGToDAG.cpp b/lib/Target/R600/AMDILISelDAGToDAG.cpp index a88e8c7..b125ba8 100644 --- a/lib/Target/R600/AMDILISelDAGToDAG.cpp +++ b/lib/Target/R600/AMDILISelDAGToDAG.cpp @@ -218,7 +218,9 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { continue; } } else { - if (!TII->isALUInstr(Use->getMachineOpcode())) { + if (!TII->isALUInstr(Use->getMachineOpcode()) || + (TII->get(Use->getMachineOpcode()).TSFlags & + R600_InstFlag::VECTOR)) { continue; } @@ -261,7 +263,8 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) { const R600InstrInfo *TII = static_cast(TM.getInstrInfo()); - if (Result && Result->isMachineOpcode() + if (Result && Result->isMachineOpcode() && + !(TII->get(Result->getMachineOpcode()).TSFlags & R600_InstFlag::VECTOR) && TII->isALUInstr(Result->getMachineOpcode())) { // Fold FNEG/FABS/CONST_ADDRESS // TODO: Isel can generate multiple MachineInst, we need to recursively -- cgit v1.1 From abfd5f6154b10cc5801bc9e1b8e8221df0113c68 Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Thu, 14 Feb 2013 16:55:06 +0000 Subject: R600: Fold zero/one in export instructions Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175181 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/R600ISelLowering.cpp | 112 ++++++++++++++++------------------- lib/Target/R600/R600Instructions.td | 20 ++----- lib/Target/R600/R600Intrinsics.td | 3 - 3 files changed, 55 insertions(+), 80 deletions(-) (limited to 'lib') diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index 8f4ec94..a7796b6 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -303,57 +303,6 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( using namespace llvm::Intrinsic; using namespace llvm::AMDGPUIntrinsic; -static SDValue -InsertScalarToRegisterExport(SelectionDAG &DAG, DebugLoc DL, SDNode **ExportMap, - unsigned Slot, unsigned Channel, unsigned Inst, unsigned Type, - SDValue Scalar, SDValue Chain) { - if (!ExportMap[Slot]) { - SDValue Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, - DL, MVT::v4f32, - DAG.getUNDEF(MVT::v4f32), - Scalar, - DAG.getConstant(Channel, MVT::i32)); - - unsigned Mask = 1 << Channel; - - const SDValue Ops[] = {Chain, Vector, DAG.getConstant(Inst, MVT::i32), - DAG.getConstant(Type, MVT::i32), DAG.getConstant(Slot, MVT::i32), - DAG.getConstant(Mask, MVT::i32)}; - - SDValue Res = DAG.getNode( - AMDGPUISD::EXPORT, - DL, - MVT::Other, - Ops, 6); - ExportMap[Slot] = Res.getNode(); - return Res; - } - - SDNode *ExportInstruction = (SDNode *) ExportMap[Slot] ; - SDValue PreviousVector = ExportInstruction->getOperand(1); - SDValue Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, - DL, MVT::v4f32, - PreviousVector, - Scalar, - DAG.getConstant(Channel, MVT::i32)); - - unsigned Mask = dyn_cast(ExportInstruction->getOperand(5)) - ->getZExtValue(); - Mask |= (1 << Channel); - - const SDValue Ops[] = {ExportInstruction->getOperand(0), Vector, - DAG.getConstant(Inst, MVT::i32), - DAG.getConstant(Type, MVT::i32), - DAG.getConstant(Slot, MVT::i32), - DAG.getConstant(Mask, MVT::i32)}; - - DAG.UpdateNodeOperands(ExportInstruction, - Ops, 6); - - return Chain; - -} - SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); @@ -379,16 +328,19 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const MFI->LiveOuts.push_back(Reg); return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2)); } - case AMDGPUIntrinsic::R600_store_pixel_color: { - MachineFunction &MF = DAG.getMachineFunction(); - R600MachineFunctionInfo *MFI = MF.getInfo(); - int64_t RegIndex = cast(Op.getOperand(3))->getZExtValue(); - - SDNode **OutputsMap = MFI->Outputs; - return InsertScalarToRegisterExport(DAG, Op.getDebugLoc(), OutputsMap, - RegIndex / 4, RegIndex % 4, 0, 0, Op.getOperand(2), - Chain); - + case AMDGPUIntrinsic::R600_store_swizzle: { + const SDValue Args[8] = { + Chain, + Op.getOperand(2), // Export Value + Op.getOperand(3), // ArrayBase + Op.getOperand(4), // Type + DAG.getConstant(0, MVT::i32), // SWZ_X + DAG.getConstant(1, MVT::i32), // SWZ_Y + DAG.getConstant(2, MVT::i32), // SWZ_Z + DAG.getConstant(3, MVT::i32) // SWZ_W + }; + return DAG.getNode(AMDGPUISD::EXPORT, Op.getDebugLoc(), Op.getValueType(), + Args, 8); } // default for switch(IntrinsicID) @@ -1195,7 +1147,43 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N, LHS.getOperand(2), LHS.getOperand(3), CCOpcode); - + } + case AMDGPUISD::EXPORT: { + SDValue Arg = N->getOperand(1); + if (Arg.getOpcode() != ISD::BUILD_VECTOR) + break; + SDValue NewBldVec[4] = { + DAG.getUNDEF(MVT::f32), + DAG.getUNDEF(MVT::f32), + DAG.getUNDEF(MVT::f32), + DAG.getUNDEF(MVT::f32) + }; + SDValue NewArgs[8] = { + N->getOperand(0), // Chain + SDValue(), + N->getOperand(2), // ArrayBase + N->getOperand(3), // Type + N->getOperand(4), // SWZ_X + N->getOperand(5), // SWZ_Y + N->getOperand(6), // SWZ_Z + N->getOperand(7) // SWZ_W + }; + for (unsigned i = 0; i < Arg.getNumOperands(); i++) { + if (ConstantFPSDNode *C = dyn_cast(Arg.getOperand(i))) { + if (C->isZero()) { + NewArgs[4 + i] = DAG.getConstant(4, MVT::i32); // SEL_0 + } else if (C->isExactlyValue(1.0)) { + NewArgs[4 + i] = DAG.getConstant(5, MVT::i32); // SEL_0 + } else { + NewBldVec[i] = Arg.getOperand(i); + } + } else { + NewBldVec[i] = Arg.getOperand(i); + } + } + DebugLoc DL = N->getDebugLoc(); + NewArgs[1] = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4f32, NewBldVec, 4); + return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs, 8); } } return SDValue(); diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index c9885a3..286ec9b 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -540,7 +540,7 @@ def INTERP_LOAD_P0 : R600_1OP <0xE0, "INTERP_LOAD_P0", []>; // Export Instructions //===----------------------------------------------------------------------===// -def ExportType : SDTypeProfile<0, 5, [SDTCisFP<0>, SDTCisInt<1>]>; +def ExportType : SDTypeProfile<0, 7, [SDTCisFP<0>, SDTCisInt<1>]>; def EXPORT: SDNode<"AMDGPUISD::EXPORT", ExportType, [SDNPHasChain, SDNPSideEffect]>; @@ -612,22 +612,12 @@ multiclass ExportPattern cf_inst> { (v4f32 (IMPLICIT_DEF)), 1, 60, 7, 7, 7, 7, cf_inst, 0) >; - def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 0), - (i32 imm:$type), (i32 imm:$arraybase), (i32 imm)), - (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase, - 0, 1, 2, 3, cf_inst, 0) - >; - def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 1), - (i32 imm:$type), (i32 imm:$arraybase), (i32 imm)), - (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase, - 0, 1, 2, 3, cf_inst, 0) + def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 imm:$base), (i32 imm:$type), + (i32 imm:$swz_x), (i32 imm:$swz_y), (i32 imm:$swz_z), (i32 imm:$swz_w)), + (ExportInst R600_Reg128:$src, imm:$type, imm:$base, + imm:$swz_x, imm:$swz_y, imm:$swz_z, imm:$swz_w, cf_inst, 0) >; - def : Pat<(int_R600_store_swizzle (v4f32 R600_Reg128:$src), imm:$arraybase, - imm:$type), - (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase, - 0, 1, 2, 3, cf_inst, 0) - >; } multiclass SteamOutputExportPattern; def int_R600_store_swizzle : Intrinsic<[], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>; - def int_R600_store_stream_output : Intrinsic<[], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; - def int_R600_store_pixel_color : - Intrinsic<[], [llvm_float_ty, llvm_i32_ty], []>; def int_R600_store_pixel_depth : Intrinsic<[], [llvm_float_ty], []>; def int_R600_store_pixel_stencil : -- cgit v1.1 From f846add9adf0752e552cb98fd0ba5dae791e4c3b Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Thu, 14 Feb 2013 16:55:11 +0000 Subject: R600: Export instructions are no longer terminator This allows MachineInstScheduler to reorder them, and thus make scheduling more efficient. Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175182 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/R600Instructions.td | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index 286ec9b..e495bea 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -644,7 +644,7 @@ multiclass SteamOutputExportPattern; } -let isTerminator = 1, usesCustomInserter = 1 in { +let usesCustomInserter = 1 in { class ExportSwzInst : InstR600ISA<( outs), @@ -658,7 +658,7 @@ class ExportSwzInst : InstR600ISA<( let Inst{63-32} = Word1; } -} // End isTerminator = 1, usesCustomInserter = 1 +} // End usesCustomInserter = 1 class ExportBufInst : InstR600ISA<( outs), -- cgit v1.1 From 786788573729899a236851320c5680da8c161ec1 Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Thu, 14 Feb 2013 16:57:19 +0000 Subject: R600: Do not fold single instruction with more that 3 kcache read It fixes around 100 tfb piglit tests and 16 glean tests. NOTE: This is a candidate for the Mesa stable branch. Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175183 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDILISelDAGToDAG.cpp | 2 ++ lib/Target/R600/R600LowerConstCopy.cpp | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/R600/AMDILISelDAGToDAG.cpp b/lib/Target/R600/AMDILISelDAGToDAG.cpp index b125ba8..2e726e9 100644 --- a/lib/Target/R600/AMDILISelDAGToDAG.cpp +++ b/lib/Target/R600/AMDILISelDAGToDAG.cpp @@ -334,6 +334,8 @@ bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode, SDValue Operand = Ops[OperandIdx[i] - 1]; switch (Operand.getOpcode()) { case AMDGPUISD::CONST_ADDRESS: { + if (i == 2) + break; SDValue CstOffset; if (!Operand.getValueType().isVector() && SelectGlobalValueConstantOffset(Operand.getOperand(0), CstOffset)) { diff --git a/lib/Target/R600/R600LowerConstCopy.cpp b/lib/Target/R600/R600LowerConstCopy.cpp index 46f2aef..3ebe653 100644 --- a/lib/Target/R600/R600LowerConstCopy.cpp +++ b/lib/Target/R600/R600LowerConstCopy.cpp @@ -180,7 +180,7 @@ bool R600LowerConstCopy::runOnMachineFunction(MachineFunction &MF) { int ConstMovSel = TII->getOperandIdx(CstMov->getOpcode(), R600Operands::SRC0_SEL); unsigned ConstIndex = CstMov->getOperand(ConstMovSel).getImm(); - if (canFoldInBundle(CP, ConstIndex)) { + if (MI->isInsideBundle() && canFoldInBundle(CP, ConstIndex)) { TII->setImmOperand(MI, OpTable[SrcOp][1], ConstIndex); MI->getOperand(SrcIdx).setReg(AMDGPU::ALU_CONST); } else { -- cgit v1.1 From 7248451c4307c05cf3ddfa8133f0c5334bab6455 Mon Sep 17 00:00:00 2001 From: Weiming Zhao Date: Thu, 14 Feb 2013 18:10:21 +0000 Subject: Re-apply r175088 for bug fix 13622: Add paired register support for inline asm with 64-bit data on ARM Update test case to use -mtriple=arm-linux-gnueabi git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175186 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMAsmPrinter.cpp | 15 ++-- lib/Target/ARM/ARMISelDAGToDAG.cpp | 141 +++++++++++++++++++++++++++++++++++++ 2 files changed, 150 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index 986dfb7..58c7798 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -342,6 +342,11 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, unsigned Reg = MO.getReg(); assert(TargetRegisterInfo::isPhysicalRegister(Reg)); assert(!MO.getSubReg() && "Subregs should be eliminated!"); + if(ARM::GPRPairRegClass.contains(Reg)) { + const MachineFunction &MF = *MI->getParent()->getParent(); + const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); + Reg = TRI->getSubReg(Reg, ARM::gsub_0); + } O << ARMInstPrinter::getRegisterName(Reg); break; } @@ -530,14 +535,12 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, const MachineOperand &MO = MI->getOperand(OpNum); if (!MO.isReg()) return true; - const TargetRegisterClass &RC = ARM::GPRRegClass; const MachineFunction &MF = *MI->getParent()->getParent(); const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); - - unsigned RegIdx = TRI->getEncodingValue(MO.getReg()); - RegIdx |= 1; //The odd register is also the higher-numbered one of a pair. - - unsigned Reg = RC.getRegister(RegIdx); + unsigned Reg = MO.getReg(); + if(!ARM::GPRPairRegClass.contains(Reg)) + return false; + Reg = TRI->getSubReg(Reg, ARM::gsub_1); O << ARMInstPrinter::getRegisterName(Reg); return false; } diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 939bed7..a83f052 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -19,6 +19,7 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/IR/CallingConv.h" @@ -257,6 +258,8 @@ private: // Select special operations if node forms integer ABS pattern SDNode *SelectABSOp(SDNode *N); + SDNode *SelectInlineAsm(SDNode *N); + SDNode *SelectConcatVector(SDNode *N); SDNode *SelectAtomic64(SDNode *Node, unsigned Opc); @@ -2552,6 +2555,12 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { switch (N->getOpcode()) { default: break; + case ISD::INLINEASM: { + SDNode *ResNode = SelectInlineAsm(N); + if (ResNode) + return ResNode; + break; + } case ISD::XOR: { // Select special operations if XOR node forms integer ABS pattern SDNode *ResNode = SelectABSOp(N); @@ -3446,6 +3455,138 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { return SelectCode(N); } +SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){ + std::vector AsmNodeOperands; + unsigned Flag, Kind; + bool Changed = false; + unsigned NumOps = N->getNumOperands(); + + ExternalSymbolSDNode *S = dyn_cast( + N->getOperand(InlineAsm::Op_AsmString)); + StringRef AsmString = StringRef(S->getSymbol()); + + // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint. + // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require + // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs + // respectively. Since there is no constraint to explicitly specify a + // reg pair, we search %H operand inside the asm string. If it is found, the + // transformation below enforces a GPRPair reg class for "%r" for 64-bit data. + if (AsmString.find(":H}") == StringRef::npos) + return NULL; + + DebugLoc dl = N->getDebugLoc(); + SDValue Glue = N->getOperand(NumOps-1); + + // Glue node will be appended late. + for(unsigned i = 0; i < NumOps -1; ++i) { + SDValue op = N->getOperand(i); + AsmNodeOperands.push_back(op); + + if (i < InlineAsm::Op_FirstOperand) + continue; + + if (ConstantSDNode *C = dyn_cast(N->getOperand(i))) { + Flag = C->getZExtValue(); + Kind = InlineAsm::getKind(Flag); + } + else + continue; + + if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef + && Kind != InlineAsm::Kind_RegDefEarlyClobber) + continue; + + unsigned RegNum = InlineAsm::getNumOperandRegisters(Flag); + unsigned RC; + bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC); + if (!HasRC || RC != ARM::GPRRegClassID || RegNum != 2) + continue; + + assert((i+2 < NumOps-1) && "Invalid number of operands in inline asm"); + SDValue V0 = N->getOperand(i+1); + SDValue V1 = N->getOperand(i+2); + unsigned Reg0 = cast(V0)->getReg(); + unsigned Reg1 = cast(V1)->getReg(); + SDValue PairedReg; + MachineRegisterInfo &MRI = MF->getRegInfo(); + + if (Kind == InlineAsm::Kind_RegDef || + Kind == InlineAsm::Kind_RegDefEarlyClobber) { + // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to + // the original GPRs. + + unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); + PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped); + SDValue Chain = SDValue(N,0); + + SDNode *GU = N->getGluedUser(); + SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped, + Chain.getValue(1)); + + // Extract values from a GPRPair reg and copy to the original GPR reg. + SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32, + RegCopy); + SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32, + RegCopy); + SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0, + RegCopy.getValue(1)); + SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1)); + + // Update the original glue user. + std::vector Ops(GU->op_begin(), GU->op_end()-1); + Ops.push_back(T1.getValue(1)); + CurDAG->UpdateNodeOperands(GU, &Ops[0], Ops.size()); + GU = T1.getNode(); + } + else { + // For Kind == InlineAsm::Kind_RegUse, we first copy two GPRs into a + // GPRPair and then pass the GPRPair to the inline asm. + SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain]; + + // As REG_SEQ doesn't take RegisterSDNode, we copy them first. + SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32, + Chain.getValue(1)); + SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32, + T0.getValue(1)); + SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0); + + // Copy REG_SEQ into a GPRPair-typed VR and replace the original two + // i32 VRs of inline asm with it. + unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); + PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped); + Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1)); + + AsmNodeOperands[InlineAsm::Op_InputChain] = Chain; + Glue = Chain.getValue(1); + } + + Changed = true; + + if(PairedReg.getNode()) { + Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/); + Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID); + // Replace the current flag. + AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant( + Flag, MVT::i32); + // Add the new register node and skip the original two GPRs. + AsmNodeOperands.push_back(PairedReg); + // Skip the next two GPRs. + i += 2; + } + } + + AsmNodeOperands.push_back(Glue); + if (!Changed) + return NULL; + + SDValue New = CurDAG->getNode(ISD::INLINEASM, N->getDebugLoc(), + CurDAG->getVTList(MVT::Other, MVT::Glue), &AsmNodeOperands[0], + AsmNodeOperands.size()); + New->setNodeId(-1); + return New.getNode(); +} + + bool ARMDAGToDAGISel:: SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, std::vector &OutOps) { -- cgit v1.1 From 5e3100afef1a1b4d909dc003cfeff9f8b3838082 Mon Sep 17 00:00:00 2001 From: Jyotsna Verma Date: Thu, 14 Feb 2013 18:15:29 +0000 Subject: Hexagon: Use multiclass for absolute addressing mode loads. This patch doesn't introduce any functionality changes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175187 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Hexagon/HexagonInstrInfoV4.td | 109 ++++++++++--------------------- 1 file changed, 35 insertions(+), 74 deletions(-) (limited to 'lib') diff --git a/lib/Target/Hexagon/HexagonInstrInfoV4.td b/lib/Target/Hexagon/HexagonInstrInfoV4.td index 169660d..e76d716 100644 --- a/lib/Target/Hexagon/HexagonInstrInfoV4.td +++ b/lib/Target/Hexagon/HexagonInstrInfoV4.td @@ -3726,89 +3726,50 @@ def : Pat<(store (i64 DoubleRegs:$src1), (STrid_abs_V4 tglobaladdr: $absaddr, DoubleRegs: $src1)>; } -multiclass LD_abs { - let isPredicable = 1 in - def _abs_V4 : LDInst2<(outs IntRegs:$dst), - (ins globaladdress:$absaddr), - !strconcat("$dst = ", !strconcat(OpcStr, "(##$absaddr)")), - []>, - Requires<[HasV4T]>; - - let isPredicated = 1 in - def _abs_cPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$absaddr), - !strconcat("if ($src1) $dst = ", - !strconcat(OpcStr, "(##$absaddr)")), +multiclass LD_Abs_Predbase { + let PNewValue = !if(isPredNew, "new", "") in + def NAME : LDInst2<(outs RC:$dst), + (ins PredRegs:$src1, globaladdressExt:$absaddr), + !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", + ") ")#"$dst = "#mnemonic#"(##$absaddr)", []>, Requires<[HasV4T]>; +} - let isPredicated = 1 in - def _abs_cNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$absaddr), - !strconcat("if (!$src1) $dst = ", - !strconcat(OpcStr, "(##$absaddr)")), - []>, - Requires<[HasV4T]>; +multiclass LD_Abs_Pred { + let PredSense = !if(PredNot, "false", "true") in { + defm _c#NAME : LD_Abs_Predbase; + // Predicate new + defm _cdn#NAME : LD_Abs_Predbase; + } +} - let isPredicated = 1 in - def _abs_cdnPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$absaddr), - !strconcat("if ($src1.new) $dst = ", - !strconcat(OpcStr, "(##$absaddr)")), +let isExtended = 1, neverHasSideEffects = 1 in +multiclass LD_Abs { + let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in { + let opExtendable = 1, isPredicable = 1 in + def NAME#_V4 : LDInst2<(outs RC:$dst), + (ins globaladdressExt:$absaddr), + "$dst = "#mnemonic#"(##$absaddr)", []>, Requires<[HasV4T]>; - let isPredicated = 1 in - def _abs_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$absaddr), - !strconcat("if (!$src1.new) $dst = ", - !strconcat(OpcStr, "(##$absaddr)")), - []>, - Requires<[HasV4T]>; + let opExtendable = 2, isPredicated = 1 in { + defm Pt_V4 : LD_Abs_Pred; + defm NotPt_V4 : LD_Abs_Pred; + } + } } -let AddedComplexity = 30 in -def LDrid_abs_V4 : LDInst<(outs DoubleRegs:$dst), - (ins globaladdress:$absaddr), - "$dst = memd(##$absaddr)", - [(set (i64 DoubleRegs:$dst), - (load (HexagonCONST32 tglobaladdr:$absaddr)))]>, - Requires<[HasV4T]>; - -let AddedComplexity = 30, isPredicated = 1 in -def LDrid_abs_cPt_V4 : LDInst2<(outs DoubleRegs:$dst), - (ins PredRegs:$src1, globaladdress:$absaddr), - "if ($src1) $dst = memd(##$absaddr)", - []>, - Requires<[HasV4T]>; - -let AddedComplexity = 30, isPredicated = 1 in -def LDrid_abs_cNotPt_V4 : LDInst2<(outs DoubleRegs:$dst), - (ins PredRegs:$src1, globaladdress:$absaddr), - "if (!$src1) $dst = memd(##$absaddr)", - []>, - Requires<[HasV4T]>; - -let AddedComplexity = 30, isPredicated = 1 in -def LDrid_abs_cdnPt_V4 : LDInst2<(outs DoubleRegs:$dst), - (ins PredRegs:$src1, globaladdress:$absaddr), - "if ($src1.new) $dst = memd(##$absaddr)", - []>, - Requires<[HasV4T]>; - -let AddedComplexity = 30, isPredicated = 1 in -def LDrid_abs_cdnNotPt_V4 : LDInst2<(outs DoubleRegs:$dst), - (ins PredRegs:$src1, globaladdress:$absaddr), - "if (!$src1.new) $dst = memd(##$absaddr)", - []>, - Requires<[HasV4T]>; - -defm LDrib : LD_abs<"memb">; -defm LDriub : LD_abs<"memub">; -defm LDrih : LD_abs<"memh">; -defm LDriuh : LD_abs<"memuh">; -defm LDriw : LD_abs<"memw">; - +let addrMode = Absolute in { + defm LDrib_abs : LD_Abs<"memb", "LDrib", IntRegs>, AddrModeRel; + defm LDriub_abs : LD_Abs<"memub", "LDriub", IntRegs>, AddrModeRel; + defm LDrih_abs : LD_Abs<"memh", "LDrih", IntRegs>, AddrModeRel; + defm LDriuh_abs : LD_Abs<"memuh", "LDriuh", IntRegs>, AddrModeRel; + defm LDriw_abs : LD_Abs<"memw", "LDriw", IntRegs>, AddrModeRel; + defm LDrid_abs : LD_Abs<"memd", "LDrid", DoubleRegs>, AddrModeRel; +} let Predicates = [HasV4T], AddedComplexity = 30 in def : Pat<(i32 (load (HexagonCONST32 tglobaladdr:$absaddr))), -- cgit v1.1 From b39a5529496a9824831c78e9e7249c062a4b754a Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Thu, 14 Feb 2013 18:20:48 +0000 Subject: 80-col git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175189 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index dbc0e01..02ac8bf 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -17430,7 +17430,8 @@ static SDValue performVZEXTCombine(SDNode *N, SelectionDAG &DAG, if (In.getOpcode() != X86ISD::VZEXT) return SDValue(); - return DAG.getNode(X86ISD::VZEXT, N->getDebugLoc(), N->getValueType(0), In.getOperand(0)); + return DAG.getNode(X86ISD::VZEXT, N->getDebugLoc(), N->getValueType(0), + In.getOperand(0)); } SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, -- cgit v1.1 From 6cc4b8d1eb2adf547807044e0a97f3a255733784 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Thu, 14 Feb 2013 18:28:52 +0000 Subject: Dont merge consecutive loads/stores into vectors when noimplicitfloat is used. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175190 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index c61d7ac..9d40ff7 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7702,6 +7702,8 @@ struct ConsecutiveMemoryChainSorter { bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { EVT MemVT = St->getMemoryVT(); int64_t ElementSizeBytes = MemVT.getSizeInBits()/8; + bool NoVectors = DAG.getMachineFunction().getFunction()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, Attribute::NoImplicitFloat); // Don't merge vectors into wider inputs. if (MemVT.isVector() || !MemVT.isSimple()) @@ -7877,16 +7879,14 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { // We only use vectors if the constant is known to be zero and the // function is not marked with the noimplicitfloat attribute. - if (NonZero || (DAG.getMachineFunction().getFunction()->getAttributes(). - hasAttribute(AttributeSet::FunctionIndex, - Attribute::NoImplicitFloat))) + if (NonZero || NoVectors) LastLegalVectorType = 0; // Check if we found a legal integer type to store. if (LastLegalType == 0 && LastLegalVectorType == 0) return false; - bool UseVector = LastLegalVectorType > LastLegalType; + bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors; unsigned NumElem = UseVector ? LastLegalVectorType : LastLegalType; // Make sure we have something to merge. @@ -8039,7 +8039,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { // All loads much share the same chain. if (LoadNodes[i].MemNode->getChain() != FirstChain) break; - + int64_t CurrAddress = LoadNodes[i].OffsetFromBase; if (CurrAddress - StartAddress != (ElementSizeBytes * i)) break; @@ -8059,7 +8059,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { // Only use vector types if the vector type is larger than the integer type. // If they are the same, use integers. - bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType; + bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType && !NoVectors; unsigned LastLegalType = std::max(LastLegalVectorType, LastLegalIntegerType); // We add +1 here because the LastXXX variables refer to location while -- cgit v1.1 From 01115b1f5032b848659669b161af1bdd9e646208 Mon Sep 17 00:00:00 2001 From: Michel Danzer Date: Thu, 14 Feb 2013 19:03:25 +0000 Subject: R600/SI: Fix int_SI_fs_interp_constant MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The important fix is that the constant interpolation value is stored in the parameter slot P0, which is encoded as 2. In addition, drop the SI_INTERP_CONST pseudo instruction, pass the parameter slot as an operand to V_INTERP_MOV_F32 instead of hardcoding it there, and add a special operand class for the parameter slots for type checking and pretty printing. NOTE: This is a candidate for the Mesa stable branch. Reviewed-by: Christian König Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175193 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp | 15 ++++++++++++ lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h | 1 + lib/Target/R600/SIISelLowering.cpp | 24 ------------------- lib/Target/R600/SIISelLowering.h | 2 -- lib/Target/R600/SIInstructions.td | 29 ++++++++++++++--------- 5 files changed, 34 insertions(+), 37 deletions(-) (limited to 'lib') diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp index fb17ab7..d6450a0 100644 --- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp +++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp @@ -40,6 +40,21 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, } } +void AMDGPUInstPrinter::printInterpSlot(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + unsigned Imm = MI->getOperand(OpNum).getImm(); + + if (Imm == 2) { + O << "P0"; + } else if (Imm == 1) { + O << "P20"; + } else if (Imm == 0) { + O << "P10"; + } else { + assert(!"Invalid interpolation parameter slot"); + } +} + void AMDGPUInstPrinter::printMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { printOperand(MI, OpNo, O); diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h index e775c4c..767a708 100644 --- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h +++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h @@ -33,6 +33,7 @@ public: private: void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printInterpSlot(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printIfSet(const MCInst *MI, unsigned OpNo, raw_ostream &O, StringRef Asm); void printAbs(const MCInst *MI, unsigned OpNo, raw_ostream &O); diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index ceab692..3919b97 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -120,9 +120,6 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( case AMDGPU::SI_INTERP: LowerSI_INTERP(MI, *BB, I, MRI); break; - case AMDGPU::SI_INTERP_CONST: - LowerSI_INTERP_CONST(MI, *BB, I, MRI); - break; case AMDGPU::SI_WQM: LowerSI_WQM(MI, *BB, I, MRI); break; @@ -172,27 +169,6 @@ void SITargetLowering::LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB, MI->eraseFromParent(); } -void SITargetLowering::LowerSI_INTERP_CONST(MachineInstr *MI, - MachineBasicBlock &BB, MachineBasicBlock::iterator I, - MachineRegisterInfo &MRI) const { - MachineOperand dst = MI->getOperand(0); - MachineOperand attr_chan = MI->getOperand(1); - MachineOperand attr = MI->getOperand(2); - MachineOperand params = MI->getOperand(3); - unsigned M0 = MRI.createVirtualRegister(&AMDGPU::M0RegRegClass); - - BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B32), M0) - .addOperand(params); - - BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_MOV_F32)) - .addOperand(dst) - .addOperand(attr_chan) - .addOperand(attr) - .addReg(M0); - - MI->eraseFromParent(); -} - void SITargetLowering::LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB, MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const { unsigned VCC = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h index 8528c24..f4bc94d 100644 --- a/lib/Target/R600/SIISelLowering.h +++ b/lib/Target/R600/SIISelLowering.h @@ -27,8 +27,6 @@ class SITargetLowering : public AMDGPUTargetLowering { MachineBasicBlock::iterator I, unsigned Opocde) const; void LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB, MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const; - void LowerSI_INTERP_CONST(MachineInstr *MI, MachineBasicBlock &BB, - MachineBasicBlock::iterator I, MachineRegisterInfo &MRI) const; void LowerSI_WQM(MachineInstr *MI, MachineBasicBlock &BB, MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const; void LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB, diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index a09f243..b1533bd 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -11,6 +11,17 @@ // that are not yet supported remain commented out. //===----------------------------------------------------------------------===// +class InterpSlots { +int P0 = 2; +int P10 = 0; +int P20 = 1; +} +def INTERP : InterpSlots; + +def InterpSlot : Operand { + let PrintMethod = "printInterpSlot"; +} + def isSI : Predicate<"Subtarget.device()" "->getGeneration() == AMDGPUDeviceInfo::HD7XXX">; @@ -681,10 +692,9 @@ def V_INTERP_P2_F32 : VINTRP < def V_INTERP_MOV_F32 : VINTRP < 0x00000002, (outs VReg_32:$dst), - (ins i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0), - "V_INTERP_MOV_F32", + (ins InterpSlot:$src0, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0), + "V_INTERP_MOV_F32 $dst, $src0, $attr_chan, $attr", []> { - let VSRC = 0; let DisableEncoding = "$m0"; } @@ -1079,14 +1089,6 @@ def SI_INTERP : InstSI < [] >; -def SI_INTERP_CONST : InstSI < - (outs VReg_32:$dst), - (ins i32imm:$attr_chan, i32imm:$attr, SReg_32:$params), - "SI_INTERP_CONST $dst, $attr_chan, $attr, $params", - [(set VReg_32:$dst, (int_SI_fs_interp_constant imm:$attr_chan, - imm:$attr, SReg_32:$params))] ->; - def SI_WQM : InstSI < (outs), (ins), @@ -1322,6 +1324,11 @@ def : Pat < /********** ===================== **********/ def : Pat < + (int_SI_fs_interp_constant imm:$attr_chan, imm:$attr, SReg_32:$params), + (V_INTERP_MOV_F32 INTERP.P0, imm:$attr_chan, imm:$attr, SReg_32:$params) +>; + +def : Pat < (int_SI_fs_interp_linear_center imm:$attr_chan, imm:$attr, SReg_32:$params), (SI_INTERP (f32 LINEAR_CENTER_I), (f32 LINEAR_CENTER_J), imm:$attr_chan, imm:$attr, SReg_32:$params) -- cgit v1.1 From 7b672ed380cf44894f8b96c52558dcfc136af383 Mon Sep 17 00:00:00 2001 From: Kay Tiong Khoo Date: Thu, 14 Feb 2013 19:08:21 +0000 Subject: added basic support for Intel ADX instructions -feature flag, instructions definitions, test cases git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175196 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86.td | 2 ++ lib/Target/X86/X86InstrArithmetic.td | 46 ++++++++++++++++++++++++++++++++++++ lib/Target/X86/X86InstrInfo.td | 1 + lib/Target/X86/X86Subtarget.cpp | 1 + lib/Target/X86/X86Subtarget.h | 4 ++++ 5 files changed, 54 insertions(+) (limited to 'lib') diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 3ab2899..0216252 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -120,6 +120,8 @@ def FeatureBMI2 : SubtargetFeature<"bmi2", "HasBMI2", "true", "Support BMI2 instructions">; def FeatureRTM : SubtargetFeature<"rtm", "HasRTM", "true", "Support RTM instructions">; +def FeatureADX : SubtargetFeature<"adx", "HasADX", "true", + "Support ADX instructions">; def FeatureLeaForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true", "Use LEA for adjusting the stack pointer">; def FeatureSlowDivide : SubtargetFeature<"idiv-to-divb", diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td index 7db611c..d86a406 100644 --- a/lib/Target/X86/X86InstrArithmetic.td +++ b/lib/Target/X86/X86InstrArithmetic.td @@ -1256,3 +1256,49 @@ let Predicates = [HasBMI2] in { let Uses = [RDX] in defm MULX64 : bmi_mulx<"mulx{q}", GR64, i64mem>, VEX_W; } + +//===----------------------------------------------------------------------===// +// ADCX Instruction +// +let hasSideEffects = 0, Predicates = [HasADX], Defs = [EFLAGS] in { + def ADCX32rr : I<0xF6, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), + "adcx{l}\t{$src, $dst|$dst, $src}", + [], IIC_BIN_NONMEM>, T8, OpSize; + + def ADCX64rr : I<0xF6, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), + "adcx{q}\t{$src, $dst|$dst, $src}", + [], IIC_BIN_NONMEM>, T8, OpSize, REX_W, Requires<[In64BitMode]>; + + let mayLoad = 1 in { + def ADCX32rm : I<0xF6, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), + "adcx{l}\t{$src, $dst|$dst, $src}", + [], IIC_BIN_MEM>, T8, OpSize; + + def ADCX64rm : I<0xF6, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), + "adcx{q}\t{$src, $dst|$dst, $src}", + [], IIC_BIN_MEM>, T8, OpSize, REX_W, Requires<[In64BitMode]>; + } +} + +//===----------------------------------------------------------------------===// +// ADOX Instruction +// +let hasSideEffects = 0, Predicates = [HasADX], Defs = [EFLAGS] in { + def ADOX32rr : I<0xF6, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), + "adox{l}\t{$src, $dst|$dst, $src}", + [], IIC_BIN_NONMEM>, T8XS; + + def ADOX64rr : I<0xF6, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), + "adox{q}\t{$src, $dst|$dst, $src}", + [], IIC_BIN_NONMEM>, T8XS, REX_W, Requires<[In64BitMode]>; + + let mayLoad = 1 in { + def ADOX32rm : I<0xF6, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), + "adox{l}\t{$src, $dst|$dst, $src}", + [], IIC_BIN_MEM>, T8XS; + + def ADOX64rm : I<0xF6, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), + "adox{q}\t{$src, $dst|$dst, $src}", + [], IIC_BIN_MEM>, T8XS, REX_W, Requires<[In64BitMode]>; + } +} diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 0d32506..84c278c 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -603,6 +603,7 @@ def HasLZCNT : Predicate<"Subtarget->hasLZCNT()">; def HasBMI : Predicate<"Subtarget->hasBMI()">; def HasBMI2 : Predicate<"Subtarget->hasBMI2()">; def HasRTM : Predicate<"Subtarget->hasRTM()">; +def HasADX : Predicate<"Subtarget->hasADX()">; def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">; def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">; def HasCmpxchg16b: Predicate<"Subtarget->hasCmpxchg16b()">; diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 6305501..b8ecf44 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -349,6 +349,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, , HasBMI(false) , HasBMI2(false) , HasRTM(false) + , HasADX(false) , IsBTMemSlow(false) , IsUAMemFast(false) , HasVectorUAMem(false) diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index eb587a5..1466041 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -121,6 +121,9 @@ protected: /// HasRTM - Processor has RTM instructions. bool HasRTM; + /// HasADX - Processor has ADX instructions. + bool HasADX; + /// IsBTMemSlow - True if BT (bit test) of memory instructions are slow. bool IsBTMemSlow; @@ -242,6 +245,7 @@ public: bool hasBMI() const { return HasBMI; } bool hasBMI2() const { return HasBMI2; } bool hasRTM() const { return HasRTM; } + bool hasADX() const { return HasADX; } bool isBTMemSlow() const { return IsBTMemSlow; } bool isUnalignedMemAccessFast() const { return IsUAMemFast; } bool hasVectorUAMem() const { return HasVectorUAMem; } -- cgit v1.1 From 31cfc707058ad3f470924cdb3c460c8f50ee76c3 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Thu, 14 Feb 2013 19:11:28 +0000 Subject: Add two new functions to the C API: LLVMCreateMemoryBufferWithMemoryRange - exposes MemoryBuffer::getMemBuffer LLVMCreateMemoryBufferWithMemoryRangeCopy - exposes MemoryBuffer::getMemBufferCopy Patch by Moritz Maxeiner! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175199 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Core.cpp | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'lib') diff --git a/lib/IR/Core.cpp b/lib/IR/Core.cpp index b696ed0..10f870c 100644 --- a/lib/IR/Core.cpp +++ b/lib/IR/Core.cpp @@ -2369,6 +2369,29 @@ LLVMBool LLVMCreateMemoryBufferWithSTDIN(LLVMMemoryBufferRef *OutMemBuf, return 1; } +LLVMMemoryBufferRef LLVMCreateMemoryBufferWithMemoryRange( + const char *InputData, + size_t InputDataLength, + const char *BufferName, + bool RequiresNullTerminator) { + + return wrap(MemoryBuffer::getMemBuffer( + StringRef(InputData, InputDataLength), + StringRef(BufferName), + RequiresNullTerminator)); +} + +LLVMMemoryBufferRef LLVMCreateMemoryBufferWithMemoryRangeCopy( + const char *InputData, + size_t InputDataLength, + const char *BufferName) { + + return wrap(MemoryBuffer::getMemBufferCopy( + StringRef(InputData, InputDataLength), + StringRef(BufferName))); +} + + void LLVMDisposeMemoryBuffer(LLVMMemoryBufferRef MemBuf) { delete unwrap(MemBuf); } -- cgit v1.1 From aefd14be3951d5b84cda5561655fe63549819d25 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Thu, 14 Feb 2013 19:40:27 +0000 Subject: s/bool/LLVMBool/ git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175204 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Core.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/IR/Core.cpp b/lib/IR/Core.cpp index 10f870c..79eb269 100644 --- a/lib/IR/Core.cpp +++ b/lib/IR/Core.cpp @@ -2373,7 +2373,7 @@ LLVMMemoryBufferRef LLVMCreateMemoryBufferWithMemoryRange( const char *InputData, size_t InputDataLength, const char *BufferName, - bool RequiresNullTerminator) { + LLVMBool RequiresNullTerminator) { return wrap(MemoryBuffer::getMemBuffer( StringRef(InputData, InputDataLength), -- cgit v1.1 From 84a2c2bbb58dd0f91608d8d14b3139b9295ca036 Mon Sep 17 00:00:00 2001 From: Jyotsna Verma Date: Thu, 14 Feb 2013 19:57:17 +0000 Subject: Hexagon: Change insn class to support instruction encoding. This patch doesn't introduce any functionality changes. It adds some new fields to the Hexagon instruction classes and changes their layout to support instruction encoding. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175205 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Hexagon/HexagonInstrFormats.td | 412 +++++++++++++--------------- lib/Target/Hexagon/HexagonInstrFormatsV4.td | 53 ++-- lib/Target/Hexagon/HexagonInstrInfo.td | 8 +- lib/Target/Hexagon/HexagonSchedule.td | 24 +- lib/Target/Hexagon/HexagonScheduleV4.td | 14 +- 5 files changed, 252 insertions(+), 259 deletions(-) (limited to 'lib') diff --git a/lib/Target/Hexagon/HexagonInstrFormats.td b/lib/Target/Hexagon/HexagonInstrFormats.td index 71c620b..587fa7d 100644 --- a/lib/Target/Hexagon/HexagonInstrFormats.td +++ b/lib/Target/Hexagon/HexagonInstrFormats.td @@ -13,19 +13,19 @@ // *** Must match HexagonBaseInfo.h *** //===----------------------------------------------------------------------===// -class Type t> { +class IType t> { bits<5> Value = t; } -def TypePSEUDO : Type<0>; -def TypeALU32 : Type<1>; -def TypeCR : Type<2>; -def TypeJR : Type<3>; -def TypeJ : Type<4>; -def TypeLD : Type<5>; -def TypeST : Type<6>; -def TypeSYSTEM : Type<7>; -def TypeXTYPE : Type<8>; -def TypeMARKER : Type<31>; +def TypePSEUDO : IType<0>; +def TypeALU32 : IType<1>; +def TypeCR : IType<2>; +def TypeJR : IType<3>; +def TypeJ : IType<4>; +def TypeLD : IType<5>; +def TypeST : IType<6>; +def TypeSYSTEM : IType<7>; +def TypeXTYPE : IType<8>; +def TypeENDLOOP: IType<31>; // Maintain list of valid subtargets for each instruction. class SubTarget value> { @@ -44,8 +44,8 @@ def HasV5SubT : SubTarget<0x8>; def NoV5SubT : SubTarget<0x7>; // Addressing modes for load/store instructions -class AddrModeType value> { - bits<4> Value = value; +class AddrModeType value> { + bits<3> Value = value; } def NoAddrMode : AddrModeType<0>; // No addressing mode @@ -55,14 +55,35 @@ def BaseImmOffset : AddrModeType<3>; // Indirect with offset def BaseLongOffset : AddrModeType<4>; // Indirect with long offset def BaseRegOffset : AddrModeType<5>; // Indirect with register offset +class MemAccessSize value> { + bits<3> Value = value; +} + +def NoMemAccess : MemAccessSize<0>;// Not a memory acces instruction. +def ByteAccess : MemAccessSize<1>;// Byte access instruction (memb). +def HalfWordAccess : MemAccessSize<2>;// Half word access instruction (memh). +def WordAccess : MemAccessSize<3>;// Word access instrution (memw). +def DoubleWordAccess : MemAccessSize<4>;// Double word access instruction (memd) + + //===----------------------------------------------------------------------===// // Intruction Class Declaration + //===----------------------------------------------------------------------===// -class InstHexagon pattern, - string cstr, InstrItinClass itin, Type type> : Instruction { - field bits<32> Inst; +class OpcodeHexagon { + field bits<32> Inst = ?; // Default to an invalid insn. + bits<4> IClass = 0; // ICLASS + bits<2> IParse = 0; // Parse bits. + + let Inst{31-28} = IClass; + let Inst{15-14} = IParse; + + bits<1> zero = 0; +} +class InstHexagon pattern, + string cstr, InstrItinClass itin, IType type> + : Instruction, OpcodeHexagon { let Namespace = "Hexagon"; dag OutOperandList = outs; @@ -73,48 +94,63 @@ class InstHexagon pattern, let Itinerary = itin; let Size = 4; - // *** Must match HexagonBaseInfo.h *** + // *** Must match MCTargetDesc/HexagonBaseInfo.h *** + // Instruction type according to the ISA. - Type HexagonType = type; - let TSFlags{4-0} = HexagonType.Value; + IType Type = type; + let TSFlags{4-0} = Type.Value; + // Solo instructions, i.e., those that cannot be in a packet with others. - bits<1> isHexagonSolo = 0; - let TSFlags{5} = isHexagonSolo; + bits<1> isSolo = 0; + let TSFlags{5} = isSolo; + // Predicated instructions. bits<1> isPredicated = 0; let TSFlags{6} = isPredicated; + bits<1> isPredicatedFalse = 0; + let TSFlags{7} = isPredicatedFalse; bits<1> isPredicatedNew = 0; - let TSFlags{7} = isPredicatedNew; - - // Stores that can be newified. + let TSFlags{8} = isPredicatedNew; + + // New-value insn helper fields. + bits<1> isNewValue = 0; + let TSFlags{9} = isNewValue; // New-value consumer insn. + bits<1> hasNewValue = 0; + let TSFlags{10} = hasNewValue; // New-value producer insn. + bits<3> opNewValue = 0; + let TSFlags{13-11} = opNewValue; // New-value produced operand. + bits<2> opNewBits = 0; + let TSFlags{15-14} = opNewBits; // New-value opcode bits location: 0, 8, 16. bits<1> isNVStorable = 0; - let TSFlags{8} = isNVStorable; - - // New-value store instructions. + let TSFlags{16} = isNVStorable; // Store that can become new-value store. bits<1> isNVStore = 0; - let TSFlags{9} = isNVStore; + let TSFlags{17} = isNVStore; // New-value store insn. // Immediate extender helper fields. bits<1> isExtendable = 0; - let TSFlags{10} = isExtendable; // Insn may be extended. + let TSFlags{18} = isExtendable; // Insn may be extended. bits<1> isExtended = 0; - let TSFlags{11} = isExtended; // Insn must be extended. + let TSFlags{19} = isExtended; // Insn must be extended. bits<3> opExtendable = 0; - let TSFlags{14-12} = opExtendable; // Which operand may be extended. + let TSFlags{22-20} = opExtendable; // Which operand may be extended. bits<1> isExtentSigned = 0; - let TSFlags{15} = isExtentSigned; // Signed or unsigned range. + let TSFlags{23} = isExtentSigned; // Signed or unsigned range. bits<5> opExtentBits = 0; - let TSFlags{20-16} = opExtentBits; //Number of bits of range before extending. + let TSFlags{28-24} = opExtentBits; //Number of bits of range before extending. // If an instruction is valid on a subtarget (v2-v5), set the corresponding // bit from validSubTargets. v2 is the least significant bit. // By default, instruction is valid on all subtargets. SubTarget validSubTargets = HasV2SubT; - let TSFlags{24-21} = validSubTargets.Value; + let TSFlags{32-29} = validSubTargets.Value; - // Addressing mode for load/store instrutions. + // Addressing mode for load/store instructions. AddrModeType addrMode = NoAddrMode; - let TSFlags{28-25} = addrMode.Value; + let TSFlags{35-33} = addrMode.Value; + + // Memory access size for mem access instructions (load/store) + MemAccessSize accessSize = NoMemAccess; + let TSFlags{38-36} = accessSize.Value; // Fields used for relation models. string BaseOpcode = ""; @@ -124,6 +160,11 @@ class InstHexagon pattern, string InputType = ""; // Input is "imm" or "reg" type. string isMEMri = "false"; // Set to "true" for load/store with MEMri operand. string isFloat = "false"; // Set to "true" for the floating-point load/store. + string isBrTaken = ""; // Set to "true"/"false" for jump instructions + + let PredSense = !if(isPredicated, !if(isPredicatedFalse, "false", "true"), + ""); + let PNewValue = !if(isPredicatedNew, "new", ""); // *** Must match MCTargetDesc/HexagonBaseInfo.h *** } @@ -134,187 +175,143 @@ class InstHexagon pattern, // LD Instruction Class in V2/V3/V4. // Definition of the instruction class NOT CHANGED. -class LDInst pattern> - : InstHexagon { - bits<5> rd; - bits<5> rs; - bits<13> imm13; -} +class LDInst pattern = [], + string cstr = ""> + : InstHexagon; -class LDInst2 pattern> - : InstHexagon { - bits<5> rd; - bits<5> rs; - bits<13> imm13; - let mayLoad = 1; -} +let mayLoad = 1 in +class LDInst2 pattern = [], + string cstr = ""> + : LDInst; + +class CONSTLDInst pattern = [], + string cstr = ""> + : LDInst; // LD Instruction Class in V2/V3/V4. // Definition of the instruction class NOT CHANGED. -class LDInstPost pattern, - string cstr> - : InstHexagon { - bits<5> rd; - bits<5> rs; - bits<5> rt; - bits<13> imm13; -} +class LDInstPost pattern = [], + string cstr = ""> + : LDInst; + +let mayLoad = 1 in +class LD0Inst pattern = [], + string cstr = ""> + : LDInst; // ST Instruction Class in V2/V3 can take SLOT0 only. // ST Instruction Class in V4 can take SLOT0 & SLOT1. // Definition of the instruction class CHANGED from V2/V3 to V4. -class STInst pattern> - : InstHexagon { - bits<5> rd; - bits<5> rs; - bits<13> imm13; -} +let mayStore = 1 in +class STInst pattern = [], + string cstr = ""> + : InstHexagon; -class STInst2 pattern> - : InstHexagon { - bits<5> rd; - bits<5> rs; - bits<13> imm13; - let mayStore = 1; -} +class STInst2 pattern = [], + string cstr = ""> + : STInst; -// SYSTEM Instruction Class in V4 can take SLOT0 only -// In V2/V3 we used ST for this but in v4 ST can take SLOT0 or SLOT1. -class SYSInst pattern> - : InstHexagon { - bits<5> rd; - bits<5> rs; - bits<13> imm13; -} +let mayStore = 1 in +class ST0Inst pattern = [], + string cstr = ""> + : InstHexagon; // ST Instruction Class in V2/V3 can take SLOT0 only. // ST Instruction Class in V4 can take SLOT0 & SLOT1. // Definition of the instruction class CHANGED from V2/V3 to V4. -class STInstPost pattern, - string cstr> - : InstHexagon { - bits<5> rd; - bits<5> rs; - bits<5> rt; - bits<13> imm13; -} +class STInstPost pattern = [], + string cstr = ""> + : STInst; + +// SYSTEM Instruction Class in V4 can take SLOT0 only +// In V2/V3 we used ST for this but in v4 ST can take SLOT0 or SLOT1. +class SYSInst pattern = [], + string cstr = ""> + : InstHexagon; // ALU32 Instruction Class in V2/V3/V4. // Definition of the instruction class NOT CHANGED. -class ALU32Type pattern> - : InstHexagon { - bits<5> rd; - bits<5> rs; - bits<5> rt; - bits<16> imm16; - bits<16> imm16_2; -} +class ALU32Inst pattern = [], + string cstr = ""> + : InstHexagon; // ALU64 Instruction Class in V2/V3. // XTYPE Instruction Class in V4. // Definition of the instruction class NOT CHANGED. // Name of the Instruction Class changed from ALU64 to XTYPE from V2/V3 to V4. -class ALU64Type pattern> - : InstHexagon { - bits<5> rd; - bits<5> rs; - bits<5> rt; - bits<16> imm16; - bits<16> imm16_2; -} +class ALU64Inst pattern = [], + string cstr = ""> + : InstHexagon; + +class ALU64_acc pattern = [], + string cstr = ""> + : ALU64Inst; -class ALU64_acc pattern, - string cstr> - : InstHexagon { - bits<5> rd; - bits<5> rs; - bits<5> rt; - bits<16> imm16; - bits<16> imm16_2; -} // M Instruction Class in V2/V3. // XTYPE Instruction Class in V4. // Definition of the instruction class NOT CHANGED. // Name of the Instruction Class changed from M to XTYPE from V2/V3 to V4. -class MInst pattern> - : InstHexagon { - bits<5> rd; - bits<5> rs; - bits<5> rt; -} +class MInst pattern = [], + string cstr = ""> + : InstHexagon; // M Instruction Class in V2/V3. // XTYPE Instruction Class in V4. // Definition of the instruction class NOT CHANGED. // Name of the Instruction Class changed from M to XTYPE from V2/V3 to V4. -class MInst_acc pattern, - string cstr> - : InstHexagon { - bits<5> rd; - bits<5> rs; - bits<5> rt; -} +class MInst_acc pattern = [], + string cstr = ""> + : MInst; // S Instruction Class in V2/V3. // XTYPE Instruction Class in V4. // Definition of the instruction class NOT CHANGED. // Name of the Instruction Class changed from S to XTYPE from V2/V3 to V4. -class SInst pattern> - : InstHexagon { - bits<5> rd; - bits<5> rs; - bits<5> rt; -} +class SInst pattern = [], + string cstr = ""> + : InstHexagon; // S Instruction Class in V2/V3. // XTYPE Instruction Class in V4. // Definition of the instruction class NOT CHANGED. // Name of the Instruction Class changed from S to XTYPE from V2/V3 to V4. -class SInst_acc pattern, - string cstr> - : InstHexagon { -// : InstHexagon { -// : InstHexagon { - bits<5> rd; - bits<5> rs; - bits<5> rt; -} +class SInst_acc pattern = [], + string cstr = ""> + : SInst; // J Instruction Class in V2/V3/V4. // Definition of the instruction class NOT CHANGED. -class JType pattern> - : InstHexagon { - bits<16> imm16; -} +class JInst pattern = [], + string cstr = ""> + : InstHexagon; // JR Instruction Class in V2/V3/V4. // Definition of the instruction class NOT CHANGED. -class JRType pattern> - : InstHexagon { - bits<5> rs; - bits<5> pu; // Predicate register -} +class JRInst pattern = [], + string cstr = ""> + : InstHexagon; // CR Instruction Class in V2/V3/V4. // Definition of the instruction class NOT CHANGED. -class CRInst pattern> - : InstHexagon { - bits<5> rs; - bits<10> imm10; -} +class CRInst pattern = [], + string cstr = ""> + : InstHexagon; -class Marker pattern> - : InstHexagon { - let isCodeGenOnly = 1; - let isPseudo = 1; -} +let isCodeGenOnly = 1, isPseudo = 1 in +class Endloop pattern = [], + string cstr = ""> + : InstHexagon; -class Pseudo pattern> - : InstHexagon { - let isCodeGenOnly = 1; - let isPseudo = 1; -} +let isCodeGenOnly = 1, isPseudo = 1 in +class Pseudo pattern = [], + string cstr = ""> + : InstHexagon; + +let isCodeGenOnly = 1, isPseudo = 1 in +class PseudoM pattern = [], + string cstr=""> + : InstHexagon; //===----------------------------------------------------------------------===// // Intruction Classes Definitions - @@ -324,75 +321,52 @@ class Pseudo pattern> // // ALU32 patterns //. -class ALU32_rr pattern> - : ALU32Type { -} +class ALU32_rr pattern, + string cstr = ""> + : ALU32Inst; -class ALU32_ir pattern> - : ALU32Type { - let rt{0-4} = 0; -} +class ALU32_ir pattern, + string cstr = ""> + : ALU32Inst; -class ALU32_ri pattern> - : ALU32Type { - let rt{0-4} = 0; -} +class ALU32_ri pattern, + string cstr = ""> + : ALU32Inst; -class ALU32_ii pattern> - : ALU32Type { - let rt{0-4} = 0; -} +class ALU32_ii pattern, + string cstr = ""> + : ALU32Inst; // // ALU64 patterns. // -class ALU64_rr pattern> - : ALU64Type { -} - -class ALU64_ri pattern> - : ALU64Type { - let rt{0-4} = 0; -} - -// J Type Instructions. -class JInst pattern> - : JType { -} - -// JR type Instructions. -class JRInst pattern> - : JRType { -} +class ALU64_rr pattern, + string cstr = ""> + : ALU64Inst; +class ALU64_ri pattern, + string cstr = ""> + : ALU64Inst; // Post increment ST Instruction. -class STInstPI pattern, - string cstr> - : STInstPost { - let rt{0-4} = 0; -} +class STInstPI pattern = [], + string cstr = ""> + : STInst; -class STInst2PI pattern, - string cstr> - : STInstPost { - let rt{0-4} = 0; - let mayStore = 1; -} +let mayStore = 1 in +class STInst2PI pattern = [], + string cstr = ""> + : STInst; // Post increment LD Instruction. -class LDInstPI pattern, - string cstr> - : LDInstPost { - let rt{0-4} = 0; -} - -class LDInst2PI pattern, - string cstr> - : LDInstPost { - let rt{0-4} = 0; - let mayLoad = 1; -} +class LDInstPI pattern = [], + string cstr = ""> + : LDInst; + +let mayLoad = 1 in +class LDInst2PI pattern = [], + string cstr = ""> + : LDInst; //===----------------------------------------------------------------------===// // V4 Instruction Format Definitions + diff --git a/lib/Target/Hexagon/HexagonInstrFormatsV4.td b/lib/Target/Hexagon/HexagonInstrFormatsV4.td index 29973e7..9fda0da 100644 --- a/lib/Target/Hexagon/HexagonInstrFormatsV4.td +++ b/lib/Target/Hexagon/HexagonInstrFormatsV4.td @@ -17,9 +17,9 @@ // *** Must match BaseInfo.h *** //----------------------------------------------------------------------------// -def TypeMEMOP : Type<9>; -def TypeNV : Type<10>; -def TypePREFIX : Type<30>; +def TypeMEMOP : IType<9>; +def TypeNV : IType<10>; +def TypePREFIX : IType<30>; //----------------------------------------------------------------------------// // Intruction Classes Definitions + @@ -28,35 +28,38 @@ def TypePREFIX : Type<30>; // // NV type instructions. // -class NVInst_V4 pattern> - : InstHexagon { - bits<5> rd; - bits<5> rs; - bits<13> imm13; -} +class NVInst pattern = [], + string cstr = ""> + : InstHexagon; + +class NVInst_V4 pattern = [], + string cstr = ""> + : NVInst; // Definition of Post increment new value store. -class NVInstPost_V4 pattern, - string cstr> - : InstHexagon { - bits<5> rd; - bits<5> rs; - bits<5> rt; - bits<13> imm13; -} +class NVInstPost_V4 pattern = [], + string cstr = ""> + : NVInst; // Post increment ST Instruction. let mayStore = 1 in class NVInstPI_V4 pattern = [], string cstr = ""> - : NVInstPost_V4; - -class MEMInst_V4 pattern> - : InstHexagon { - bits<5> rd; - bits<5> rs; - bits<6> imm6; -} + : NVInst; + +// New-value conditional branch. +class NCJInst pattern = [], + string cstr = ""> + : NVInst; + +let mayLoad = 1, mayStore = 1 in +class MEMInst pattern = [], + string cstr = ""> + : InstHexagon; + +class MEMInst_V4 pattern = [], + string cstr = ""> + : MEMInst; let isCodeGenOnly = 1 in class EXTENDERInst pattern = []> diff --git a/lib/Target/Hexagon/HexagonInstrInfo.td b/lib/Target/Hexagon/HexagonInstrInfo.td index ac2dd22..082772a 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.td +++ b/lib/Target/Hexagon/HexagonInstrInfo.td @@ -1885,7 +1885,7 @@ def SDHexagonBARRIER: SDTypeProfile<0, 0, []>; def HexagonBARRIER: SDNode<"HexagonISD::BARRIER", SDHexagonBARRIER, [SDNPHasChain]>; -let hasSideEffects = 1, isHexagonSolo = 1 in +let hasSideEffects = 1, isSolo = 1 in def BARRIER : SYSInst<(outs), (ins), "barrier", [(HexagonBARRIER)]>; @@ -1960,9 +1960,9 @@ def LOOP0_r : CRInst<(outs), (ins brtarget:$offset, IntRegs:$src2), let isBranch = 1, isTerminator = 1, neverHasSideEffects = 1, Defs = [PC, LC0], Uses = [SA0, LC0] in { -def ENDLOOP0 : Marker<(outs), (ins brtarget:$offset), - ":endloop0", - []>; +def ENDLOOP0 : Endloop<(outs), (ins brtarget:$offset), + ":endloop0", + []>; } // Support for generating global address. diff --git a/lib/Target/Hexagon/HexagonSchedule.td b/lib/Target/Hexagon/HexagonSchedule.td index b5ff69a..c2cfbb9 100644 --- a/lib/Target/Hexagon/HexagonSchedule.td +++ b/lib/Target/Hexagon/HexagonSchedule.td @@ -8,10 +8,11 @@ //===----------------------------------------------------------------------===// // Functional Units -def LUNIT : FuncUnit; -def LSUNIT : FuncUnit; -def MUNIT : FuncUnit; -def SUNIT : FuncUnit; +def LSUNIT : FuncUnit; // SLOT0 +def LUNIT : FuncUnit; // SLOT1 +def MUNIT : FuncUnit; // SLOT2 +def SUNIT : FuncUnit; // SLOT3 +def LOOPUNIT : FuncUnit; // Itinerary classes def ALU32 : InstrItinClass; @@ -20,27 +21,34 @@ def CR : InstrItinClass; def J : InstrItinClass; def JR : InstrItinClass; def LD : InstrItinClass; +def LD0 : InstrItinClass; def M : InstrItinClass; def ST : InstrItinClass; +def ST0 : InstrItinClass; def S : InstrItinClass; def SYS : InstrItinClass; -def MARKER : InstrItinClass; +def ENDLOOP : InstrItinClass; def PSEUDO : InstrItinClass; +def PSEUDOM : InstrItinClass; def HexagonItineraries : - ProcessorItineraries<[LUNIT, LSUNIT, MUNIT, SUNIT], [], [ + ProcessorItineraries<[LSUNIT, LUNIT, MUNIT, SUNIT, LOOPUNIT], [], [ InstrItinData]>, InstrItinData]>, InstrItinData]>, InstrItinData]>, InstrItinData]>, InstrItinData]>, + InstrItinData]>, InstrItinData]>, InstrItinData]>, + InstrItinData]>, InstrItinData]>, InstrItinData]>, - InstrItinData]>, - InstrItinData]> + InstrItinData]>, + InstrItinData]>, + InstrItinData, + InstrStage<1, [MUNIT, SUNIT]>]> ]>; def HexagonModel : SchedMachineModel { diff --git a/lib/Target/Hexagon/HexagonScheduleV4.td b/lib/Target/Hexagon/HexagonScheduleV4.td index 5668ae8..ef72cf4 100644 --- a/lib/Target/Hexagon/HexagonScheduleV4.td +++ b/lib/Target/Hexagon/HexagonScheduleV4.td @@ -28,6 +28,10 @@ def SLOT0 : FuncUnit; def SLOT1 : FuncUnit; def SLOT2 : FuncUnit; def SLOT3 : FuncUnit; +// Endloop is a pseudo instruction that is encoded with 2 bits in a packet +// rather than taking an execution slot. This special unit is needed +// to schedule an ENDLOOP with 4 other instructions. +def SLOT_ENDLOOP: FuncUnit; // Itinerary classes. def NV_V4 : InstrItinClass; @@ -36,22 +40,26 @@ def MEM_V4 : InstrItinClass; def PREFIX : InstrItinClass; def HexagonItinerariesV4 : - ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3], [], [ + ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP], [], [ InstrItinData]>, InstrItinData]>, InstrItinData]>, InstrItinData]>, InstrItinData]>, InstrItinData]>, + InstrItinData]>, InstrItinData]>, InstrItinData]>, + InstrItinData]>, InstrItinData]>, InstrItinData]>, InstrItinData]>, InstrItinData]>, - InstrItinData]>, + InstrItinData]>, InstrItinData]>, - InstrItinData]> + InstrItinData]>, + InstrItinData, + InstrStage<1, [SLOT2, SLOT3]>]> ]>; def HexagonModelV4 : SchedMachineModel { -- cgit v1.1 From 27d844f4d0fa82e89bd0a1d10b477338f970b8f7 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Thu, 14 Feb 2013 20:44:07 +0000 Subject: Update comment. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175209 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/AsmParser/LLParser.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index c4b2c0f..86e2fd9 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -2312,7 +2312,8 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { return false; case lltok::kw_asm: { - // ValID ::= 'asm' SideEffect? AlignStack? STRINGCONSTANT ',' STRINGCONSTANT + // ValID ::= 'asm' SideEffect? AlignStack? IntelDialect? STRINGCONSTANT ',' + // STRINGCONSTANT bool HasSideEffect, AlignStack, AsmDialect; Lex.Lex(); if (ParseOptionalToken(lltok::kw_sideeffect, HasSideEffect) || -- cgit v1.1 From fe9b5a4f74c5686ab479dd42bd87e654834a8ba4 Mon Sep 17 00:00:00 2001 From: Jakub Staszak Date: Thu, 14 Feb 2013 21:50:09 +0000 Subject: Simplify code. Remove "else after return". git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175212 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FastISel.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index b6c1512..7f230ff 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -326,12 +326,11 @@ bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned &ResultReg) { unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc, Src, /*TODO: Kill=*/false); - - if (RR != 0) { - ResultReg = RR; - return true; - } else + if (RR == 0) return false; + + ResultReg = RR; + return true; } /// X86SelectAddress - Attempt to fill in an address from the given value. -- cgit v1.1 From 6ca6d3b1eac5b8611f3a9e2c270c2e794d37e1f5 Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Thu, 14 Feb 2013 22:37:09 +0000 Subject: BBVectorize: Don't store candidate pairs in a std::multimap This is another commit on the road to removing std::multimap from BBVectorize. This gives an ~1% speedup on the csa.ll test case in PR15222. No functionality change intended. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175215 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/BBVectorize.cpp | 152 +++++++++++++++++++------------ 1 file changed, 92 insertions(+), 60 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp index c11d9f6..1b6e987 100644 --- a/lib/Transforms/Vectorize/BBVectorize.cpp +++ b/lib/Transforms/Vectorize/BBVectorize.cpp @@ -225,7 +225,7 @@ namespace { bool getCandidatePairs(BasicBlock &BB, BasicBlock::iterator &Start, - std::multimap &CandidatePairs, + DenseMap > &CandidatePairs, DenseSet &FixedOrderPairs, DenseMap &CandidatePairCostSavings, std::vector &PairableInsts, bool NonPow2Len); @@ -239,18 +239,18 @@ namespace { PairConnectionSplat }; - void computeConnectedPairs(std::multimap &CandidatePairs, + void computeConnectedPairs(DenseMap > &CandidatePairs, DenseSet &CandidatePairsSet, std::vector &PairableInsts, std::multimap &ConnectedPairs, DenseMap &PairConnectionTypes); void buildDepMap(BasicBlock &BB, - std::multimap &CandidatePairs, + DenseMap > &CandidatePairs, std::vector &PairableInsts, DenseSet &PairableInstUsers); - void choosePairs(std::multimap &CandidatePairs, + void choosePairs(DenseMap > &CandidatePairs, DenseSet &CandidatePairsSet, DenseMap &CandidatePairCostSavings, std::vector &PairableInsts, @@ -282,7 +282,7 @@ namespace { DenseSet *LoadMoveSetPairs = 0); void computePairsConnectedTo( - std::multimap &CandidatePairs, + DenseMap > &CandidatePairs, DenseSet &CandidatePairsSet, std::vector &PairableInsts, std::multimap &ConnectedPairs, @@ -299,7 +299,7 @@ namespace { DenseSet &CurrentPairs); void pruneTreeFor( - std::multimap &CandidatePairs, + DenseMap > &CandidatePairs, std::vector &PairableInsts, std::multimap &ConnectedPairs, DenseSet &PairableInstUsers, @@ -311,7 +311,7 @@ namespace { bool UseCycleCheck); void buildInitialTreeFor( - std::multimap &CandidatePairs, + DenseMap > &CandidatePairs, DenseSet &CandidatePairsSet, std::vector &PairableInsts, std::multimap &ConnectedPairs, @@ -320,7 +320,7 @@ namespace { DenseMap &Tree, ValuePair J); void findBestTreeFor( - std::multimap &CandidatePairs, + DenseMap > &CandidatePairs, DenseSet &CandidatePairsSet, DenseMap &CandidatePairCostSavings, std::vector &PairableInsts, @@ -333,7 +333,7 @@ namespace { DenseSet &PairableInstUserPairSet, DenseMap &ChosenPairs, DenseSet &BestTree, size_t &BestMaxDepth, - int &BestEffSize, VPIteratorPair ChoiceRange, + int &BestEffSize, Value *II, std::vector&JJ, bool UseCycleCheck); Value *getReplacementPointerInput(LLVMContext& Context, Instruction *I, @@ -699,7 +699,7 @@ namespace { do { std::vector PairableInsts; - std::multimap CandidatePairs; + DenseMap > CandidatePairs; DenseSet FixedOrderPairs; DenseMap CandidatePairCostSavings; ShouldContinue = getCandidatePairs(BB, Start, CandidatePairs, @@ -710,9 +710,11 @@ namespace { // Build the candidate pair set for faster lookups. DenseSet CandidatePairsSet; - for (std::multimap::iterator I = CandidatePairs.begin(), - E = CandidatePairs.end(); I != E; ++I) - CandidatePairsSet.insert(*I); + for (DenseMap >::iterator I = + CandidatePairs.begin(), E = CandidatePairs.end(); I != E; ++I) + for (std::vector::iterator J = I->second.begin(), + JE = I->second.end(); J != JE; ++J) + CandidatePairsSet.insert(ValuePair(I->first, *J)); // Now we have a map of all of the pairable instructions and we need to // select the best possible pairing. A good pairing is one such that the @@ -1158,7 +1160,7 @@ namespace { // basic block and collects all candidate pairs for vectorization. bool BBVectorize::getCandidatePairs(BasicBlock &BB, BasicBlock::iterator &Start, - std::multimap &CandidatePairs, + DenseMap > &CandidatePairs, DenseSet &FixedOrderPairs, DenseMap &CandidatePairCostSavings, std::vector &PairableInsts, bool NonPow2Len) { @@ -1207,7 +1209,7 @@ namespace { PairableInsts.push_back(I); } - CandidatePairs.insert(ValuePair(I, J)); + CandidatePairs[I].push_back(J); if (TTI) CandidatePairCostSavings.insert(ValuePairWithCost(ValuePair(I, J), CostSavings)); @@ -1251,7 +1253,7 @@ namespace { // it looks for pairs such that both members have an input which is an // output of PI or PJ. void BBVectorize::computePairsConnectedTo( - std::multimap &CandidatePairs, + DenseMap > &CandidatePairs, DenseSet &CandidatePairsSet, std::vector &PairableInsts, std::multimap &ConnectedPairs, @@ -1342,20 +1344,23 @@ namespace { // connected if some output of the first pair forms an input to both members // of the second pair. void BBVectorize::computeConnectedPairs( - std::multimap &CandidatePairs, + DenseMap > &CandidatePairs, DenseSet &CandidatePairsSet, std::vector &PairableInsts, std::multimap &ConnectedPairs, DenseMap &PairConnectionTypes) { for (std::vector::iterator PI = PairableInsts.begin(), PE = PairableInsts.end(); PI != PE; ++PI) { - VPIteratorPair choiceRange = CandidatePairs.equal_range(*PI); + DenseMap >::iterator PP = + CandidatePairs.find(*PI); + if (PP == CandidatePairs.end()) + continue; - for (std::multimap::iterator P = choiceRange.first; - P != choiceRange.second; ++P) + for (std::vector::iterator P = PP->second.begin(), + E = PP->second.end(); P != E; ++P) computePairsConnectedTo(CandidatePairs, CandidatePairsSet, PairableInsts, ConnectedPairs, - PairConnectionTypes, *P); + PairConnectionTypes, ValuePair(*PI, *P)); } DEBUG(dbgs() << "BBV: found " << ConnectedPairs.size() @@ -1367,14 +1372,14 @@ namespace { // depends on the output of A. void BBVectorize::buildDepMap( BasicBlock &BB, - std::multimap &CandidatePairs, + DenseMap > &CandidatePairs, std::vector &PairableInsts, DenseSet &PairableInstUsers) { DenseSet IsInPair; - for (std::multimap::iterator C = CandidatePairs.begin(), - E = CandidatePairs.end(); C != E; ++C) { + for (DenseMap >::iterator C = + CandidatePairs.begin(), E = CandidatePairs.end(); C != E; ++C) { IsInPair.insert(C->first); - IsInPair.insert(C->second); + IsInPair.insert(C->second.begin(), C->second.end()); } // Iterate through the basic block, recording all users of each @@ -1481,7 +1486,7 @@ namespace { // This function builds the initial tree of connected pairs with the // pair J at the root. void BBVectorize::buildInitialTreeFor( - std::multimap &CandidatePairs, + DenseMap > &CandidatePairs, DenseSet &CandidatePairsSet, std::vector &PairableInsts, std::multimap &ConnectedPairs, @@ -1527,7 +1532,7 @@ namespace { // Given some initial tree, prune it by removing conflicting pairs (pairs // that cannot be simultaneously chosen for vectorization). void BBVectorize::pruneTreeFor( - std::multimap &CandidatePairs, + DenseMap > &CandidatePairs, std::vector &PairableInsts, std::multimap &ConnectedPairs, DenseSet &PairableInstUsers, @@ -1693,7 +1698,7 @@ namespace { // This function finds the best tree of mututally-compatible connected // pairs, given the choice of root pairs as an iterator range. void BBVectorize::findBestTreeFor( - std::multimap &CandidatePairs, + DenseMap > &CandidatePairs, DenseSet &CandidatePairsSet, DenseMap &CandidatePairCostSavings, std::vector &PairableInsts, @@ -1706,10 +1711,13 @@ namespace { DenseSet &PairableInstUserPairSet, DenseMap &ChosenPairs, DenseSet &BestTree, size_t &BestMaxDepth, - int &BestEffSize, VPIteratorPair ChoiceRange, + int &BestEffSize, Value *II, std::vector&JJ, bool UseCycleCheck) { - for (std::multimap::iterator J = ChoiceRange.first; - J != ChoiceRange.second; ++J) { + for (std::vector::iterator J = JJ.begin(), JE = JJ.end(); + J != JE; ++J) { + ValuePair IJ(II, *J); + if (!CandidatePairsSet.count(IJ)) + continue; // Before going any further, make sure that this pair does not // conflict with any already-selected pairs (see comment below @@ -1718,7 +1726,7 @@ namespace { bool DoesConflict = false; for (DenseMap::iterator C = ChosenPairs.begin(), E = ChosenPairs.end(); C != E; ++C) { - if (pairsConflict(*C, *J, PairableInstUsers, + if (pairsConflict(*C, IJ, PairableInstUsers, UseCycleCheck ? &PairableInstUserMap : 0, UseCycleCheck ? &PairableInstUserPairSet : 0)) { DoesConflict = true; @@ -1730,20 +1738,20 @@ namespace { if (DoesConflict) continue; if (UseCycleCheck && - pairWillFormCycle(*J, PairableInstUserMap, ChosenPairSet)) + pairWillFormCycle(IJ, PairableInstUserMap, ChosenPairSet)) continue; DenseMap Tree; buildInitialTreeFor(CandidatePairs, CandidatePairsSet, PairableInsts, ConnectedPairs, - PairableInstUsers, ChosenPairs, Tree, *J); + PairableInstUsers, ChosenPairs, Tree, IJ); // Because we'll keep the child with the largest depth, the largest // depth is still the same in the unpruned Tree. - size_t MaxDepth = Tree.lookup(*J); + size_t MaxDepth = Tree.lookup(IJ); DEBUG(if (DebugPairSelection) dbgs() << "BBV: found Tree for pair {" - << *J->first << " <-> " << *J->second << "} of depth " << + << IJ.first << " <-> " << IJ.second << "} of depth " << MaxDepth << " and size " << Tree.size() << "\n"); // At this point the Tree has been constructed, but, may contain @@ -1757,7 +1765,7 @@ namespace { pruneTreeFor(CandidatePairs, PairableInsts, ConnectedPairs, PairableInstUsers, PairableInstUserMap, PairableInstUserPairSet, - ChosenPairs, Tree, PrunedTree, *J, UseCycleCheck); + ChosenPairs, Tree, PrunedTree, IJ, UseCycleCheck); int EffSize = 0; if (TTI) { @@ -2055,7 +2063,7 @@ namespace { DEBUG(if (DebugPairSelection) dbgs() << "BBV: found pruned Tree for pair {" - << *J->first << " <-> " << *J->second << "} of depth " << + << IJ.first << " <-> " << IJ.second << "} of depth " << MaxDepth << " and size " << PrunedTree.size() << " (effective size: " << EffSize << ")\n"); if (((TTI && !UseChainDepthWithTI) || @@ -2071,7 +2079,7 @@ namespace { // Given the list of candidate pairs, this function selects those // that will be fused into vector instructions. void BBVectorize::choosePairs( - std::multimap &CandidatePairs, + DenseMap > &CandidatePairs, DenseSet &CandidatePairsSet, DenseMap &CandidatePairCostSavings, std::vector &PairableInsts, @@ -2082,16 +2090,25 @@ namespace { DenseSet &PairableInstUsers, DenseMap& ChosenPairs) { bool UseCycleCheck = - CandidatePairs.size() <= Config.MaxCandPairsForCycleCheck; + CandidatePairsSet.size() <= Config.MaxCandPairsForCycleCheck; + + DenseMap > CandidatePairs2; + for (DenseSet::iterator I = CandidatePairsSet.begin(), + E = CandidatePairsSet.end(); I != E; ++I) { + std::vector &JJ = CandidatePairs2[I->second]; + if (JJ.empty()) JJ.reserve(32); + JJ.push_back(I->first); + } + std::multimap PairableInstUserMap; DenseSet PairableInstUserPairSet; for (std::vector::iterator I = PairableInsts.begin(), E = PairableInsts.end(); I != E; ++I) { // The number of possible pairings for this variable: - size_t NumChoices = CandidatePairs.count(*I); + size_t NumChoices = CandidatePairs.lookup(*I).size(); if (!NumChoices) continue; - VPIteratorPair ChoiceRange = CandidatePairs.equal_range(*I); + std::vector &JJ = CandidatePairs[*I]; // The best pair to choose and its tree: size_t BestMaxDepth = 0; @@ -2103,16 +2120,18 @@ namespace { ConnectedPairs, ConnectedPairDeps, PairableInstUsers, PairableInstUserMap, PairableInstUserPairSet, ChosenPairs, - BestTree, BestMaxDepth, BestEffSize, ChoiceRange, + BestTree, BestMaxDepth, BestEffSize, *I, JJ, UseCycleCheck); + if (BestTree.empty()) + continue; + // A tree has been chosen (or not) at this point. If no tree was // chosen, then this instruction, I, cannot be paired (and is no longer // considered). - DEBUG(if (BestTree.size() > 0) - dbgs() << "BBV: selected pairs in the best tree for: " - << *cast(*I) << "\n"); + DEBUG(dbgs() << "BBV: selected pairs in the best tree for: " + << *cast(*I) << "\n"); for (DenseSet::iterator S = BestTree.begin(), SE2 = BestTree.end(); S != SE2; ++S) { @@ -2122,20 +2141,33 @@ namespace { *S->second << "\n"); // Remove all candidate pairs that have values in the chosen tree. - for (std::multimap::iterator K = - CandidatePairs.begin(); K != CandidatePairs.end();) { - if (K->first == S->first || K->second == S->first || - K->second == S->second || K->first == S->second) { - // Don't remove the actual pair chosen so that it can be used - // in subsequent tree selections. - if (!(K->first == S->first && K->second == S->second)) { - CandidatePairsSet.erase(*K); - CandidatePairs.erase(K++); - } else - ++K; - } else { - ++K; - } + std::vector &KK = CandidatePairs[S->first], + &LL = CandidatePairs2[S->second], + &MM = CandidatePairs[S->second], + &NN = CandidatePairs2[S->first]; + for (std::vector::iterator K = KK.begin(), KE = KK.end(); + K != KE; ++K) { + if (*K == S->second) + continue; + + CandidatePairsSet.erase(ValuePair(S->first, *K)); + } + for (std::vector::iterator L = LL.begin(), LE = LL.end(); + L != LE; ++L) { + if (*L == S->first) + continue; + + CandidatePairsSet.erase(ValuePair(*L, S->second)); + } + for (std::vector::iterator M = MM.begin(), ME = MM.end(); + M != ME; ++M) { + assert(*M != S->first && "Flipped pair in candidate list?"); + CandidatePairsSet.erase(ValuePair(S->second, *M)); + } + for (std::vector::iterator N = NN.begin(), NE = NN.end(); + N != NE; ++N) { + assert(*N != S->second && "Flipped pair in candidate list?"); + CandidatePairsSet.erase(ValuePair(*N, S->first)); } } } -- cgit v1.1 From 97a241b173a1413df5a93fdd891ddfac36dabad9 Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Thu, 14 Feb 2013 22:38:04 +0000 Subject: BBVectorize: Remove the remaining instances of std::multimap All instances of std::multimap have now been replaced by DenseMap >, and this yields a speedup of 5% on the csa.ll test case from PR15222. No functionality change intended. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175216 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/BBVectorize.cpp | 487 ++++++++++++++++--------------- 1 file changed, 256 insertions(+), 231 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp index 1b6e987..37638ca 100644 --- a/lib/Transforms/Vectorize/BBVectorize.cpp +++ b/lib/Transforms/Vectorize/BBVectorize.cpp @@ -48,7 +48,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/Local.h" #include -#include using namespace llvm; static cl::opt @@ -207,11 +206,6 @@ namespace { typedef std::pair ValuePairWithDepth; typedef std::pair VPPair; // A ValuePair pair typedef std::pair VPPairWithType; - typedef std::pair::iterator, - std::multimap::iterator> VPIteratorPair; - typedef std::pair::iterator, - std::multimap::iterator> - VPPIteratorPair; AliasAnalysis *AA; DominatorTree *DT; @@ -239,35 +233,36 @@ namespace { PairConnectionSplat }; - void computeConnectedPairs(DenseMap > &CandidatePairs, - DenseSet &CandidatePairsSet, - std::vector &PairableInsts, - std::multimap &ConnectedPairs, - DenseMap &PairConnectionTypes); + void computeConnectedPairs( + DenseMap > &CandidatePairs, + DenseSet &CandidatePairsSet, + std::vector &PairableInsts, + DenseMap > &ConnectedPairs, + DenseMap &PairConnectionTypes); void buildDepMap(BasicBlock &BB, - DenseMap > &CandidatePairs, - std::vector &PairableInsts, - DenseSet &PairableInstUsers); + DenseMap > &CandidatePairs, + std::vector &PairableInsts, + DenseSet &PairableInstUsers); void choosePairs(DenseMap > &CandidatePairs, - DenseSet &CandidatePairsSet, - DenseMap &CandidatePairCostSavings, - std::vector &PairableInsts, - DenseSet &FixedOrderPairs, - DenseMap &PairConnectionTypes, - std::multimap &ConnectedPairs, - std::multimap &ConnectedPairDeps, - DenseSet &PairableInstUsers, - DenseMap& ChosenPairs); + DenseSet &CandidatePairsSet, + DenseMap &CandidatePairCostSavings, + std::vector &PairableInsts, + DenseSet &FixedOrderPairs, + DenseMap &PairConnectionTypes, + DenseMap > &ConnectedPairs, + DenseMap > &ConnectedPairDeps, + DenseSet &PairableInstUsers, + DenseMap& ChosenPairs); void fuseChosenPairs(BasicBlock &BB, - std::vector &PairableInsts, - DenseMap& ChosenPairs, - DenseSet &FixedOrderPairs, - DenseMap &PairConnectionTypes, - std::multimap &ConnectedPairs, - std::multimap &ConnectedPairDeps); + std::vector &PairableInsts, + DenseMap& ChosenPairs, + DenseSet &FixedOrderPairs, + DenseMap &PairConnectionTypes, + DenseMap > &ConnectedPairs, + DenseMap > &ConnectedPairDeps); bool isInstVectorizable(Instruction *I, bool &IsSimpleLoadStore); @@ -281,60 +276,61 @@ namespace { Instruction *J, bool UpdateUsers = true, DenseSet *LoadMoveSetPairs = 0); - void computePairsConnectedTo( - DenseMap > &CandidatePairs, - DenseSet &CandidatePairsSet, - std::vector &PairableInsts, - std::multimap &ConnectedPairs, - DenseMap &PairConnectionTypes, - ValuePair P); + void computePairsConnectedTo( + DenseMap > &CandidatePairs, + DenseSet &CandidatePairsSet, + std::vector &PairableInsts, + DenseMap > &ConnectedPairs, + DenseMap &PairConnectionTypes, + ValuePair P); bool pairsConflict(ValuePair P, ValuePair Q, - DenseSet &PairableInstUsers, - std::multimap *PairableInstUserMap = 0, - DenseSet *PairableInstUserPairSet = 0); + DenseSet &PairableInstUsers, + DenseMap > + *PairableInstUserMap = 0, + DenseSet *PairableInstUserPairSet = 0); bool pairWillFormCycle(ValuePair P, - std::multimap &PairableInstUsers, - DenseSet &CurrentPairs); + DenseMap > &PairableInstUsers, + DenseSet &CurrentPairs); void pruneTreeFor( - DenseMap > &CandidatePairs, - std::vector &PairableInsts, - std::multimap &ConnectedPairs, - DenseSet &PairableInstUsers, - std::multimap &PairableInstUserMap, - DenseSet &PairableInstUserPairSet, - DenseMap &ChosenPairs, - DenseMap &Tree, - DenseSet &PrunedTree, ValuePair J, - bool UseCycleCheck); + DenseMap > &CandidatePairs, + std::vector &PairableInsts, + DenseMap > &ConnectedPairs, + DenseSet &PairableInstUsers, + DenseMap > &PairableInstUserMap, + DenseSet &PairableInstUserPairSet, + DenseMap &ChosenPairs, + DenseMap &Tree, + DenseSet &PrunedTree, ValuePair J, + bool UseCycleCheck); void buildInitialTreeFor( - DenseMap > &CandidatePairs, - DenseSet &CandidatePairsSet, - std::vector &PairableInsts, - std::multimap &ConnectedPairs, - DenseSet &PairableInstUsers, - DenseMap &ChosenPairs, - DenseMap &Tree, ValuePair J); + DenseMap > &CandidatePairs, + DenseSet &CandidatePairsSet, + std::vector &PairableInsts, + DenseMap > &ConnectedPairs, + DenseSet &PairableInstUsers, + DenseMap &ChosenPairs, + DenseMap &Tree, ValuePair J); void findBestTreeFor( - DenseMap > &CandidatePairs, - DenseSet &CandidatePairsSet, - DenseMap &CandidatePairCostSavings, - std::vector &PairableInsts, - DenseSet &FixedOrderPairs, - DenseMap &PairConnectionTypes, - std::multimap &ConnectedPairs, - std::multimap &ConnectedPairDeps, - DenseSet &PairableInstUsers, - std::multimap &PairableInstUserMap, - DenseSet &PairableInstUserPairSet, - DenseMap &ChosenPairs, - DenseSet &BestTree, size_t &BestMaxDepth, - int &BestEffSize, Value *II, std::vector&JJ, - bool UseCycleCheck); + DenseMap > &CandidatePairs, + DenseSet &CandidatePairsSet, + DenseMap &CandidatePairCostSavings, + std::vector &PairableInsts, + DenseSet &FixedOrderPairs, + DenseMap &PairConnectionTypes, + DenseMap > &ConnectedPairs, + DenseMap > &ConnectedPairDeps, + DenseSet &PairableInstUsers, + DenseMap > &PairableInstUserMap, + DenseSet &PairableInstUserPairSet, + DenseMap &ChosenPairs, + DenseSet &BestTree, size_t &BestMaxDepth, + int &BestEffSize, Value *II, std::vector&JJ, + bool UseCycleCheck); Value *getReplacementPointerInput(LLVMContext& Context, Instruction *I, Instruction *J, unsigned o); @@ -366,14 +362,14 @@ namespace { void collectPairLoadMoveSet(BasicBlock &BB, DenseMap &ChosenPairs, - std::multimap &LoadMoveSet, + DenseMap > &LoadMoveSet, DenseSet &LoadMoveSetPairs, Instruction *I); void collectLoadMoveSet(BasicBlock &BB, std::vector &PairableInsts, DenseMap &ChosenPairs, - std::multimap &LoadMoveSet, + DenseMap > &LoadMoveSet, DenseSet &LoadMoveSetPairs); bool canMoveUsesOfIAfterJ(BasicBlock &BB, @@ -695,7 +691,8 @@ namespace { DenseMap AllChosenPairs; DenseSet AllFixedOrderPairs; DenseMap AllPairConnectionTypes; - std::multimap AllConnectedPairs, AllConnectedPairDeps; + DenseMap > AllConnectedPairs, + AllConnectedPairDeps; do { std::vector PairableInsts; @@ -725,17 +722,19 @@ namespace { // Note that it only matters that both members of the second pair use some // element of the first pair (to allow for splatting). - std::multimap ConnectedPairs, ConnectedPairDeps; + DenseMap > ConnectedPairs, + ConnectedPairDeps; DenseMap PairConnectionTypes; computeConnectedPairs(CandidatePairs, CandidatePairsSet, PairableInsts, ConnectedPairs, PairConnectionTypes); if (ConnectedPairs.empty()) continue; - for (std::multimap::iterator + for (DenseMap >::iterator I = ConnectedPairs.begin(), IE = ConnectedPairs.end(); - I != IE; ++I) { - ConnectedPairDeps.insert(VPPair(I->second, I->first)); - } + I != IE; ++I) + for (std::vector::iterator J = I->second.begin(), + JE = I->second.end(); J != JE; ++J) + ConnectedPairDeps[*J].push_back(I->first); // Build the pairable-instruction dependency map DenseSet PairableInstUsers; @@ -783,14 +782,15 @@ namespace { } } - for (std::multimap::iterator + for (DenseMap >::iterator I = ConnectedPairs.begin(), IE = ConnectedPairs.end(); - I != IE; ++I) { - if (AllPairConnectionTypes.count(*I)) { - AllConnectedPairs.insert(*I); - AllConnectedPairDeps.insert(VPPair(I->second, I->first)); - } - } + I != IE; ++I) + for (std::vector::iterator J = I->second.begin(), + JE = I->second.end(); J != JE; ++J) + if (AllPairConnectionTypes.count(VPPair(I->first, *J))) { + AllConnectedPairs[I->first].push_back(*J); + AllConnectedPairDeps[*J].push_back(I->first); + } } while (ShouldContinue); if (AllChosenPairs.empty()) return false; @@ -1107,7 +1107,7 @@ namespace { // to contain any memory locations to which J writes. The function returns // true if J uses I. By default, alias analysis is used to determine // whether J reads from memory that overlaps with a location in WriteSet. - // If LoadMoveSet is not null, then it is a previously-computed multimap + // If LoadMoveSet is not null, then it is a previously-computed map // where the key is the memory-based user instruction and the value is // the instruction to be compared with I. So, if LoadMoveSet is provided, // then the alias analysis is not used. This is necessary because this @@ -1253,12 +1253,12 @@ namespace { // it looks for pairs such that both members have an input which is an // output of PI or PJ. void BBVectorize::computePairsConnectedTo( - DenseMap > &CandidatePairs, - DenseSet &CandidatePairsSet, - std::vector &PairableInsts, - std::multimap &ConnectedPairs, - DenseMap &PairConnectionTypes, - ValuePair P) { + DenseMap > &CandidatePairs, + DenseSet &CandidatePairsSet, + std::vector &PairableInsts, + DenseMap > &ConnectedPairs, + DenseMap &PairConnectionTypes, + ValuePair P) { StoreInst *SI, *SJ; // For each possible pairing for this variable, look at the uses of @@ -1287,14 +1287,14 @@ namespace { // Look for : if (CandidatePairsSet.count(ValuePair(*I, *J))) { VPPair VP(P, ValuePair(*I, *J)); - ConnectedPairs.insert(VP); + ConnectedPairs[VP.first].push_back(VP.second); PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionDirect)); } // Look for : if (CandidatePairsSet.count(ValuePair(*J, *I))) { VPPair VP(P, ValuePair(*J, *I)); - ConnectedPairs.insert(VP); + ConnectedPairs[VP.first].push_back(VP.second); PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionSwap)); } } @@ -1309,7 +1309,7 @@ namespace { if (CandidatePairsSet.count(ValuePair(*I, *J))) { VPPair VP(P, ValuePair(*I, *J)); - ConnectedPairs.insert(VP); + ConnectedPairs[VP.first].push_back(VP.second); PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionSplat)); } } @@ -1333,7 +1333,7 @@ namespace { if (CandidatePairsSet.count(ValuePair(*I, *J))) { VPPair VP(P, ValuePair(*I, *J)); - ConnectedPairs.insert(VP); + ConnectedPairs[VP.first].push_back(VP.second); PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionSplat)); } } @@ -1344,11 +1344,11 @@ namespace { // connected if some output of the first pair forms an input to both members // of the second pair. void BBVectorize::computeConnectedPairs( - DenseMap > &CandidatePairs, - DenseSet &CandidatePairsSet, - std::vector &PairableInsts, - std::multimap &ConnectedPairs, - DenseMap &PairConnectionTypes) { + DenseMap > &CandidatePairs, + DenseSet &CandidatePairsSet, + std::vector &PairableInsts, + DenseMap > &ConnectedPairs, + DenseMap &PairConnectionTypes) { for (std::vector::iterator PI = PairableInsts.begin(), PE = PairableInsts.end(); PI != PE; ++PI) { DenseMap >::iterator PP = @@ -1363,7 +1363,11 @@ namespace { PairConnectionTypes, ValuePair(*PI, *P)); } - DEBUG(dbgs() << "BBV: found " << ConnectedPairs.size() + DEBUG(size_t TotalPairs = 0; + for (DenseMap >::iterator I = + ConnectedPairs.begin(), IE = ConnectedPairs.end(); I != IE; ++I) + TotalPairs += I->second.size(); + dbgs() << "BBV: found " << TotalPairs << " pair connections.\n"); } @@ -1414,9 +1418,9 @@ namespace { // input of pair Q is an output of pair P. If this is the case, then these // two pairs cannot be simultaneously fused. bool BBVectorize::pairsConflict(ValuePair P, ValuePair Q, - DenseSet &PairableInstUsers, - std::multimap *PairableInstUserMap, - DenseSet *PairableInstUserPairSet) { + DenseSet &PairableInstUsers, + DenseMap > *PairableInstUserMap, + DenseSet *PairableInstUserPairSet) { // Two pairs are in conflict if they are mutual Users of eachother. bool QUsesP = PairableInstUsers.count(ValuePair(P.first, Q.first)) || PairableInstUsers.count(ValuePair(P.first, Q.second)) || @@ -1429,15 +1433,14 @@ namespace { if (PairableInstUserMap) { // FIXME: The expensive part of the cycle check is not so much the cycle // check itself but this edge insertion procedure. This needs some - // profiling and probably a different data structure (same is true of - // most uses of std::multimap). + // profiling and probably a different data structure. if (PUsesQ) { if (PairableInstUserPairSet->insert(VPPair(Q, P)).second) - PairableInstUserMap->insert(VPPair(Q, P)); + (*PairableInstUserMap)[Q].push_back(P); } if (QUsesP) { if (PairableInstUserPairSet->insert(VPPair(P, Q)).second) - PairableInstUserMap->insert(VPPair(P, Q)); + (*PairableInstUserMap)[P].push_back(Q); } } @@ -1447,8 +1450,8 @@ namespace { // This function walks the use graph of current pairs to see if, starting // from P, the walk returns to P. bool BBVectorize::pairWillFormCycle(ValuePair P, - std::multimap &PairableInstUserMap, - DenseSet &CurrentPairs) { + DenseMap > &PairableInstUserMap, + DenseSet &CurrentPairs) { DEBUG(if (DebugCycleCheck) dbgs() << "BBV: starting cycle check for : " << *P.first << " <-> " << *P.second << "\n"); @@ -1465,18 +1468,22 @@ namespace { DEBUG(if (DebugCycleCheck) dbgs() << "BBV: cycle check visiting: " << *QTop.first << " <-> " << *QTop.second << "\n"); - VPPIteratorPair QPairRange = PairableInstUserMap.equal_range(QTop); - for (std::multimap::iterator C = QPairRange.first; - C != QPairRange.second; ++C) { - if (C->second == P) { + DenseMap >::iterator QQ = + PairableInstUserMap.find(QTop); + if (QQ == PairableInstUserMap.end()) + continue; + + for (std::vector::iterator C = QQ->second.begin(), + CE = QQ->second.end(); C != CE; ++C) { + if (*C == P) { DEBUG(dbgs() << "BBV: rejected to prevent non-trivial cycle formation: " - << *C->first.first << " <-> " << *C->first.second << "\n"); + << QTop.first << " <-> " << C->second << "\n"); return true; } - if (CurrentPairs.count(C->second) && !Visited.count(C->second)) - Q.push_back(C->second); + if (CurrentPairs.count(*C) && !Visited.count(*C)) + Q.push_back(*C); } } while (!Q.empty()); @@ -1486,13 +1493,13 @@ namespace { // This function builds the initial tree of connected pairs with the // pair J at the root. void BBVectorize::buildInitialTreeFor( - DenseMap > &CandidatePairs, - DenseSet &CandidatePairsSet, - std::vector &PairableInsts, - std::multimap &ConnectedPairs, - DenseSet &PairableInstUsers, - DenseMap &ChosenPairs, - DenseMap &Tree, ValuePair J) { + DenseMap > &CandidatePairs, + DenseSet &CandidatePairsSet, + std::vector &PairableInsts, + DenseMap > &ConnectedPairs, + DenseSet &PairableInstUsers, + DenseMap &ChosenPairs, + DenseMap &Tree, ValuePair J) { // Each of these pairs is viewed as the root node of a Tree. The Tree // is then walked (depth-first). As this happens, we keep track of // the pairs that compose the Tree and the maximum depth of the Tree. @@ -1505,21 +1512,23 @@ namespace { // Push each child onto the queue: bool MoreChildren = false; size_t MaxChildDepth = QTop.second; - VPPIteratorPair qtRange = ConnectedPairs.equal_range(QTop.first); - for (std::multimap::iterator k = qtRange.first; - k != qtRange.second; ++k) { - // Make sure that this child pair is still a candidate: - if (CandidatePairsSet.count(ValuePair(k->second))) { - DenseMap::iterator C = Tree.find(k->second); - if (C == Tree.end()) { - size_t d = getDepthFactor(k->second.first); - Q.push_back(ValuePairWithDepth(k->second, QTop.second+d)); - MoreChildren = true; - } else { - MaxChildDepth = std::max(MaxChildDepth, C->second); + DenseMap >::iterator QQ = + ConnectedPairs.find(QTop.first); + if (QQ != ConnectedPairs.end()) + for (std::vector::iterator k = QQ->second.begin(), + ke = QQ->second.end(); k != ke; ++k) { + // Make sure that this child pair is still a candidate: + if (CandidatePairsSet.count(*k)) { + DenseMap::iterator C = Tree.find(*k); + if (C == Tree.end()) { + size_t d = getDepthFactor(k->first); + Q.push_back(ValuePairWithDepth(*k, QTop.second+d)); + MoreChildren = true; + } else { + MaxChildDepth = std::max(MaxChildDepth, C->second); + } } } - } if (!MoreChildren) { // Record the current pair as part of the Tree: @@ -1532,16 +1541,16 @@ namespace { // Given some initial tree, prune it by removing conflicting pairs (pairs // that cannot be simultaneously chosen for vectorization). void BBVectorize::pruneTreeFor( - DenseMap > &CandidatePairs, - std::vector &PairableInsts, - std::multimap &ConnectedPairs, - DenseSet &PairableInstUsers, - std::multimap &PairableInstUserMap, - DenseSet &PairableInstUserPairSet, - DenseMap &ChosenPairs, - DenseMap &Tree, - DenseSet &PrunedTree, ValuePair J, - bool UseCycleCheck) { + DenseMap > &CandidatePairs, + std::vector &PairableInsts, + DenseMap > &ConnectedPairs, + DenseSet &PairableInstUsers, + DenseMap > &PairableInstUserMap, + DenseSet &PairableInstUserPairSet, + DenseMap &ChosenPairs, + DenseMap &Tree, + DenseSet &PrunedTree, ValuePair J, + bool UseCycleCheck) { SmallVector Q; // General depth-first post-order traversal: Q.push_back(ValuePairWithDepth(J, getDepthFactor(J.first))); @@ -1551,10 +1560,14 @@ namespace { // Visit each child, pruning as necessary... SmallVector BestChildren; - VPPIteratorPair QTopRange = ConnectedPairs.equal_range(QTop.first); - for (std::multimap::iterator K = QTopRange.first; - K != QTopRange.second; ++K) { - DenseMap::iterator C = Tree.find(K->second); + DenseMap >::iterator QQ = + ConnectedPairs.find(QTop.first); + if (QQ == ConnectedPairs.end()) + continue; + + for (std::vector::iterator K = QQ->second.begin(), + KE = QQ->second.end(); K != KE; ++K) { + DenseMap::iterator C = Tree.find(*K); if (C == Tree.end()) continue; // This child is in the Tree, now we need to make sure it is the @@ -1698,21 +1711,21 @@ namespace { // This function finds the best tree of mututally-compatible connected // pairs, given the choice of root pairs as an iterator range. void BBVectorize::findBestTreeFor( - DenseMap > &CandidatePairs, - DenseSet &CandidatePairsSet, - DenseMap &CandidatePairCostSavings, - std::vector &PairableInsts, - DenseSet &FixedOrderPairs, - DenseMap &PairConnectionTypes, - std::multimap &ConnectedPairs, - std::multimap &ConnectedPairDeps, - DenseSet &PairableInstUsers, - std::multimap &PairableInstUserMap, - DenseSet &PairableInstUserPairSet, - DenseMap &ChosenPairs, - DenseSet &BestTree, size_t &BestMaxDepth, - int &BestEffSize, Value *II, std::vector&JJ, - bool UseCycleCheck) { + DenseMap > &CandidatePairs, + DenseSet &CandidatePairsSet, + DenseMap &CandidatePairCostSavings, + std::vector &PairableInsts, + DenseSet &FixedOrderPairs, + DenseMap &PairConnectionTypes, + DenseMap > &ConnectedPairs, + DenseMap > &ConnectedPairDeps, + DenseSet &PairableInstUsers, + DenseMap > &PairableInstUserMap, + DenseSet &PairableInstUserPairSet, + DenseMap &ChosenPairs, + DenseSet &BestTree, size_t &BestMaxDepth, + int &BestEffSize, Value *II, std::vector&JJ, + bool UseCycleCheck) { for (std::vector::iterator J = JJ.begin(), JE = JJ.end(); J != JE; ++J) { ValuePair IJ(II, *J); @@ -1805,15 +1818,17 @@ namespace { // The edge weights contribute in a negative sense: they represent // the cost of shuffles. - VPPIteratorPair IP = ConnectedPairDeps.equal_range(*S); - if (IP.first != ConnectedPairDeps.end()) { + DenseMap >::iterator SS = + ConnectedPairDeps.find(*S); + if (SS != ConnectedPairDeps.end()) { unsigned NumDepsDirect = 0, NumDepsSwap = 0; - for (std::multimap::iterator Q = IP.first; - Q != IP.second; ++Q) { - if (!PrunedTree.count(Q->second)) + for (std::vector::iterator T = SS->second.begin(), + TE = SS->second.end(); T != TE; ++T) { + VPPair Q(*S, *T); + if (!PrunedTree.count(Q.second)) continue; DenseMap::iterator R = - PairConnectionTypes.find(VPPair(Q->second, Q->first)); + PairConnectionTypes.find(VPPair(Q.second, Q.first)); assert(R != PairConnectionTypes.end() && "Cannot find pair connection type"); if (R->second == PairConnectionDirect) @@ -1829,16 +1844,17 @@ namespace { ((NumDepsSwap > NumDepsDirect) || FixedOrderPairs.count(ValuePair(S->second, S->first))); - for (std::multimap::iterator Q = IP.first; - Q != IP.second; ++Q) { - if (!PrunedTree.count(Q->second)) + for (std::vector::iterator T = SS->second.begin(), + TE = SS->second.end(); T != TE; ++T) { + VPPair Q(*S, *T); + if (!PrunedTree.count(Q.second)) continue; DenseMap::iterator R = - PairConnectionTypes.find(VPPair(Q->second, Q->first)); + PairConnectionTypes.find(VPPair(Q.second, Q.first)); assert(R != PairConnectionTypes.end() && "Cannot find pair connection type"); - Type *Ty1 = Q->second.first->getType(), - *Ty2 = Q->second.second->getType(); + Type *Ty1 = Q.second.first->getType(), + *Ty2 = Q.second.second->getType(); Type *VTy = getVecTypeForPair(Ty1, Ty2); if ((R->second == PairConnectionDirect && FlipOrder) || (R->second == PairConnectionSwap && !FlipOrder) || @@ -1856,7 +1872,7 @@ namespace { } DEBUG(if (DebugPairSelection) dbgs() << "\tcost {" << - *Q->second.first << " <-> " << *Q->second.second << + *Q.second.first << " <-> " << *Q.second.second << "} -> {" << *S->first << " <-> " << *S->second << "} = " << ESContrib << "\n"); @@ -2079,16 +2095,16 @@ namespace { // Given the list of candidate pairs, this function selects those // that will be fused into vector instructions. void BBVectorize::choosePairs( - DenseMap > &CandidatePairs, - DenseSet &CandidatePairsSet, - DenseMap &CandidatePairCostSavings, - std::vector &PairableInsts, - DenseSet &FixedOrderPairs, - DenseMap &PairConnectionTypes, - std::multimap &ConnectedPairs, - std::multimap &ConnectedPairDeps, - DenseSet &PairableInstUsers, - DenseMap& ChosenPairs) { + DenseMap > &CandidatePairs, + DenseSet &CandidatePairsSet, + DenseMap &CandidatePairCostSavings, + std::vector &PairableInsts, + DenseSet &FixedOrderPairs, + DenseMap &PairConnectionTypes, + DenseMap > &ConnectedPairs, + DenseMap > &ConnectedPairDeps, + DenseSet &PairableInstUsers, + DenseMap& ChosenPairs) { bool UseCycleCheck = CandidatePairsSet.size() <= Config.MaxCandPairsForCycleCheck; @@ -2100,7 +2116,7 @@ namespace { JJ.push_back(I->first); } - std::multimap PairableInstUserMap; + DenseMap > PairableInstUserMap; DenseSet PairableInstUserPairSet; for (std::vector::iterator I = PairableInsts.begin(), E = PairableInsts.end(); I != E; ++I) { @@ -2819,7 +2835,7 @@ namespace { // to be moved after J (the second instruction) when the pair is fused. void BBVectorize::collectPairLoadMoveSet(BasicBlock &BB, DenseMap &ChosenPairs, - std::multimap &LoadMoveSet, + DenseMap > &LoadMoveSet, DenseSet &LoadMoveSetPairs, Instruction *I) { // Skip to the first instruction past I. @@ -2834,7 +2850,7 @@ namespace { for (BasicBlock::iterator E = BB.end(); cast(L) != E; ++L) { if (trackUsesOfI(Users, WriteSet, I, L)) { if (L->mayReadFromMemory()) { - LoadMoveSet.insert(ValuePair(L, I)); + LoadMoveSet[L].push_back(I); LoadMoveSetPairs.insert(ValuePair(L, I)); } } @@ -2851,7 +2867,7 @@ namespace { void BBVectorize::collectLoadMoveSet(BasicBlock &BB, std::vector &PairableInsts, DenseMap &ChosenPairs, - std::multimap &LoadMoveSet, + DenseMap > &LoadMoveSet, DenseSet &LoadMoveSetPairs) { for (std::vector::iterator PI = PairableInsts.begin(), PIE = PairableInsts.end(); PI != PIE; ++PI) { @@ -2896,12 +2912,12 @@ namespace { // because the vector instruction is inserted in the location of the pair's // second member). void BBVectorize::fuseChosenPairs(BasicBlock &BB, - std::vector &PairableInsts, - DenseMap &ChosenPairs, - DenseSet &FixedOrderPairs, - DenseMap &PairConnectionTypes, - std::multimap &ConnectedPairs, - std::multimap &ConnectedPairDeps) { + std::vector &PairableInsts, + DenseMap &ChosenPairs, + DenseSet &FixedOrderPairs, + DenseMap &PairConnectionTypes, + DenseMap > &ConnectedPairs, + DenseMap > &ConnectedPairDeps) { LLVMContext& Context = BB.getContext(); // During the vectorization process, the order of the pairs to be fused @@ -2915,7 +2931,7 @@ namespace { E = FlippedPairs.end(); P != E; ++P) ChosenPairs.insert(*P); - std::multimap LoadMoveSet; + DenseMap > LoadMoveSet; DenseSet LoadMoveSetPairs; collectLoadMoveSet(BB, PairableInsts, ChosenPairs, LoadMoveSet, LoadMoveSetPairs); @@ -2967,18 +2983,20 @@ namespace { // of dependencies connected via swaps, and those directly connected, // and flip the order if the number of swaps is greater. bool OrigOrder = true; - VPPIteratorPair IP = ConnectedPairDeps.equal_range(ValuePair(I, J)); - if (IP.first == ConnectedPairDeps.end()) { - IP = ConnectedPairDeps.equal_range(ValuePair(J, I)); + DenseMap >::iterator IJ = + ConnectedPairDeps.find(ValuePair(I, J)); + if (IJ == ConnectedPairDeps.end()) { + IJ = ConnectedPairDeps.find(ValuePair(J, I)); OrigOrder = false; } - if (IP.first != ConnectedPairDeps.end()) { + if (IJ != ConnectedPairDeps.end()) { unsigned NumDepsDirect = 0, NumDepsSwap = 0; - for (std::multimap::iterator Q = IP.first; - Q != IP.second; ++Q) { + for (std::vector::iterator T = IJ->second.begin(), + TE = IJ->second.end(); T != TE; ++T) { + VPPair Q(IJ->first, *T); DenseMap::iterator R = - PairConnectionTypes.find(VPPair(Q->second, Q->first)); + PairConnectionTypes.find(VPPair(Q.second, Q.first)); assert(R != PairConnectionTypes.end() && "Cannot find pair connection type"); if (R->second == PairConnectionDirect) @@ -3004,17 +3022,20 @@ namespace { // If the pair being fused uses the opposite order from that in the pair // connection map, then we need to flip the types. - VPPIteratorPair IP = ConnectedPairs.equal_range(ValuePair(H, L)); - for (std::multimap::iterator Q = IP.first; - Q != IP.second; ++Q) { - DenseMap::iterator R = PairConnectionTypes.find(*Q); - assert(R != PairConnectionTypes.end() && - "Cannot find pair connection type"); - if (R->second == PairConnectionDirect) - R->second = PairConnectionSwap; - else if (R->second == PairConnectionSwap) - R->second = PairConnectionDirect; - } + DenseMap >::iterator HL = + ConnectedPairs.find(ValuePair(H, L)); + if (HL != ConnectedPairs.end()) + for (std::vector::iterator T = HL->second.begin(), + TE = HL->second.end(); T != TE; ++T) { + VPPair Q(HL->first, *T); + DenseMap::iterator R = PairConnectionTypes.find(Q); + assert(R != PairConnectionTypes.end() && + "Cannot find pair connection type"); + if (R->second == PairConnectionDirect) + R->second = PairConnectionSwap; + else if (R->second == PairConnectionSwap) + R->second = PairConnectionDirect; + } bool LBeforeH = !FlipPairOrder; unsigned NumOperands = I->getNumOperands(); @@ -3068,17 +3089,21 @@ namespace { // yet-to-be-fused pair. The loads in question are the keys of the map. if (I->mayReadFromMemory()) { std::vector NewSetMembers; - VPIteratorPair IPairRange = LoadMoveSet.equal_range(I); - VPIteratorPair JPairRange = LoadMoveSet.equal_range(J); - for (std::multimap::iterator N = IPairRange.first; - N != IPairRange.second; ++N) - NewSetMembers.push_back(ValuePair(K, N->second)); - for (std::multimap::iterator N = JPairRange.first; - N != JPairRange.second; ++N) - NewSetMembers.push_back(ValuePair(K, N->second)); + DenseMap >::iterator II = + LoadMoveSet.find(I); + if (II != LoadMoveSet.end()) + for (std::vector::iterator N = II->second.begin(), + NE = II->second.end(); N != NE; ++N) + NewSetMembers.push_back(ValuePair(K, *N)); + DenseMap >::iterator JJ = + LoadMoveSet.find(J); + if (JJ != LoadMoveSet.end()) + for (std::vector::iterator N = JJ->second.begin(), + NE = JJ->second.end(); N != NE; ++N) + NewSetMembers.push_back(ValuePair(K, *N)); for (std::vector::iterator A = NewSetMembers.begin(), AE = NewSetMembers.end(); A != AE; ++A) { - LoadMoveSet.insert(*A); + LoadMoveSet[A->first].push_back(A->second); LoadMoveSetPairs.insert(*A); } } -- cgit v1.1 From eba97c573f08332c9c9d1875c304cce1bea2e28e Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Thu, 14 Feb 2013 23:11:24 +0000 Subject: [mips] Simplify code in function Filler::findDelayInstr. 1. Define and use function terminateSearch. 2. Use MachineBasicBlock::iterator instead of MachineBasicBlock::instr_iterator. 3. Delete the line which checks whether an instruction is a pseudo. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175219 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsDelaySlotFiller.cpp | 67 ++++++++++++++------------------- 1 file changed, 29 insertions(+), 38 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp index cf0d9db..672d4ea 100644 --- a/lib/Target/Mips/MipsDelaySlotFiller.cpp +++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp @@ -66,37 +66,38 @@ namespace { } private: - typedef MachineBasicBlock::instr_iterator InstrIter; - typedef MachineBasicBlock::reverse_instr_iterator ReverseInstrIter; + typedef MachineBasicBlock::iterator Iter; + typedef MachineBasicBlock::reverse_iterator ReverseIter; bool runOnMachineBasicBlock(MachineBasicBlock &MBB); bool isDelayFiller(MachineBasicBlock &MBB, - InstrIter candidate); + Iter candidate); - void insertCallUses(InstrIter MI, + void insertCallUses(Iter MI, SmallSet &RegDefs, SmallSet &RegUses); - void insertDefsUses(InstrIter MI, + void insertDefsUses(Iter MI, SmallSet &RegDefs, SmallSet &RegUses); bool IsRegInSet(SmallSet &RegSet, unsigned Reg); - bool delayHasHazard(InstrIter candidate, + bool delayHasHazard(Iter candidate, bool &sawLoad, bool &sawStore, SmallSet &RegDefs, SmallSet &RegUses); bool - findDelayInstr(MachineBasicBlock &MBB, InstrIter slot, - InstrIter &Filler); + findDelayInstr(MachineBasicBlock &MBB, Iter slot, + Iter &Filler); + + bool terminateSearch(const MachineInstr &Candidate) const; TargetMachine &TM; const TargetInstrInfo *TII; - InstrIter LastFiller; static char ID; }; @@ -108,16 +109,14 @@ namespace { bool Filler:: runOnMachineBasicBlock(MachineBasicBlock &MBB) { bool Changed = false; - LastFiller = MBB.instr_end(); - for (InstrIter I = MBB.instr_begin(); I != MBB.instr_end(); ++I) { + for (Iter I = MBB.begin(); I != MBB.end(); ++I) { if (!I->hasDelaySlot()) continue; ++FilledSlots; Changed = true; - InstrIter InstrWithSlot = I; - InstrIter D; + Iter D; // Delay slot filling is disabled at -O0. if (!DisableDelaySlotFiller && (TM.getOptLevel() != CodeGenOpt::None) && @@ -127,13 +126,8 @@ runOnMachineBasicBlock(MachineBasicBlock &MBB) { } else BuildMI(MBB, llvm::next(I), I->getDebugLoc(), TII->get(Mips::NOP)); - // Record the filler instruction that filled the delay slot. - // The instruction after it will be visited in the next iteration. - LastFiller = ++I; - - // Bundle the delay slot filler to InstrWithSlot so that the machine - // verifier doesn't expect this instruction to be a terminator. - MIBundleBuilder(MBB, InstrWithSlot, llvm::next(LastFiller)); + // Bundle the delay slot filler to the instruction with the delay slot. + MIBundleBuilder(MBB, I, llvm::next(llvm::next(I))); } return Changed; @@ -146,8 +140,8 @@ FunctionPass *llvm::createMipsDelaySlotFillerPass(MipsTargetMachine &tm) { } bool Filler::findDelayInstr(MachineBasicBlock &MBB, - InstrIter slot, - InstrIter &Filler) { + Iter slot, + Iter &Filler) { SmallSet RegDefs; SmallSet RegUses; @@ -156,26 +150,17 @@ bool Filler::findDelayInstr(MachineBasicBlock &MBB, bool sawLoad = false; bool sawStore = false; - for (ReverseInstrIter I(slot); I != MBB.instr_rend(); ++I) { + for (ReverseIter I(slot); I != MBB.rend(); ++I) { // skip debug value if (I->isDebugValue()) continue; - // Convert to forward iterator. - InstrIter FI(llvm::next(I).base()); - - if (I->hasUnmodeledSideEffects() - || I->isInlineAsm() - || I->isLabel() - || FI == LastFiller - || I->isPseudo() - // - // Should not allow: - // ERET, DERET or WAIT, PAUSE. Need to add these to instruction - // list. TBD. - ) + if (terminateSearch(*I)) break; + // Convert to forward iterator. + Iter FI(llvm::next(I).base()); + if (delayHasHazard(FI, sawLoad, sawStore, RegDefs, RegUses)) { insertDefsUses(FI, RegDefs, RegUses); continue; @@ -188,7 +173,7 @@ bool Filler::findDelayInstr(MachineBasicBlock &MBB, return false; } -bool Filler::delayHasHazard(InstrIter candidate, +bool Filler::delayHasHazard(Iter candidate, bool &sawLoad, bool &sawStore, SmallSet &RegDefs, SmallSet &RegUses) { @@ -253,7 +238,7 @@ static void insertDefUse(const MachineOperand &MO, } // Insert Defs and Uses of MI into the sets RegDefs and RegUses. -void Filler::insertDefsUses(InstrIter MI, +void Filler::insertDefsUses(Iter MI, SmallSet &RegDefs, SmallSet &RegUses) { unsigned I, E = MI->getDesc().getNumOperands(); @@ -288,3 +273,9 @@ bool Filler::IsRegInSet(SmallSet &RegSet, unsigned Reg) { return true; return false; } + +bool Filler::terminateSearch(const MachineInstr &Candidate) const { + return (Candidate.isTerminator() || Candidate.isCall() || + Candidate.isLabel() || Candidate.isInlineAsm() || + Candidate.hasUnmodeledSideEffects()); +} -- cgit v1.1 From d6f19c716378bce0acc3cbfc9dc9297468f046a0 Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Thu, 14 Feb 2013 23:17:03 +0000 Subject: The operand listing is very much outdated. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175220 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 6758ed1..958ceb0 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -234,11 +234,8 @@ namespace llvm { // EH_SJLJ_LONGJMP - SjLj exception handling longjmp. EH_SJLJ_LONGJMP, - /// TC_RETURN - Tail call return. - /// operand #0 chain - /// operand #1 callee (register or absolute) - /// operand #2 stack adjustment - /// operand #3 optional in flag + /// TC_RETURN - Tail call return. See X86TargetLowering::LowerCall for + /// the list of operands. TC_RETURN, // VZEXT_MOVL - Vector move low and zero extend. -- cgit v1.1 From 48e841d41c57712f4d6a94b1123f198bdf0bda7d Mon Sep 17 00:00:00 2001 From: Joel Jones Date: Thu, 14 Feb 2013 23:18:40 +0000 Subject: The ARM NEON vector compare instructions take three arguments. However, the assembler should also accept a two arg form, as the docuemntation specifies that the first (destination) register is optional. This patch uses TwoOperandAliasConstraint to add the two argument form. It also fixes an 80-column formatting problem in: test/MC/ARM/neon-bitwise-encoding Clang rejects ARM NEON assembly instructions git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175221 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrNEON.td | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'lib') diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 697a8d2..901ff64 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -4264,6 +4264,7 @@ def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32, def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32, NEONvceq, 1>; +let TwoOperandAliasConstraint = "$Vm = $Vd" in defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i", "$Vd, $Vm, #0", NEONvceqz>; @@ -4277,10 +4278,12 @@ def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32, def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32, NEONvcge, 0>; +let TwoOperandAliasConstraint = "$Vm = $Vd" in { defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s", "$Vd, $Vm, #0", NEONvcgez>; defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s", "$Vd, $Vm, #0", NEONvclez>; +} // VCGT : Vector Compare Greater Than defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, @@ -4292,10 +4295,12 @@ def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32, def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32, NEONvcgt, 0>; +let TwoOperandAliasConstraint = "$Vm = $Vd" in { defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s", "$Vd, $Vm, #0", NEONvcgtz>; defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s", "$Vd, $Vm, #0", NEONvcltz>; +} // VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE) def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge", -- cgit v1.1 From 90db35a3e7d24ad81aa0ce6b641186faed033cdc Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Thu, 14 Feb 2013 23:20:15 +0000 Subject: [mips] Fix comments and coding style violations. Declare functions to be const. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175222 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsDelaySlotFiller.cpp | 111 ++++++++++++++------------------ 1 file changed, 47 insertions(+), 64 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp index 672d4ea..6ddb39b 100644 --- a/lib/Target/Mips/MipsDelaySlotFiller.cpp +++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp @@ -1,4 +1,4 @@ -//===-- DelaySlotFiller.cpp - Mips Delay Slot Filler ----------------------===// +//===-- MipsDelaySlotFiller.cpp - Mips Delay Slot Filler ------------------===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// Simple pass to fills delay slots with useful instructions. +// Simple pass to fill delay slots with useful instructions. // //===----------------------------------------------------------------------===// @@ -33,8 +33,7 @@ STATISTIC(UsefulSlots, "Number of delay slots filled with instructions that" static cl::opt DisableDelaySlotFiller( "disable-mips-delay-filler", cl::init(false), - cl::desc("Disable the delay slot filler, which attempts to fill the Mips" - "delay slots with useful instructions."), + cl::desc("Fill all delay slots with NOPs."), cl::Hidden); // This option can be used to silence complaints by machine verifier passes. @@ -71,28 +70,16 @@ namespace { bool runOnMachineBasicBlock(MachineBasicBlock &MBB); - bool isDelayFiller(MachineBasicBlock &MBB, - Iter candidate); + void insertDefsUses(const MachineInstr &MI, SmallSet &RegDefs, + SmallSet &RegUses) const; - void insertCallUses(Iter MI, - SmallSet &RegDefs, - SmallSet &RegUses); + bool isRegInSet(const SmallSet &RegSet, unsigned Reg) const; - void insertDefsUses(Iter MI, - SmallSet &RegDefs, - SmallSet &RegUses); + bool delayHasHazard(const MachineInstr &Candidate, bool &SawLoad, + bool &SawStore, const SmallSet &RegDefs, + const SmallSet &RegUses) const; - bool IsRegInSet(SmallSet &RegSet, - unsigned Reg); - - bool delayHasHazard(Iter candidate, - bool &sawLoad, bool &sawStore, - SmallSet &RegDefs, - SmallSet &RegUses); - - bool - findDelayInstr(MachineBasicBlock &MBB, Iter slot, - Iter &Filler); + bool findDelayInstr(MachineBasicBlock &MBB, Iter slot, Iter &Filler) const; bool terminateSearch(const MachineInstr &Candidate) const; @@ -106,8 +93,7 @@ namespace { /// runOnMachineBasicBlock - Fill in delay slots for the given basic block. /// We assume there is only one delay slot per delayed instruction. -bool Filler:: -runOnMachineBasicBlock(MachineBasicBlock &MBB) { +bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) { bool Changed = false; for (Iter I = MBB.begin(); I != MBB.end(); ++I) { @@ -139,18 +125,17 @@ FunctionPass *llvm::createMipsDelaySlotFillerPass(MipsTargetMachine &tm) { return new Filler(tm); } -bool Filler::findDelayInstr(MachineBasicBlock &MBB, - Iter slot, - Iter &Filler) { +bool Filler::findDelayInstr(MachineBasicBlock &MBB, Iter Slot, + Iter &Filler) const { SmallSet RegDefs; SmallSet RegUses; - insertDefsUses(slot, RegDefs, RegUses); + insertDefsUses(*Slot, RegDefs, RegUses); - bool sawLoad = false; - bool sawStore = false; + bool SawLoad = false; + bool SawStore = false; - for (ReverseIter I(slot); I != MBB.rend(); ++I) { + for (ReverseIter I(Slot); I != MBB.rend(); ++I) { // skip debug value if (I->isDebugValue()) continue; @@ -158,49 +143,46 @@ bool Filler::findDelayInstr(MachineBasicBlock &MBB, if (terminateSearch(*I)) break; - // Convert to forward iterator. - Iter FI(llvm::next(I).base()); - - if (delayHasHazard(FI, sawLoad, sawStore, RegDefs, RegUses)) { - insertDefsUses(FI, RegDefs, RegUses); + if (delayHasHazard(*I, SawLoad, SawStore, RegDefs, RegUses)) { + insertDefsUses(*I, RegDefs, RegUses); continue; } - Filler = FI; + Filler = llvm::next(I).base(); return true; } return false; } -bool Filler::delayHasHazard(Iter candidate, - bool &sawLoad, bool &sawStore, - SmallSet &RegDefs, - SmallSet &RegUses) { - if (candidate->isImplicitDef() || candidate->isKill()) +bool Filler::delayHasHazard(const MachineInstr &Candidate, bool &SawLoad, + bool &SawStore, + const SmallSet &RegDefs, + const SmallSet &RegUses) const { + if (Candidate.isImplicitDef() || Candidate.isKill()) return true; // Loads or stores cannot be moved past a store to the delay slot // and stores cannot be moved past a load. - if (candidate->mayLoad()) { - if (sawStore) + if (Candidate.mayLoad()) { + if (SawStore) return true; - sawLoad = true; + SawLoad = true; } - if (candidate->mayStore()) { - if (sawStore) + if (Candidate.mayStore()) { + if (SawStore) return true; - sawStore = true; - if (sawLoad) + SawStore = true; + if (SawLoad) return true; } - assert((!candidate->isCall() && !candidate->isReturn()) && + assert((!Candidate.isCall() && !Candidate.isReturn()) && "Cannot put calls or returns in delay slot."); - for (unsigned i = 0, e = candidate->getNumOperands(); i!= e; ++i) { - const MachineOperand &MO = candidate->getOperand(i); + for (unsigned I = 0, E = Candidate.getNumOperands(); I != E; ++I) { + const MachineOperand &MO = Candidate.getOperand(I); unsigned Reg; if (!MO.isReg() || !(Reg = MO.getReg())) @@ -208,12 +190,12 @@ bool Filler::delayHasHazard(Iter candidate, if (MO.isDef()) { // check whether Reg is defined or used before delay slot. - if (IsRegInSet(RegDefs, Reg) || IsRegInSet(RegUses, Reg)) + if (isRegInSet(RegDefs, Reg) || isRegInSet(RegUses, Reg)) return true; } if (MO.isUse()) { // check whether Reg is defined before delay slot. - if (IsRegInSet(RegDefs, Reg)) + if (isRegInSet(RegDefs, Reg)) return true; } } @@ -238,34 +220,35 @@ static void insertDefUse(const MachineOperand &MO, } // Insert Defs and Uses of MI into the sets RegDefs and RegUses. -void Filler::insertDefsUses(Iter MI, +void Filler::insertDefsUses(const MachineInstr &MI, SmallSet &RegDefs, - SmallSet &RegUses) { - unsigned I, E = MI->getDesc().getNumOperands(); + SmallSet &RegUses) const { + unsigned I, E = MI.getDesc().getNumOperands(); for (I = 0; I != E; ++I) - insertDefUse(MI->getOperand(I), RegDefs, RegUses); + insertDefUse(MI.getOperand(I), RegDefs, RegUses); // If MI is a call, add RA to RegDefs to prevent users of RA from going into // delay slot. - if (MI->isCall()) { + if (MI.isCall()) { RegDefs.insert(Mips::RA); return; } // Return if MI is a return. - if (MI->isReturn()) + if (MI.isReturn()) return; // Examine the implicit operands. Exclude register AT which is in the list of // clobbered registers of branch instructions. - E = MI->getNumOperands(); + E = MI.getNumOperands(); for (; I != E; ++I) - insertDefUse(MI->getOperand(I), RegDefs, RegUses, Mips::AT); + insertDefUse(MI.getOperand(I), RegDefs, RegUses, Mips::AT); } //returns true if the Reg or its alias is in the RegSet. -bool Filler::IsRegInSet(SmallSet &RegSet, unsigned Reg) { +bool Filler::isRegInSet(const SmallSet &RegSet, + unsigned Reg) const { // Check Reg and all aliased Registers. for (MCRegAliasIterator AI(Reg, TM.getRegisterInfo(), true); AI.isValid(); ++AI) -- cgit v1.1 From cd7319dc5f91ac81ab9d8505f34937e91bfcf65d Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Thu, 14 Feb 2013 23:40:57 +0000 Subject: [mips] Replace usage of SmallSet with BitVector, which is used to keep track of defined and used registers. Also add a few helper functions to simplify the code. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175224 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsDelaySlotFiller.cpp | 169 ++++++++++++++++---------------- 1 file changed, 83 insertions(+), 86 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp index 6ddb39b..b56d9cd 100644 --- a/lib/Target/Mips/MipsDelaySlotFiller.cpp +++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp @@ -15,7 +15,7 @@ #include "Mips.h" #include "MipsTargetMachine.h" -#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/BitVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -70,14 +70,26 @@ namespace { bool runOnMachineBasicBlock(MachineBasicBlock &MBB); - void insertDefsUses(const MachineInstr &MI, SmallSet &RegDefs, - SmallSet &RegUses) const; + /// Initialize RegDefs and RegUses. + void initRegDefsUses(const MachineInstr &MI, BitVector &RegDefs, + BitVector &RegUses) const; - bool isRegInSet(const SmallSet &RegSet, unsigned Reg) const; + bool isRegInSet(const BitVector &RegSet, unsigned Reg) const; + bool checkRegDefsUses(const BitVector &RegDefs, const BitVector &RegUses, + BitVector &NewDefs, BitVector &NewUses, + unsigned Reg, bool IsDef) const; + + bool checkRegDefsUses(BitVector &RegDefs, BitVector &RegUses, + const MachineInstr &MI, unsigned Begin, + unsigned End) const; + + /// This function checks if it is valid to move Candidate to the delay slot + /// and returns true if it isn't. It also updates load and store flags and + /// register defs and uses. bool delayHasHazard(const MachineInstr &Candidate, bool &SawLoad, - bool &SawStore, const SmallSet &RegDefs, - const SmallSet &RegUses) const; + bool &SawStore, BitVector &RegDefs, + BitVector &RegUses) const; bool findDelayInstr(MachineBasicBlock &MBB, Iter slot, Iter &Filler) const; @@ -127,10 +139,10 @@ FunctionPass *llvm::createMipsDelaySlotFillerPass(MipsTargetMachine &tm) { bool Filler::findDelayInstr(MachineBasicBlock &MBB, Iter Slot, Iter &Filler) const { - SmallSet RegDefs; - SmallSet RegUses; + unsigned NumRegs = TM.getRegisterInfo()->getNumRegs(); + BitVector RegDefs(NumRegs), RegUses(NumRegs); - insertDefsUses(*Slot, RegDefs, RegUses); + initRegDefsUses(*Slot, RegDefs, RegUses); bool SawLoad = false; bool SawStore = false; @@ -143,10 +155,8 @@ bool Filler::findDelayInstr(MachineBasicBlock &MBB, Iter Slot, if (terminateSearch(*I)) break; - if (delayHasHazard(*I, SawLoad, SawStore, RegDefs, RegUses)) { - insertDefsUses(*I, RegDefs, RegUses); + if (delayHasHazard(*I, SawLoad, SawStore, RegDefs, RegUses)) continue; - } Filler = llvm::next(I).base(); return true; @@ -155,104 +165,91 @@ bool Filler::findDelayInstr(MachineBasicBlock &MBB, Iter Slot, return false; } +bool Filler::checkRegDefsUses(const BitVector &RegDefs, + const BitVector &RegUses, + BitVector &NewDefs, BitVector &NewUses, + unsigned Reg, bool IsDef) const { + if (IsDef) { + NewDefs.set(Reg); + // check whether Reg has already been defined or used. + return (isRegInSet(RegDefs, Reg) || isRegInSet(RegUses, Reg)); + } + + NewUses.set(Reg); + // check whether Reg has already been defined. + return isRegInSet(RegDefs, Reg); +} + +bool Filler::checkRegDefsUses(BitVector &RegDefs, BitVector &RegUses, + const MachineInstr &MI, unsigned Begin, + unsigned End) const { + unsigned NumRegs = TM.getRegisterInfo()->getNumRegs(); + BitVector NewDefs(NumRegs), NewUses(NumRegs); + bool HasHazard = false; + + for (unsigned I = Begin; I != End; ++I) { + const MachineOperand &MO = MI.getOperand(I); + + if (MO.isReg() && MO.getReg()) + HasHazard |= checkRegDefsUses(RegDefs, RegUses, NewDefs, NewUses, + MO.getReg(), MO.isDef()); + } + + RegDefs |= NewDefs; + RegUses |= NewUses; + + return HasHazard; +} + bool Filler::delayHasHazard(const MachineInstr &Candidate, bool &SawLoad, - bool &SawStore, - const SmallSet &RegDefs, - const SmallSet &RegUses) const { - if (Candidate.isImplicitDef() || Candidate.isKill()) - return true; + bool &SawStore, BitVector &RegDefs, + BitVector &RegUses) const { + bool HasHazard = (Candidate.isImplicitDef() || Candidate.isKill()); // Loads or stores cannot be moved past a store to the delay slot // and stores cannot be moved past a load. - if (Candidate.mayLoad()) { - if (SawStore) - return true; - SawLoad = true; - } - if (Candidate.mayStore()) { - if (SawStore) - return true; + HasHazard |= SawStore | SawLoad; SawStore = true; - if (SawLoad) - return true; + } else if (Candidate.mayLoad()) { + HasHazard |= SawStore; + SawLoad = true; } assert((!Candidate.isCall() && !Candidate.isReturn()) && "Cannot put calls or returns in delay slot."); - for (unsigned I = 0, E = Candidate.getNumOperands(); I != E; ++I) { - const MachineOperand &MO = Candidate.getOperand(I); - unsigned Reg; - - if (!MO.isReg() || !(Reg = MO.getReg())) - continue; // skip - - if (MO.isDef()) { - // check whether Reg is defined or used before delay slot. - if (isRegInSet(RegDefs, Reg) || isRegInSet(RegUses, Reg)) - return true; - } - if (MO.isUse()) { - // check whether Reg is defined before delay slot. - if (isRegInSet(RegDefs, Reg)) - return true; - } - } - return false; -} + HasHazard |= checkRegDefsUses(RegDefs, RegUses, Candidate, 0, + Candidate.getNumOperands()); -// Helper function for getting a MachineOperand's register number and adding it -// to RegDefs or RegUses. -static void insertDefUse(const MachineOperand &MO, - SmallSet &RegDefs, - SmallSet &RegUses, - unsigned ExcludedReg = 0) { - unsigned Reg; - - if (!MO.isReg() || !(Reg = MO.getReg()) || (Reg == ExcludedReg)) - return; - - if (MO.isDef()) - RegDefs.insert(Reg); - else if (MO.isUse()) - RegUses.insert(Reg); + return HasHazard; } -// Insert Defs and Uses of MI into the sets RegDefs and RegUses. -void Filler::insertDefsUses(const MachineInstr &MI, - SmallSet &RegDefs, - SmallSet &RegUses) const { - unsigned I, E = MI.getDesc().getNumOperands(); - - for (I = 0; I != E; ++I) - insertDefUse(MI.getOperand(I), RegDefs, RegUses); +void Filler::initRegDefsUses(const MachineInstr &MI, BitVector &RegDefs, + BitVector &RegUses) const { + // Add all register operands which are explicit and non-variadic. + checkRegDefsUses(RegDefs, RegUses, MI, 0, MI.getDesc().getNumOperands()); // If MI is a call, add RA to RegDefs to prevent users of RA from going into // delay slot. - if (MI.isCall()) { - RegDefs.insert(Mips::RA); - return; + if (MI.isCall()) + RegDefs.set(Mips::RA); + + // Add all implicit register operands of branch instructions except + // register AT. + if (MI.isBranch()) { + checkRegDefsUses(RegDefs, RegUses, MI, MI.getDesc().getNumOperands(), + MI.getNumOperands()); + RegDefs.reset(Mips::AT); } - - // Return if MI is a return. - if (MI.isReturn()) - return; - - // Examine the implicit operands. Exclude register AT which is in the list of - // clobbered registers of branch instructions. - E = MI.getNumOperands(); - for (; I != E; ++I) - insertDefUse(MI.getOperand(I), RegDefs, RegUses, Mips::AT); } //returns true if the Reg or its alias is in the RegSet. -bool Filler::isRegInSet(const SmallSet &RegSet, - unsigned Reg) const { +bool Filler::isRegInSet(const BitVector &RegSet, unsigned Reg) const { // Check Reg and all aliased Registers. for (MCRegAliasIterator AI(Reg, TM.getRegisterInfo(), true); AI.isValid(); ++AI) - if (RegSet.count(*AI)) + if (RegSet.test(*AI)) return true; return false; } -- cgit v1.1 From d977aacf990d241d0224d20518f631a928c1b1a8 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Thu, 14 Feb 2013 23:54:40 +0000 Subject: [mips] Disallow moving load/store instructions past volatile instructions. Unfortunately, I wasn't able to create a test case that demonstrates the problem I was trying to fix with this patch. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175226 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsDelaySlotFiller.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp index b56d9cd..6b25d2d 100644 --- a/lib/Target/Mips/MipsDelaySlotFiller.cpp +++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp @@ -208,7 +208,7 @@ bool Filler::delayHasHazard(const MachineInstr &Candidate, bool &SawLoad, // Loads or stores cannot be moved past a store to the delay slot // and stores cannot be moved past a load. - if (Candidate.mayStore()) { + if (Candidate.mayStore() || Candidate.hasOrderedMemoryRef()) { HasHazard |= SawStore | SawLoad; SawStore = true; } else if (Candidate.mayLoad()) { -- cgit v1.1 From 0ec707a4e586f41f8a2cf91557fbbbe142377dd0 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Fri, 15 Feb 2013 00:55:08 +0000 Subject: Simplify the attributes '<' comparison function. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175235 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Attributes.cpp | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) (limited to 'lib') diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 3de304e..629679c 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -332,25 +332,23 @@ StringRef AttributeImpl::getValueAsString() const { bool AttributeImpl::operator<(const AttributeImpl &AI) const { // This sorts the attributes with Attribute::AttrKinds coming first (sorted // relative to their enum value) and then strings. - if (isEnumAttribute()) - if (AI.isAlignAttribute() || AI.isEnumAttribute()) - return getKindAsEnum() < AI.getKindAsEnum(); - - if (isAlignAttribute()) { - if (!AI.isStringAttribute() && getKindAsEnum() < AI.getKindAsEnum()) - return true; - if (AI.isAlignAttribute()) - return getValueAsInt() < AI.getValueAsInt(); + if (isEnumAttribute()) { + if (AI.isEnumAttribute()) return getKindAsEnum() < AI.getKindAsEnum(); + if (AI.isAlignAttribute()) return true; + if (AI.isStringAttribute()) return true; } - if (isStringAttribute()) { - if (!AI.isStringAttribute()) return false; - if (getKindAsString() < AI.getKindAsString()) return true; - if (getKindAsString() == AI.getKindAsString()) - return getValueAsString() < AI.getValueAsString(); + if (isAlignAttribute()) { + if (AI.isEnumAttribute()) return false; + if (AI.isAlignAttribute()) return getValueAsInt() < AI.getValueAsInt(); + if (AI.isStringAttribute()) return true; } - return false; + if (AI.isEnumAttribute()) return false; + if (AI.isAlignAttribute()) return false; + if (getKindAsString() == AI.getKindAsString()) + return getValueAsString() < AI.getValueAsString(); + return getKindAsString() < AI.getKindAsString(); } uint64_t AttributeImpl::getAttrMask(Attribute::AttrKind Val) { -- cgit v1.1 From 5cf38fd7633bee4a0ff627593cc1fd63ab0868d8 Mon Sep 17 00:00:00 2001 From: Reed Kotler Date: Fri, 15 Feb 2013 01:04:38 +0000 Subject: Fix minor mips16 issues in directives for function prologue. Probably this does not matter but makes it more gcc compatible which avoids possible subtle problems. Also, turned back on a disabled check in helloworld.ll. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175237 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsAsmPrinter.cpp | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp index e573e89..84bf48c 100644 --- a/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/MipsAsmPrinter.cpp @@ -236,10 +236,11 @@ void MipsAsmPrinter::EmitFunctionBodyStart() { raw_svector_ostream OS(Str); printSavedRegsBitmask(OS); OutStreamer.EmitRawText(OS.str()); - - OutStreamer.EmitRawText(StringRef("\t.set\tnoreorder")); - OutStreamer.EmitRawText(StringRef("\t.set\tnomacro")); - OutStreamer.EmitRawText(StringRef("\t.set\tnoat")); + if (!Subtarget->inMips16Mode()) { + OutStreamer.EmitRawText(StringRef("\t.set\tnoreorder")); + OutStreamer.EmitRawText(StringRef("\t.set\tnomacro")); + OutStreamer.EmitRawText(StringRef("\t.set\tnoat")); + } } } @@ -250,9 +251,11 @@ void MipsAsmPrinter::EmitFunctionBodyEnd() { // always be at the function end, and we can't emit and // break with BB logic. if (OutStreamer.hasRawTextSupport()) { - OutStreamer.EmitRawText(StringRef("\t.set\tat")); - OutStreamer.EmitRawText(StringRef("\t.set\tmacro")); - OutStreamer.EmitRawText(StringRef("\t.set\treorder")); + if (!Subtarget->inMips16Mode()) { + OutStreamer.EmitRawText(StringRef("\t.set\tat")); + OutStreamer.EmitRawText(StringRef("\t.set\tmacro")); + OutStreamer.EmitRawText(StringRef("\t.set\treorder")); + } OutStreamer.EmitRawText("\t.end\t" + Twine(CurrentFnSym->getName())); } } -- cgit v1.1 From a7e4409bb238ec3c5169c25afbb0308ae76d1111 Mon Sep 17 00:00:00 2001 From: Anna Zaks Date: Fri, 15 Feb 2013 04:15:55 +0000 Subject: Revert "Simplify the attributes '<' comparison function." This reverts commit 82c101153fe7b35bce48781fab038e1b8f31a7bd. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175250 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Attributes.cpp | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) (limited to 'lib') diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 629679c..3de304e 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -332,23 +332,25 @@ StringRef AttributeImpl::getValueAsString() const { bool AttributeImpl::operator<(const AttributeImpl &AI) const { // This sorts the attributes with Attribute::AttrKinds coming first (sorted // relative to their enum value) and then strings. - if (isEnumAttribute()) { - if (AI.isEnumAttribute()) return getKindAsEnum() < AI.getKindAsEnum(); - if (AI.isAlignAttribute()) return true; - if (AI.isStringAttribute()) return true; - } + if (isEnumAttribute()) + if (AI.isAlignAttribute() || AI.isEnumAttribute()) + return getKindAsEnum() < AI.getKindAsEnum(); if (isAlignAttribute()) { - if (AI.isEnumAttribute()) return false; - if (AI.isAlignAttribute()) return getValueAsInt() < AI.getValueAsInt(); - if (AI.isStringAttribute()) return true; + if (!AI.isStringAttribute() && getKindAsEnum() < AI.getKindAsEnum()) + return true; + if (AI.isAlignAttribute()) + return getValueAsInt() < AI.getValueAsInt(); + } + + if (isStringAttribute()) { + if (!AI.isStringAttribute()) return false; + if (getKindAsString() < AI.getKindAsString()) return true; + if (getKindAsString() == AI.getKindAsString()) + return getValueAsString() < AI.getValueAsString(); } - if (AI.isEnumAttribute()) return false; - if (AI.isAlignAttribute()) return false; - if (getKindAsString() == AI.getKindAsString()) - return getValueAsString() < AI.getValueAsString(); - return getKindAsString() < AI.getKindAsString(); + return false; } uint64_t AttributeImpl::getAttrMask(Attribute::AttrKind Val) { -- cgit v1.1 From ab90084bca42b74a5b5edad9b416bd81e105dad0 Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Fri, 15 Feb 2013 04:28:42 +0000 Subject: BBVectorize: Cap the number of candidate pairs in each instruction group For some basic blocks, it is possible to generate many candidate pairs for relatively few pairable instructions. When many (tens of thousands) of these pairs are generated for a single instruction group, the time taken to generate and rank the different vectorization plans can become quite large. As a result, we now cap the number of candidate pairs within each instruction group. This is done by closing out the group once the threshold is reached (set now at 3000 pairs). Although this will limit the overall compile-time impact, this may not be the best way to achieve this result. It might be better, for example, to prune excessive candidate pairs after the fact the prevent the generation of short, but highly-connected groups. We can experiment with this in the future. This change reduces the overall compile-time slowdown of the csa.ll test case in PR15222 to ~5x. If 5x is still considered too large, a lower limit can be used as the default. This represents a functionality change, but only for very large inputs (thus, there is no regression test). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175251 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/BBVectorize.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp index 37638ca..4849a96 100644 --- a/lib/Transforms/Vectorize/BBVectorize.cpp +++ b/lib/Transforms/Vectorize/BBVectorize.cpp @@ -88,6 +88,10 @@ MaxInsts("bb-vectorize-max-instr-per-group", cl::init(500), cl::Hidden, cl::desc("The maximum number of pairable instructions per group")); static cl::opt +MaxPairs("bb-vectorize-max-pairs-per-group", cl::init(3000), cl::Hidden, + cl::desc("The maximum number of candidate instruction pairs per group")); + +static cl::opt MaxCandPairsForCycleCheck("bb-vectorize-max-cycle-check-pairs", cl::init(200), cl::Hidden, cl::desc("The maximum number of candidate pairs with which to use" " a full cycle check")); @@ -1164,6 +1168,7 @@ namespace { DenseSet &FixedOrderPairs, DenseMap &CandidatePairCostSavings, std::vector &PairableInsts, bool NonPow2Len) { + size_t TotalPairs = 0; BasicBlock::iterator E = BB.end(); if (Start == E) return false; @@ -1210,6 +1215,7 @@ namespace { } CandidatePairs[I].push_back(J); + ++TotalPairs; if (TTI) CandidatePairCostSavings.insert(ValuePairWithCost(ValuePair(I, J), CostSavings)); @@ -1233,7 +1239,8 @@ namespace { // If we have already found too many pairs, break here and this function // will be called again starting after the last instruction selected // during this invocation. - if (PairableInsts.size() >= Config.MaxInsts) { + if (PairableInsts.size() >= Config.MaxInsts || + TotalPairs >= Config.MaxPairs) { ShouldContinue = true; break; } @@ -3165,6 +3172,7 @@ VectorizeConfig::VectorizeConfig() { MaxCandPairsForCycleCheck = ::MaxCandPairsForCycleCheck; SplatBreaksChain = ::SplatBreaksChain; MaxInsts = ::MaxInsts; + MaxPairs = ::MaxPairs; MaxIter = ::MaxIter; Pow2LenOnly = ::Pow2LenOnly; NoMemOpBoost = ::NoMemOpBoost; -- cgit v1.1 From 94328f4ff3d450104f15c9dae437cea80417233d Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Fri, 15 Feb 2013 05:25:26 +0000 Subject: Simplify the 'operator<' for the attribute object. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175252 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Attributes.cpp | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) (limited to 'lib') diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 3de304e..629679c 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -332,25 +332,23 @@ StringRef AttributeImpl::getValueAsString() const { bool AttributeImpl::operator<(const AttributeImpl &AI) const { // This sorts the attributes with Attribute::AttrKinds coming first (sorted // relative to their enum value) and then strings. - if (isEnumAttribute()) - if (AI.isAlignAttribute() || AI.isEnumAttribute()) - return getKindAsEnum() < AI.getKindAsEnum(); - - if (isAlignAttribute()) { - if (!AI.isStringAttribute() && getKindAsEnum() < AI.getKindAsEnum()) - return true; - if (AI.isAlignAttribute()) - return getValueAsInt() < AI.getValueAsInt(); + if (isEnumAttribute()) { + if (AI.isEnumAttribute()) return getKindAsEnum() < AI.getKindAsEnum(); + if (AI.isAlignAttribute()) return true; + if (AI.isStringAttribute()) return true; } - if (isStringAttribute()) { - if (!AI.isStringAttribute()) return false; - if (getKindAsString() < AI.getKindAsString()) return true; - if (getKindAsString() == AI.getKindAsString()) - return getValueAsString() < AI.getValueAsString(); + if (isAlignAttribute()) { + if (AI.isEnumAttribute()) return false; + if (AI.isAlignAttribute()) return getValueAsInt() < AI.getValueAsInt(); + if (AI.isStringAttribute()) return true; } - return false; + if (AI.isEnumAttribute()) return false; + if (AI.isAlignAttribute()) return false; + if (getKindAsString() == AI.getKindAsString()) + return getValueAsString() < AI.getValueAsString(); + return getKindAsString() < AI.getKindAsString(); } uint64_t AttributeImpl::getAttrMask(Attribute::AttrKind Val) { -- cgit v1.1 From 148ac534fc5592ed7031efde9a577890f078068b Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Fri, 15 Feb 2013 09:33:26 +0000 Subject: AArch64: refactor frame handling to use movz/movk for overlarge offsets. In the near future litpools will be in a different section, which means that any access to them is at least two instructions. This makes the case for a movz/movk pair (if total offset <= 32-bits) even more compelling. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175257 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64InstrInfo.cpp | 41 +++++++++++++++++++++++---------- 1 file changed, 29 insertions(+), 12 deletions(-) (limited to 'lib') diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp index 94b3429..9a7504a 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -623,18 +623,35 @@ void llvm::emitRegUpdate(MachineBasicBlock &MBB, else if (abs(NumBytes) & ~0xffffff) { // Generically, we have to materialize the offset into a temporary register // and subtract it. There are a couple of ways this could be done, for now - // we'll go for a literal-pool load. - MachineFunction &MF = *MBB.getParent(); - MachineConstantPool *MCP = MF.getConstantPool(); - const Constant *C - = ConstantInt::get(Type::getInt64Ty(MF.getFunction()->getContext()), - abs(NumBytes)); - unsigned CPI = MCP->getConstantPoolIndex(C, 8); - - // LDR xTMP, .LITPOOL - BuildMI(MBB, MBBI, dl, TII.get(AArch64::LDRx_lit), ScratchReg) - .addConstantPoolIndex(CPI) - .setMIFlag(MIFlags); + // we'll use a movz/movk or movn/movk sequence. + uint64_t Bits = static_cast(abs(NumBytes)); + BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVZxii), ScratchReg) + .addImm(0xffff & Bits).addImm(0) + .setMIFlags(MIFlags); + + Bits >>= 16; + if (Bits & 0xffff) { + BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVKxii), ScratchReg) + .addReg(ScratchReg) + .addImm(0xffff & Bits).addImm(1) + .setMIFlags(MIFlags); + } + + Bits >>= 16; + if (Bits & 0xffff) { + BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVKxii), ScratchReg) + .addReg(ScratchReg) + .addImm(0xffff & Bits).addImm(2) + .setMIFlags(MIFlags); + } + + Bits >>= 16; + if (Bits & 0xffff) { + BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVKxii), ScratchReg) + .addReg(ScratchReg) + .addImm(0xffff & Bits).addImm(3) + .setMIFlags(MIFlags); + } // ADD DST, SRC, xTMP (, lsl #0) unsigned AddOp = NumBytes > 0 ? AArch64::ADDxxx_uxtx : AArch64::SUBxxx_uxtx; -- cgit v1.1 From 1e8839302b70d77de63844332bdee9ce7d06f2c9 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Fri, 15 Feb 2013 09:33:43 +0000 Subject: AArch64: remove ConstantIsland pass & put literals in separate section. This implements the review suggestion to simplify the AArch64 backend. If we later discover that we *really* need the extra complexity of the ConstantIslands pass for performance reasons it can be resurrected. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175258 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64.h | 2 - lib/Target/AArch64/AArch64AsmPrinter.cpp | 16 - lib/Target/AArch64/AArch64AsmPrinter.h | 5 - lib/Target/AArch64/AArch64ConstantIslandPass.cpp | 1423 ---------------------- lib/Target/AArch64/AArch64ISelDAGToDAG.cpp | 146 ++- lib/Target/AArch64/AArch64ISelLowering.cpp | 10 +- lib/Target/AArch64/AArch64InstrInfo.cpp | 44 - lib/Target/AArch64/AArch64InstrInfo.h | 4 - lib/Target/AArch64/AArch64InstrInfo.td | 30 +- lib/Target/AArch64/AArch64MachineFunctionInfo.h | 11 - lib/Target/AArch64/AArch64TargetMachine.cpp | 1 - lib/Target/AArch64/CMakeLists.txt | 1 - 12 files changed, 83 insertions(+), 1610 deletions(-) delete mode 100644 lib/Target/AArch64/AArch64ConstantIslandPass.cpp (limited to 'lib') diff --git a/lib/Target/AArch64/AArch64.h b/lib/Target/AArch64/AArch64.h index 622814d..a97aae7 100644 --- a/lib/Target/AArch64/AArch64.h +++ b/lib/Target/AArch64/AArch64.h @@ -29,8 +29,6 @@ class MCInst; FunctionPass *createAArch64ISelDAG(AArch64TargetMachine &TM, CodeGenOpt::Level OptLevel); -FunctionPass *createAArch64ConstantIslandPass(); - FunctionPass *createAArch64CleanupLocalDynamicTLSPass(); void LowerAArch64MachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp index 61839b6..47ebb82 100644 --- a/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -17,7 +17,6 @@ #include "InstPrinter/AArch64InstPrinter.h" #include "llvm/DebugInfo.h" #include "llvm/ADT/SmallString.h" -#include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/MC/MCAsmInfo.h" @@ -298,20 +297,6 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) { return; switch (MI->getOpcode()) { - case AArch64::CONSTPOOL_ENTRY: { - unsigned LabelId = (unsigned)MI->getOperand(0).getImm(); - unsigned CPIdx = (unsigned)MI->getOperand(1).getIndex(); - - OutStreamer.EmitLabel(GetCPISymbol(LabelId)); - - const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPIdx]; - if (MCPE.isMachineConstantPoolEntry()) - EmitMachineConstantPoolValue(MCPE.Val.MachineCPVal); - else - EmitGlobalConstant(MCPE.Val.ConstVal); - - return; - } case AArch64::DBG_VALUE: { if (isVerbose() && OutStreamer.hasRawTextSupport()) { SmallString<128> TmpStr; @@ -352,7 +337,6 @@ void AArch64AsmPrinter::EmitEndOfAsmFile(Module &M) { } bool AArch64AsmPrinter::runOnMachineFunction(MachineFunction &MF) { - MCP = MF.getConstantPool(); return AsmPrinter::runOnMachineFunction(MF); } diff --git a/lib/Target/AArch64/AArch64AsmPrinter.h b/lib/Target/AArch64/AArch64AsmPrinter.h index b6f9ee6..af0c9fe 100644 --- a/lib/Target/AArch64/AArch64AsmPrinter.h +++ b/lib/Target/AArch64/AArch64AsmPrinter.h @@ -29,7 +29,6 @@ class LLVM_LIBRARY_VISIBILITY AArch64AsmPrinter : public AsmPrinter { /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can /// make the right decision when printing asm code for different targets. const AArch64Subtarget *Subtarget; - const MachineConstantPool *MCP; // emitPseudoExpansionLowering - tblgen'erated. bool emitPseudoExpansionLowering(MCStreamer &OutStreamer, @@ -74,10 +73,6 @@ class LLVM_LIBRARY_VISIBILITY AArch64AsmPrinter : public AsmPrinter { return "AArch64 Assembly Printer"; } - /// A no-op on AArch64 because we emit our constant pool entries inline with - /// the function. - virtual void EmitConstantPool() {} - virtual bool runOnMachineFunction(MachineFunction &MF); }; } // end namespace llvm diff --git a/lib/Target/AArch64/AArch64ConstantIslandPass.cpp b/lib/Target/AArch64/AArch64ConstantIslandPass.cpp deleted file mode 100644 index ab482bd..0000000 --- a/lib/Target/AArch64/AArch64ConstantIslandPass.cpp +++ /dev/null @@ -1,1423 +0,0 @@ -//===-- AArch64ConstantIslandPass.cpp - AArch64 constant islands ----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains a pass that splits the constant pool up into 'islands' -// which are scattered through-out the function. This is required due to the -// limited pc-relative displacements that AArch64 has. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "aarch64-cp-islands" -#include "AArch64.h" -#include "AArch64InstrInfo.h" -#include "AArch64MachineFunctionInfo.h" -#include "AArch64Subtarget.h" -#include "AArch64MachineFunctionInfo.h" -#include "Utils/AArch64BaseInfo.h" -#include "llvm/CodeGen/MachineConstantPool.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineJumpTableInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/Format.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Support/CommandLine.h" -#include -using namespace llvm; - -STATISTIC(NumCPEs, "Number of constpool entries"); -STATISTIC(NumSplit, "Number of uncond branches inserted"); -STATISTIC(NumCBrFixed, "Number of cond branches fixed"); - -// FIXME: This option should be removed once it has received sufficient testing. -static cl::opt -AlignConstantIslands("aarch64-align-constant-islands", cl::Hidden, - cl::init(true), - cl::desc("Align constant islands in code")); - -/// Return the worst case padding that could result from unknown offset bits. -/// This does not include alignment padding caused by known offset bits. -/// -/// @param LogAlign log2(alignment) -/// @param KnownBits Number of known low offset bits. -static inline unsigned UnknownPadding(unsigned LogAlign, unsigned KnownBits) { - if (KnownBits < LogAlign) - return (1u << LogAlign) - (1u << KnownBits); - return 0; -} - -namespace { - /// Due to limited PC-relative displacements, AArch64 requires constant pool - /// entries to be scattered among the instructions inside a function. To do - /// this, it completely ignores the normal LLVM constant pool; instead, it - /// places constants wherever it feels like with special instructions. - /// - /// The terminology used in this pass includes: - /// Islands - Clumps of constants placed in the function. - /// Water - Potential places where an island could be formed. - /// CPE - A constant pool entry that has been placed somewhere, which - /// tracks a list of users. - class AArch64ConstantIslands : public MachineFunctionPass { - /// Information about the offset and size of a single basic block. - struct BasicBlockInfo { - /// Distance from the beginning of the function to the beginning of this - /// basic block. - /// - /// Offsets are computed assuming worst case padding before an aligned - /// block. This means that subtracting basic block offsets always gives a - /// conservative estimate of the real distance which may be smaller. - /// - /// Because worst case padding is used, the computed offset of an aligned - /// block may not actually be aligned. - unsigned Offset; - - /// Size of the basic block in bytes. If the block contains inline - /// assembly, this is a worst case estimate. - /// - /// The size does not include any alignment padding whether from the - /// beginning of the block, or from an aligned jump table at the end. - unsigned Size; - - /// The number of low bits in Offset that are known to be exact. The - /// remaining bits of Offset are an upper bound. - uint8_t KnownBits; - - /// When non-zero, the block contains instructions (inline asm) of unknown - /// size. The real size may be smaller than Size bytes by a multiple of 1 - /// << Unalign. - uint8_t Unalign; - - BasicBlockInfo() : Offset(0), Size(0), KnownBits(0), Unalign(0) {} - - /// Compute the number of known offset bits internally to this block. - /// This number should be used to predict worst case padding when - /// splitting the block. - unsigned internalKnownBits() const { - unsigned Bits = Unalign ? Unalign : KnownBits; - // If the block size isn't a multiple of the known bits, assume the - // worst case padding. - if (Size & ((1u << Bits) - 1)) - Bits = CountTrailingZeros_32(Size); - return Bits; - } - - /// Compute the offset immediately following this block. If LogAlign is - /// specified, return the offset the successor block will get if it has - /// this alignment. - unsigned postOffset(unsigned LogAlign = 0) const { - unsigned PO = Offset + Size; - if (!LogAlign) - return PO; - // Add alignment padding from the terminator. - return PO + UnknownPadding(LogAlign, internalKnownBits()); - } - - /// Compute the number of known low bits of postOffset. If this block - /// contains inline asm, the number of known bits drops to the - /// instruction alignment. An aligned terminator may increase the number - /// of know bits. - /// If LogAlign is given, also consider the alignment of the next block. - unsigned postKnownBits(unsigned LogAlign = 0) const { - return std::max(LogAlign, internalKnownBits()); - } - }; - - std::vector BBInfo; - - /// A sorted list of basic blocks where islands could be placed (i.e. blocks - /// that don't fall through to the following block, due to a return, - /// unreachable, or unconditional branch). - std::vector WaterList; - - /// The subset of WaterList that was created since the previous iteration by - /// inserting unconditional branches. - SmallSet NewWaterList; - - typedef std::vector::iterator water_iterator; - - /// One user of a constant pool, keeping the machine instruction pointer, - /// the constant pool being referenced, and the number of bits used by the - /// instruction for displacement. The HighWaterMark records the highest - /// basic block where a new CPEntry can be placed. To ensure this pass - /// terminates, the CP entries are initially placed at the end of the - /// function and then move monotonically to lower addresses. The exception - /// to this rule is when the current CP entry for a particular CPUser is out - /// of range, but there is another CP entry for the same constant value in - /// range. We want to use the existing in-range CP entry, but if it later - /// moves out of range, the search for new water should resume where it left - /// off. The HighWaterMark is used to record that point. - struct CPUser { - MachineInstr *MI; - MachineInstr *CPEMI; - MachineBasicBlock *HighWaterMark; - private: - unsigned OffsetBits; - public: - CPUser(MachineInstr *mi, MachineInstr *cpemi, unsigned offsetbits) - : MI(mi), CPEMI(cpemi), OffsetBits(offsetbits) { - HighWaterMark = CPEMI->getParent(); - } - /// Returns the number of bits used to specify the offset. - unsigned getOffsetBits() const { - return OffsetBits; - } - - /// Returns the maximum positive displacement possible from this CPUser - /// (essentially INT_MAX * 4). - unsigned getMaxPosDisp() const { - return (1 << (OffsetBits - 1)) - 1; - } - }; - - /// Keep track of all of the machine instructions that use various constant - /// pools and their max displacement. - std::vector CPUsers; - - /// One per constant pool entry, keeping the machine instruction pointer, - /// the constpool index, and the number of CPUser's which reference this - /// entry. - struct CPEntry { - MachineInstr *CPEMI; - unsigned CPI; - unsigned RefCount; - CPEntry(MachineInstr *cpemi, unsigned cpi, unsigned rc = 0) - : CPEMI(cpemi), CPI(cpi), RefCount(rc) {} - }; - - /// Keep track of all of the constant pool entry machine instructions. For - /// each original constpool index (i.e. those that existed upon entry to - /// this pass), it keeps a vector of entries. Original elements are cloned - /// as we go along; the clones are put in the vector of the original - /// element, but have distinct CPIs. - std::vector > CPEntries; - - /// One per immediate branch, keeping the machine instruction pointer, - /// conditional or unconditional, the max displacement, and (if IsCond is - /// true) the corresponding inverted branch opcode. - struct ImmBranch { - MachineInstr *MI; - unsigned OffsetBits : 31; - bool IsCond : 1; - ImmBranch(MachineInstr *mi, unsigned offsetbits, bool cond) - : MI(mi), OffsetBits(offsetbits), IsCond(cond) {} - }; - - /// Keep track of all the immediate branch instructions. - /// - std::vector ImmBranches; - - MachineFunction *MF; - MachineConstantPool *MCP; - const AArch64InstrInfo *TII; - const AArch64Subtarget *STI; - AArch64MachineFunctionInfo *AFI; - public: - static char ID; - AArch64ConstantIslands() : MachineFunctionPass(ID) {} - - virtual bool runOnMachineFunction(MachineFunction &MF); - - virtual const char *getPassName() const { - return "AArch64 constant island placement pass"; - } - - private: - void doInitialPlacement(std::vector &CPEMIs); - CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI); - unsigned getCPELogAlign(const MachineInstr *CPEMI); - void scanFunctionJumpTables(); - void initializeFunctionInfo(const std::vector &CPEMIs); - MachineBasicBlock *splitBlockBeforeInstr(MachineInstr *MI); - void updateForInsertedWaterBlock(MachineBasicBlock *NewBB); - void adjustBBOffsetsAfter(MachineBasicBlock *BB); - bool decrementCPEReferenceCount(unsigned CPI, MachineInstr* CPEMI); - int findInRangeCPEntry(CPUser& U, unsigned UserOffset); - bool findAvailableWater(CPUser&U, unsigned UserOffset, - water_iterator &WaterIter); - void createNewWater(unsigned CPUserIndex, unsigned UserOffset, - MachineBasicBlock *&NewMBB); - bool handleConstantPoolUser(unsigned CPUserIndex); - void removeDeadCPEMI(MachineInstr *CPEMI); - bool removeUnusedCPEntries(); - bool isCPEntryInRange(MachineInstr *MI, unsigned UserOffset, - MachineInstr *CPEMI, unsigned OffsetBits, - bool DoDump = false); - bool isWaterInRange(unsigned UserOffset, MachineBasicBlock *Water, - CPUser &U, unsigned &Growth); - bool isBBInRange(MachineInstr *MI, MachineBasicBlock *BB, - unsigned OffsetBits); - bool fixupImmediateBr(ImmBranch &Br); - bool fixupConditionalBr(ImmBranch &Br); - - void computeBlockSize(MachineBasicBlock *MBB); - unsigned getOffsetOf(MachineInstr *MI) const; - unsigned getUserOffset(CPUser&) const; - void dumpBBs(); - void verify(); - - bool isOffsetInRange(unsigned UserOffset, unsigned TrialOffset, - unsigned BitsAvailable); - bool isOffsetInRange(unsigned UserOffset, unsigned TrialOffset, - const CPUser &U) { - return isOffsetInRange(UserOffset, TrialOffset, U.getOffsetBits()); - } - }; - char AArch64ConstantIslands::ID = 0; -} - -/// check BBOffsets, BBSizes, alignment of islands -void AArch64ConstantIslands::verify() { -#ifndef NDEBUG - for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); - MBBI != E; ++MBBI) { - MachineBasicBlock *MBB = MBBI; - unsigned MBBId = MBB->getNumber(); - assert(!MBBId || BBInfo[MBBId - 1].postOffset() <= BBInfo[MBBId].Offset); - } - DEBUG(dbgs() << "Verifying " << CPUsers.size() << " CP users.\n"); - for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) { - CPUser &U = CPUsers[i]; - unsigned UserOffset = getUserOffset(U); - // Verify offset using the real max displacement without the safety - // adjustment. - if (isCPEntryInRange(U.MI, UserOffset, U.CPEMI, U.getOffsetBits(), - /* DoDump = */ true)) { - DEBUG(dbgs() << "OK\n"); - continue; - } - DEBUG(dbgs() << "Out of range.\n"); - dumpBBs(); - DEBUG(MF->dump()); - llvm_unreachable("Constant pool entry out of range!"); - } -#endif -} - -/// print block size and offset information - debugging -void AArch64ConstantIslands::dumpBBs() { - DEBUG({ - for (unsigned J = 0, E = BBInfo.size(); J !=E; ++J) { - const BasicBlockInfo &BBI = BBInfo[J]; - dbgs() << format("%08x BB#%u\t", BBI.Offset, J) - << " kb=" << unsigned(BBI.KnownBits) - << " ua=" << unsigned(BBI.Unalign) - << format(" size=%#x\n", BBInfo[J].Size); - } - }); -} - -/// Returns an instance of the constpool island pass. -FunctionPass *llvm::createAArch64ConstantIslandPass() { - return new AArch64ConstantIslands(); -} - -bool AArch64ConstantIslands::runOnMachineFunction(MachineFunction &mf) { - MF = &mf; - MCP = mf.getConstantPool(); - - DEBUG(dbgs() << "***** AArch64ConstantIslands: " - << MCP->getConstants().size() << " CP entries, aligned to " - << MCP->getConstantPoolAlignment() << " bytes *****\n"); - - TII = (const AArch64InstrInfo*)MF->getTarget().getInstrInfo(); - AFI = MF->getInfo(); - STI = &MF->getTarget().getSubtarget(); - - // This pass invalidates liveness information when it splits basic blocks. - MF->getRegInfo().invalidateLiveness(); - - // Renumber all of the machine basic blocks in the function, guaranteeing that - // the numbers agree with the position of the block in the function. - MF->RenumberBlocks(); - - // Perform the initial placement of the constant pool entries. To start with, - // we put them all at the end of the function. - std::vector CPEMIs; - if (!MCP->isEmpty()) - doInitialPlacement(CPEMIs); - - /// The next UID to take is the first unused one. - AFI->initPICLabelUId(CPEMIs.size()); - - // Do the initial scan of the function, building up information about the - // sizes of each block, the location of all the water, and finding all of the - // constant pool users. - initializeFunctionInfo(CPEMIs); - CPEMIs.clear(); - DEBUG(dumpBBs()); - - - /// Remove dead constant pool entries. - bool MadeChange = removeUnusedCPEntries(); - - // Iteratively place constant pool entries and fix up branches until there - // is no change. - unsigned NoCPIters = 0, NoBRIters = 0; - while (true) { - DEBUG(dbgs() << "Beginning CP iteration #" << NoCPIters << '\n'); - bool CPChange = false; - for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) - CPChange |= handleConstantPoolUser(i); - if (CPChange && ++NoCPIters > 30) - report_fatal_error("Constant Island pass failed to converge!"); - DEBUG(dumpBBs()); - - // Clear NewWaterList now. If we split a block for branches, it should - // appear as "new water" for the next iteration of constant pool placement. - NewWaterList.clear(); - - DEBUG(dbgs() << "Beginning BR iteration #" << NoBRIters << '\n'); - bool BRChange = false; - for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i) - BRChange |= fixupImmediateBr(ImmBranches[i]); - if (BRChange && ++NoBRIters > 30) - report_fatal_error("Branch Fix Up pass failed to converge!"); - DEBUG(dumpBBs()); - - if (!CPChange && !BRChange) - break; - MadeChange = true; - } - - // After a while, this might be made debug-only, but it is not expensive. - verify(); - - DEBUG(dbgs() << '\n'; dumpBBs()); - - BBInfo.clear(); - WaterList.clear(); - CPUsers.clear(); - CPEntries.clear(); - ImmBranches.clear(); - - return MadeChange; -} - -/// Perform the initial placement of the constant pool entries. To start with, -/// we put them all at the end of the function. -void -AArch64ConstantIslands::doInitialPlacement(std::vector &CPEMIs) { - // Create the basic block to hold the CPE's. - MachineBasicBlock *BB = MF->CreateMachineBasicBlock(); - MF->push_back(BB); - - // MachineConstantPool measures alignment in bytes. We measure in log2(bytes). - unsigned MaxAlign = Log2_32(MCP->getConstantPoolAlignment()); - - // Mark the basic block as required by the const-pool. - // If AlignConstantIslands isn't set, use 4-byte alignment for everything. - BB->setAlignment(AlignConstantIslands ? MaxAlign : 2); - - // The function needs to be as aligned as the basic blocks. The linker may - // move functions around based on their alignment. - MF->ensureAlignment(BB->getAlignment()); - - // Order the entries in BB by descending alignment. That ensures correct - // alignment of all entries as long as BB is sufficiently aligned. Keep - // track of the insertion point for each alignment. We are going to bucket - // sort the entries as they are created. - SmallVector InsPoint(MaxAlign + 1, BB->end()); - - // Add all of the constants from the constant pool to the end block, use an - // identity mapping of CPI's to CPE's. - const std::vector &CPs = MCP->getConstants(); - - const DataLayout &TD = *MF->getTarget().getDataLayout(); - for (unsigned i = 0, e = CPs.size(); i != e; ++i) { - unsigned Size = TD.getTypeAllocSize(CPs[i].getType()); - assert(Size >= 4 && "Too small constant pool entry"); - unsigned Align = CPs[i].getAlignment(); - assert(isPowerOf2_32(Align) && "Invalid alignment"); - // Verify that all constant pool entries are a multiple of their alignment. - // If not, we would have to pad them out so that instructions stay aligned. - assert((Size % Align) == 0 && "CP Entry not multiple of 4 bytes!"); - - // Insert CONSTPOOL_ENTRY before entries with a smaller alignment. - unsigned LogAlign = Log2_32(Align); - MachineBasicBlock::iterator InsAt = InsPoint[LogAlign]; - MachineInstr *CPEMI = - BuildMI(*BB, InsAt, DebugLoc(), TII->get(AArch64::CONSTPOOL_ENTRY)) - .addImm(i).addConstantPoolIndex(i).addImm(Size); - CPEMIs.push_back(CPEMI); - - // Ensure that future entries with higher alignment get inserted before - // CPEMI. This is bucket sort with iterators. - for (unsigned a = LogAlign + 1; a <= MaxAlign; ++a) - if (InsPoint[a] == InsAt) - InsPoint[a] = CPEMI; - - // Add a new CPEntry, but no corresponding CPUser yet. - std::vector CPEs; - CPEs.push_back(CPEntry(CPEMI, i)); - CPEntries.push_back(CPEs); - ++NumCPEs; - DEBUG(dbgs() << "Moved CPI#" << i << " to end of function, size = " - << Size << ", align = " << Align <<'\n'); - } - DEBUG(BB->dump()); -} - -/// Return true if the specified basic block can fallthrough into the block -/// immediately after it. -static bool BBHasFallthrough(MachineBasicBlock *MBB) { - // Get the next machine basic block in the function. - MachineFunction::iterator MBBI = MBB; - // Can't fall off end of function. - if (llvm::next(MBBI) == MBB->getParent()->end()) - return false; - - MachineBasicBlock *NextBB = llvm::next(MBBI); - for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(), - E = MBB->succ_end(); I != E; ++I) - if (*I == NextBB) - return true; - - return false; -} - -/// Given the constpool index and CONSTPOOL_ENTRY MI, look up the corresponding -/// CPEntry. -AArch64ConstantIslands::CPEntry -*AArch64ConstantIslands::findConstPoolEntry(unsigned CPI, - const MachineInstr *CPEMI) { - std::vector &CPEs = CPEntries[CPI]; - // Number of entries per constpool index should be small, just do a - // linear search. - for (unsigned i = 0, e = CPEs.size(); i != e; ++i) { - if (CPEs[i].CPEMI == CPEMI) - return &CPEs[i]; - } - return NULL; -} - -/// Returns the required alignment of the constant pool entry represented by -/// CPEMI. Alignment is measured in log2(bytes) units. -unsigned AArch64ConstantIslands::getCPELogAlign(const MachineInstr *CPEMI) { - assert(CPEMI && CPEMI->getOpcode() == AArch64::CONSTPOOL_ENTRY); - - // Everything is 4-byte aligned unless AlignConstantIslands is set. - if (!AlignConstantIslands) - return 2; - - unsigned CPI = CPEMI->getOperand(1).getIndex(); - assert(CPI < MCP->getConstants().size() && "Invalid constant pool index."); - unsigned Align = MCP->getConstants()[CPI].getAlignment(); - assert(isPowerOf2_32(Align) && "Invalid CPE alignment"); - return Log2_32(Align); -} - -/// Do the initial scan of the function, building up information about the sizes -/// of each block, the location of all the water, and finding all of the -/// constant pool users. -void AArch64ConstantIslands:: -initializeFunctionInfo(const std::vector &CPEMIs) { - BBInfo.clear(); - BBInfo.resize(MF->getNumBlockIDs()); - - // First thing, compute the size of all basic blocks, and see if the function - // has any inline assembly in it. If so, we have to be conservative about - // alignment assumptions, as we don't know for sure the size of any - // instructions in the inline assembly. - for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) - computeBlockSize(I); - - // The known bits of the entry block offset are determined by the function - // alignment. - BBInfo.front().KnownBits = MF->getAlignment(); - - // Compute block offsets and known bits. - adjustBBOffsetsAfter(MF->begin()); - - // Now go back through the instructions and build up our data structures. - for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); - MBBI != E; ++MBBI) { - MachineBasicBlock &MBB = *MBBI; - - // If this block doesn't fall through into the next MBB, then this is - // 'water' that a constant pool island could be placed. - if (!BBHasFallthrough(&MBB)) - WaterList.push_back(&MBB); - - for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); - I != E; ++I) { - if (I->isDebugValue()) - continue; - - int Opc = I->getOpcode(); - if (I->isBranch()) { - bool IsCond = false; - - // The offsets encoded in instructions here scale by the instruction - // size (4 bytes), effectively increasing their range by 2 bits. - unsigned Bits = 0; - switch (Opc) { - default: - continue; // Ignore other JT branches - case AArch64::TBZxii: - case AArch64::TBZwii: - case AArch64::TBNZxii: - case AArch64::TBNZwii: - IsCond = true; - Bits = 14 + 2; - break; - case AArch64::Bcc: - case AArch64::CBZx: - case AArch64::CBZw: - case AArch64::CBNZx: - case AArch64::CBNZw: - IsCond = true; - Bits = 19 + 2; - break; - case AArch64::Bimm: - Bits = 26 + 2; - break; - } - - // Record this immediate branch. - ImmBranches.push_back(ImmBranch(I, Bits, IsCond)); - } - - if (Opc == AArch64::CONSTPOOL_ENTRY) - continue; - - // Scan the instructions for constant pool operands. - for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) - if (I->getOperand(op).isCPI()) { - // We found one. The addressing mode tells us the max displacement - // from the PC that this instruction permits. - - // The offsets encoded in instructions here scale by the instruction - // size (4 bytes), effectively increasing their range by 2 bits. - unsigned Bits = 0; - - switch (Opc) { - default: - llvm_unreachable("Unknown addressing mode for CP reference!"); - - case AArch64::LDRw_lit: - case AArch64::LDRx_lit: - case AArch64::LDRs_lit: - case AArch64::LDRd_lit: - case AArch64::LDRq_lit: - case AArch64::LDRSWx_lit: - case AArch64::PRFM_lit: - Bits = 19 + 2; - } - - // Remember that this is a user of a CP entry. - unsigned CPI = I->getOperand(op).getIndex(); - MachineInstr *CPEMI = CPEMIs[CPI]; - CPUsers.push_back(CPUser(I, CPEMI, Bits)); - - // Increment corresponding CPEntry reference count. - CPEntry *CPE = findConstPoolEntry(CPI, CPEMI); - assert(CPE && "Cannot find a corresponding CPEntry!"); - CPE->RefCount++; - - // Instructions can only use one CP entry, don't bother scanning the - // rest of the operands. - break; - } - } - } -} - -/// Compute the size and some alignment information for MBB. This function -/// updates BBInfo directly. -void AArch64ConstantIslands::computeBlockSize(MachineBasicBlock *MBB) { - BasicBlockInfo &BBI = BBInfo[MBB->getNumber()]; - BBI.Size = 0; - BBI.Unalign = 0; - - for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; - ++I) { - BBI.Size += TII->getInstSizeInBytes(*I); - // For inline asm, GetInstSizeInBytes returns a conservative estimate. - // The actual size may be smaller, but still a multiple of the instr size. - if (I->isInlineAsm()) - BBI.Unalign = 2; - } -} - -/// Return the current offset of the specified machine instruction from the -/// start of the function. This offset changes as stuff is moved around inside -/// the function. -unsigned AArch64ConstantIslands::getOffsetOf(MachineInstr *MI) const { - MachineBasicBlock *MBB = MI->getParent(); - - // The offset is composed of two things: the sum of the sizes of all MBB's - // before this instruction's block, and the offset from the start of the block - // it is in. - unsigned Offset = BBInfo[MBB->getNumber()].Offset; - - // Sum instructions before MI in MBB. - for (MachineBasicBlock::iterator I = MBB->begin(); &*I != MI; ++I) { - assert(I != MBB->end() && "Didn't find MI in its own basic block?"); - Offset += TII->getInstSizeInBytes(*I); - } - return Offset; -} - -/// Little predicate function to sort the WaterList by MBB ID. -static bool CompareMBBNumbers(const MachineBasicBlock *LHS, - const MachineBasicBlock *RHS) { - return LHS->getNumber() < RHS->getNumber(); -} - -/// When a block is newly inserted into the machine function, it upsets all of -/// the block numbers. Renumber the blocks and update the arrays that parallel -/// this numbering. -void AArch64ConstantIslands:: -updateForInsertedWaterBlock(MachineBasicBlock *NewBB) { - // Renumber the MBB's to keep them consecutive. - NewBB->getParent()->RenumberBlocks(NewBB); - - // Insert an entry into BBInfo to align it properly with the (newly - // renumbered) block numbers. - BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo()); - - // Next, update WaterList. Specifically, we need to add NewMBB as having - // available water after it. - water_iterator IP = - std::lower_bound(WaterList.begin(), WaterList.end(), NewBB, - CompareMBBNumbers); - WaterList.insert(IP, NewBB); -} - - -/// Split the basic block containing MI into two blocks, which are joined by -/// an unconditional branch. Update data structures and renumber blocks to -/// account for this change and returns the newly created block. -MachineBasicBlock * -AArch64ConstantIslands::splitBlockBeforeInstr(MachineInstr *MI) { - MachineBasicBlock *OrigBB = MI->getParent(); - - // Create a new MBB for the code after the OrigBB. - MachineBasicBlock *NewBB = - MF->CreateMachineBasicBlock(OrigBB->getBasicBlock()); - MachineFunction::iterator MBBI = OrigBB; ++MBBI; - MF->insert(MBBI, NewBB); - - // Splice the instructions starting with MI over to NewBB. - NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end()); - - // Add an unconditional branch from OrigBB to NewBB. - // Note the new unconditional branch is not being recorded. - // There doesn't seem to be meaningful DebugInfo available; this doesn't - // correspond to anything in the source. - BuildMI(OrigBB, DebugLoc(), TII->get(AArch64::Bimm)).addMBB(NewBB); - ++NumSplit; - - // Update the CFG. All succs of OrigBB are now succs of NewBB. - NewBB->transferSuccessors(OrigBB); - - // OrigBB branches to NewBB. - OrigBB->addSuccessor(NewBB); - - // Update internal data structures to account for the newly inserted MBB. - // This is almost the same as updateForInsertedWaterBlock, except that - // the Water goes after OrigBB, not NewBB. - MF->RenumberBlocks(NewBB); - - // Insert an entry into BBInfo to align it properly with the (newly - // renumbered) block numbers. - BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo()); - - // Next, update WaterList. Specifically, we need to add OrigMBB as having - // available water after it (but not if it's already there, which happens - // when splitting before a conditional branch that is followed by an - // unconditional branch - in that case we want to insert NewBB). - water_iterator IP = - std::lower_bound(WaterList.begin(), WaterList.end(), OrigBB, - CompareMBBNumbers); - MachineBasicBlock* WaterBB = *IP; - if (WaterBB == OrigBB) - WaterList.insert(llvm::next(IP), NewBB); - else - WaterList.insert(IP, OrigBB); - NewWaterList.insert(OrigBB); - - // Figure out how large the OrigBB is. As the first half of the original - // block, it cannot contain a tablejump. The size includes - // the new jump we added. (It should be possible to do this without - // recounting everything, but it's very confusing, and this is rarely - // executed.) - computeBlockSize(OrigBB); - - // Figure out how large the NewMBB is. As the second half of the original - // block, it may contain a tablejump. - computeBlockSize(NewBB); - - // All BBOffsets following these blocks must be modified. - adjustBBOffsetsAfter(OrigBB); - - return NewBB; -} - -/// Compute the offset of U.MI as seen by the hardware displacement computation. -unsigned AArch64ConstantIslands::getUserOffset(CPUser &U) const { - return getOffsetOf(U.MI); -} - -/// Checks whether UserOffset (the location of a constant pool reference) is -/// within OffsetBits of TrialOffset (a proposed location of a constant pool -/// entry). -bool AArch64ConstantIslands::isOffsetInRange(unsigned UserOffset, - unsigned TrialOffset, - unsigned OffsetBits) { - return isIntN(OffsetBits, static_cast(TrialOffset) - UserOffset); -} - -/// Returns true if a CPE placed after the specified Water (a basic block) will -/// be in range for the specific MI. -/// -/// Compute how much the function will grow by inserting a CPE after Water. -bool AArch64ConstantIslands::isWaterInRange(unsigned UserOffset, - MachineBasicBlock* Water, CPUser &U, - unsigned &Growth) { - unsigned CPELogAlign = getCPELogAlign(U.CPEMI); - unsigned CPEOffset = BBInfo[Water->getNumber()].postOffset(CPELogAlign); - unsigned NextBlockOffset, NextBlockAlignment; - MachineFunction::const_iterator NextBlock = Water; - if (++NextBlock == MF->end()) { - NextBlockOffset = BBInfo[Water->getNumber()].postOffset(); - NextBlockAlignment = 0; - } else { - NextBlockOffset = BBInfo[NextBlock->getNumber()].Offset; - NextBlockAlignment = NextBlock->getAlignment(); - } - unsigned Size = U.CPEMI->getOperand(2).getImm(); - unsigned CPEEnd = CPEOffset + Size; - - // The CPE may be able to hide in the alignment padding before the next - // block. It may also cause more padding to be required if it is more aligned - // that the next block. - if (CPEEnd > NextBlockOffset) { - Growth = CPEEnd - NextBlockOffset; - // Compute the padding that would go at the end of the CPE to align the next - // block. - Growth += OffsetToAlignment(CPEEnd, 1u << NextBlockAlignment); - - // If the CPE is to be inserted before the instruction, that will raise - // the offset of the instruction. Also account for unknown alignment padding - // in blocks between CPE and the user. - if (CPEOffset < UserOffset) - UserOffset += Growth + UnknownPadding(MF->getAlignment(), CPELogAlign); - } else - // CPE fits in existing padding. - Growth = 0; - - return isOffsetInRange(UserOffset, CPEOffset, U); -} - -/// Returns true if the distance between specific MI and specific ConstPool -/// entry instruction can fit in MI's displacement field. -bool AArch64ConstantIslands::isCPEntryInRange(MachineInstr *MI, - unsigned UserOffset, - MachineInstr *CPEMI, - unsigned OffsetBits, - bool DoDump) { - unsigned CPEOffset = getOffsetOf(CPEMI); - - if (DoDump) { - DEBUG({ - unsigned Block = MI->getParent()->getNumber(); - const BasicBlockInfo &BBI = BBInfo[Block]; - dbgs() << "User of CPE#" << CPEMI->getOperand(0).getImm() - << " bits available=" << OffsetBits - << format(" insn address=%#x", UserOffset) - << " in BB#" << Block << ": " - << format("%#x-%x\t", BBI.Offset, BBI.postOffset()) << *MI - << format("CPE address=%#x offset=%+d: ", CPEOffset, - int(CPEOffset-UserOffset)); - }); - } - - return isOffsetInRange(UserOffset, CPEOffset, OffsetBits); -} - -#ifndef NDEBUG -/// Return true of the specified basic block's only predecessor unconditionally -/// branches to its only successor. -static bool BBIsJumpedOver(MachineBasicBlock *MBB) { - if (MBB->pred_size() != 1 || MBB->succ_size() != 1) - return false; - - MachineBasicBlock *Succ = *MBB->succ_begin(); - MachineBasicBlock *Pred = *MBB->pred_begin(); - MachineInstr *PredMI = &Pred->back(); - if (PredMI->getOpcode() == AArch64::Bimm) - return PredMI->getOperand(0).getMBB() == Succ; - return false; -} -#endif // NDEBUG - -void AArch64ConstantIslands::adjustBBOffsetsAfter(MachineBasicBlock *BB) { - unsigned BBNum = BB->getNumber(); - for(unsigned i = BBNum + 1, e = MF->getNumBlockIDs(); i < e; ++i) { - // Get the offset and known bits at the end of the layout predecessor. - // Include the alignment of the current block. - unsigned LogAlign = MF->getBlockNumbered(i)->getAlignment(); - unsigned Offset = BBInfo[i - 1].postOffset(LogAlign); - unsigned KnownBits = BBInfo[i - 1].postKnownBits(LogAlign); - - // This is where block i begins. Stop if the offset is already correct, - // and we have updated 2 blocks. This is the maximum number of blocks - // changed before calling this function. - if (i > BBNum + 2 && - BBInfo[i].Offset == Offset && - BBInfo[i].KnownBits == KnownBits) - break; - - BBInfo[i].Offset = Offset; - BBInfo[i].KnownBits = KnownBits; - } -} - -/// Find the constant pool entry with index CPI and instruction CPEMI, and -/// decrement its refcount. If the refcount becomes 0 remove the entry and -/// instruction. Returns true if we removed the entry, false if we didn't. -bool AArch64ConstantIslands::decrementCPEReferenceCount(unsigned CPI, - MachineInstr *CPEMI) { - // Find the old entry. Eliminate it if it is no longer used. - CPEntry *CPE = findConstPoolEntry(CPI, CPEMI); - assert(CPE && "Unexpected!"); - if (--CPE->RefCount == 0) { - removeDeadCPEMI(CPEMI); - CPE->CPEMI = NULL; - --NumCPEs; - return true; - } - return false; -} - -/// See if the currently referenced CPE is in range; if not, see if an in-range -/// clone of the CPE is in range, and if so, change the data structures so the -/// user references the clone. Returns: -/// 0 = no existing entry found -/// 1 = entry found, and there were no code insertions or deletions -/// 2 = entry found, and there were code insertions or deletions -int AArch64ConstantIslands::findInRangeCPEntry(CPUser& U, unsigned UserOffset) -{ - MachineInstr *UserMI = U.MI; - MachineInstr *CPEMI = U.CPEMI; - - // Check to see if the CPE is already in-range. - if (isCPEntryInRange(UserMI, UserOffset, CPEMI, U.getOffsetBits(), true)) { - DEBUG(dbgs() << "In range\n"); - return 1; - } - - // No. Look for previously created clones of the CPE that are in range. - unsigned CPI = CPEMI->getOperand(1).getIndex(); - std::vector &CPEs = CPEntries[CPI]; - for (unsigned i = 0, e = CPEs.size(); i != e; ++i) { - // We already tried this one - if (CPEs[i].CPEMI == CPEMI) - continue; - // Removing CPEs can leave empty entries, skip - if (CPEs[i].CPEMI == NULL) - continue; - if (isCPEntryInRange(UserMI, UserOffset, CPEs[i].CPEMI, - U.getOffsetBits())) { - DEBUG(dbgs() << "Replacing CPE#" << CPI << " with CPE#" - << CPEs[i].CPI << "\n"); - // Point the CPUser node to the replacement - U.CPEMI = CPEs[i].CPEMI; - // Change the CPI in the instruction operand to refer to the clone. - for (unsigned j = 0, e = UserMI->getNumOperands(); j != e; ++j) - if (UserMI->getOperand(j).isCPI()) { - UserMI->getOperand(j).setIndex(CPEs[i].CPI); - break; - } - // Adjust the refcount of the clone... - CPEs[i].RefCount++; - // ...and the original. If we didn't remove the old entry, none of the - // addresses changed, so we don't need another pass. - return decrementCPEReferenceCount(CPI, CPEMI) ? 2 : 1; - } - } - return 0; -} - -/// Look for an existing entry in the WaterList in which we can place the CPE -/// referenced from U so it's within range of U's MI. Returns true if found, -/// false if not. If it returns true, WaterIter is set to the WaterList -/// entry. To ensure that this pass terminates, the CPE location for a -/// particular CPUser is only allowed to move to a lower address, so search -/// backward from the end of the list and prefer the first water that is in -/// range. -bool AArch64ConstantIslands::findAvailableWater(CPUser &U, unsigned UserOffset, - water_iterator &WaterIter) { - if (WaterList.empty()) - return false; - - unsigned BestGrowth = ~0u; - for (water_iterator IP = prior(WaterList.end()), B = WaterList.begin();; - --IP) { - MachineBasicBlock* WaterBB = *IP; - // Check if water is in range and is either at a lower address than the - // current "high water mark" or a new water block that was created since - // the previous iteration by inserting an unconditional branch. In the - // latter case, we want to allow resetting the high water mark back to - // this new water since we haven't seen it before. Inserting branches - // should be relatively uncommon and when it does happen, we want to be - // sure to take advantage of it for all the CPEs near that block, so that - // we don't insert more branches than necessary. - unsigned Growth; - if (isWaterInRange(UserOffset, WaterBB, U, Growth) && - (WaterBB->getNumber() < U.HighWaterMark->getNumber() || - NewWaterList.count(WaterBB)) && Growth < BestGrowth) { - // This is the least amount of required padding seen so far. - BestGrowth = Growth; - WaterIter = IP; - DEBUG(dbgs() << "Found water after BB#" << WaterBB->getNumber() - << " Growth=" << Growth << '\n'); - - // Keep looking unless it is perfect. - if (BestGrowth == 0) - return true; - } - if (IP == B) - break; - } - return BestGrowth != ~0u; -} - -/// No existing WaterList entry will work for CPUsers[CPUserIndex], so create a -/// place to put the CPE. The end of the block is used if in range, and the -/// conditional branch munged so control flow is correct. Otherwise the block -/// is split to create a hole with an unconditional branch around it. In either -/// case NewMBB is set to a block following which the new island can be inserted -/// (the WaterList is not adjusted). -void AArch64ConstantIslands::createNewWater(unsigned CPUserIndex, - unsigned UserOffset, - MachineBasicBlock *&NewMBB) { - CPUser &U = CPUsers[CPUserIndex]; - MachineInstr *UserMI = U.MI; - MachineInstr *CPEMI = U.CPEMI; - unsigned CPELogAlign = getCPELogAlign(CPEMI); - MachineBasicBlock *UserMBB = UserMI->getParent(); - const BasicBlockInfo &UserBBI = BBInfo[UserMBB->getNumber()]; - - // If the block does not end in an unconditional branch already, and if the - // end of the block is within range, make new water there. - if (BBHasFallthrough(UserMBB)) { - // Size of branch to insert. - unsigned InstrSize = 4; - // Compute the offset where the CPE will begin. - unsigned CPEOffset = UserBBI.postOffset(CPELogAlign) + InstrSize; - - if (isOffsetInRange(UserOffset, CPEOffset, U)) { - DEBUG(dbgs() << "Split at end of BB#" << UserMBB->getNumber() - << format(", expected CPE offset %#x\n", CPEOffset)); - NewMBB = llvm::next(MachineFunction::iterator(UserMBB)); - // Add an unconditional branch from UserMBB to fallthrough block. Record - // it for branch lengthening; this new branch will not get out of range, - // but if the preceding conditional branch is out of range, the targets - // will be exchanged, and the altered branch may be out of range, so the - // machinery has to know about it. - BuildMI(UserMBB, DebugLoc(), TII->get(AArch64::Bimm)).addMBB(NewMBB); - - // 26 bits written down, specifying a multiple of 4. - unsigned OffsetBits = 26 + 2; - ImmBranches.push_back(ImmBranch(&UserMBB->back(), OffsetBits, false)); - BBInfo[UserMBB->getNumber()].Size += InstrSize; - adjustBBOffsetsAfter(UserMBB); - return; - } - } - - // What a big block. Find a place within the block to split it. We make a - // first guess, then walk through the instructions between the one currently - // being looked at and the possible insertion point, and make sure any other - // instructions that reference CPEs will be able to use the same island area; - // if not, we back up the insertion point. - - // Try to split the block so it's fully aligned. Compute the latest split - // point where we can add a 4-byte branch instruction, and then align to - // LogAlign which is the largest possible alignment in the function. - unsigned LogAlign = MF->getAlignment(); - assert(LogAlign >= CPELogAlign && "Over-aligned constant pool entry"); - unsigned KnownBits = UserBBI.internalKnownBits(); - unsigned UPad = UnknownPadding(LogAlign, KnownBits); - unsigned BaseInsertOffset = UserOffset + U.getMaxPosDisp() - UPad; - DEBUG(dbgs() << format("Split in middle of big block before %#x", - BaseInsertOffset)); - - // The 4 in the following is for the unconditional branch we'll be inserting - // Alignment of the island is handled inside isOffsetInRange. - BaseInsertOffset -= 4; - - DEBUG(dbgs() << format(", adjusted to %#x", BaseInsertOffset) - << " la=" << LogAlign - << " kb=" << KnownBits - << " up=" << UPad << '\n'); - - // This could point off the end of the block if we've already got constant - // pool entries following this block; only the last one is in the water list. - // Back past any possible branches (allow for a conditional and a maximally - // long unconditional). - if (BaseInsertOffset + 8 >= UserBBI.postOffset()) { - BaseInsertOffset = UserBBI.postOffset() - UPad - 8; - DEBUG(dbgs() << format("Move inside block: %#x\n", BaseInsertOffset)); - } - unsigned EndInsertOffset = BaseInsertOffset + 4 + UPad + - CPEMI->getOperand(2).getImm(); - MachineBasicBlock::iterator MI = UserMI; - ++MI; - unsigned CPUIndex = CPUserIndex+1; - unsigned NumCPUsers = CPUsers.size(); - for (unsigned Offset = UserOffset+TII->getInstSizeInBytes(*UserMI); - Offset < BaseInsertOffset; - Offset += TII->getInstSizeInBytes(*MI), - MI = llvm::next(MI)) { - assert(MI != UserMBB->end() && "Fell off end of block"); - if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == MI) { - CPUser &U = CPUsers[CPUIndex]; - if (!isOffsetInRange(Offset, EndInsertOffset, U)) { - // Shift intertion point by one unit of alignment so it is within reach. - BaseInsertOffset -= 1u << LogAlign; - EndInsertOffset -= 1u << LogAlign; - } - // This is overly conservative, as we don't account for CPEMIs being - // reused within the block, but it doesn't matter much. Also assume CPEs - // are added in order with alignment padding. We may eventually be able - // to pack the aligned CPEs better. - EndInsertOffset += U.CPEMI->getOperand(2).getImm(); - CPUIndex++; - } - } - - --MI; - NewMBB = splitBlockBeforeInstr(MI); -} - -/// Analyze the specified user, checking to see if it is out-of-range. If so, -/// pick up the constant pool value and move it some place in-range. Return -/// true if we changed any addresses, false otherwise. -bool AArch64ConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) { - CPUser &U = CPUsers[CPUserIndex]; - MachineInstr *UserMI = U.MI; - MachineInstr *CPEMI = U.CPEMI; - unsigned CPI = CPEMI->getOperand(1).getIndex(); - unsigned Size = CPEMI->getOperand(2).getImm(); - // Compute this only once, it's expensive. - unsigned UserOffset = getUserOffset(U); - - // See if the current entry is within range, or there is a clone of it - // in range. - int result = findInRangeCPEntry(U, UserOffset); - if (result==1) return false; - else if (result==2) return true; - - // No existing clone of this CPE is within range. - // We will be generating a new clone. Get a UID for it. - unsigned ID = AFI->createPICLabelUId(); - - // Look for water where we can place this CPE. - MachineBasicBlock *NewIsland = MF->CreateMachineBasicBlock(); - MachineBasicBlock *NewMBB; - water_iterator IP; - if (findAvailableWater(U, UserOffset, IP)) { - DEBUG(dbgs() << "Found water in range\n"); - MachineBasicBlock *WaterBB = *IP; - - // If the original WaterList entry was "new water" on this iteration, - // propagate that to the new island. This is just keeping NewWaterList - // updated to match the WaterList, which will be updated below. - if (NewWaterList.count(WaterBB)) { - NewWaterList.erase(WaterBB); - NewWaterList.insert(NewIsland); - } - // The new CPE goes before the following block (NewMBB). - NewMBB = llvm::next(MachineFunction::iterator(WaterBB)); - - } else { - // No water found. - DEBUG(dbgs() << "No water found\n"); - createNewWater(CPUserIndex, UserOffset, NewMBB); - - // splitBlockBeforeInstr adds to WaterList, which is important when it is - // called while handling branches so that the water will be seen on the - // next iteration for constant pools, but in this context, we don't want - // it. Check for this so it will be removed from the WaterList. - // Also remove any entry from NewWaterList. - MachineBasicBlock *WaterBB = prior(MachineFunction::iterator(NewMBB)); - IP = std::find(WaterList.begin(), WaterList.end(), WaterBB); - if (IP != WaterList.end()) - NewWaterList.erase(WaterBB); - - // We are adding new water. Update NewWaterList. - NewWaterList.insert(NewIsland); - } - - // Remove the original WaterList entry; we want subsequent insertions in - // this vicinity to go after the one we're about to insert. This - // considerably reduces the number of times we have to move the same CPE - // more than once and is also important to ensure the algorithm terminates. - if (IP != WaterList.end()) - WaterList.erase(IP); - - // Okay, we know we can put an island before NewMBB now, do it! - MF->insert(NewMBB, NewIsland); - - // Update internal data structures to account for the newly inserted MBB. - updateForInsertedWaterBlock(NewIsland); - - // Decrement the old entry, and remove it if refcount becomes 0. - decrementCPEReferenceCount(CPI, CPEMI); - - // Now that we have an island to add the CPE to, clone the original CPE and - // add it to the island. - U.HighWaterMark = NewIsland; - U.CPEMI = BuildMI(NewIsland, DebugLoc(), TII->get(AArch64::CONSTPOOL_ENTRY)) - .addImm(ID).addConstantPoolIndex(CPI).addImm(Size); - CPEntries[CPI].push_back(CPEntry(U.CPEMI, ID, 1)); - ++NumCPEs; - - // Mark the basic block as aligned as required by the const-pool entry. - NewIsland->setAlignment(getCPELogAlign(U.CPEMI)); - - // Increase the size of the island block to account for the new entry. - BBInfo[NewIsland->getNumber()].Size += Size; - adjustBBOffsetsAfter(llvm::prior(MachineFunction::iterator(NewIsland))); - - // Finally, change the CPI in the instruction operand to be ID. - for (unsigned i = 0, e = UserMI->getNumOperands(); i != e; ++i) - if (UserMI->getOperand(i).isCPI()) { - UserMI->getOperand(i).setIndex(ID); - break; - } - - DEBUG(dbgs() << " Moved CPE to #" << ID << " CPI=" << CPI - << format(" offset=%#x\n", BBInfo[NewIsland->getNumber()].Offset)); - - return true; -} - -/// Remove a dead constant pool entry instruction. Update sizes and offsets of -/// impacted basic blocks. -void AArch64ConstantIslands::removeDeadCPEMI(MachineInstr *CPEMI) { - MachineBasicBlock *CPEBB = CPEMI->getParent(); - unsigned Size = CPEMI->getOperand(2).getImm(); - CPEMI->eraseFromParent(); - BBInfo[CPEBB->getNumber()].Size -= Size; - // All succeeding offsets have the current size value added in, fix this. - if (CPEBB->empty()) { - BBInfo[CPEBB->getNumber()].Size = 0; - - // This block no longer needs to be aligned. . - CPEBB->setAlignment(0); - } else - // Entries are sorted by descending alignment, so realign from the front. - CPEBB->setAlignment(getCPELogAlign(CPEBB->begin())); - - adjustBBOffsetsAfter(CPEBB); - // An island has only one predecessor BB and one successor BB. Check if - // this BB's predecessor jumps directly to this BB's successor. This - // shouldn't happen currently. - assert(!BBIsJumpedOver(CPEBB) && "How did this happen?"); - // FIXME: remove the empty blocks after all the work is done? -} - -/// Remove constant pool entries whose refcounts are zero. -bool AArch64ConstantIslands::removeUnusedCPEntries() { - unsigned MadeChange = false; - for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) { - std::vector &CPEs = CPEntries[i]; - for (unsigned j = 0, ee = CPEs.size(); j != ee; ++j) { - if (CPEs[j].RefCount == 0 && CPEs[j].CPEMI) { - removeDeadCPEMI(CPEs[j].CPEMI); - CPEs[j].CPEMI = NULL; - MadeChange = true; - } - } - } - return MadeChange; -} - -/// Returns true if the distance between specific MI and specific BB can fit in -/// MI's displacement field. -bool AArch64ConstantIslands::isBBInRange(MachineInstr *MI, - MachineBasicBlock *DestBB, - unsigned OffsetBits) { - int64_t BrOffset = getOffsetOf(MI); - int64_t DestOffset = BBInfo[DestBB->getNumber()].Offset; - - DEBUG(dbgs() << "Branch of destination BB#" << DestBB->getNumber() - << " from BB#" << MI->getParent()->getNumber() - << " bits available=" << OffsetBits - << " from " << getOffsetOf(MI) << " to " << DestOffset - << " offset " << int(DestOffset-BrOffset) << "\t" << *MI); - - return isIntN(OffsetBits, DestOffset - BrOffset); -} - -/// Fix up an immediate branch whose destination is too far away to fit in its -/// displacement field. -bool AArch64ConstantIslands::fixupImmediateBr(ImmBranch &Br) { - MachineInstr *MI = Br.MI; - MachineBasicBlock *DestBB = 0; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - if (MI->getOperand(i).isMBB()) { - DestBB = MI->getOperand(i).getMBB(); - break; - } - } - assert(DestBB && "Branch with no destination BB?"); - - // Check to see if the DestBB is already in-range. - if (isBBInRange(MI, DestBB, Br.OffsetBits)) - return false; - - assert(Br.IsCond && "Only conditional branches should need fixup"); - return fixupConditionalBr(Br); -} - -/// Fix up a conditional branch whose destination is too far away to fit in its -/// displacement field. It is converted to an inverse conditional branch + an -/// unconditional branch to the destination. -bool -AArch64ConstantIslands::fixupConditionalBr(ImmBranch &Br) { - MachineInstr *MI = Br.MI; - MachineBasicBlock *MBB = MI->getParent(); - unsigned CondBrMBBOperand = 0; - - // The general idea is to add an unconditional branch to the destination and - // invert the conditional branch to jump over it. Complications occur around - // fallthrough and unreachable ends to the block. - // b.lt L1 - // => - // b.ge L2 - // b L1 - // L2: - - // First we invert the conditional branch, by creating a replacement if - // necessary. This if statement contains all the special handling of different - // branch types. - if (MI->getOpcode() == AArch64::Bcc) { - // The basic block is operand number 1 for Bcc - CondBrMBBOperand = 1; - - A64CC::CondCodes CC = (A64CC::CondCodes)MI->getOperand(0).getImm(); - CC = A64InvertCondCode(CC); - MI->getOperand(0).setImm(CC); - } else { - MachineInstrBuilder InvertedMI; - int InvertedOpcode; - switch (MI->getOpcode()) { - default: llvm_unreachable("Unknown branch type"); - case AArch64::TBZxii: InvertedOpcode = AArch64::TBNZxii; break; - case AArch64::TBZwii: InvertedOpcode = AArch64::TBNZwii; break; - case AArch64::TBNZxii: InvertedOpcode = AArch64::TBZxii; break; - case AArch64::TBNZwii: InvertedOpcode = AArch64::TBZwii; break; - case AArch64::CBZx: InvertedOpcode = AArch64::CBNZx; break; - case AArch64::CBZw: InvertedOpcode = AArch64::CBNZw; break; - case AArch64::CBNZx: InvertedOpcode = AArch64::CBZx; break; - case AArch64::CBNZw: InvertedOpcode = AArch64::CBZw; break; - } - - InvertedMI = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(InvertedOpcode)); - for (unsigned i = 0, e= MI->getNumOperands(); i != e; ++i) { - InvertedMI.addOperand(MI->getOperand(i)); - if (MI->getOperand(i).isMBB()) - CondBrMBBOperand = i; - } - - MI->eraseFromParent(); - MI = Br.MI = InvertedMI; - } - - // If the branch is at the end of its MBB and that has a fall-through block, - // direct the updated conditional branch to the fall-through - // block. Otherwise, split the MBB before the next instruction. - MachineInstr *BMI = &MBB->back(); - bool NeedSplit = (BMI != MI) || !BBHasFallthrough(MBB); - - ++NumCBrFixed; - if (BMI != MI) { - if (llvm::next(MachineBasicBlock::iterator(MI)) == prior(MBB->end()) && - BMI->getOpcode() == AArch64::Bimm) { - // Last MI in the BB is an unconditional branch. We can swap destinations: - // b.eq L1 (temporarily b.ne L1 after first change) - // b L2 - // => - // b.ne L2 - // b L1 - MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB(); - if (isBBInRange(MI, NewDest, Br.OffsetBits)) { - DEBUG(dbgs() << " Invert Bcc condition and swap its destination with " - << *BMI); - MachineBasicBlock *DestBB = MI->getOperand(CondBrMBBOperand).getMBB(); - BMI->getOperand(0).setMBB(DestBB); - MI->getOperand(CondBrMBBOperand).setMBB(NewDest); - return true; - } - } - } - - if (NeedSplit) { - MachineBasicBlock::iterator MBBI = MI; ++MBBI; - splitBlockBeforeInstr(MBBI); - // No need for the branch to the next block. We're adding an unconditional - // branch to the destination. - int delta = TII->getInstSizeInBytes(MBB->back()); - BBInfo[MBB->getNumber()].Size -= delta; - MBB->back().eraseFromParent(); - // BBInfo[SplitBB].Offset is wrong temporarily, fixed below - } - - // After splitting and removing the unconditional branch from the original BB, - // the structure is now: - // oldbb: - // [things] - // b.invertedCC L1 - // splitbb/fallthroughbb: - // [old b L2/real continuation] - // - // We now have to change the conditional branch to point to splitbb and add an - // unconditional branch after it to L1, giving the final structure: - // oldbb: - // [things] - // b.invertedCC splitbb - // b L1 - // splitbb/fallthroughbb: - // [old b L2/real continuation] - MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB)); - - DEBUG(dbgs() << " Insert B to BB#" - << MI->getOperand(CondBrMBBOperand).getMBB()->getNumber() - << " also invert condition and change dest. to BB#" - << NextBB->getNumber() << "\n"); - - // Insert a new unconditional branch and fixup the destination of the - // conditional one. Also update the ImmBranch as well as adding a new entry - // for the new branch. - BuildMI(MBB, DebugLoc(), TII->get(AArch64::Bimm)) - .addMBB(MI->getOperand(CondBrMBBOperand).getMBB()); - MI->getOperand(CondBrMBBOperand).setMBB(NextBB); - - BBInfo[MBB->getNumber()].Size += TII->getInstSizeInBytes(MBB->back()); - - // 26 bits written down in Bimm, specifying a multiple of 4. - unsigned OffsetBits = 26 + 2; - ImmBranches.push_back(ImmBranch(&MBB->back(), OffsetBits, false)); - - adjustBBOffsetsAfter(MBB); - return true; -} diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index c933555..46b8221 100644 --- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -89,8 +89,8 @@ public: bool SelectTSTBOperand(SDValue N, SDValue &FixedPos, unsigned RegWidth); SDNode *TrySelectToMoveImm(SDNode *N); + SDNode *LowerToFPLitPool(SDNode *Node); SDNode *SelectToLitPool(SDNode *N); - SDNode *SelectToFPLitPool(SDNode *N); SDNode* Select(SDNode*); private: @@ -225,92 +225,78 @@ SDNode *AArch64DAGToDAGISel::TrySelectToMoveImm(SDNode *Node) { } SDNode *AArch64DAGToDAGISel::SelectToLitPool(SDNode *Node) { - DebugLoc dl = Node->getDebugLoc(); + DebugLoc DL = Node->getDebugLoc(); uint64_t UnsignedVal = cast(Node)->getZExtValue(); int64_t SignedVal = cast(Node)->getSExtValue(); EVT DestType = Node->getValueType(0); + EVT PtrVT = TLI.getPointerTy(); // Since we may end up loading a 64-bit constant from a 32-bit entry the // constant in the pool may have a different type to the eventual node. - SDValue PoolEntry; - EVT LoadType; - unsigned LoadInst; + ISD::LoadExtType Extension; + EVT MemType; assert((DestType == MVT::i64 || DestType == MVT::i32) && "Only expect integer constants at the moment"); - if (DestType == MVT::i32 || UnsignedVal <= UINT32_MAX) { - // LDR w3, lbl - LoadInst = AArch64::LDRw_lit; - LoadType = MVT::i32; - - PoolEntry = CurDAG->getTargetConstantPool( - ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), UnsignedVal), - MVT::i32); + if (DestType == MVT::i32) { + Extension = ISD::NON_EXTLOAD; + MemType = MVT::i32; + } else if (UnsignedVal <= UINT32_MAX) { + Extension = ISD::ZEXTLOAD; + MemType = MVT::i32; } else if (SignedVal >= INT32_MIN && SignedVal <= INT32_MAX) { - // We can use a sign-extending 32-bit load: LDRSW x3, lbl - LoadInst = AArch64::LDRSWx_lit; - LoadType = MVT::i64; - - PoolEntry = CurDAG->getTargetConstantPool( - ConstantInt::getSigned(Type::getInt32Ty(*CurDAG->getContext()), - SignedVal), - MVT::i32); + Extension = ISD::SEXTLOAD; + MemType = MVT::i32; } else { - // Full 64-bit load needed: LDR x3, lbl - LoadInst = AArch64::LDRx_lit; - LoadType = MVT::i64; - - PoolEntry = CurDAG->getTargetConstantPool( - ConstantInt::get(Type::getInt64Ty(*CurDAG->getContext()), UnsignedVal), - MVT::i64); - } - - SDNode *ResNode = CurDAG->getMachineNode(LoadInst, dl, - LoadType, MVT::Other, - PoolEntry, CurDAG->getEntryNode()); - - if (DestType != LoadType) { - // We used the implicit zero-extension of "LDR w3, lbl", tell LLVM this - // fact. - assert(DestType == MVT::i64 && LoadType == MVT::i32 - && "Unexpected load combination"); - - ResNode = CurDAG->getMachineNode(TargetOpcode::SUBREG_TO_REG, dl, - MVT::i64, MVT::i32, MVT::Other, - CurDAG->getTargetConstant(0, MVT::i64), - SDValue(ResNode, 0), - CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32)); + Extension = ISD::NON_EXTLOAD; + MemType = MVT::i64; } - return ResNode; + Constant *CV = ConstantInt::get(Type::getIntNTy(*CurDAG->getContext(), + MemType.getSizeInBits()), + UnsignedVal); + SDValue PoolAddr; + unsigned Alignment = TLI.getDataLayout()->getABITypeAlignment(CV->getType()); + PoolAddr = CurDAG->getNode(AArch64ISD::WrapperSmall, DL, PtrVT, + CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, + AArch64II::MO_NO_FLAG), + CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, + AArch64II::MO_LO12), + CurDAG->getConstant(Alignment, MVT::i32)); + + return CurDAG->getExtLoad(Extension, DL, DestType, CurDAG->getEntryNode(), + PoolAddr, + MachinePointerInfo::getConstantPool(), MemType, + /* isVolatile = */ false, + /* isNonTemporal = */ false, + Alignment).getNode(); } -SDNode *AArch64DAGToDAGISel::SelectToFPLitPool(SDNode *Node) { - DebugLoc dl = Node->getDebugLoc(); +SDNode *AArch64DAGToDAGISel::LowerToFPLitPool(SDNode *Node) { + DebugLoc DL = Node->getDebugLoc(); const ConstantFP *FV = cast(Node)->getConstantFPValue(); + EVT PtrVT = TLI.getPointerTy(); EVT DestType = Node->getValueType(0); - unsigned LoadInst; - switch (DestType.getSizeInBits()) { - case 32: - LoadInst = AArch64::LDRs_lit; - break; - case 64: - LoadInst = AArch64::LDRd_lit; - break; - case 128: - LoadInst = AArch64::LDRq_lit; - break; - default: llvm_unreachable("cannot select floating-point litpool"); - } - - SDValue PoolEntry = CurDAG->getTargetConstantPool(FV, DestType); - SDNode *ResNode = CurDAG->getMachineNode(LoadInst, dl, - DestType, MVT::Other, - PoolEntry, CurDAG->getEntryNode()); - - return ResNode; + unsigned Alignment = TLI.getDataLayout()->getABITypeAlignment(FV->getType()); + SDValue PoolAddr; + + assert(TM.getCodeModel() == CodeModel::Small && + "Only small code model supported"); + PoolAddr = CurDAG->getNode(AArch64ISD::WrapperSmall, DL, PtrVT, + CurDAG->getTargetConstantPool(FV, PtrVT, 0, 0, + AArch64II::MO_NO_FLAG), + CurDAG->getTargetConstantPool(FV, PtrVT, 0, 0, + AArch64II::MO_LO12), + CurDAG->getConstant(Alignment, MVT::i32)); + + return CurDAG->getLoad(DestType, DL, CurDAG->getEntryNode(), PoolAddr, + MachinePointerInfo::getConstantPool(), + /* isVolatile = */ false, + /* isNonTemporal = */ false, + /* isInvariant = */ true, + Alignment).getNode(); } bool @@ -377,17 +363,19 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { ResNode = TrySelectToMoveImm(Node); } - // If even that fails we fall back to a lit-pool entry at the moment. Future - // tuning or restrictions like non-readable code-sections may mandate a - // sequence of MOVZ/MOVN/MOVK instructions. - if (!ResNode) { - ResNode = SelectToLitPool(Node); - } + if (ResNode) + return ResNode; + // If even that fails we fall back to a lit-pool entry at the moment. Future + // tuning may change this to a sequence of MOVZ/MOVN/MOVK instructions. + ResNode = SelectToLitPool(Node); assert(ResNode && "We need *some* way to materialise a constant"); + // We want to continue selection at this point since the litpool access + // generated used generic nodes for simplicity. ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0)); - return NULL; + Node = ResNode; + break; } case ISD::ConstantFP: { if (A64Imms::isFPImm(cast(Node)->getValueAPF())) { @@ -395,9 +383,13 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { break; } - SDNode *ResNode = SelectToFPLitPool(Node); + SDNode *ResNode = LowerToFPLitPool(Node); ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0)); - return NULL; + + // We want to continue selection at this point since the litpool access + // generated used generic nodes for simplicity. + Node = ResNode; + break; } default: break; // Let generic code handle it diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 2c11547..739ca95 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1866,8 +1866,14 @@ AArch64TargetLowering::LowerGlobalAddressELF(SDValue Op, // Weak symbols can't use ADRP/ADD pair since they should evaluate to // zero when undefined. In PIC mode the GOT can take care of this, but in // absolute mode we use a constant pool load. - return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), - DAG.getConstantPool(GV, GN->getValueType(0)), + SDValue PoolAddr; + PoolAddr = DAG.getNode(AArch64ISD::WrapperSmall, dl, PtrVT, + DAG.getTargetConstantPool(GV, PtrVT, 0, 0, + AArch64II::MO_NO_FLAG), + DAG.getTargetConstantPool(GV, PtrVT, 0, 0, + AArch64II::MO_LO12), + DAG.getConstant(8, MVT::i32)); + return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), PoolAddr, MachinePointerInfo::getConstantPool(), /*isVolatile=*/ false, /*isNonTemporal=*/ true, /*isInvariant=*/ true, 8); diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp index 9a7504a..b4e9e8d 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -558,50 +558,6 @@ void AArch64InstrInfo::getAddressConstraints(const MachineInstr &MI, } } -unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { - const MCInstrDesc &MCID = MI.getDesc(); - const MachineBasicBlock &MBB = *MI.getParent(); - const MachineFunction &MF = *MBB.getParent(); - const MCAsmInfo &MAI = *MF.getTarget().getMCAsmInfo(); - - if (MCID.getSize()) - return MCID.getSize(); - - if (MI.getOpcode() == AArch64::INLINEASM) - return getInlineAsmLength(MI.getOperand(0).getSymbolName(), MAI); - - if (MI.isLabel()) - return 0; - - switch (MI.getOpcode()) { - case TargetOpcode::BUNDLE: - return getInstBundleLength(MI); - case TargetOpcode::IMPLICIT_DEF: - case TargetOpcode::KILL: - case TargetOpcode::PROLOG_LABEL: - case TargetOpcode::EH_LABEL: - case TargetOpcode::DBG_VALUE: - return 0; - case AArch64::CONSTPOOL_ENTRY: - return MI.getOperand(2).getImm(); - case AArch64::TLSDESCCALL: - return 0; - default: - llvm_unreachable("Unknown instruction class"); - } -} - -unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const { - unsigned Size = 0; - MachineBasicBlock::const_instr_iterator I = MI; - MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); - while (++I != E && I->isInsideBundle()) { - assert(!I->isBundle() && "No nested bundle!"); - Size += getInstSizeInBytes(*I); - } - return Size; -} - bool llvm::rewriteA64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, unsigned FrameReg, int &Offset, const AArch64InstrInfo &TII) { diff --git a/lib/Target/AArch64/AArch64InstrInfo.h b/lib/Target/AArch64/AArch64InstrInfo.h index 8084f78..0feb5a1 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.h +++ b/lib/Target/AArch64/AArch64InstrInfo.h @@ -83,10 +83,6 @@ public: /// + imm % OffsetScale == 0 void getAddressConstraints(const MachineInstr &MI, int &AccessScale, int &MinOffset, int &MaxOffset) const; - - unsigned getInstSizeInBytes(const MachineInstr &MI) const; - - unsigned getInstBundleLength(const MachineInstr &MI) const; }; bool rewriteA64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index bdef183..562a7f6 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -2449,16 +2449,12 @@ class A64I_LDRlitSimple opc, bit v, RegisterClass OutReg, "ldr\t$Rt, $Imm19", patterns, NoItinerary>; let mayLoad = 1 in { - def LDRw_lit : A64I_LDRlitSimple<0b00, 0b0, GPR32, - [(set (i32 GPR32:$Rt), (load constpool:$Imm19))]>; - def LDRx_lit : A64I_LDRlitSimple<0b01, 0b0, GPR64, - [(set (i64 GPR64:$Rt), (load constpool:$Imm19))]>; + def LDRw_lit : A64I_LDRlitSimple<0b00, 0b0, GPR32>; + def LDRx_lit : A64I_LDRlitSimple<0b01, 0b0, GPR64>; } -def LDRs_lit : A64I_LDRlitSimple<0b00, 0b1, FPR32, - [(set (f32 FPR32:$Rt), (load constpool:$Imm19))]>; -def LDRd_lit : A64I_LDRlitSimple<0b01, 0b1, FPR64, - [(set (f64 FPR64:$Rt), (load constpool:$Imm19))]>; +def LDRs_lit : A64I_LDRlitSimple<0b00, 0b1, FPR32>; +def LDRd_lit : A64I_LDRlitSimple<0b01, 0b1, FPR64>; let mayLoad = 1 in { def LDRq_lit : A64I_LDRlitSimple<0b10, 0b1, FPR128>; @@ -4565,22 +4561,6 @@ def : Pat<(and (A64Bfi GPR64:$src, GPR64:$Rn, imm:$ImmR, imm:$ImmS), } //===----------------------------------------------------------------------===// -// Constant island entries -//===----------------------------------------------------------------------===// - -// The constant island pass needs to create "instructions" in the middle of the -// instruction stream to reresent its constants. - -def cpinst_operand : Operand; - -def CONSTPOOL_ENTRY : PseudoInst<(outs), (ins cpinst_operand:$instid, - cpinst_operand:$cpidx, - i32imm:$size), []> { - let hasSideEffects = 0; - let isNotDuplicable = 1; -} - -//===----------------------------------------------------------------------===// // Miscellaneous patterns //===----------------------------------------------------------------------===// @@ -4898,6 +4878,8 @@ defm : uimm12_pats<(A64WrapperSmall texternalsym:$Hi, texternalsym:$Lo12, ALIGN), (ADRPxi texternalsym:$Hi), (i64 texternalsym:$Lo12)>; +defm : uimm12_pats<(A64WrapperSmall tconstpool:$Hi, tconstpool:$Lo12, ALIGN), + (ADRPxi tconstpool:$Hi), (i64 tconstpool:$Lo12)>; // We also want to use uimm12 instructions for local variables at the moment. def tframeindex_XFORM : SDNodeXForm Date: Fri, 15 Feb 2013 12:30:38 +0000 Subject: Make helpers static. Add missing include so LLVMInitializeObjCARCOpts gets C linkage. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175264 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/ARMException.cpp | 2 +- lib/DebugInfo/DWARFDebugFrame.cpp | 3 ++- lib/MC/MCObjectStreamer.cpp | 2 +- lib/Support/SourceMgr.cpp | 4 ++-- lib/TableGen/Record.cpp | 8 +++----- lib/Target/X86/X86RegisterInfo.cpp | 2 +- lib/Transforms/ObjCARC/ObjCARC.cpp | 1 + 7 files changed, 11 insertions(+), 11 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp index 9310069..188047d 100644 --- a/lib/CodeGen/AsmPrinter/ARMException.cpp +++ b/lib/CodeGen/AsmPrinter/ARMException.cpp @@ -36,7 +36,7 @@ #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; -cl::opt +static cl::opt EnableARMEHABIDescriptors("arm-enable-ehabi-descriptors", cl::Hidden, cl::desc("Generate ARM EHABI tables with unwinding descriptors"), cl::init(false)); diff --git a/lib/DebugInfo/DWARFDebugFrame.cpp b/lib/DebugInfo/DWARFDebugFrame.cpp index 9da304d31..69e3a3a 100644 --- a/lib/DebugInfo/DWARFDebugFrame.cpp +++ b/lib/DebugInfo/DWARFDebugFrame.cpp @@ -46,7 +46,7 @@ protected: uint64_t Length; }; - +namespace { /// \brief DWARF Common Information Entry (CIE) class CIE : public FrameEntry { public: @@ -128,6 +128,7 @@ private: uint64_t AddressRange; CIE *LinkedCIE; }; +} // end anonymous namespace DWARFDebugFrame::DWARFDebugFrame() { diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp index 5aa4b40..fe43506 100644 --- a/lib/MC/MCObjectStreamer.cpp +++ b/lib/MC/MCObjectStreamer.cpp @@ -224,7 +224,7 @@ void MCObjectStreamer::EmitInstToFragment(const MCInst &Inst) { IF->getContents().append(Code.begin(), Code.end()); } -const char *BundlingNotImplementedMsg = +static const char *BundlingNotImplementedMsg = "Aligned bundling is not implemented for this object format"; void MCObjectStreamer::EmitBundleAlignMode(unsigned AlignPow2) { diff --git a/lib/Support/SourceMgr.cpp b/lib/Support/SourceMgr.cpp index fa82265..fac3cad 100644 --- a/lib/Support/SourceMgr.cpp +++ b/lib/Support/SourceMgr.cpp @@ -248,8 +248,8 @@ SMDiagnostic::SMDiagnostic(const SourceMgr &sm, SMLoc L, StringRef FN, std::sort(FixIts.begin(), FixIts.end()); } -void buildFixItLine(std::string &CaretLine, std::string &FixItLine, - ArrayRef FixIts, ArrayRef SourceLine) { +static void buildFixItLine(std::string &CaretLine, std::string &FixItLine, + ArrayRef FixIts, ArrayRef SourceLine){ if (FixIts.empty()) return; diff --git a/lib/TableGen/Record.cpp b/lib/TableGen/Record.cpp index fcee93a..9ad2053 100644 --- a/lib/TableGen/Record.cpp +++ b/lib/TableGen/Record.cpp @@ -1525,11 +1525,9 @@ Init *FieldInit::resolveReferences(Record &R, const RecordVal *RV) const { return const_cast(this); } -void ProfileDagInit(FoldingSetNodeID &ID, - Init *V, - const std::string &VN, - ArrayRef ArgRange, - ArrayRef NameRange) { +static void ProfileDagInit(FoldingSetNodeID &ID, Init *V, const std::string &VN, + ArrayRef ArgRange, + ArrayRef NameRange) { ID.AddPointer(V); ID.AddString(VN); diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 02da140..401eefb 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -50,7 +50,7 @@ ForceStackAlign("force-align-stack", " needed for the function."), cl::init(false), cl::Hidden); -cl::opt +static cl::opt EnableBasePointer("x86-use-base-pointer", cl::Hidden, cl::init(true), cl::desc("Enable use of a base pointer for complex stack frames")); diff --git a/lib/Transforms/ObjCARC/ObjCARC.cpp b/lib/Transforms/ObjCARC/ObjCARC.cpp index f798063..53a31b0 100644 --- a/lib/Transforms/ObjCARC/ObjCARC.cpp +++ b/lib/Transforms/ObjCARC/ObjCARC.cpp @@ -15,6 +15,7 @@ #include "ObjCARC.h" #include "llvm-c/Core.h" +#include "llvm-c/Initialization.h" #include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" -- cgit v1.1 From 74ecd214e267c9b24d824a6403d2f89aff1a7697 Mon Sep 17 00:00:00 2001 From: Kostya Serebryany Date: Fri, 15 Feb 2013 12:46:06 +0000 Subject: [asan] support long double on 64-bit. See https://code.google.com/p/address-sanitizer/issues/detail?id=151 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175266 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Instrumentation/AddressSanitizer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index b97e342..5769e94 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -639,7 +639,7 @@ void AddressSanitizer::instrumentMop(Instruction *I) { Type *OrigTy = cast(OrigPtrTy)->getElementType(); assert(OrigTy->isSized()); - uint32_t TypeSize = TD->getTypeStoreSizeInBits(OrigTy); + uint32_t TypeSize = TD->getTypeAllocSizeInBits(OrigTy); if (TypeSize != 8 && TypeSize != 16 && TypeSize != 32 && TypeSize != 64 && TypeSize != 128) { -- cgit v1.1 From 9fa05f98e0e8410bc8c5e4000e0d47880f8b37c4 Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Fri, 15 Feb 2013 14:08:43 +0000 Subject: Don't make assumptions about the mangling of static functions in extern "C" blocks. We still don't have consensus if we should try to change clang or the standard, but llvm should work with compilers that implement the current standard and mangle those functions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175267 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCJITInfo.cpp | 14 +++++++------- lib/Target/X86/X86JITInfo.cpp | 30 ++++++++++-------------------- 2 files changed, 17 insertions(+), 27 deletions(-) (limited to 'lib') diff --git a/lib/Target/PowerPC/PPCJITInfo.cpp b/lib/Target/PowerPC/PPCJITInfo.cpp index 851de17..bf2e303 100644 --- a/lib/Target/PowerPC/PPCJITInfo.cpp +++ b/lib/Target/PowerPC/PPCJITInfo.cpp @@ -115,7 +115,7 @@ asm( "lwz r2, 208(r1)\n" // stub's frame "lwz r4, 8(r2)\n" // stub's lr "li r5, 0\n" // 0 == 32 bit - "bl _PPCCompilationCallbackC\n" + "bl _LLVMPPCCompilationCallback\n" "mtctr r3\n" // Restore all int arg registers "lwz r10, 204(r1)\n" "lwz r9, 200(r1)\n" @@ -178,7 +178,7 @@ asm( "lwz 5, 104(1)\n" // stub's frame "lwz 4, 4(5)\n" // stub's lr "li 5, 0\n" // 0 == 32 bit - "bl PPCCompilationCallbackC\n" + "bl LLVMPPCCompilationCallback\n" "mtctr 3\n" // Restore all int arg registers "lwz 10, 100(1)\n" "lwz 9, 96(1)\n" @@ -259,10 +259,10 @@ asm( "ld 4, 16(5)\n" // stub's lr "li 5, 1\n" // 1 == 64 bit #ifdef __ELF__ - "bl PPCCompilationCallbackC\n" + "bl LLVMPPCCompilationCallback\n" "nop\n" #else - "bl _PPCCompilationCallbackC\n" + "bl _LLVMPPCCompilationCallback\n" #endif "mtctr 3\n" // Restore all int arg registers @@ -292,9 +292,9 @@ void PPC64CompilationCallback() { #endif extern "C" { -static void* LLVM_ATTRIBUTE_USED PPCCompilationCallbackC(unsigned *StubCallAddrPlus4, - unsigned *OrigCallAddrPlus4, - bool is64Bit) { +void* LLVMPPCCompilationCallback(unsigned *StubCallAddrPlus4, + unsigned *OrigCallAddrPlus4, + bool is64Bit) { // Adjust the pointer to the address of the call instruction in the stub // emitted by emitFunctionStub, rather than the instruction after it. unsigned *StubCallAddr = StubCallAddrPlus4 - 1; diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp index 0a4acce..3ae0630 100644 --- a/lib/Target/X86/X86JITInfo.cpp +++ b/lib/Target/X86/X86JITInfo.cpp @@ -79,7 +79,7 @@ static TargetJITInfo::JITCompilerFn JITCompilerFunction; # define CFI(x) #endif -// Provide a wrapper for X86CompilationCallback2 that saves non-traditional +// Provide a wrapper for LLVMX86CompilationCallback2 that saves non-traditional // callee saved registers, for the fastcc calling convention. extern "C" { #if defined(X86_64_JIT) @@ -131,12 +131,12 @@ extern "C" { "subq $32, %rsp\n" "movq %rbp, %rcx\n" // Pass prev frame and return address "movq 8(%rbp), %rdx\n" - "call " ASMPREFIX "X86CompilationCallback2\n" + "call " ASMPREFIX "LLVMX86CompilationCallback2\n" "addq $32, %rsp\n" #else "movq %rbp, %rdi\n" // Pass prev frame and return address "movq 8(%rbp), %rsi\n" - "call " ASMPREFIX "X86CompilationCallback2\n" + "call " ASMPREFIX "LLVMX86CompilationCallback2\n" #endif // Restore all XMM arg registers "movaps 112(%rsp), %xmm7\n" @@ -213,7 +213,7 @@ extern "C" { "movl 4(%ebp), %eax\n" // Pass prev frame and return address "movl %eax, 4(%esp)\n" "movl %ebp, (%esp)\n" - "call " ASMPREFIX "X86CompilationCallback2\n" + "call " ASMPREFIX "LLVMX86CompilationCallback2\n" "movl %ebp, %esp\n" // Restore ESP CFI(".cfi_def_cfa_register %esp\n") "subl $12, %esp\n" @@ -269,7 +269,7 @@ extern "C" { "movl 4(%ebp), %eax\n" // Pass prev frame and return address "movl %eax, 4(%esp)\n" "movl %ebp, (%esp)\n" - "call " ASMPREFIX "X86CompilationCallback2\n" + "call " ASMPREFIX "LLVMX86CompilationCallback2\n" "addl $16, %esp\n" "movaps 48(%esp), %xmm3\n" CFI(".cfi_restore %xmm3\n") @@ -300,10 +300,7 @@ extern "C" { SIZE(X86CompilationCallback_SSE) ); # else - // the following function is called only from this translation unit, - // unless we are under 64bit Windows with MSC, where there is - // no support for inline assembly - static void X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr); + void LLVMX86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr); _declspec(naked) void X86CompilationCallback(void) { __asm { @@ -317,7 +314,7 @@ extern "C" { mov eax, dword ptr [ebp+4] mov dword ptr [esp+4], eax mov dword ptr [esp], ebp - call X86CompilationCallback2 + call LLVMX86CompilationCallback2 mov esp, ebp sub esp, 12 pop ecx @@ -337,19 +334,12 @@ extern "C" { #endif } -/// X86CompilationCallback2 - This is the target-specific function invoked by the +/// This is the target-specific function invoked by the /// function stub when we did not know the real target of a call. This function /// must locate the start of the stub or call site and pass it into the JIT /// compiler function. extern "C" { -#if !(defined (X86_64_JIT) && defined(_MSC_VER)) - // the following function is called only from this translation unit, - // unless we are under 64bit Windows with MSC, where there is - // no support for inline assembly -static -#endif -void LLVM_ATTRIBUTE_USED -X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) { +void LLVMX86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) { intptr_t *RetAddrLoc = &StackPtr[1]; // We are reading raw stack data here. Tell MemorySanitizer that it is // sufficiently initialized. @@ -520,7 +510,7 @@ void *X86JITInfo::emitFunctionStub(const Function* F, void *Target, // This used to use 0xCD, but that value is used by JITMemoryManager to // initialize the buffer with garbage, which means it may follow a - // noreturn function call, confusing X86CompilationCallback2. PR 4929. + // noreturn function call, confusing LLVMX86CompilationCallback2. PR 4929. JCE.emitByte(0xCE); // Interrupt - Just a marker identifying the stub! return Result; } -- cgit v1.1 From 8a8a2dcae054a7b4dfea360b9b88e6be53fda40f Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Fri, 15 Feb 2013 14:15:59 +0000 Subject: Give these callbacks hidden visibility. It is better to not export them more than we need to and some ELF linkers complain about directly accessing symbols with default visibility. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175268 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCJITInfo.cpp | 7 ++++--- lib/Target/X86/X86JITInfo.cpp | 3 ++- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/Target/PowerPC/PPCJITInfo.cpp b/lib/Target/PowerPC/PPCJITInfo.cpp index bf2e303..79d0c1f 100644 --- a/lib/Target/PowerPC/PPCJITInfo.cpp +++ b/lib/Target/PowerPC/PPCJITInfo.cpp @@ -292,9 +292,10 @@ void PPC64CompilationCallback() { #endif extern "C" { -void* LLVMPPCCompilationCallback(unsigned *StubCallAddrPlus4, - unsigned *OrigCallAddrPlus4, - bool is64Bit) { +void* LLVM_LIBRARY_VISIBILITY +LLVMPPCCompilationCallback(unsigned *StubCallAddrPlus4, + unsigned *OrigCallAddrPlus4, + bool is64Bit) { // Adjust the pointer to the address of the call instruction in the stub // emitted by emitFunctionStub, rather than the instruction after it. unsigned *StubCallAddr = StubCallAddrPlus4 - 1; diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp index 3ae0630..aeb05cc 100644 --- a/lib/Target/X86/X86JITInfo.cpp +++ b/lib/Target/X86/X86JITInfo.cpp @@ -339,7 +339,8 @@ extern "C" { /// must locate the start of the stub or call site and pass it into the JIT /// compiler function. extern "C" { -void LLVMX86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) { +void LLVM_LIBRARY_VISIBILITY LLVMX86CompilationCallback2(intptr_t *StackPtr, + intptr_t RetAddr) { intptr_t *RetAddrLoc = &StackPtr[1]; // We are reading raw stack data here. Tell MemorySanitizer that it is // sufficiently initialized. -- cgit v1.1 From 85d2760c8e1d36657ae4d86a6aeee03b3a723d9c Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Fri, 15 Feb 2013 14:32:20 +0000 Subject: AArch64: add branch fixup pass. This is essentially a stripped-down version of the ConstandIslands pass (which always had these two functions), providing just the features necessary for correctness. In particular there needs to be a way to resolve the situation where a conditional branch's destination block ends up out of range. This issue crops up when self-hosting for AArch64. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175269 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64.h | 2 + lib/Target/AArch64/AArch64BranchFixupPass.cpp | 600 ++++++++++++++++++++++++++ lib/Target/AArch64/AArch64InstrInfo.cpp | 42 ++ lib/Target/AArch64/AArch64InstrInfo.h | 6 + lib/Target/AArch64/AArch64TargetMachine.cpp | 1 + lib/Target/AArch64/CMakeLists.txt | 1 + 6 files changed, 652 insertions(+) create mode 100644 lib/Target/AArch64/AArch64BranchFixupPass.cpp (limited to 'lib') diff --git a/lib/Target/AArch64/AArch64.h b/lib/Target/AArch64/AArch64.h index a97aae7..4de4faa 100644 --- a/lib/Target/AArch64/AArch64.h +++ b/lib/Target/AArch64/AArch64.h @@ -31,6 +31,8 @@ FunctionPass *createAArch64ISelDAG(AArch64TargetMachine &TM, FunctionPass *createAArch64CleanupLocalDynamicTLSPass(); +FunctionPass *createAArch64BranchFixupPass(); + void LowerAArch64MachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, AArch64AsmPrinter &AP); diff --git a/lib/Target/AArch64/AArch64BranchFixupPass.cpp b/lib/Target/AArch64/AArch64BranchFixupPass.cpp new file mode 100644 index 0000000..71233ba --- /dev/null +++ b/lib/Target/AArch64/AArch64BranchFixupPass.cpp @@ -0,0 +1,600 @@ +//===-- AArch64BranchFixupPass.cpp - AArch64 branch fixup -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass that fixes AArch64 branches which have ended up out +// of range for their immediate operands. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "aarch64-branch-fixup" +#include "AArch64.h" +#include "AArch64InstrInfo.h" +#include "Utils/AArch64BaseInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +STATISTIC(NumSplit, "Number of uncond branches inserted"); +STATISTIC(NumCBrFixed, "Number of cond branches fixed"); + +/// Return the worst case padding that could result from unknown offset bits. +/// This does not include alignment padding caused by known offset bits. +/// +/// @param LogAlign log2(alignment) +/// @param KnownBits Number of known low offset bits. +static inline unsigned UnknownPadding(unsigned LogAlign, unsigned KnownBits) { + if (KnownBits < LogAlign) + return (1u << LogAlign) - (1u << KnownBits); + return 0; +} + +namespace { + /// Due to limited PC-relative displacements, conditional branches to distant + /// blocks may need converting into an unconditional equivalent. For example: + /// tbz w1, #0, far_away + /// becomes + /// tbnz w1, #0, skip + /// b far_away + /// skip: + class AArch64BranchFixup : public MachineFunctionPass { + /// Information about the offset and size of a single basic block. + struct BasicBlockInfo { + /// Distance from the beginning of the function to the beginning of this + /// basic block. + /// + /// Offsets are computed assuming worst case padding before an aligned + /// block. This means that subtracting basic block offsets always gives a + /// conservative estimate of the real distance which may be smaller. + /// + /// Because worst case padding is used, the computed offset of an aligned + /// block may not actually be aligned. + unsigned Offset; + + /// Size of the basic block in bytes. If the block contains inline + /// assembly, this is a worst case estimate. + /// + /// The size does not include any alignment padding whether from the + /// beginning of the block, or from an aligned jump table at the end. + unsigned Size; + + /// The number of low bits in Offset that are known to be exact. The + /// remaining bits of Offset are an upper bound. + uint8_t KnownBits; + + /// When non-zero, the block contains instructions (inline asm) of unknown + /// size. The real size may be smaller than Size bytes by a multiple of 1 + /// << Unalign. + uint8_t Unalign; + + BasicBlockInfo() : Offset(0), Size(0), KnownBits(0), Unalign(0) {} + + /// Compute the number of known offset bits internally to this block. + /// This number should be used to predict worst case padding when + /// splitting the block. + unsigned internalKnownBits() const { + unsigned Bits = Unalign ? Unalign : KnownBits; + // If the block size isn't a multiple of the known bits, assume the + // worst case padding. + if (Size & ((1u << Bits) - 1)) + Bits = CountTrailingZeros_32(Size); + return Bits; + } + + /// Compute the offset immediately following this block. If LogAlign is + /// specified, return the offset the successor block will get if it has + /// this alignment. + unsigned postOffset(unsigned LogAlign = 0) const { + unsigned PO = Offset + Size; + if (!LogAlign) + return PO; + // Add alignment padding from the terminator. + return PO + UnknownPadding(LogAlign, internalKnownBits()); + } + + /// Compute the number of known low bits of postOffset. If this block + /// contains inline asm, the number of known bits drops to the + /// instruction alignment. An aligned terminator may increase the number + /// of know bits. + /// If LogAlign is given, also consider the alignment of the next block. + unsigned postKnownBits(unsigned LogAlign = 0) const { + return std::max(LogAlign, internalKnownBits()); + } + }; + + std::vector BBInfo; + + /// One per immediate branch, keeping the machine instruction pointer, + /// conditional or unconditional, the max displacement, and (if IsCond is + /// true) the corresponding inverted branch opcode. + struct ImmBranch { + MachineInstr *MI; + unsigned OffsetBits : 31; + bool IsCond : 1; + ImmBranch(MachineInstr *mi, unsigned offsetbits, bool cond) + : MI(mi), OffsetBits(offsetbits), IsCond(cond) {} + }; + + /// Keep track of all the immediate branch instructions. + /// + std::vector ImmBranches; + + MachineFunction *MF; + const AArch64InstrInfo *TII; + public: + static char ID; + AArch64BranchFixup() : MachineFunctionPass(ID) {} + + virtual bool runOnMachineFunction(MachineFunction &MF); + + virtual const char *getPassName() const { + return "AArch64 branch fixup pass"; + } + + private: + void initializeFunctionInfo(); + MachineBasicBlock *splitBlockBeforeInstr(MachineInstr *MI); + void adjustBBOffsetsAfter(MachineBasicBlock *BB); + bool isBBInRange(MachineInstr *MI, MachineBasicBlock *BB, + unsigned OffsetBits); + bool fixupImmediateBr(ImmBranch &Br); + bool fixupConditionalBr(ImmBranch &Br); + + void computeBlockSize(MachineBasicBlock *MBB); + unsigned getOffsetOf(MachineInstr *MI) const; + void dumpBBs(); + void verify(); + }; + char AArch64BranchFixup::ID = 0; +} + +/// check BBOffsets +void AArch64BranchFixup::verify() { +#ifndef NDEBUG + for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); + MBBI != E; ++MBBI) { + MachineBasicBlock *MBB = MBBI; + unsigned MBBId = MBB->getNumber(); + assert(!MBBId || BBInfo[MBBId - 1].postOffset() <= BBInfo[MBBId].Offset); + } +#endif +} + +/// print block size and offset information - debugging +void AArch64BranchFixup::dumpBBs() { + DEBUG({ + for (unsigned J = 0, E = BBInfo.size(); J !=E; ++J) { + const BasicBlockInfo &BBI = BBInfo[J]; + dbgs() << format("%08x BB#%u\t", BBI.Offset, J) + << " kb=" << unsigned(BBI.KnownBits) + << " ua=" << unsigned(BBI.Unalign) + << format(" size=%#x\n", BBInfo[J].Size); + } + }); +} + +/// Returns an instance of the branch fixup pass. +FunctionPass *llvm::createAArch64BranchFixupPass() { + return new AArch64BranchFixup(); +} + +bool AArch64BranchFixup::runOnMachineFunction(MachineFunction &mf) { + MF = &mf; + DEBUG(dbgs() << "***** AArch64BranchFixup ******"); + TII = (const AArch64InstrInfo*)MF->getTarget().getInstrInfo(); + + // This pass invalidates liveness information when it splits basic blocks. + MF->getRegInfo().invalidateLiveness(); + + // Renumber all of the machine basic blocks in the function, guaranteeing that + // the numbers agree with the position of the block in the function. + MF->RenumberBlocks(); + + // Do the initial scan of the function, building up information about the + // sizes of each block and location of each immediate branch. + initializeFunctionInfo(); + + // Iteratively fix up branches until there is no change. + unsigned NoBRIters = 0; + bool MadeChange = false; + while (true) { + DEBUG(dbgs() << "Beginning iteration #" << NoBRIters << '\n'); + bool BRChange = false; + for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i) + BRChange |= fixupImmediateBr(ImmBranches[i]); + if (BRChange && ++NoBRIters > 30) + report_fatal_error("Branch Fix Up pass failed to converge!"); + DEBUG(dumpBBs()); + + if (!BRChange) + break; + MadeChange = true; + } + + // After a while, this might be made debug-only, but it is not expensive. + verify(); + + DEBUG(dbgs() << '\n'; dumpBBs()); + + BBInfo.clear(); + ImmBranches.clear(); + + return MadeChange; +} + +/// Return true if the specified basic block can fallthrough into the block +/// immediately after it. +static bool BBHasFallthrough(MachineBasicBlock *MBB) { + // Get the next machine basic block in the function. + MachineFunction::iterator MBBI = MBB; + // Can't fall off end of function. + if (llvm::next(MBBI) == MBB->getParent()->end()) + return false; + + MachineBasicBlock *NextBB = llvm::next(MBBI); + for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(), + E = MBB->succ_end(); I != E; ++I) + if (*I == NextBB) + return true; + + return false; +} + +/// Do the initial scan of the function, building up information about the sizes +/// of each block, and each immediate branch. +void AArch64BranchFixup::initializeFunctionInfo() { + BBInfo.clear(); + BBInfo.resize(MF->getNumBlockIDs()); + + // First thing, compute the size of all basic blocks, and see if the function + // has any inline assembly in it. If so, we have to be conservative about + // alignment assumptions, as we don't know for sure the size of any + // instructions in the inline assembly. + for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) + computeBlockSize(I); + + // The known bits of the entry block offset are determined by the function + // alignment. + BBInfo.front().KnownBits = MF->getAlignment(); + + // Compute block offsets and known bits. + adjustBBOffsetsAfter(MF->begin()); + + // Now go back through the instructions and build up our data structures. + for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); + MBBI != E; ++MBBI) { + MachineBasicBlock &MBB = *MBBI; + + for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); + I != E; ++I) { + if (I->isDebugValue()) + continue; + + int Opc = I->getOpcode(); + if (I->isBranch()) { + bool IsCond = false; + + // The offsets encoded in instructions here scale by the instruction + // size (4 bytes), effectively increasing their range by 2 bits. + unsigned Bits = 0; + switch (Opc) { + default: + continue; // Ignore other JT branches + case AArch64::TBZxii: + case AArch64::TBZwii: + case AArch64::TBNZxii: + case AArch64::TBNZwii: + IsCond = true; + Bits = 14 + 2; + break; + case AArch64::Bcc: + case AArch64::CBZx: + case AArch64::CBZw: + case AArch64::CBNZx: + case AArch64::CBNZw: + IsCond = true; + Bits = 19 + 2; + break; + case AArch64::Bimm: + Bits = 26 + 2; + break; + } + + // Record this immediate branch. + ImmBranches.push_back(ImmBranch(I, Bits, IsCond)); + } + } + } +} + +/// Compute the size and some alignment information for MBB. This function +/// updates BBInfo directly. +void AArch64BranchFixup::computeBlockSize(MachineBasicBlock *MBB) { + BasicBlockInfo &BBI = BBInfo[MBB->getNumber()]; + BBI.Size = 0; + BBI.Unalign = 0; + + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; + ++I) { + BBI.Size += TII->getInstSizeInBytes(*I); + // For inline asm, GetInstSizeInBytes returns a conservative estimate. + // The actual size may be smaller, but still a multiple of the instr size. + if (I->isInlineAsm()) + BBI.Unalign = 2; + } +} + +/// Return the current offset of the specified machine instruction from the +/// start of the function. This offset changes as stuff is moved around inside +/// the function. +unsigned AArch64BranchFixup::getOffsetOf(MachineInstr *MI) const { + MachineBasicBlock *MBB = MI->getParent(); + + // The offset is composed of two things: the sum of the sizes of all MBB's + // before this instruction's block, and the offset from the start of the block + // it is in. + unsigned Offset = BBInfo[MBB->getNumber()].Offset; + + // Sum instructions before MI in MBB. + for (MachineBasicBlock::iterator I = MBB->begin(); &*I != MI; ++I) { + assert(I != MBB->end() && "Didn't find MI in its own basic block?"); + Offset += TII->getInstSizeInBytes(*I); + } + return Offset; +} + +/// Split the basic block containing MI into two blocks, which are joined by +/// an unconditional branch. Update data structures and renumber blocks to +/// account for this change and returns the newly created block. +MachineBasicBlock * +AArch64BranchFixup::splitBlockBeforeInstr(MachineInstr *MI) { + MachineBasicBlock *OrigBB = MI->getParent(); + + // Create a new MBB for the code after the OrigBB. + MachineBasicBlock *NewBB = + MF->CreateMachineBasicBlock(OrigBB->getBasicBlock()); + MachineFunction::iterator MBBI = OrigBB; ++MBBI; + MF->insert(MBBI, NewBB); + + // Splice the instructions starting with MI over to NewBB. + NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end()); + + // Add an unconditional branch from OrigBB to NewBB. + // Note the new unconditional branch is not being recorded. + // There doesn't seem to be meaningful DebugInfo available; this doesn't + // correspond to anything in the source. + BuildMI(OrigBB, DebugLoc(), TII->get(AArch64::Bimm)).addMBB(NewBB); + ++NumSplit; + + // Update the CFG. All succs of OrigBB are now succs of NewBB. + NewBB->transferSuccessors(OrigBB); + + // OrigBB branches to NewBB. + OrigBB->addSuccessor(NewBB); + + // Update internal data structures to account for the newly inserted MBB. + MF->RenumberBlocks(NewBB); + + // Insert an entry into BBInfo to align it properly with the (newly + // renumbered) block numbers. + BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo()); + + // Figure out how large the OrigBB is. As the first half of the original + // block, it cannot contain a tablejump. The size includes + // the new jump we added. (It should be possible to do this without + // recounting everything, but it's very confusing, and this is rarely + // executed.) + computeBlockSize(OrigBB); + + // Figure out how large the NewMBB is. As the second half of the original + // block, it may contain a tablejump. + computeBlockSize(NewBB); + + // All BBOffsets following these blocks must be modified. + adjustBBOffsetsAfter(OrigBB); + + return NewBB; +} + +void AArch64BranchFixup::adjustBBOffsetsAfter(MachineBasicBlock *BB) { + unsigned BBNum = BB->getNumber(); + for(unsigned i = BBNum + 1, e = MF->getNumBlockIDs(); i < e; ++i) { + // Get the offset and known bits at the end of the layout predecessor. + // Include the alignment of the current block. + unsigned LogAlign = MF->getBlockNumbered(i)->getAlignment(); + unsigned Offset = BBInfo[i - 1].postOffset(LogAlign); + unsigned KnownBits = BBInfo[i - 1].postKnownBits(LogAlign); + + // This is where block i begins. Stop if the offset is already correct, + // and we have updated 2 blocks. This is the maximum number of blocks + // changed before calling this function. + if (i > BBNum + 2 && + BBInfo[i].Offset == Offset && + BBInfo[i].KnownBits == KnownBits) + break; + + BBInfo[i].Offset = Offset; + BBInfo[i].KnownBits = KnownBits; + } +} + +/// Returns true if the distance between specific MI and specific BB can fit in +/// MI's displacement field. +bool AArch64BranchFixup::isBBInRange(MachineInstr *MI, + MachineBasicBlock *DestBB, + unsigned OffsetBits) { + int64_t BrOffset = getOffsetOf(MI); + int64_t DestOffset = BBInfo[DestBB->getNumber()].Offset; + + DEBUG(dbgs() << "Branch of destination BB#" << DestBB->getNumber() + << " from BB#" << MI->getParent()->getNumber() + << " bits available=" << OffsetBits + << " from " << getOffsetOf(MI) << " to " << DestOffset + << " offset " << int(DestOffset-BrOffset) << "\t" << *MI); + + return isIntN(OffsetBits, DestOffset - BrOffset); +} + +/// Fix up an immediate branch whose destination is too far away to fit in its +/// displacement field. +bool AArch64BranchFixup::fixupImmediateBr(ImmBranch &Br) { + MachineInstr *MI = Br.MI; + MachineBasicBlock *DestBB = 0; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + if (MI->getOperand(i).isMBB()) { + DestBB = MI->getOperand(i).getMBB(); + break; + } + } + assert(DestBB && "Branch with no destination BB?"); + + // Check to see if the DestBB is already in-range. + if (isBBInRange(MI, DestBB, Br.OffsetBits)) + return false; + + assert(Br.IsCond && "Only conditional branches should need fixup"); + return fixupConditionalBr(Br); +} + +/// Fix up a conditional branch whose destination is too far away to fit in its +/// displacement field. It is converted to an inverse conditional branch + an +/// unconditional branch to the destination. +bool +AArch64BranchFixup::fixupConditionalBr(ImmBranch &Br) { + MachineInstr *MI = Br.MI; + MachineBasicBlock *MBB = MI->getParent(); + unsigned CondBrMBBOperand = 0; + + // The general idea is to add an unconditional branch to the destination and + // invert the conditional branch to jump over it. Complications occur around + // fallthrough and unreachable ends to the block. + // b.lt L1 + // => + // b.ge L2 + // b L1 + // L2: + + // First we invert the conditional branch, by creating a replacement if + // necessary. This if statement contains all the special handling of different + // branch types. + if (MI->getOpcode() == AArch64::Bcc) { + // The basic block is operand number 1 for Bcc + CondBrMBBOperand = 1; + + A64CC::CondCodes CC = (A64CC::CondCodes)MI->getOperand(0).getImm(); + CC = A64InvertCondCode(CC); + MI->getOperand(0).setImm(CC); + } else { + MachineInstrBuilder InvertedMI; + int InvertedOpcode; + switch (MI->getOpcode()) { + default: llvm_unreachable("Unknown branch type"); + case AArch64::TBZxii: InvertedOpcode = AArch64::TBNZxii; break; + case AArch64::TBZwii: InvertedOpcode = AArch64::TBNZwii; break; + case AArch64::TBNZxii: InvertedOpcode = AArch64::TBZxii; break; + case AArch64::TBNZwii: InvertedOpcode = AArch64::TBZwii; break; + case AArch64::CBZx: InvertedOpcode = AArch64::CBNZx; break; + case AArch64::CBZw: InvertedOpcode = AArch64::CBNZw; break; + case AArch64::CBNZx: InvertedOpcode = AArch64::CBZx; break; + case AArch64::CBNZw: InvertedOpcode = AArch64::CBZw; break; + } + + InvertedMI = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(InvertedOpcode)); + for (unsigned i = 0, e= MI->getNumOperands(); i != e; ++i) { + InvertedMI.addOperand(MI->getOperand(i)); + if (MI->getOperand(i).isMBB()) + CondBrMBBOperand = i; + } + + MI->eraseFromParent(); + MI = Br.MI = InvertedMI; + } + + // If the branch is at the end of its MBB and that has a fall-through block, + // direct the updated conditional branch to the fall-through + // block. Otherwise, split the MBB before the next instruction. + MachineInstr *BMI = &MBB->back(); + bool NeedSplit = (BMI != MI) || !BBHasFallthrough(MBB); + + ++NumCBrFixed; + if (BMI != MI) { + if (llvm::next(MachineBasicBlock::iterator(MI)) == prior(MBB->end()) && + BMI->getOpcode() == AArch64::Bimm) { + // Last MI in the BB is an unconditional branch. We can swap destinations: + // b.eq L1 (temporarily b.ne L1 after first change) + // b L2 + // => + // b.ne L2 + // b L1 + MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB(); + if (isBBInRange(MI, NewDest, Br.OffsetBits)) { + DEBUG(dbgs() << " Invert Bcc condition and swap its destination with " + << *BMI); + MachineBasicBlock *DestBB = MI->getOperand(CondBrMBBOperand).getMBB(); + BMI->getOperand(0).setMBB(DestBB); + MI->getOperand(CondBrMBBOperand).setMBB(NewDest); + return true; + } + } + } + + if (NeedSplit) { + MachineBasicBlock::iterator MBBI = MI; ++MBBI; + splitBlockBeforeInstr(MBBI); + // No need for the branch to the next block. We're adding an unconditional + // branch to the destination. + int delta = TII->getInstSizeInBytes(MBB->back()); + BBInfo[MBB->getNumber()].Size -= delta; + MBB->back().eraseFromParent(); + // BBInfo[SplitBB].Offset is wrong temporarily, fixed below + } + + // After splitting and removing the unconditional branch from the original BB, + // the structure is now: + // oldbb: + // [things] + // b.invertedCC L1 + // splitbb/fallthroughbb: + // [old b L2/real continuation] + // + // We now have to change the conditional branch to point to splitbb and add an + // unconditional branch after it to L1, giving the final structure: + // oldbb: + // [things] + // b.invertedCC splitbb + // b L1 + // splitbb/fallthroughbb: + // [old b L2/real continuation] + MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB)); + + DEBUG(dbgs() << " Insert B to BB#" + << MI->getOperand(CondBrMBBOperand).getMBB()->getNumber() + << " also invert condition and change dest. to BB#" + << NextBB->getNumber() << "\n"); + + // Insert a new unconditional branch and fixup the destination of the + // conditional one. Also update the ImmBranch as well as adding a new entry + // for the new branch. + BuildMI(MBB, DebugLoc(), TII->get(AArch64::Bimm)) + .addMBB(MI->getOperand(CondBrMBBOperand).getMBB()); + MI->getOperand(CondBrMBBOperand).setMBB(NextBB); + + BBInfo[MBB->getNumber()].Size += TII->getInstSizeInBytes(MBB->back()); + + // 26 bits written down in Bimm, specifying a multiple of 4. + unsigned OffsetBits = 26 + 2; + ImmBranches.push_back(ImmBranch(&MBB->back(), OffsetBits, false)); + + adjustBBOffsetsAfter(MBB); + return true; +} diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp index b4e9e8d..7b93463 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -558,6 +558,48 @@ void AArch64InstrInfo::getAddressConstraints(const MachineInstr &MI, } } +unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { + const MCInstrDesc &MCID = MI.getDesc(); + const MachineBasicBlock &MBB = *MI.getParent(); + const MachineFunction &MF = *MBB.getParent(); + const MCAsmInfo &MAI = *MF.getTarget().getMCAsmInfo(); + + if (MCID.getSize()) + return MCID.getSize(); + + if (MI.getOpcode() == AArch64::INLINEASM) + return getInlineAsmLength(MI.getOperand(0).getSymbolName(), MAI); + + if (MI.isLabel()) + return 0; + + switch (MI.getOpcode()) { + case TargetOpcode::BUNDLE: + return getInstBundleLength(MI); + case TargetOpcode::IMPLICIT_DEF: + case TargetOpcode::KILL: + case TargetOpcode::PROLOG_LABEL: + case TargetOpcode::EH_LABEL: + case TargetOpcode::DBG_VALUE: + return 0; + case AArch64::TLSDESCCALL: + return 0; + default: + llvm_unreachable("Unknown instruction class"); + } +} + +unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const { + unsigned Size = 0; + MachineBasicBlock::const_instr_iterator I = MI; + MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); + while (++I != E && I->isInsideBundle()) { + assert(!I->isBundle() && "No nested bundle!"); + Size += getInstSizeInBytes(*I); + } + return Size; +} + bool llvm::rewriteA64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, unsigned FrameReg, int &Offset, const AArch64InstrInfo &TII) { diff --git a/lib/Target/AArch64/AArch64InstrInfo.h b/lib/Target/AArch64/AArch64InstrInfo.h index 0feb5a1..22a2ab4 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.h +++ b/lib/Target/AArch64/AArch64InstrInfo.h @@ -83,6 +83,12 @@ public: /// + imm % OffsetScale == 0 void getAddressConstraints(const MachineInstr &MI, int &AccessScale, int &MinOffset, int &MaxOffset) const; + + + unsigned getInstSizeInBytes(const MachineInstr &MI) const; + + unsigned getInstBundleLength(const MachineInstr &MI) const; + }; bool rewriteA64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp index 33739cf..df599d5 100644 --- a/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -66,6 +66,7 @@ TargetPassConfig *AArch64TargetMachine::createPassConfig(PassManagerBase &PM) { bool AArch64PassConfig::addPreEmitPass() { addPass(&UnpackMachineBundlesID); + addPass(createAArch64BranchFixupPass()); return true; } diff --git a/lib/Target/AArch64/CMakeLists.txt b/lib/Target/AArch64/CMakeLists.txt index 06720a8..8164d6f 100644 --- a/lib/Target/AArch64/CMakeLists.txt +++ b/lib/Target/AArch64/CMakeLists.txt @@ -14,6 +14,7 @@ add_public_tablegen_target(AArch64CommonTableGen) add_llvm_target(AArch64CodeGen AArch64AsmPrinter.cpp + AArch64BranchFixupPass.cpp AArch64FrameLowering.cpp AArch64ISelDAGToDAG.cpp AArch64ISelLowering.cpp -- cgit v1.1 From 7c5c9b39c91d5f53284011c0ddbf458d553740da Mon Sep 17 00:00:00 2001 From: "Arnaud A. de Grandmaison" Date: Fri, 15 Feb 2013 14:35:47 +0000 Subject: Teach InstCombine to work with smaller legal types in icmp (shl %v, C1), C2 It enables to work with a smaller constant, which is target friendly for those which can compare to immediates. It also avoids inserting a shift in favor of a trunc, which can be free on some targets. This used to work until LLVM-3.1, but regressed with the 3.2 release. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175270 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/InstCombine/InstCombineCompares.cpp | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'lib') diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index 40e559e..2e7bd92 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -1331,6 +1331,25 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, return new ICmpInst(TrueIfSigned ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ, And, Constant::getNullValue(And->getType())); } + + // Transform (icmp pred iM (shl iM %v, N), CI) + // -> (icmp pred i(M-N) (trunc %v iM to i(N-N)), (trunc (CI>>N)) + // Transform the shl to a trunc if (trunc (CI>>N)) has no loss. + // This enables to get rid of the shift in favor of a trunc which can be + // free on the target. It has the additional benefit of comparing to a + // smaller constant, which will be target friendly. + unsigned Amt = ShAmt->getLimitedValue(TypeBits-1); + if (Amt != 0 && RHSV.countTrailingZeros() >= Amt) { + Type *NTy = IntegerType::get(ICI.getContext(), TypeBits - Amt); + Constant *NCI = ConstantExpr::getTrunc( + ConstantExpr::getAShr(RHS, + ConstantInt::get(RHS->getType(), Amt)), + NTy); + return new ICmpInst(ICI.getPredicate(), + Builder->CreateTrunc(LHSI->getOperand(0), NTy), + ConstantExpr::getTrunc(NCI, NTy)); + } + break; } -- cgit v1.1 From ad079b274d6ecd3e36fb98099864baeeeb96ac82 Mon Sep 17 00:00:00 2001 From: "Arnaud A. de Grandmaison" Date: Fri, 15 Feb 2013 15:18:17 +0000 Subject: Fix refactoring mistake in "Teach InstCombine to work with smaller legal types..." git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175273 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/InstCombine/InstCombineCompares.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index 2e7bd92..bad46b4 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -1347,7 +1347,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, NTy); return new ICmpInst(ICI.getPredicate(), Builder->CreateTrunc(LHSI->getOperand(0), NTy), - ConstantExpr::getTrunc(NCI, NTy)); + NCI); } break; -- cgit v1.1 From f64a7a83bea5f1d2ab1e71231616c6cb0487d56e Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Fri, 15 Feb 2013 17:20:54 +0000 Subject: BBVectorize: Call a DAG and DAG instead of a tree Several functions and variable names used the term 'tree' to refer to what is actually a DAG. Correcting this mistake will, hopefully, prevent confusion in the future. No functionality change intended. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175278 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/BBVectorize.cpp | 168 +++++++++++++++---------------- 1 file changed, 84 insertions(+), 84 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp index 4849a96..1773cff 100644 --- a/lib/Transforms/Vectorize/BBVectorize.cpp +++ b/lib/Transforms/Vectorize/BBVectorize.cpp @@ -298,7 +298,7 @@ namespace { DenseMap > &PairableInstUsers, DenseSet &CurrentPairs); - void pruneTreeFor( + void pruneDAGFor( DenseMap > &CandidatePairs, std::vector &PairableInsts, DenseMap > &ConnectedPairs, @@ -306,20 +306,20 @@ namespace { DenseMap > &PairableInstUserMap, DenseSet &PairableInstUserPairSet, DenseMap &ChosenPairs, - DenseMap &Tree, - DenseSet &PrunedTree, ValuePair J, + DenseMap &DAG, + DenseSet &PrunedDAG, ValuePair J, bool UseCycleCheck); - void buildInitialTreeFor( + void buildInitialDAGFor( DenseMap > &CandidatePairs, DenseSet &CandidatePairsSet, std::vector &PairableInsts, DenseMap > &ConnectedPairs, DenseSet &PairableInstUsers, DenseMap &ChosenPairs, - DenseMap &Tree, ValuePair J); + DenseMap &DAG, ValuePair J); - void findBestTreeFor( + void findBestDAGFor( DenseMap > &CandidatePairs, DenseSet &CandidatePairsSet, DenseMap &CandidatePairCostSavings, @@ -332,7 +332,7 @@ namespace { DenseMap > &PairableInstUserMap, DenseSet &PairableInstUserPairSet, DenseMap &ChosenPairs, - DenseSet &BestTree, size_t &BestMaxDepth, + DenseSet &BestDAG, size_t &BestMaxDepth, int &BestEffSize, Value *II, std::vector&JJ, bool UseCycleCheck); @@ -510,7 +510,7 @@ namespace { // InsertElement and ExtractElement have a depth factor of zero. This is // for two reasons: First, they cannot be usefully fused. Second, because // the pass generates a lot of these, they can confuse the simple metric - // used to compare the trees in the next iteration. Thus, giving them a + // used to compare the dags in the next iteration. Thus, giving them a // weight of zero allows the pass to essentially ignore them in // subsequent iterations when looking for vectorization opportunities // while still tracking dependency chains that flow through those @@ -745,8 +745,8 @@ namespace { buildDepMap(BB, CandidatePairs, PairableInsts, PairableInstUsers); // There is now a graph of the connected pairs. For each variable, pick - // the pairing with the largest tree meeting the depth requirement on at - // least one branch. Then select all pairings that are part of that tree + // the pairing with the largest dag meeting the depth requirement on at + // least one branch. Then select all pairings that are part of that dag // and remove them from the list of available pairings and pairable // variables. @@ -920,7 +920,7 @@ namespace { // This function returns true if the two provided instructions are compatible // (meaning that they can be fused into a vector instruction). This assumes // that I has already been determined to be vectorizable and that J is not - // in the use tree of I. + // in the use dag of I. bool BBVectorize::areInstsCompatible(Instruction *I, Instruction *J, bool IsSimpleLoadStore, bool NonPow2Len, int &CostSavings, int &FixedOrder) { @@ -1379,7 +1379,7 @@ namespace { } // This function builds a set of use tuples such that is in the set - // if B is in the use tree of A. If B is in the use tree of A, then B + // if B is in the use dag of A. If B is in the use dag of A, then B // depends on the output of A. void BBVectorize::buildDepMap( BasicBlock &BB, @@ -1497,19 +1497,19 @@ namespace { return false; } - // This function builds the initial tree of connected pairs with the + // This function builds the initial dag of connected pairs with the // pair J at the root. - void BBVectorize::buildInitialTreeFor( + void BBVectorize::buildInitialDAGFor( DenseMap > &CandidatePairs, DenseSet &CandidatePairsSet, std::vector &PairableInsts, DenseMap > &ConnectedPairs, DenseSet &PairableInstUsers, DenseMap &ChosenPairs, - DenseMap &Tree, ValuePair J) { - // Each of these pairs is viewed as the root node of a Tree. The Tree + DenseMap &DAG, ValuePair J) { + // Each of these pairs is viewed as the root node of a DAG. The DAG // is then walked (depth-first). As this happens, we keep track of - // the pairs that compose the Tree and the maximum depth of the Tree. + // the pairs that compose the DAG and the maximum depth of the DAG. SmallVector Q; // General depth-first post-order traversal: Q.push_back(ValuePairWithDepth(J, getDepthFactor(J.first))); @@ -1526,8 +1526,8 @@ namespace { ke = QQ->second.end(); k != ke; ++k) { // Make sure that this child pair is still a candidate: if (CandidatePairsSet.count(*k)) { - DenseMap::iterator C = Tree.find(*k); - if (C == Tree.end()) { + DenseMap::iterator C = DAG.find(*k); + if (C == DAG.end()) { size_t d = getDepthFactor(k->first); Q.push_back(ValuePairWithDepth(*k, QTop.second+d)); MoreChildren = true; @@ -1538,16 +1538,16 @@ namespace { } if (!MoreChildren) { - // Record the current pair as part of the Tree: - Tree.insert(ValuePairWithDepth(QTop.first, MaxChildDepth)); + // Record the current pair as part of the DAG: + DAG.insert(ValuePairWithDepth(QTop.first, MaxChildDepth)); Q.pop_back(); } } while (!Q.empty()); } - // Given some initial tree, prune it by removing conflicting pairs (pairs + // Given some initial dag, prune it by removing conflicting pairs (pairs // that cannot be simultaneously chosen for vectorization). - void BBVectorize::pruneTreeFor( + void BBVectorize::pruneDAGFor( DenseMap > &CandidatePairs, std::vector &PairableInsts, DenseMap > &ConnectedPairs, @@ -1555,15 +1555,15 @@ namespace { DenseMap > &PairableInstUserMap, DenseSet &PairableInstUserPairSet, DenseMap &ChosenPairs, - DenseMap &Tree, - DenseSet &PrunedTree, ValuePair J, + DenseMap &DAG, + DenseSet &PrunedDAG, ValuePair J, bool UseCycleCheck) { SmallVector Q; // General depth-first post-order traversal: Q.push_back(ValuePairWithDepth(J, getDepthFactor(J.first))); do { ValuePairWithDepth QTop = Q.pop_back_val(); - PrunedTree.insert(QTop.first); + PrunedDAG.insert(QTop.first); // Visit each child, pruning as necessary... SmallVector BestChildren; @@ -1574,10 +1574,10 @@ namespace { for (std::vector::iterator K = QQ->second.begin(), KE = QQ->second.end(); K != KE; ++K) { - DenseMap::iterator C = Tree.find(*K); - if (C == Tree.end()) continue; + DenseMap::iterator C = DAG.find(*K); + if (C == DAG.end()) continue; - // This child is in the Tree, now we need to make sure it is the + // This child is in the DAG, now we need to make sure it is the // best of any conflicting children. There could be multiple // conflicting children, so first, determine if we're keeping // this child, then delete conflicting children as necessary. @@ -1591,7 +1591,7 @@ namespace { // fusing (a,b) we have y .. a/b .. x where y is an input // to a/b and x is an output to a/b: x and y can no longer // be legally fused. To prevent this condition, we must - // make sure that a child pair added to the Tree is not + // make sure that a child pair added to the DAG is not // both an input and output of an already-selected pair. // Pairing-induced dependencies can also form from more complicated @@ -1623,9 +1623,9 @@ namespace { if (!CanAdd) continue; // Even worse, this child could conflict with another node already - // selected for the Tree. If that is the case, ignore this child. - for (DenseSet::iterator T = PrunedTree.begin(), - E2 = PrunedTree.end(); T != E2; ++T) { + // selected for the DAG. If that is the case, ignore this child. + for (DenseSet::iterator T = PrunedDAG.begin(), + E2 = PrunedDAG.end(); T != E2; ++T) { if (T->first == C->first.first || T->first == C->first.second || T->second == C->first.first || @@ -1678,7 +1678,7 @@ namespace { // To check for non-trivial cycles formed by the addition of the // current pair we've formed a list of all relevant pairs, now use a // graph walk to check for a cycle. We start from the current pair and - // walk the use tree to see if we again reach the current pair. If we + // walk the use dag to see if we again reach the current pair. If we // do, then the current pair is rejected. // FIXME: It may be more efficient to use a topological-ordering @@ -1715,9 +1715,9 @@ namespace { } while (!Q.empty()); } - // This function finds the best tree of mututally-compatible connected + // This function finds the best dag of mututally-compatible connected // pairs, given the choice of root pairs as an iterator range. - void BBVectorize::findBestTreeFor( + void BBVectorize::findBestDAGFor( DenseMap > &CandidatePairs, DenseSet &CandidatePairsSet, DenseMap &CandidatePairCostSavings, @@ -1730,7 +1730,7 @@ namespace { DenseMap > &PairableInstUserMap, DenseSet &PairableInstUserPairSet, DenseMap &ChosenPairs, - DenseSet &BestTree, size_t &BestMaxDepth, + DenseSet &BestDAG, size_t &BestMaxDepth, int &BestEffSize, Value *II, std::vector&JJ, bool UseCycleCheck) { for (std::vector::iterator J = JJ.begin(), JE = JJ.end(); @@ -1741,7 +1741,7 @@ namespace { // Before going any further, make sure that this pair does not // conflict with any already-selected pairs (see comment below - // near the Tree pruning for more details). + // near the DAG pruning for more details). DenseSet ChosenPairSet; bool DoesConflict = false; for (DenseMap::iterator C = ChosenPairs.begin(), @@ -1761,39 +1761,39 @@ namespace { pairWillFormCycle(IJ, PairableInstUserMap, ChosenPairSet)) continue; - DenseMap Tree; - buildInitialTreeFor(CandidatePairs, CandidatePairsSet, + DenseMap DAG; + buildInitialDAGFor(CandidatePairs, CandidatePairsSet, PairableInsts, ConnectedPairs, - PairableInstUsers, ChosenPairs, Tree, IJ); + PairableInstUsers, ChosenPairs, DAG, IJ); // Because we'll keep the child with the largest depth, the largest - // depth is still the same in the unpruned Tree. - size_t MaxDepth = Tree.lookup(IJ); + // depth is still the same in the unpruned DAG. + size_t MaxDepth = DAG.lookup(IJ); - DEBUG(if (DebugPairSelection) dbgs() << "BBV: found Tree for pair {" + DEBUG(if (DebugPairSelection) dbgs() << "BBV: found DAG for pair {" << IJ.first << " <-> " << IJ.second << "} of depth " << - MaxDepth << " and size " << Tree.size() << "\n"); + MaxDepth << " and size " << DAG.size() << "\n"); - // At this point the Tree has been constructed, but, may contain + // At this point the DAG has been constructed, but, may contain // contradictory children (meaning that different children of - // some tree node may be attempting to fuse the same instruction). - // So now we walk the tree again, in the case of a conflict, + // some dag node may be attempting to fuse the same instruction). + // So now we walk the dag again, in the case of a conflict, // keep only the child with the largest depth. To break a tie, // favor the first child. - DenseSet PrunedTree; - pruneTreeFor(CandidatePairs, PairableInsts, ConnectedPairs, + DenseSet PrunedDAG; + pruneDAGFor(CandidatePairs, PairableInsts, ConnectedPairs, PairableInstUsers, PairableInstUserMap, PairableInstUserPairSet, - ChosenPairs, Tree, PrunedTree, IJ, UseCycleCheck); + ChosenPairs, DAG, PrunedDAG, IJ, UseCycleCheck); int EffSize = 0; if (TTI) { - DenseSet PrunedTreeInstrs; - for (DenseSet::iterator S = PrunedTree.begin(), - E = PrunedTree.end(); S != E; ++S) { - PrunedTreeInstrs.insert(S->first); - PrunedTreeInstrs.insert(S->second); + DenseSet PrunedDAGInstrs; + for (DenseSet::iterator S = PrunedDAG.begin(), + E = PrunedDAG.end(); S != E; ++S) { + PrunedDAGInstrs.insert(S->first); + PrunedDAGInstrs.insert(S->second); } // The set of pairs that have already contributed to the total cost. @@ -1806,8 +1806,8 @@ namespace { // The node weights represent the cost savings associated with // fusing the pair of instructions. - for (DenseSet::iterator S = PrunedTree.begin(), - E = PrunedTree.end(); S != E; ++S) { + for (DenseSet::iterator S = PrunedDAG.begin(), + E = PrunedDAG.end(); S != E; ++S) { if (!isa(S->first) && !isa(S->first) && !isa(S->first)) @@ -1832,7 +1832,7 @@ namespace { for (std::vector::iterator T = SS->second.begin(), TE = SS->second.end(); T != TE; ++T) { VPPair Q(*S, *T); - if (!PrunedTree.count(Q.second)) + if (!PrunedDAG.count(Q.second)) continue; DenseMap::iterator R = PairConnectionTypes.find(VPPair(Q.second, Q.first)); @@ -1854,7 +1854,7 @@ namespace { for (std::vector::iterator T = SS->second.begin(), TE = SS->second.end(); T != TE; ++T) { VPPair Q(*S, *T); - if (!PrunedTree.count(Q.second)) + if (!PrunedDAG.count(Q.second)) continue; DenseMap::iterator R = PairConnectionTypes.find(VPPair(Q.second, Q.first)); @@ -1906,7 +1906,7 @@ namespace { } if (isa(*I)) continue; - if (PrunedTreeInstrs.count(*I)) + if (PrunedDAGInstrs.count(*I)) continue; NeedsExtraction = true; break; @@ -1938,7 +1938,7 @@ namespace { } if (isa(*I)) continue; - if (PrunedTreeInstrs.count(*I)) + if (PrunedDAGInstrs.count(*I)) continue; NeedsExtraction = true; break; @@ -1980,7 +1980,7 @@ namespace { ValuePair VPR = ValuePair(O2, O1); // Internal edges are not handled here. - if (PrunedTree.count(VP) || PrunedTree.count(VPR)) + if (PrunedDAG.count(VP) || PrunedDAG.count(VPR)) continue; Type *Ty1 = O1->getType(), @@ -2074,27 +2074,27 @@ namespace { if (!HasNontrivialInsts) { DEBUG(if (DebugPairSelection) dbgs() << - "\tNo non-trivial instructions in tree;" + "\tNo non-trivial instructions in DAG;" " override to zero effective size\n"); EffSize = 0; } } else { - for (DenseSet::iterator S = PrunedTree.begin(), - E = PrunedTree.end(); S != E; ++S) + for (DenseSet::iterator S = PrunedDAG.begin(), + E = PrunedDAG.end(); S != E; ++S) EffSize += (int) getDepthFactor(S->first); } DEBUG(if (DebugPairSelection) - dbgs() << "BBV: found pruned Tree for pair {" + dbgs() << "BBV: found pruned DAG for pair {" << IJ.first << " <-> " << IJ.second << "} of depth " << - MaxDepth << " and size " << PrunedTree.size() << + MaxDepth << " and size " << PrunedDAG.size() << " (effective size: " << EffSize << ")\n"); if (((TTI && !UseChainDepthWithTI) || MaxDepth >= Config.ReqChainDepth) && EffSize > 0 && EffSize > BestEffSize) { BestMaxDepth = MaxDepth; BestEffSize = EffSize; - BestTree = PrunedTree; + BestDAG = PrunedDAG; } } } @@ -2133,37 +2133,37 @@ namespace { std::vector &JJ = CandidatePairs[*I]; - // The best pair to choose and its tree: + // The best pair to choose and its dag: size_t BestMaxDepth = 0; int BestEffSize = 0; - DenseSet BestTree; - findBestTreeFor(CandidatePairs, CandidatePairsSet, + DenseSet BestDAG; + findBestDAGFor(CandidatePairs, CandidatePairsSet, CandidatePairCostSavings, PairableInsts, FixedOrderPairs, PairConnectionTypes, ConnectedPairs, ConnectedPairDeps, PairableInstUsers, PairableInstUserMap, PairableInstUserPairSet, ChosenPairs, - BestTree, BestMaxDepth, BestEffSize, *I, JJ, + BestDAG, BestMaxDepth, BestEffSize, *I, JJ, UseCycleCheck); - if (BestTree.empty()) + if (BestDAG.empty()) continue; - // A tree has been chosen (or not) at this point. If no tree was + // A dag has been chosen (or not) at this point. If no dag was // chosen, then this instruction, I, cannot be paired (and is no longer // considered). - DEBUG(dbgs() << "BBV: selected pairs in the best tree for: " + DEBUG(dbgs() << "BBV: selected pairs in the best DAG for: " << *cast(*I) << "\n"); - for (DenseSet::iterator S = BestTree.begin(), - SE2 = BestTree.end(); S != SE2; ++S) { - // Insert the members of this tree into the list of chosen pairs. + for (DenseSet::iterator S = BestDAG.begin(), + SE2 = BestDAG.end(); S != SE2; ++S) { + // Insert the members of this dag into the list of chosen pairs. ChosenPairs.insert(ValuePair(S->first, S->second)); DEBUG(dbgs() << "BBV: selected pair: " << *S->first << " <-> " << *S->second << "\n"); - // Remove all candidate pairs that have values in the chosen tree. + // Remove all candidate pairs that have values in the chosen dag. std::vector &KK = CandidatePairs[S->first], &LL = CandidatePairs2[S->second], &MM = CandidatePairs[S->second], @@ -2868,7 +2868,7 @@ namespace { // are chosen for vectorization, we can end up in a situation where the // aliasing analysis starts returning different query results as the // process of fusing instruction pairs continues. Because the algorithm - // relies on finding the same use trees here as were found earlier, we'll + // relies on finding the same use dags here as were found earlier, we'll // need to precompute the necessary aliasing information here and then // manually update it during the fusion process. void BBVectorize::collectLoadMoveSet(BasicBlock &BB, @@ -3074,9 +3074,9 @@ namespace { Instruction *K1 = 0, *K2 = 0; replaceOutputsOfPair(Context, L, H, K, InsertionPt, K1, K2); - // The use tree of the first original instruction must be moved to after - // the location of the second instruction. The entire use tree of the - // first instruction is disjoint from the input tree of the second + // The use dag of the first original instruction must be moved to after + // the location of the second instruction. The entire use dag of the + // first instruction is disjoint from the input dag of the second // (by definition), and so commutes with it. moveUsesOfIAfterJ(BB, LoadMoveSetPairs, InsertionPt, I, J); -- cgit v1.1 From 55a98b00c1a383309ade29fe2eb329c4c8d6a9d3 Mon Sep 17 00:00:00 2001 From: Jyotsna Verma Date: Fri, 15 Feb 2013 17:52:07 +0000 Subject: Hexagon: Set appropriate TSFlags to the loads/stores with global address to support constant extension. This patch doesn't introduce any functionality changes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175280 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Hexagon/HexagonInstrInfoV4.td | 58 ++++++++++++++------------------ 1 file changed, 25 insertions(+), 33 deletions(-) (limited to 'lib') diff --git a/lib/Target/Hexagon/HexagonInstrInfoV4.td b/lib/Target/Hexagon/HexagonInstrInfoV4.td index e76d716..e1b2f88 100644 --- a/lib/Target/Hexagon/HexagonInstrInfoV4.td +++ b/lib/Target/Hexagon/HexagonInstrInfoV4.td @@ -1366,7 +1366,8 @@ def STriw_shl_V4 : STInst<(outs), // memd(#global)=Rtt -let isPredicable = 1, neverHasSideEffects = 1 in +let isPredicable = 1, mayStore = 1, neverHasSideEffects = 1, +validSubTargets = HasV4SubT in def STd_GP_V4 : STInst2<(outs), (ins globaladdress:$global, DoubleRegs:$src), "memd(#$global) = $src", @@ -1374,7 +1375,8 @@ def STd_GP_V4 : STInst2<(outs), Requires<[HasV4T]>; // if (Pv) memd(##global) = Rtt -let neverHasSideEffects = 1, isPredicated = 1 in +let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1, +isExtended = 1, opExtendable = 1, validSubTargets = HasV4SubT in { def STd_GP_cPt_V4 : STInst2<(outs), (ins PredRegs:$src1, globaladdress:$global, DoubleRegs:$src2), "if ($src1) memd(##$global) = $src2", @@ -1382,7 +1384,6 @@ def STd_GP_cPt_V4 : STInst2<(outs), Requires<[HasV4T]>; // if (!Pv) memd(##global) = Rtt -let neverHasSideEffects = 1, isPredicated = 1 in def STd_GP_cNotPt_V4 : STInst2<(outs), (ins PredRegs:$src1, globaladdress:$global, DoubleRegs:$src2), "if (!$src1) memd(##$global) = $src2", @@ -1390,7 +1391,6 @@ def STd_GP_cNotPt_V4 : STInst2<(outs), Requires<[HasV4T]>; // if (Pv) memd(##global) = Rtt -let neverHasSideEffects = 1, isPredicated = 1 in def STd_GP_cdnPt_V4 : STInst2<(outs), (ins PredRegs:$src1, globaladdress:$global, DoubleRegs:$src2), "if ($src1.new) memd(##$global) = $src2", @@ -1398,15 +1398,16 @@ def STd_GP_cdnPt_V4 : STInst2<(outs), Requires<[HasV4T]>; // if (!Pv) memd(##global) = Rtt -let neverHasSideEffects = 1, isPredicated = 1 in def STd_GP_cdnNotPt_V4 : STInst2<(outs), (ins PredRegs:$src1, globaladdress:$global, DoubleRegs:$src2), "if (!$src1.new) memd(##$global) = $src2", []>, Requires<[HasV4T]>; +} // memb(#global)=Rt -let isPredicable = 1, neverHasSideEffects = 1 in +let isPredicable = 1, neverHasSideEffects = 1, isNVStorable = 1, +validSubTargets = HasV4SubT in def STb_GP_V4 : STInst2<(outs), (ins globaladdress:$global, IntRegs:$src), "memb(#$global) = $src", @@ -1414,7 +1415,8 @@ def STb_GP_V4 : STInst2<(outs), Requires<[HasV4T]>; // if (Pv) memb(##global) = Rt -let neverHasSideEffects = 1, isPredicated = 1 in +let neverHasSideEffects = 1, isPredicated = 1, isNVStorable = 1, +isExtended = 1, opExtendable = 1, validSubTargets = HasV4SubT in { def STb_GP_cPt_V4 : STInst2<(outs), (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), "if ($src1) memb(##$global) = $src2", @@ -1422,7 +1424,6 @@ def STb_GP_cPt_V4 : STInst2<(outs), Requires<[HasV4T]>; // if (!Pv) memb(##global) = Rt -let neverHasSideEffects = 1, isPredicated = 1 in def STb_GP_cNotPt_V4 : STInst2<(outs), (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), "if (!$src1) memb(##$global) = $src2", @@ -1430,7 +1431,6 @@ def STb_GP_cNotPt_V4 : STInst2<(outs), Requires<[HasV4T]>; // if (Pv) memb(##global) = Rt -let neverHasSideEffects = 1, isPredicated = 1 in def STb_GP_cdnPt_V4 : STInst2<(outs), (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), "if ($src1.new) memb(##$global) = $src2", @@ -1438,15 +1438,16 @@ def STb_GP_cdnPt_V4 : STInst2<(outs), Requires<[HasV4T]>; // if (!Pv) memb(##global) = Rt -let neverHasSideEffects = 1, isPredicated = 1 in def STb_GP_cdnNotPt_V4 : STInst2<(outs), (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), "if (!$src1.new) memb(##$global) = $src2", []>, Requires<[HasV4T]>; +} // memh(#global)=Rt -let isPredicable = 1, neverHasSideEffects = 1 in +let isPredicable = 1, neverHasSideEffects = 1, isNVStorable = 1, +validSubTargets = HasV4SubT in def STh_GP_V4 : STInst2<(outs), (ins globaladdress:$global, IntRegs:$src), "memh(#$global) = $src", @@ -1454,7 +1455,8 @@ def STh_GP_V4 : STInst2<(outs), Requires<[HasV4T]>; // if (Pv) memh(##global) = Rt -let neverHasSideEffects = 1, isPredicated = 1 in +let neverHasSideEffects = 1, isPredicated = 1, isNVStorable = 1, +isExtended = 1, opExtendable = 1, validSubTargets = HasV4SubT in { def STh_GP_cPt_V4 : STInst2<(outs), (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), "if ($src1) memh(##$global) = $src2", @@ -1462,7 +1464,6 @@ def STh_GP_cPt_V4 : STInst2<(outs), Requires<[HasV4T]>; // if (!Pv) memh(##global) = Rt -let neverHasSideEffects = 1, isPredicated = 1 in def STh_GP_cNotPt_V4 : STInst2<(outs), (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), "if (!$src1) memh(##$global) = $src2", @@ -1470,7 +1471,6 @@ def STh_GP_cNotPt_V4 : STInst2<(outs), Requires<[HasV4T]>; // if (Pv) memh(##global) = Rt -let neverHasSideEffects = 1, isPredicated = 1 in def STh_GP_cdnPt_V4 : STInst2<(outs), (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), "if ($src1.new) memh(##$global) = $src2", @@ -1478,15 +1478,16 @@ def STh_GP_cdnPt_V4 : STInst2<(outs), Requires<[HasV4T]>; // if (!Pv) memh(##global) = Rt -let neverHasSideEffects = 1, isPredicated = 1 in def STh_GP_cdnNotPt_V4 : STInst2<(outs), (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), "if (!$src1.new) memh(##$global) = $src2", []>, Requires<[HasV4T]>; +} // memw(#global)=Rt -let isPredicable = 1, neverHasSideEffects = 1 in +let isPredicable = 1, neverHasSideEffects = 1, isNVStorable = 1, +validSubTargets = HasV4SubT in def STw_GP_V4 : STInst2<(outs), (ins globaladdress:$global, IntRegs:$src), "memw(#$global) = $src", @@ -1494,7 +1495,8 @@ def STw_GP_V4 : STInst2<(outs), Requires<[HasV4T]>; // if (Pv) memw(##global) = Rt -let neverHasSideEffects = 1, isPredicated = 1 in +let neverHasSideEffects = 1, isPredicated = 1, isNVStorable = 1, +isExtended = 1, opExtendable = 1, validSubTargets = HasV4SubT in { def STw_GP_cPt_V4 : STInst2<(outs), (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), "if ($src1) memw(##$global) = $src2", @@ -1502,7 +1504,6 @@ def STw_GP_cPt_V4 : STInst2<(outs), Requires<[HasV4T]>; // if (!Pv) memw(##global) = Rt -let neverHasSideEffects = 1, isPredicated = 1 in def STw_GP_cNotPt_V4 : STInst2<(outs), (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), "if (!$src1) memw(##$global) = $src2", @@ -1510,7 +1511,6 @@ def STw_GP_cNotPt_V4 : STInst2<(outs), Requires<[HasV4T]>; // if (Pv) memw(##global) = Rt -let neverHasSideEffects = 1, isPredicated = 1 in def STw_GP_cdnPt_V4 : STInst2<(outs), (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), "if ($src1.new) memw(##$global) = $src2", @@ -1518,12 +1518,12 @@ def STw_GP_cdnPt_V4 : STInst2<(outs), Requires<[HasV4T]>; // if (!Pv) memw(##global) = Rt -let neverHasSideEffects = 1, isPredicated = 1 in def STw_GP_cdnNotPt_V4 : STInst2<(outs), (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), "if (!$src1.new) memw(##$global) = $src2", []>, Requires<[HasV4T]>; +} // 64 bit atomic store def : Pat <(atomic_store_64 (HexagonCONST32_GP tglobaladdr:$global), @@ -1806,7 +1806,8 @@ def STriw_shl_nv_V4 : NVInst_V4<(outs), // memw(Rx++Mu:brev)=Nt.new // memw(gp+#u16:2)=Nt.new -let mayStore = 1, neverHasSideEffects = 1 in +let mayStore = 1, neverHasSideEffects = 1, isNVStore = 1, +validSubTargets = HasV4SubT in def STw_GP_nv_V4 : NVInst_V4<(outs), (ins globaladdress:$global, IntRegs:$src), "memw(#$global) = $src.new", @@ -1814,7 +1815,8 @@ def STw_GP_nv_V4 : NVInst_V4<(outs), Requires<[HasV4T]>; // if (Pv) memb(##global) = Rt -let mayStore = 1, neverHasSideEffects = 1 in +let mayStore = 1, neverHasSideEffects = 1, isNVStore = 1, +isExtended = 1, opExtendable = 1, validSubTargets = HasV4SubT in { def STb_GP_cPt_nv_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), "if ($src1) memb(##$global) = $src2.new", @@ -1822,7 +1824,6 @@ def STb_GP_cPt_nv_V4 : NVInst_V4<(outs), Requires<[HasV4T]>; // if (!Pv) memb(##global) = Rt -let mayStore = 1, neverHasSideEffects = 1 in def STb_GP_cNotPt_nv_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), "if (!$src1) memb(##$global) = $src2.new", @@ -1830,7 +1831,6 @@ def STb_GP_cNotPt_nv_V4 : NVInst_V4<(outs), Requires<[HasV4T]>; // if (Pv) memb(##global) = Rt -let mayStore = 1, neverHasSideEffects = 1 in def STb_GP_cdnPt_nv_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), "if ($src1.new) memb(##$global) = $src2.new", @@ -1838,7 +1838,6 @@ def STb_GP_cdnPt_nv_V4 : NVInst_V4<(outs), Requires<[HasV4T]>; // if (!Pv) memb(##global) = Rt -let mayStore = 1, neverHasSideEffects = 1 in def STb_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), "if (!$src1.new) memb(##$global) = $src2.new", @@ -1846,7 +1845,6 @@ def STb_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs), Requires<[HasV4T]>; // if (Pv) memh(##global) = Rt -let mayStore = 1, neverHasSideEffects = 1 in def STh_GP_cPt_nv_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), "if ($src1) memh(##$global) = $src2.new", @@ -1854,7 +1852,6 @@ def STh_GP_cPt_nv_V4 : NVInst_V4<(outs), Requires<[HasV4T]>; // if (!Pv) memh(##global) = Rt -let mayStore = 1, neverHasSideEffects = 1 in def STh_GP_cNotPt_nv_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), "if (!$src1) memh(##$global) = $src2.new", @@ -1862,7 +1859,6 @@ def STh_GP_cNotPt_nv_V4 : NVInst_V4<(outs), Requires<[HasV4T]>; // if (Pv) memh(##global) = Rt -let mayStore = 1, neverHasSideEffects = 1 in def STh_GP_cdnPt_nv_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), "if ($src1.new) memh(##$global) = $src2.new", @@ -1870,7 +1866,6 @@ def STh_GP_cdnPt_nv_V4 : NVInst_V4<(outs), Requires<[HasV4T]>; // if (!Pv) memh(##global) = Rt -let mayStore = 1, neverHasSideEffects = 1 in def STh_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), "if (!$src1.new) memh(##$global) = $src2.new", @@ -1878,7 +1873,6 @@ def STh_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs), Requires<[HasV4T]>; // if (Pv) memw(##global) = Rt -let mayStore = 1, neverHasSideEffects = 1 in def STw_GP_cPt_nv_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), "if ($src1) memw(##$global) = $src2.new", @@ -1886,7 +1880,6 @@ def STw_GP_cPt_nv_V4 : NVInst_V4<(outs), Requires<[HasV4T]>; // if (!Pv) memw(##global) = Rt -let mayStore = 1, neverHasSideEffects = 1 in def STw_GP_cNotPt_nv_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), "if (!$src1) memw(##$global) = $src2.new", @@ -1894,7 +1887,6 @@ def STw_GP_cNotPt_nv_V4 : NVInst_V4<(outs), Requires<[HasV4T]>; // if (Pv) memw(##global) = Rt -let mayStore = 1, neverHasSideEffects = 1 in def STw_GP_cdnPt_nv_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), "if ($src1.new) memw(##$global) = $src2.new", @@ -1902,12 +1894,12 @@ def STw_GP_cdnPt_nv_V4 : NVInst_V4<(outs), Requires<[HasV4T]>; // if (!Pv) memw(##global) = Rt -let mayStore = 1, neverHasSideEffects = 1 in def STw_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), "if (!$src1.new) memw(##$global) = $src2.new", []>, Requires<[HasV4T]>; +} //===----------------------------------------------------------------------===// // NV/ST - -- cgit v1.1 From 86cdbc9c29a572d422815f55fd89ff7510d1e3e8 Mon Sep 17 00:00:00 2001 From: Paul Redmond Date: Fri, 15 Feb 2013 18:45:18 +0000 Subject: enable SDISel sincos optimization for GNU environments - add sincos to runtime library if target triple environment is GNU - added canCombineSinCosLibcall() which checks that sincos is in the RTL and if the environment is GNU then unsafe fpmath is enabled (required to preserve errno) - extended sincos-opt lit test Reviewed by: Hal Finkel git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175283 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 17 ++++++++++++++++- lib/CodeGen/TargetLoweringBase.cpp | 25 +++++++++++++++++-------- 2 files changed, 33 insertions(+), 9 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 6a3e03b..4a0176b 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -15,6 +15,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Triple.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" @@ -2111,6 +2112,20 @@ static bool isSinCosLibcallAvailable(SDNode *Node, const TargetLowering &TLI) { return TLI.getLibcallName(LC) != 0; } +/// canCombineSinCosLibcall - Return true if sincos libcall is available and +/// can be used to combine sin and cos. +static bool canCombineSinCosLibcall(SDNode *Node, const TargetLowering &TLI, + const TargetMachine &TM) { + if (!isSinCosLibcallAvailable(Node, TLI)) + return false; + // GNU sin/cos functions set errno while sincos does not. Therefore + // combining sin and cos is only safe if unsafe-fpmath is enabled. + bool isGNU = Triple(TM.getTargetTriple()).getEnvironment() == Triple::GNU; + if (isGNU && !TM.Options.UnsafeFPMath) + return false; + return true; +} + /// useSinCos - Only issue sincos libcall if both sin and cos are /// needed. static bool useSinCos(SDNode *Node) { @@ -3149,7 +3164,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { // Turn fsin / fcos into ISD::FSINCOS node if there are a pair of fsin / // fcos which share the same operand and both are used. if ((TLI.isOperationLegalOrCustom(ISD::FSINCOS, VT) || - isSinCosLibcallAvailable(Node, TLI)) + canCombineSinCosLibcall(Node, TLI, TM)) && useSinCos(Node)) { SDVTList VTs = DAG.getVTList(VT, VT); Tmp1 = DAG.getNode(ISD::FSINCOS, dl, VTs, Node->getOperand(0)); diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp index 10aa9d6..f2329dc 100644 --- a/lib/CodeGen/TargetLoweringBase.cpp +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -14,6 +14,7 @@ #include "llvm/Target/TargetLowering.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Triple.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -34,7 +35,7 @@ using namespace llvm; /// InitLibcallNames - Set default libcall names. /// -static void InitLibcallNames(const char **Names) { +static void InitLibcallNames(const char **Names, const TargetMachine &TM) { Names[RTLIB::SHL_I16] = "__ashlhi3"; Names[RTLIB::SHL_I32] = "__ashlsi3"; Names[RTLIB::SHL_I64] = "__ashldi3"; @@ -341,12 +342,20 @@ static void InitLibcallNames(const char **Names) { Names[RTLIB::SYNC_FETCH_AND_NAND_4] = "__sync_fetch_and_nand_4"; Names[RTLIB::SYNC_FETCH_AND_NAND_8] = "__sync_fetch_and_nand_8"; - // These are generally not available. - Names[RTLIB::SINCOS_F32] = 0; - Names[RTLIB::SINCOS_F64] = 0; - Names[RTLIB::SINCOS_F80] = 0; - Names[RTLIB::SINCOS_F128] = 0; - Names[RTLIB::SINCOS_PPCF128] = 0; + if (Triple(TM.getTargetTriple()).getEnvironment() == Triple::GNU) { + Names[RTLIB::SINCOS_F32] = "sincosf"; + Names[RTLIB::SINCOS_F64] = "sincos"; + Names[RTLIB::SINCOS_F80] = "sincosl"; + Names[RTLIB::SINCOS_F128] = "sincosl"; + Names[RTLIB::SINCOS_PPCF128] = "sincosl"; + } else { + // These are generally not available. + Names[RTLIB::SINCOS_F32] = 0; + Names[RTLIB::SINCOS_F64] = 0; + Names[RTLIB::SINCOS_F80] = 0; + Names[RTLIB::SINCOS_F128] = 0; + Names[RTLIB::SINCOS_PPCF128] = 0; + } } /// InitLibcallCallingConvs - Set default libcall CallingConvs. @@ -726,7 +735,7 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm, SupportJumpTables = true; MinimumJumpTableEntries = 4; - InitLibcallNames(LibcallRoutineNames); + InitLibcallNames(LibcallRoutineNames, TM); InitCmpLibcallCCs(CmpLibcallCCs); InitLibcallCallingConvs(LibcallCallingConvs); } -- cgit v1.1 From 75234372460b720a444ac7b3b0a0bbfb948be1e1 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Fri, 15 Feb 2013 20:37:21 +0000 Subject: AsmParser: Reformat the MS asm parser to reduce nesting. While there postpone register name printing after uniquing. No intended functionality change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175292 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCParser/AsmParser.cpp | 171 +++++++++++++++++++++--------------------- 1 file changed, 86 insertions(+), 85 deletions(-) (limited to 'lib') diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index aa07ecb..bfe3612 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -3998,7 +3998,8 @@ bool AsmParser::ParseDirectiveEndr(SMLoc DirectiveLoc) { return false; } -bool AsmParser::ParseDirectiveMSEmit(SMLoc IDLoc, ParseStatementInfo &Info, size_t Len) { +bool AsmParser::ParseDirectiveMSEmit(SMLoc IDLoc, ParseStatementInfo &Info, + size_t Len) { const MCExpr *Value; SMLoc ExprLoc = getLexer().getLoc(); if (ParseExpression(Value)) @@ -4026,15 +4027,16 @@ bool AsmParser::ParseDirectiveMSAlign(SMLoc IDLoc, ParseStatementInfo &Info) { if (!isPowerOf2_64(IntValue)) return Error(ExprLoc, "literal value not a power of two greater then zero"); - Info.AsmRewrites->push_back(AsmRewrite(AOK_Align, IDLoc, 5, Log2_64(IntValue))); + Info.AsmRewrites->push_back(AsmRewrite(AOK_Align, IDLoc, 5, + Log2_64(IntValue))); return false; } // We are comparing pointers, but the pointers are relative to a single string. // Thus, this should always be deterministic. -static int RewritesSort (const void *A, const void *B) { - const AsmRewrite *AsmRewriteA = static_cast(A); - const AsmRewrite *AsmRewriteB = static_cast(B); +static int RewritesSort(const void *A, const void *B) { + const AsmRewrite *AsmRewriteA = static_cast(A); + const AsmRewrite *AsmRewriteB = static_cast(B); if (AsmRewriteA->Loc.getPointer() < AsmRewriteB->Loc.getPointer()) return -1; if (AsmRewriteB->Loc.getPointer() < AsmRewriteA->Loc.getPointer()) @@ -4042,23 +4044,24 @@ static int RewritesSort (const void *A, const void *B) { return 0; } -bool AsmParser::ParseMSInlineAsm(void *AsmLoc, std::string &AsmString, - unsigned &NumOutputs, unsigned &NumInputs, - SmallVectorImpl > &OpDecls, - SmallVectorImpl &Constraints, - SmallVectorImpl &Clobbers, - const MCInstrInfo *MII, - const MCInstPrinter *IP, - MCAsmParserSemaCallback &SI) { +bool +AsmParser::ParseMSInlineAsm(void *AsmLoc, std::string &AsmString, + unsigned &NumOutputs, unsigned &NumInputs, + SmallVectorImpl > &OpDecls, + SmallVectorImpl &Constraints, + SmallVectorImpl &Clobbers, + const MCInstrInfo *MII, + const MCInstPrinter *IP, + MCAsmParserSemaCallback &SI) { SmallVector InputDecls; SmallVector OutputDecls; SmallVector InputDeclsAddressOf; SmallVector OutputDeclsAddressOf; SmallVector InputConstraints; SmallVector OutputConstraints; - std::set ClobberRegs; + SmallVector ClobberRegs; - SmallVector AsmStrRewrites; + SmallVector AsmStrRewrites; // Prime the lexer. Lex(); @@ -4074,65 +4077,60 @@ bool AsmParser::ParseMSInlineAsm(void *AsmLoc, std::string &AsmString, if (Info.ParseError) return true; - if (Info.Opcode != ~0U) { - const MCInstrDesc &Desc = MII->get(Info.Opcode); + if (Info.Opcode == ~0U) + continue; - // Build the list of clobbers, outputs and inputs. - for (unsigned i = 1, e = Info.ParsedOperands.size(); i != e; ++i) { - MCParsedAsmOperand *Operand = Info.ParsedOperands[i]; + const MCInstrDesc &Desc = MII->get(Info.Opcode); - // Immediate. - if (Operand->isImm()) { - if (Operand->needAsmRewrite()) - AsmStrRewrites.push_back(AsmRewrite(AOK_ImmPrefix, - Operand->getStartLoc())); - continue; - } + // Build the list of clobbers, outputs and inputs. + for (unsigned i = 1, e = Info.ParsedOperands.size(); i != e; ++i) { + MCParsedAsmOperand *Operand = Info.ParsedOperands[i]; - // Register operand. - if (Operand->isReg() && !Operand->needAddressOf()) { - unsigned NumDefs = Desc.getNumDefs(); - // Clobber. - if (NumDefs && Operand->getMCOperandNum() < NumDefs) { - std::string Reg; - raw_string_ostream OS(Reg); - IP->printRegName(OS, Operand->getReg()); - ClobberRegs.insert(StringRef(OS.str())); - } - continue; - } + // Immediate. + if (Operand->isImm()) { + if (Operand->needAsmRewrite()) + AsmStrRewrites.push_back(AsmRewrite(AOK_ImmPrefix, + Operand->getStartLoc())); + continue; + } - // Expr/Input or Output. - bool IsVarDecl; - unsigned Length, Size, Type; - void *OpDecl = SI.LookupInlineAsmIdentifier(Operand->getName(), AsmLoc, - Length, Size, Type, IsVarDecl); - if (OpDecl) { - bool isOutput = (i == 1) && Desc.mayStore(); - if (Operand->isMem() && Operand->needSizeDirective()) - AsmStrRewrites.push_back(AsmRewrite(AOK_SizeDirective, - Operand->getStartLoc(), - /*Len*/0, - Operand->getMemSize())); - if (isOutput) { - std::string Constraint = "="; - ++InputIdx; - OutputDecls.push_back(OpDecl); - OutputDeclsAddressOf.push_back(Operand->needAddressOf()); - Constraint += Operand->getConstraint().str(); - OutputConstraints.push_back(Constraint); - AsmStrRewrites.push_back(AsmRewrite(AOK_Output, - Operand->getStartLoc(), - Operand->getNameLen())); - } else { - InputDecls.push_back(OpDecl); - InputDeclsAddressOf.push_back(Operand->needAddressOf()); - InputConstraints.push_back(Operand->getConstraint().str()); - AsmStrRewrites.push_back(AsmRewrite(AOK_Input, - Operand->getStartLoc(), - Operand->getNameLen())); - } - } + // Register operand. + if (Operand->isReg() && !Operand->needAddressOf()) { + unsigned NumDefs = Desc.getNumDefs(); + // Clobber. + if (NumDefs && Operand->getMCOperandNum() < NumDefs) + ClobberRegs.push_back(Operand->getReg()); + continue; + } + + // Expr/Input or Output. + bool IsVarDecl; + unsigned Length, Size, Type; + void *OpDecl = SI.LookupInlineAsmIdentifier(Operand->getName(), AsmLoc, + Length, Size, Type, + IsVarDecl); + if (!OpDecl) + continue; + + bool isOutput = (i == 1) && Desc.mayStore(); + if (Operand->isMem() && Operand->needSizeDirective()) + AsmStrRewrites.push_back(AsmRewrite(AOK_SizeDirective, + Operand->getStartLoc(), /*Len*/0, + Operand->getMemSize())); + + if (isOutput) { + ++InputIdx; + OutputDecls.push_back(OpDecl); + OutputDeclsAddressOf.push_back(Operand->needAddressOf()); + OutputConstraints.push_back('=' + Operand->getConstraint().str()); + AsmStrRewrites.push_back(AsmRewrite(AOK_Output, Operand->getStartLoc(), + Operand->getNameLen())); + } else { + InputDecls.push_back(OpDecl); + InputDeclsAddressOf.push_back(Operand->needAddressOf()); + InputConstraints.push_back(Operand->getConstraint().str()); + AsmStrRewrites.push_back(AsmRewrite(AOK_Input, Operand->getStartLoc(), + Operand->getNameLen())); } } } @@ -4142,9 +4140,14 @@ bool AsmParser::ParseMSInlineAsm(void *AsmLoc, std::string &AsmString, NumInputs = InputDecls.size(); // Set the unique clobbers. - for (std::set::iterator I = ClobberRegs.begin(), - E = ClobberRegs.end(); I != E; ++I) - Clobbers.push_back(*I); + array_pod_sort(ClobberRegs.begin(), ClobberRegs.end()); + ClobberRegs.erase(std::unique(ClobberRegs.begin(), ClobberRegs.end()), + ClobberRegs.end()); + Clobbers.assign(ClobberRegs.size(), std::string()); + for (unsigned I = 0, E = ClobberRegs.size(); I != E; ++I) { + raw_string_ostream OS(Clobbers[I]); + IP->printRegName(OS, ClobberRegs[I]); + } // Merge the various outputs and inputs. Output are expected first. if (NumOutputs || NumInputs) { @@ -4166,9 +4169,10 @@ bool AsmParser::ParseMSInlineAsm(void *AsmLoc, std::string &AsmString, AsmRewriteKind PrevKind = AOK_Imm; raw_string_ostream OS(AsmStringIR); const char *Start = SrcMgr.getMemoryBuffer(0)->getBufferStart(); - array_pod_sort (AsmStrRewrites.begin(), AsmStrRewrites.end(), RewritesSort); - for (SmallVectorImpl::iterator - I = AsmStrRewrites.begin(), E = AsmStrRewrites.end(); I != E; ++I) { + array_pod_sort(AsmStrRewrites.begin(), AsmStrRewrites.end(), RewritesSort); + for (SmallVectorImpl::iterator I = AsmStrRewrites.begin(), + E = AsmStrRewrites.end(); + I != E; ++I) { const char *Loc = (*I).Loc.getPointer(); unsigned AdditionalSkip = 0; @@ -4190,22 +4194,19 @@ bool AsmParser::ParseMSInlineAsm(void *AsmLoc, std::string &AsmString, switch (Kind) { default: break; case AOK_Imm: - OS << Twine("$$"); - OS << (*I).Val; + OS << "$$" << (*I).Val; break; case AOK_ImmPrefix: - OS << Twine("$$"); + OS << "$$"; break; case AOK_Input: - OS << '$'; - OS << InputIdx++; + OS << '$' << InputIdx++; break; case AOK_Output: - OS << '$'; - OS << OutputIdx++; + OS << '$' << OutputIdx++; break; case AOK_SizeDirective: - switch((*I).Val) { + switch ((*I).Val) { default: break; case 8: OS << "byte ptr "; break; case 16: OS << "word ptr "; break; @@ -4224,7 +4225,7 @@ bool AsmParser::ParseMSInlineAsm(void *AsmLoc, std::string &AsmString, OS << ".align " << Val; // Skip the original immediate. - assert (Val < 10 && "Expected alignment less then 2^10."); + assert(Val < 10 && "Expected alignment less then 2^10."); AdditionalSkip = (Val < 4) ? 2 : Val < 7 ? 3 : 4; break; } -- cgit v1.1 From 79cd4118090a3c0bc80cafc699a51abf1d6299f3 Mon Sep 17 00:00:00 2001 From: Reed Kotler Date: Fri, 15 Feb 2013 21:05:58 +0000 Subject: Remove a final dependency on the form field in tablegen; which is a remnant of the old jit and which we don't intend to support in mips16 or micromips. This dependency is for the testing of whether an instruction is a pseudo. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175297 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp | 6 ------ lib/Target/Mips/MipsAsmPrinter.cpp | 10 ++++++++++ 2 files changed, 10 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp index 4b68b7e..9f2d1e4 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp @@ -141,12 +141,6 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, llvm_unreachable("unimplemented opcode in EncodeInstruction()"); const MCInstrDesc &Desc = MCII.get(TmpInst.getOpcode()); - uint64_t TSFlags = Desc.TSFlags; - - // Pseudo instructions don't get encoded and shouldn't be here - // in the first place! - if ((TSFlags & MipsII::FormMask) == MipsII::Pseudo) - llvm_unreachable("Pseudo opcode found in EncodeInstruction()"); // Get byte count of instruction unsigned Size = Desc.getSize(); diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp index 84bf48c..003d890 100644 --- a/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/MipsAsmPrinter.cpp @@ -74,6 +74,16 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) { if (emitPseudoExpansionLowering(OutStreamer, &*I)) continue; + // The inMips16Mode() test is not permanent. + // Some instructions are marked as pseudo right now which + // would make the test fail for the wrong reason but + // that will be fixed soon. We need this here because we are + // removing another test for this situation downstream in the + // callchain. + // + if (I->isPseudo() && !Subtarget->inMips16Mode()) + llvm_unreachable("Pseudo opcode found in EmitInstruction()"); + MCInst TmpInst0; MCInstLowering.Lower(I, TmpInst0); OutStreamer.EmitInstruction(TmpInst0); -- cgit v1.1 From dc2f79274021a590d6b72acd741117068c3e49bd Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Fri, 15 Feb 2013 21:20:45 +0000 Subject: [mips] Split SelectAddr, which was used to match address patterns, into two functions. Set AddedComplexity to determine the order in which patterns are matched. This simplifies selection of floating point loads/stores. No functionality change intended. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175300 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsISelDAGToDAG.cpp | 39 ++++++++++++++++++++++----------- lib/Target/Mips/MipsInstrFPU.td | 42 ++++++++++++++++++++++++++++++++---- lib/Target/Mips/MipsInstrInfo.td | 8 ++++++- 3 files changed, 71 insertions(+), 18 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp index c5f1290..385ade5 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -96,7 +96,17 @@ private: SDNode *Select(SDNode *N); // Complex Pattern. - bool SelectAddr(SDNode *Parent, SDValue N, SDValue &Base, SDValue &Offset); + /// (reg + imm). + bool selectAddrRegImm(SDNode *Parent, SDValue Addr, SDValue &Base, + SDValue &Offset) const; + + /// Fall back on this function if all else fails. + bool selectAddrDefault(SDNode *Parent, SDValue Addr, SDValue &Base, + SDValue &Offset) const; + + /// Match integer address pattern. + bool selectIntAddr(SDNode *Parent, SDValue Addr, SDValue &Base, + SDValue &Offset) const; bool SelectAddr16(SDNode *Parent, SDValue N, SDValue &Base, SDValue &Offset, SDValue &Alias); @@ -323,8 +333,8 @@ SDValue MipsDAGToDAGISel::getMips16SPAliasReg() { /// ComplexPattern used on MipsInstrInfo /// Used on Mips Load/Store instructions -bool MipsDAGToDAGISel:: -SelectAddr(SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset) { +bool MipsDAGToDAGISel::selectAddrRegImm(SDNode *Parent, SDValue Addr, + SDValue &Base, SDValue &Offset) const { EVT ValTy = Addr.getValueType(); // if Address is FI, get the TargetFrameIndex. @@ -384,21 +394,24 @@ SelectAddr(SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset) { return true; } } - - // If an indexed floating point load/store can be emitted, return false. - const LSBaseSDNode *LS = dyn_cast(Parent); - - if (LS && - (LS->getMemoryVT() == MVT::f32 || LS->getMemoryVT() == MVT::f64) && - Subtarget.hasFPIdx()) - return false; } - Base = Addr; - Offset = CurDAG->getTargetConstant(0, ValTy); + return false; +} + +bool MipsDAGToDAGISel::selectAddrDefault(SDNode *Parent, SDValue Addr, + SDValue &Base, SDValue &Offset) const { + Base = Addr; + Offset = CurDAG->getTargetConstant(0, Addr.getValueType()); return true; } +bool MipsDAGToDAGISel::selectIntAddr(SDNode *Parent, SDValue Addr, + SDValue &Base, SDValue &Offset) const { + return selectAddrRegImm(Parent, Addr, Base, Offset) || + selectAddrDefault(Parent, Addr, Base, Offset); +} + void MipsDAGToDAGISel::getMips16SPRefReg(SDNode *Parent, SDValue &AliasReg) { SDValue AliasFPReg = CurDAG->getRegister(Mips::S0, TLI.getPointerTy()); if (Parent) { diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td index 70465d8..891bdc1 100644 --- a/lib/Target/Mips/MipsInstrFPU.td +++ b/lib/Target/Mips/MipsInstrFPU.td @@ -152,14 +152,14 @@ class MTC1_FT_CCR : InstSE<(outs RC:$rt), (ins MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"), - [(set RC:$rt, (OpNode addr:$addr))], Itin, FrmFI> { + [(set RC:$rt, (OpNode addrDefault:$addr))], Itin, FrmFI> { let DecoderMethod = "DecodeFMem"; } class SW_FT : InstSE<(outs), (ins RC:$rt, MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"), - [(OpNode RC:$rt, addr:$addr)], Itin, FrmFI> { + [(OpNode RC:$rt, addrDefault:$addr)], Itin, FrmFI> { let DecoderMethod = "DecodeFMem"; } @@ -180,13 +180,17 @@ class LWXC1_FT : InstSE<(outs DRC:$fd), (ins PRC:$base, PRC:$index), !strconcat(opstr, "\t$fd, ${index}(${base})"), - [(set DRC:$fd, (OpNode (add PRC:$base, PRC:$index)))], Itin, FrmFI>; + [(set DRC:$fd, (OpNode (add PRC:$base, PRC:$index)))], Itin, FrmFI> { + let AddedComplexity = 20; +} class SWXC1_FT : InstSE<(outs), (ins DRC:$fs, PRC:$base, PRC:$index), !strconcat(opstr, "\t$fs, ${index}(${base})"), - [(OpNode DRC:$fs, (add PRC:$base, PRC:$index))], Itin, FrmFI>; + [(OpNode DRC:$fs, (add PRC:$base, PRC:$index))], Itin, FrmFI> { + let AddedComplexity = 20; +} class BC1F_FT : @@ -498,3 +502,33 @@ let Predicates = [IsFP64bit, HasStdEnc] in { def : MipsPat<(f32 (fround FGR64:$src)), (CVT_S_D64 FGR64:$src)>; def : MipsPat<(f64 (fextend FGR32:$src)), (CVT_D64_S FGR32:$src)>; } + +// Load/Store patterns. +let AddedComplexity = 40 in { + let Predicates = [IsN64, HasStdEnc] in { + def : MipsPat<(f32 (load addrRegImm:$a)), (LWC1_P8 addrRegImm:$a)>; + def : MipsPat<(store FGR32:$v, addrRegImm:$a), + (SWC1_P8 FGR32:$v, addrRegImm:$a)>; + def : MipsPat<(f64 (load addrRegImm:$a)), (LDC164_P8 addrRegImm:$a)>; + def : MipsPat<(store FGR64:$v, addrRegImm:$a), + (SDC164_P8 FGR64:$v, addrRegImm:$a)>; + } + + let Predicates = [NotN64, HasStdEnc] in { + def : MipsPat<(f32 (load addrRegImm:$a)), (LWC1 addrRegImm:$a)>; + def : MipsPat<(store FGR32:$v, addrRegImm:$a), + (SWC1 FGR32:$v, addrRegImm:$a)>; + } + + let Predicates = [NotN64, HasMips64, HasStdEnc] in { + def : MipsPat<(f64 (load addrRegImm:$a)), (LDC164 addrRegImm:$a)>; + def : MipsPat<(store FGR64:$v, addrRegImm:$a), + (SDC164 FGR64:$v, addrRegImm:$a)>; + } + + let Predicates = [NotN64, NotMips64, HasStdEnc] in { + def : MipsPat<(f64 (load addrRegImm:$a)), (LDC1 addrRegImm:$a)>; + def : MipsPat<(store AFGR64:$v, addrRegImm:$a), + (SDC1 AFGR64:$v, addrRegImm:$a)>; + } +} diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 052e855..f37f935 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -334,7 +334,13 @@ def immZExt5 : ImmLeaf; // Mips Address Mode! SDNode frameindex could possibily be a match // since load and store instructions from stack used it. def addr : - ComplexPattern; + ComplexPattern; + +def addrRegImm : + ComplexPattern; + +def addrDefault : + ComplexPattern; //===----------------------------------------------------------------------===// // Instructions specific format -- cgit v1.1 From ffd28a44f04ab2de5a7092fbd5ff17af79f56e28 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Fri, 15 Feb 2013 21:45:11 +0000 Subject: [mips] Clean up class MipsCCInfo. No functionality change intended. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175310 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsISelLowering.cpp | 84 ++++++++++++++++++++---------------- lib/Target/Mips/MipsISelLowering.h | 48 +++++++++++---------- 2 files changed, 74 insertions(+), 58 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 2ff369c..088e669 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -3007,9 +3007,9 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVector ArgLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), getTargetMachine(), ArgLocs, *DAG.getContext()); - MipsCC MipsCCInfo(CallConv, isVarArg, IsO32, CCInfo); + MipsCC MipsCCInfo(CallConv, IsO32, CCInfo); - MipsCCInfo.analyzeCallOperands(Outs); + MipsCCInfo.analyzeCallOperands(Outs, isVarArg); // Get a count of how many bytes are to be pushed on the stack. unsigned NextStackOffset = CCInfo.getNextStackOffset(); @@ -3294,7 +3294,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, SmallVector ArgLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), getTargetMachine(), ArgLocs, *DAG.getContext()); - MipsCC MipsCCInfo(CallConv, isVarArg, IsO32, CCInfo); + MipsCC MipsCCInfo(CallConv, IsO32, CCInfo); MipsCCInfo.analyzeFormalArguments(Ins); MipsFI->setFormalArgInfo(CCInfo.getNextStackOffset(), @@ -3776,40 +3776,21 @@ unsigned MipsTargetLowering::getJumpTableEncoding() const { return TargetLowering::getJumpTableEncoding(); } -MipsTargetLowering::MipsCC::MipsCC(CallingConv::ID CallConv, bool IsVarArg, - bool IsO32, CCState &Info) : CCInfo(Info) { - UseRegsForByval = true; - - if (IsO32) { - RegSize = 4; - NumIntArgRegs = array_lengthof(O32IntRegs); - ReservedArgArea = 16; - IntArgRegs = ShadowRegs = O32IntRegs; - FixedFn = VarFn = CC_MipsO32; - } else { - RegSize = 8; - NumIntArgRegs = array_lengthof(Mips64IntRegs); - ReservedArgArea = 0; - IntArgRegs = Mips64IntRegs; - ShadowRegs = Mips64DPRegs; - FixedFn = CC_MipsN; - VarFn = IsVarArg ? CC_MipsN_VarArg : CC_MipsN; - } - - if (CallConv == CallingConv::Fast) { - assert(!IsVarArg); - UseRegsForByval = false; - ReservedArgArea = 0; - FixedFn = VarFn = CC_Mips_FastCC; - } - +MipsTargetLowering::MipsCC::MipsCC(CallingConv::ID CC, bool IsO32_, + CCState &Info) + : CCInfo(Info), CallConv(CC), IsO32(IsO32_) { // Pre-allocate reserved argument area. - CCInfo.AllocateStack(ReservedArgArea, 1); + CCInfo.AllocateStack(reservedArgArea(), 1); } void MipsTargetLowering::MipsCC:: -analyzeCallOperands(const SmallVectorImpl &Args) { +analyzeCallOperands(const SmallVectorImpl &Args, + bool IsVarArg) { + assert((CallConv != CallingConv::Fast || !IsVarArg) && + "CallingConv::Fast shouldn't be used for vararg functions."); + unsigned NumOpnds = Args.size(); + llvm::CCAssignFn *FixedFn = fixedArgFn(), *VarFn = varArgFn(); for (unsigned I = 0; I != NumOpnds; ++I) { MVT ArgVT = Args[I].VT; @@ -3821,10 +3802,10 @@ analyzeCallOperands(const SmallVectorImpl &Args) { continue; } - if (Args[I].IsFixed) - R = FixedFn(I, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo); - else + if (IsVarArg && !Args[I].IsFixed) R = VarFn(I, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo); + else + R = FixedFn(I, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo); if (R) { #ifndef NDEBUG @@ -3839,6 +3820,7 @@ analyzeCallOperands(const SmallVectorImpl &Args) { void MipsTargetLowering::MipsCC:: analyzeFormalArguments(const SmallVectorImpl &Args) { unsigned NumArgs = Args.size(); + llvm::CCAssignFn *FixedFn = fixedArgFn(); for (unsigned I = 0; I != NumArgs; ++I) { MVT ArgVT = Args[I].VT; @@ -3868,11 +3850,12 @@ MipsTargetLowering::MipsCC::handleByValArg(unsigned ValNo, MVT ValVT, assert(ArgFlags.getByValSize() && "Byval argument's size shouldn't be 0."); struct ByValArgInfo ByVal; + unsigned RegSize = regSize(); unsigned ByValSize = RoundUpToAlignment(ArgFlags.getByValSize(), RegSize); unsigned Align = std::min(std::max(ArgFlags.getByValAlign(), RegSize), RegSize * 2); - if (UseRegsForByval) + if (useRegsForByval()) allocateRegs(ByVal, ByValSize, Align); // Allocate space on caller's stack. @@ -3883,9 +3866,38 @@ MipsTargetLowering::MipsCC::handleByValArg(unsigned ValNo, MVT ValVT, ByValArgs.push_back(ByVal); } +unsigned MipsTargetLowering::MipsCC::numIntArgRegs() const { + return IsO32 ? array_lengthof(O32IntRegs) : array_lengthof(Mips64IntRegs); +} + +unsigned MipsTargetLowering::MipsCC::reservedArgArea() const { + return (IsO32 && (CallConv != CallingConv::Fast)) ? 16 : 0; +} + +const uint16_t *MipsTargetLowering::MipsCC::intArgRegs() const { + return IsO32 ? O32IntRegs : Mips64IntRegs; +} + +llvm::CCAssignFn *MipsTargetLowering::MipsCC::fixedArgFn() const { + if (CallConv == CallingConv::Fast) + return CC_Mips_FastCC; + + return IsO32 ? CC_MipsO32 : CC_MipsN; +} + +llvm::CCAssignFn *MipsTargetLowering::MipsCC::varArgFn() const { + return IsO32 ? CC_MipsO32 : CC_MipsN_VarArg; +} + +const uint16_t *MipsTargetLowering::MipsCC::shadowRegs() const { + return IsO32 ? O32IntRegs : Mips64DPRegs; +} + void MipsTargetLowering::MipsCC::allocateRegs(ByValArgInfo &ByVal, unsigned ByValSize, unsigned Align) { + unsigned RegSize = regSize(), NumIntArgRegs = numIntArgRegs(); + const uint16_t *IntArgRegs = intArgRegs(), *ShadowRegs = shadowRegs(); assert(!(ByValSize % RegSize) && !(Align % RegSize) && "Byval argument's size and alignment should be a multiple of" "RegSize."); diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 00aa282..3b46355 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -201,53 +201,57 @@ namespace llvm { /// arguments and inquire about calling convention information. class MipsCC { public: - MipsCC(CallingConv::ID CallConv, bool IsVarArg, bool IsO32, - CCState &Info); + MipsCC(CallingConv::ID CallConv, bool IsO32, CCState &Info); - void analyzeCallOperands(const SmallVectorImpl &Outs); + void analyzeCallOperands(const SmallVectorImpl &Outs, + bool IsVarArg); void analyzeFormalArguments(const SmallVectorImpl &Ins); - void handleByValArg(unsigned ValNo, MVT ValVT, MVT LocVT, - CCValAssign::LocInfo LocInfo, - ISD::ArgFlagsTy ArgFlags); - const CCState &getCCInfo() const { return CCInfo; } /// hasByValArg - Returns true if function has byval arguments. bool hasByValArg() const { return !ByValArgs.empty(); } - /// useRegsForByval - Returns true if the calling convention allows the - /// use of registers to pass byval arguments. - bool useRegsForByval() const { return UseRegsForByval; } - /// regSize - Size (in number of bits) of integer registers. - unsigned regSize() const { return RegSize; } + unsigned regSize() const { return IsO32 ? 4 : 8; } /// numIntArgRegs - Number of integer registers available for calls. - unsigned numIntArgRegs() const { return NumIntArgRegs; } + unsigned numIntArgRegs() const; /// reservedArgArea - The size of the area the caller reserves for /// register arguments. This is 16-byte if ABI is O32. - unsigned reservedArgArea() const { return ReservedArgArea; } + unsigned reservedArgArea() const; - /// intArgRegs - Pointer to array of integer registers. - const uint16_t *intArgRegs() const { return IntArgRegs; } + /// Return pointer to array of integer argument registers. + const uint16_t *intArgRegs() const; typedef SmallVector::const_iterator byval_iterator; byval_iterator byval_begin() const { return ByValArgs.begin(); } byval_iterator byval_end() const { return ByValArgs.end(); } private: + void handleByValArg(unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags); + + /// useRegsForByval - Returns true if the calling convention allows the + /// use of registers to pass byval arguments. + bool useRegsForByval() const { return CallConv != CallingConv::Fast; } + + /// Return the function that analyzes fixed argument list functions. + llvm::CCAssignFn *fixedArgFn() const; + + /// Return the function that analyzes variable argument list functions. + llvm::CCAssignFn *varArgFn() const; + + const uint16_t *shadowRegs() const; + void allocateRegs(ByValArgInfo &ByVal, unsigned ByValSize, unsigned Align); CCState &CCInfo; - bool UseRegsForByval; - unsigned RegSize; - unsigned NumIntArgRegs; - unsigned ReservedArgArea; - const uint16_t *IntArgRegs, *ShadowRegs; + CallingConv::ID CallConv; + bool IsO32; SmallVector ByValArgs; - llvm::CCAssignFn *FixedFn, *VarFn; }; // Subtarget Info -- cgit v1.1 From 4fb25b7d799ea27a2c98a4d9bcf7469cc685db47 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Fri, 15 Feb 2013 21:58:13 +0000 Subject: [ms-inline asm] Adjust the EndLoc to account for the ']'. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175312 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/AsmParser/X86AsmParser.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index dc15a11..8c4c447 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -911,7 +911,8 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, if (getLexer().isNot(AsmToken::RBrac)) return ErrorOperand(Parser.getTok().getLoc(), "Expected ']' token!"); - End = Parser.getTok().getEndLoc(); + // Adjust the EndLoc due to the ']'. + End = SMLoc::getFromPointer(Parser.getTok().getEndLoc().getPointer()-1); Parser.Lex(); return X86Operand::CreateMem(Disp, Start, End, Size); } -- cgit v1.1 From 789cb5df9ca61f8a3794a4fbde7cc020fd00a02a Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Fri, 15 Feb 2013 22:31:27 +0000 Subject: Use the 'target-features' and 'target-cpu' attributes to reset the subtarget features. If two functions require different features (e.g., `-mno-sse' vs. `-msse') then we want to honor that, especially during LTO. We can do that by resetting the subtarget's features depending upon the 'target-feature' attribute. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175314 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp | 4 ++ lib/Target/X86/X86Subtarget.cpp | 100 +++++++++++++++----------- lib/Target/X86/X86Subtarget.h | 8 ++- 3 files changed, 71 insertions(+), 41 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index c9289ad..fa913f6 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -354,6 +354,10 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { TTI = getAnalysisIfAvailable(); GFI = Fn.hasGC() ? &getAnalysis().getFunctionInfo(Fn) : 0; + TargetSubtargetInfo &ST = + const_cast(TM.getSubtarget()); + ST.resetSubtargetFeatures(MF); + DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n"); SplitCriticalSideEffectEdges(const_cast(Fn), this); diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index b8ecf44..58e0d06 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -14,6 +14,8 @@ #define DEBUG_TYPE "subtarget" #include "X86Subtarget.h" #include "X86InstrInfo.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -324,46 +326,21 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { } } -X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, - const std::string &FS, - unsigned StackAlignOverride, bool is64Bit) - : X86GenSubtargetInfo(TT, CPU, FS) - , X86ProcFamily(Others) - , PICStyle(PICStyles::None) - , X86SSELevel(NoMMXSSE) - , X863DNowLevel(NoThreeDNow) - , HasCMov(false) - , HasX86_64(false) - , HasPOPCNT(false) - , HasSSE4A(false) - , HasAES(false) - , HasPCLMUL(false) - , HasFMA(false) - , HasFMA4(false) - , HasXOP(false) - , HasMOVBE(false) - , HasRDRAND(false) - , HasF16C(false) - , HasFSGSBase(false) - , HasLZCNT(false) - , HasBMI(false) - , HasBMI2(false) - , HasRTM(false) - , HasADX(false) - , IsBTMemSlow(false) - , IsUAMemFast(false) - , HasVectorUAMem(false) - , HasCmpxchg16b(false) - , UseLeaForSP(false) - , HasSlowDivide(false) - , PostRAScheduler(false) - , PadShortFunctions(false) - , stackAlignment(4) - // FIXME: this is a known good value for Yonah. How about others? - , MaxInlineSizeThreshold(128) - , TargetTriple(TT) - , In64BitMode(is64Bit) { - // Determine default and user specified characteristics +void X86Subtarget::resetSubtargetFeatures(const MachineFunction *MF) { + AttributeSet FnAttrs = MF->getFunction()->getAttributes(); + Attribute CPUAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex, + "target-cpu"); + Attribute FSAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex, + "target-features"); + std::string CPU = + !CPUAttr.hasAttribute(Attribute::None) ?CPUAttr.getValueAsString() : ""; + std::string FS = + !FSAttr.hasAttribute(Attribute::None) ? FSAttr.getValueAsString() : ""; + if (!FS.empty()) + resetSubtargetFeatures(CPU, FS); +} + +void X86Subtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { std::string CPUName = CPU; if (!FS.empty() || !CPU.empty()) { if (CPUName.empty()) { @@ -440,6 +417,49 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, stackAlignment = 16; } +X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, + const std::string &FS, + unsigned StackAlignOverride, bool is64Bit) + : X86GenSubtargetInfo(TT, CPU, FS) + , X86ProcFamily(Others) + , PICStyle(PICStyles::None) + , X86SSELevel(NoMMXSSE) + , X863DNowLevel(NoThreeDNow) + , HasCMov(false) + , HasX86_64(false) + , HasPOPCNT(false) + , HasSSE4A(false) + , HasAES(false) + , HasPCLMUL(false) + , HasFMA(false) + , HasFMA4(false) + , HasXOP(false) + , HasMOVBE(false) + , HasRDRAND(false) + , HasF16C(false) + , HasFSGSBase(false) + , HasLZCNT(false) + , HasBMI(false) + , HasBMI2(false) + , HasRTM(false) + , HasADX(false) + , IsBTMemSlow(false) + , IsUAMemFast(false) + , HasVectorUAMem(false) + , HasCmpxchg16b(false) + , UseLeaForSP(false) + , HasSlowDivide(false) + , PostRAScheduler(false) + , PadShortFunctions(false) + , stackAlignment(4) + // FIXME: this is a known good value for Yonah. How about others? + , MaxInlineSizeThreshold(128) + , TargetTriple(TT) + , StackAlignOverride(StackAlignOverride) + , In64BitMode(is64Bit) { + resetSubtargetFeatures(CPU, FS); +} + bool X86Subtarget::enablePostRAScheduler( CodeGenOpt::Level OptLevel, TargetSubtargetInfo::AntiDepBreakMode& Mode, diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 1466041..d1c7067 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -168,11 +168,13 @@ protected: InstrItineraryData InstrItins; private: + /// StackAlignOverride - Override the stack alignment. + unsigned StackAlignOverride; + /// In64BitMode - True if compiling for 64-bit, false for 32-bit. bool In64BitMode; public: - /// This constructor initializes the data members to match that /// of the specified triple. /// @@ -197,6 +199,10 @@ public: /// instruction. void AutoDetectSubtargetFeatures(); + /// \brief Reset the features for the X86 target. + virtual void resetSubtargetFeatures(const MachineFunction *MF); + void resetSubtargetFeatures(StringRef CPU, StringRef FS); + /// Is this x86_64? (disregarding specific ABI / programming model) bool is64Bit() const { return In64BitMode; -- cgit v1.1 From 4788d14b484ba9e2fe19855fd6c97a3659980fca Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Fri, 15 Feb 2013 22:41:25 +0000 Subject: Support changing the subtarget features in ARM. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175315 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMSubtarget.cpp | 29 +++++++++++++++++++++++++---- lib/Target/ARM/ARMSubtarget.h | 4 ++++ 2 files changed, 29 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index c3dea00..c33bb9d 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -14,7 +14,9 @@ #include "ARMSubtarget.h" #include "ARMBaseInstrInfo.h" #include "ARMBaseRegisterInfo.h" +#include "llvm/IR/Attributes.h" #include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Function.h" #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetInstrInfo.h" @@ -85,17 +87,35 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU, , CPUString(CPU) , TargetTriple(TT) , TargetABI(ARM_ABI_APCS) { - // Determine default and user specified characteristics + resetSubtargetFeatures(CPU, FS); +} + +void ARMSubtarget::resetSubtargetFeatures(const MachineFunction *MF) { + AttributeSet FnAttrs = MF->getFunction()->getAttributes(); + Attribute CPUAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex, + "target-cpu"); + Attribute FSAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex, + "target-features"); + std::string CPU = + !CPUAttr.hasAttribute(Attribute::None) ?CPUAttr.getValueAsString() : ""; + std::string FS = + !FSAttr.hasAttribute(Attribute::None) ? FSAttr.getValueAsString() : ""; + if (!FS.empty()) + resetSubtargetFeatures(CPU, FS); +} + +void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { if (CPUString.empty()) CPUString = "generic"; // Insert the architecture feature derived from the target triple into the // feature string. This is important for setting features that are implied // based on the architecture version. - std::string ArchFS = ARM_MC::ParseARMTriple(TT, CPUString); + std::string ArchFS = ARM_MC::ParseARMTriple(TargetTriple.getTriple(), + CPUString); if (!FS.empty()) { if (!ArchFS.empty()) - ArchFS = ArchFS + "," + FS; + ArchFS = ArchFS + "," + FS.str(); else ArchFS = FS; } @@ -112,7 +132,8 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU, // Initialize scheduling itinerary for the specified CPU. InstrItins = getInstrItineraryForCPU(CPUString); - if ((TT.find("eabi") != std::string::npos) || (isTargetIOS() && isMClass())) + if ((TargetTriple.getTriple().find("eabi") != std::string::npos) || + (isTargetIOS() && isMClass())) // FIXME: We might want to separate AAPCS and EABI. Some systems, e.g. // Darwin-EABI conforms to AACPS but not the rest of EABI. TargetABI = ARM_ABI_AAPCS; diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 33efabf..87834b8 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -202,6 +202,10 @@ protected: /// subtarget options. Definition of function is auto generated by tblgen. void ParseSubtargetFeatures(StringRef CPU, StringRef FS); + /// \brief Reset the features for the X86 target. + virtual void resetSubtargetFeatures(const MachineFunction *MF); + void resetSubtargetFeatures(StringRef CPU, StringRef FS); + void computeIssueWidth(); bool hasV4TOps() const { return HasV4TOps; } -- cgit v1.1 From 67144e37ba5cd35ee917daac631e03963b05a674 Mon Sep 17 00:00:00 2001 From: Derek Schuff Date: Fri, 15 Feb 2013 22:50:52 +0000 Subject: If bundle alignment is enabled, do not add data to a fragment with instructions With bundle alignment, instructions all get their own MCFragments (unless they are in a bundle-locked group). For instructions with fixups, this is an MCDataFragment. Emitting actual data (e.g. for .long) attempts to re-use MCDataFragments, which we don't want int this case since it leads to fragments which exceed the bundle size. So, don't reuse them in this case. Also adds a test and fixes some formatting. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175316 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCELFStreamer.cpp | 7 ++++--- lib/MC/MCObjectStreamer.cpp | 4 +++- 2 files changed, 7 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp index c4c8e6e..8ddbfbb 100644 --- a/lib/MC/MCELFStreamer.cpp +++ b/lib/MC/MCELFStreamer.cpp @@ -386,7 +386,9 @@ void MCELFStreamer::EmitInstToData(const MCInst &Inst) { if (Assembler.isBundlingEnabled()) { MCSectionData *SD = getCurrentSectionData(); if (SD->isBundleLocked() && !SD->isBundleGroupBeforeFirstInst()) - DF = getOrCreateDataFragment(); + // If we are bundle-locked, we re-use the current fragment. + // The bundle-locking directive ensures this is a new data fragment. + DF = cast(getCurrentFragment()); else if (!SD->isBundleLocked() && Fixups.size() == 0) { // Optimize memory usage by emitting the instruction to a // MCCompactEncodedInstFragment when not in a bundle-locked group and @@ -394,8 +396,7 @@ void MCELFStreamer::EmitInstToData(const MCInst &Inst) { MCCompactEncodedInstFragment *CEIF = new MCCompactEncodedInstFragment(SD); CEIF->getContents().append(Code.begin(), Code.end()); return; - } - else { + } else { DF = new MCDataFragment(SD); if (SD->getBundleLockState() == MCSectionData::BundleLockedAlignToEnd) { // If this is a new fragment created for a bundle-locked group, and the diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp index fe43506..b6c7341 100644 --- a/lib/MC/MCObjectStreamer.cpp +++ b/lib/MC/MCObjectStreamer.cpp @@ -59,7 +59,9 @@ MCFragment *MCObjectStreamer::getCurrentFragment() const { MCDataFragment *MCObjectStreamer::getOrCreateDataFragment() const { MCDataFragment *F = dyn_cast_or_null(getCurrentFragment()); - if (!F) + // When bundling is enabled, we don't want to add data to a fragment that + // already has instructions (see MCELFStreamer::EmitInstToData for details) + if (!F || (Assembler->isBundlingEnabled() && F->hasInstructions())) F = new MCDataFragment(getCurrentSectionData()); return F; } -- cgit v1.1 From b54562b96d407d007e9e6da3ddef09ac83e9776f Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Fri, 15 Feb 2013 22:54:16 +0000 Subject: c[ms-inline asm] It's possible to have a SizeDirective rewrite and an Input/Output rewrite to the same location. Make sure the SizeDirective rewrite is performed first. This also ensure the sort algorithm is stable. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175317 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCParser/AsmParser.cpp | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index bfe3612..b7953c1 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -4041,7 +4041,21 @@ static int RewritesSort(const void *A, const void *B) { return -1; if (AsmRewriteB->Loc.getPointer() < AsmRewriteA->Loc.getPointer()) return 1; - return 0; + + // It's possible to have a SizeDirective rewrite and an Input/Output rewrite + // to the same location. Make sure the SizeDirective rewrite is performed + // first. This also ensure the sort algorithm is stable. + if (AsmRewriteA->Kind == AOK_SizeDirective) { + assert ((AsmRewriteB->Kind == AOK_Input || AsmRewriteB->Kind == AOK_Output) && + "Expected an Input/Output rewrite!"); + return -1; + } + if (AsmRewriteB->Kind == AOK_SizeDirective) { + assert ((AsmRewriteA->Kind == AOK_Input || AsmRewriteA->Kind == AOK_Output) && + "Expected an Input/Output rewrite!"); + return 1; + } + llvm_unreachable ("Unstable rewrite sort."); } bool @@ -4174,6 +4188,7 @@ AsmParser::ParseMSInlineAsm(void *AsmLoc, std::string &AsmString, E = AsmStrRewrites.end(); I != E; ++I) { const char *Loc = (*I).Loc.getPointer(); + assert(Loc >= Start && "Expected Loc to be after Start!"); unsigned AdditionalSkip = 0; AsmRewriteKind Kind = (*I).Kind; -- cgit v1.1 From f78708593407286de34506e699da25a56b65a20d Mon Sep 17 00:00:00 2001 From: Matt Beaumont-Gay Date: Fri, 15 Feb 2013 23:12:33 +0000 Subject: Suppress a GCC -Wunused-variable warning in -Asserts builds git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175319 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCObjectStreamer.cpp | 2 ++ 1 file changed, 2 insertions(+) (limited to 'lib') diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp index b6c7341..0d2ce83 100644 --- a/lib/MC/MCObjectStreamer.cpp +++ b/lib/MC/MCObjectStreamer.cpp @@ -226,8 +226,10 @@ void MCObjectStreamer::EmitInstToFragment(const MCInst &Inst) { IF->getContents().append(Code.begin(), Code.end()); } +#ifndef NDEBUG static const char *BundlingNotImplementedMsg = "Aligned bundling is not implemented for this object format"; +#endif void MCObjectStreamer::EmitBundleAlignMode(unsigned AlignPow2) { llvm_unreachable(BundlingNotImplementedMsg); -- cgit v1.1 From 9be8b4fc92e1ace819a78db512c1f945c1471be7 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Fri, 15 Feb 2013 23:18:01 +0000 Subject: Reinitialize the ivars in the subtarget. When we're recalculating the feature set of the subtarget, we need to have the ivars in their initial state. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175320 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMSubtarget.cpp | 84 ++++++++++++++++++++++------------------- lib/Target/ARM/ARMSubtarget.h | 4 +- lib/Target/X86/X86Subtarget.cpp | 72 +++++++++++++++++++---------------- lib/Target/X86/X86Subtarget.h | 4 +- 4 files changed, 90 insertions(+), 74 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index c33bb9d..e11314d 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -45,51 +45,55 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU, const std::string &FS) : ARMGenSubtargetInfo(TT, CPU, FS) , ARMProcFamily(Others) - , HasV4TOps(false) - , HasV5TOps(false) - , HasV5TEOps(false) - , HasV6Ops(false) - , HasV6T2Ops(false) - , HasV7Ops(false) - , HasVFPv2(false) - , HasVFPv3(false) - , HasVFPv4(false) - , HasNEON(false) - , UseNEONForSinglePrecisionFP(false) - , UseMulOps(UseFusedMulOps) - , SlowFPVMLx(false) - , HasVMLxForwarding(false) - , SlowFPBrcc(false) - , InThumbMode(false) - , HasThumb2(false) - , IsMClass(false) - , NoARM(false) - , PostRAScheduler(false) - , IsR9Reserved(ReserveR9) - , UseMovt(false) - , SupportsTailCall(false) - , HasFP16(false) - , HasD16(false) - , HasHardwareDivide(false) - , HasHardwareDivideInARM(false) - , HasT2ExtractPack(false) - , HasDataBarrier(false) - , Pref32BitThumb(false) - , AvoidCPSRPartialUpdate(false) - , AvoidMOVsShifterOperand(false) - , HasRAS(false) - , HasMPExtension(false) - , FPOnlySP(false) - , AllowsUnalignedMem(false) - , Thumb2DSP(false) - , UseNaClTrap(false) , stackAlignment(4) , CPUString(CPU) , TargetTriple(TT) , TargetABI(ARM_ABI_APCS) { + initializeEnvironment(); resetSubtargetFeatures(CPU, FS); } +void ARMSubtarget::initializeEnvironment() { + HasV4TOps = false; + HasV5TOps = false; + HasV5TEOps = false; + HasV6Ops = false; + HasV6T2Ops = false; + HasV7Ops = false; + HasVFPv2 = false; + HasVFPv3 = false; + HasVFPv4 = false; + HasNEON = false; + UseNEONForSinglePrecisionFP = false; + UseMulOps = UseFusedMulOps; + SlowFPVMLx = false; + HasVMLxForwarding = false; + SlowFPBrcc = false; + InThumbMode = false; + HasThumb2 = false; + IsMClass = false; + NoARM = false; + PostRAScheduler = false; + IsR9Reserved = ReserveR9; + UseMovt = false; + SupportsTailCall = false; + HasFP16 = false; + HasD16 = false; + HasHardwareDivide = false; + HasHardwareDivideInARM = false; + HasT2ExtractPack = false; + HasDataBarrier = false; + Pref32BitThumb = false; + AvoidCPSRPartialUpdate = false; + AvoidMOVsShifterOperand = false; + HasRAS = false; + HasMPExtension = false; + FPOnlySP = false; + AllowsUnalignedMem = false; + Thumb2DSP = false; + UseNaClTrap = false; +} + void ARMSubtarget::resetSubtargetFeatures(const MachineFunction *MF) { AttributeSet FnAttrs = MF->getFunction()->getAttributes(); Attribute CPUAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex, @@ -100,8 +104,10 @@ void ARMSubtarget::resetSubtargetFeatures(const MachineFunction *MF) { !CPUAttr.hasAttribute(Attribute::None) ?CPUAttr.getValueAsString() : ""; std::string FS = !FSAttr.hasAttribute(Attribute::None) ? FSAttr.getValueAsString() : ""; - if (!FS.empty()) + if (!FS.empty()) { + initializeEnvironment(); resetSubtargetFeatures(CPU, FS); + } } void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 87834b8..f47555c 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -204,8 +204,10 @@ protected: /// \brief Reset the features for the X86 target. virtual void resetSubtargetFeatures(const MachineFunction *MF); +private: + void initializeEnvironment(); void resetSubtargetFeatures(StringRef CPU, StringRef FS); - +public: void computeIssueWidth(); bool hasV4TOps() const { return HasV4TOps; } diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 58e0d06..6391acf 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -336,8 +336,10 @@ void X86Subtarget::resetSubtargetFeatures(const MachineFunction *MF) { !CPUAttr.hasAttribute(Attribute::None) ?CPUAttr.getValueAsString() : ""; std::string FS = !FSAttr.hasAttribute(Attribute::None) ? FSAttr.getValueAsString() : ""; - if (!FS.empty()) + if (!FS.empty()) { + initializeEnvironment(); resetSubtargetFeatures(CPU, FS); + } } void X86Subtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { @@ -417,46 +419,50 @@ void X86Subtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { stackAlignment = 16; } +void X86Subtarget::initializeEnvironment() { + PICStyle = PICStyles::None; + X86SSELevel = NoMMXSSE; + X863DNowLevel = NoThreeDNow; + HasCMov = false; + HasX86_64 = false; + HasPOPCNT = false; + HasSSE4A = false; + HasAES = false; + HasPCLMUL = false; + HasFMA = false; + HasFMA4 = false; + HasXOP = false; + HasMOVBE = false; + HasRDRAND = false; + HasF16C = false; + HasFSGSBase = false; + HasLZCNT = false; + HasBMI = false; + HasBMI2 = false; + HasRTM = false; + HasADX = false; + IsBTMemSlow = false; + IsUAMemFast = false; + HasVectorUAMem = false; + HasCmpxchg16b = false; + UseLeaForSP = false; + HasSlowDivide = false; + PostRAScheduler = false; + PadShortFunctions = false; + stackAlignment = 4; + // FIXME: this is a known good value for Yonah. How about others? + MaxInlineSizeThreshold = 128; +} + X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, const std::string &FS, unsigned StackAlignOverride, bool is64Bit) : X86GenSubtargetInfo(TT, CPU, FS) , X86ProcFamily(Others) - , PICStyle(PICStyles::None) - , X86SSELevel(NoMMXSSE) - , X863DNowLevel(NoThreeDNow) - , HasCMov(false) - , HasX86_64(false) - , HasPOPCNT(false) - , HasSSE4A(false) - , HasAES(false) - , HasPCLMUL(false) - , HasFMA(false) - , HasFMA4(false) - , HasXOP(false) - , HasMOVBE(false) - , HasRDRAND(false) - , HasF16C(false) - , HasFSGSBase(false) - , HasLZCNT(false) - , HasBMI(false) - , HasBMI2(false) - , HasRTM(false) - , HasADX(false) - , IsBTMemSlow(false) - , IsUAMemFast(false) - , HasVectorUAMem(false) - , HasCmpxchg16b(false) - , UseLeaForSP(false) - , HasSlowDivide(false) - , PostRAScheduler(false) - , PadShortFunctions(false) - , stackAlignment(4) - // FIXME: this is a known good value for Yonah. How about others? - , MaxInlineSizeThreshold(128) , TargetTriple(TT) , StackAlignOverride(StackAlignOverride) , In64BitMode(is64Bit) { + initializeEnvironment(); resetSubtargetFeatures(CPU, FS); } diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index d1c7067..e97da4b 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -201,8 +201,10 @@ public: /// \brief Reset the features for the X86 target. virtual void resetSubtargetFeatures(const MachineFunction *MF); +private: + void initializeEnvironment(); void resetSubtargetFeatures(StringRef CPU, StringRef FS); - +public: /// Is this x86_64? (disregarding specific ABI / programming model) bool is64Bit() const { return In64BitMode; -- cgit v1.1 From ba6867d0ce3de9b7b4385f98d215edfcd36c4b32 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Fri, 15 Feb 2013 23:22:32 +0000 Subject: Temporary revert of 175320. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175322 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMSubtarget.cpp | 84 +++++++++++++++++++---------------------- lib/Target/ARM/ARMSubtarget.h | 4 +- lib/Target/X86/X86Subtarget.cpp | 72 ++++++++++++++++------------------- lib/Target/X86/X86Subtarget.h | 4 +- 4 files changed, 74 insertions(+), 90 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index e11314d..c33bb9d 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -45,55 +45,51 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU, const std::string &FS) : ARMGenSubtargetInfo(TT, CPU, FS) , ARMProcFamily(Others) + , HasV4TOps(false) + , HasV5TOps(false) + , HasV5TEOps(false) + , HasV6Ops(false) + , HasV6T2Ops(false) + , HasV7Ops(false) + , HasVFPv2(false) + , HasVFPv3(false) + , HasVFPv4(false) + , HasNEON(false) + , UseNEONForSinglePrecisionFP(false) + , UseMulOps(UseFusedMulOps) + , SlowFPVMLx(false) + , HasVMLxForwarding(false) + , SlowFPBrcc(false) + , InThumbMode(false) + , HasThumb2(false) + , IsMClass(false) + , NoARM(false) + , PostRAScheduler(false) + , IsR9Reserved(ReserveR9) + , UseMovt(false) + , SupportsTailCall(false) + , HasFP16(false) + , HasD16(false) + , HasHardwareDivide(false) + , HasHardwareDivideInARM(false) + , HasT2ExtractPack(false) + , HasDataBarrier(false) + , Pref32BitThumb(false) + , AvoidCPSRPartialUpdate(false) + , AvoidMOVsShifterOperand(false) + , HasRAS(false) + , HasMPExtension(false) + , FPOnlySP(false) + , AllowsUnalignedMem(false) + , Thumb2DSP(false) + , UseNaClTrap(false) , stackAlignment(4) , CPUString(CPU) , TargetTriple(TT) , TargetABI(ARM_ABI_APCS) { - initializeEnvironment(); resetSubtargetFeatures(CPU, FS); } -void ARMSubtarget::initializeEnvironment() { - HasV4TOps = false; - HasV5TOps = false; - HasV5TEOps = false; - HasV6Ops = false; - HasV6T2Ops = false; - HasV7Ops = false; - HasVFPv2 = false; - HasVFPv3 = false; - HasVFPv4 = false; - HasNEON = false; - UseNEONForSinglePrecisionFP = false; - UseMulOps = UseFusedMulOps; - SlowFPVMLx = false; - HasVMLxForwarding = false; - SlowFPBrcc = false; - InThumbMode = false; - HasThumb2 = false; - IsMClass = false; - NoARM = false; - PostRAScheduler = false; - IsR9Reserved = ReserveR9; - UseMovt = false; - SupportsTailCall = false; - HasFP16 = false; - HasD16 = false; - HasHardwareDivide = false; - HasHardwareDivideInARM = false; - HasT2ExtractPack = false; - HasDataBarrier = false; - Pref32BitThumb = false; - AvoidCPSRPartialUpdate = false; - AvoidMOVsShifterOperand = false; - HasRAS = false; - HasMPExtension = false; - FPOnlySP = false; - AllowsUnalignedMem = false; - Thumb2DSP = false; - UseNaClTrap = false; -} - void ARMSubtarget::resetSubtargetFeatures(const MachineFunction *MF) { AttributeSet FnAttrs = MF->getFunction()->getAttributes(); Attribute CPUAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex, @@ -104,10 +100,8 @@ void ARMSubtarget::resetSubtargetFeatures(const MachineFunction *MF) { !CPUAttr.hasAttribute(Attribute::None) ?CPUAttr.getValueAsString() : ""; std::string FS = !FSAttr.hasAttribute(Attribute::None) ? FSAttr.getValueAsString() : ""; - if (!FS.empty()) { - initializeEnvironment(); + if (!FS.empty()) resetSubtargetFeatures(CPU, FS); - } } void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index f47555c..87834b8 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -204,10 +204,8 @@ protected: /// \brief Reset the features for the X86 target. virtual void resetSubtargetFeatures(const MachineFunction *MF); -private: - void initializeEnvironment(); void resetSubtargetFeatures(StringRef CPU, StringRef FS); -public: + void computeIssueWidth(); bool hasV4TOps() const { return HasV4TOps; } diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 6391acf..58e0d06 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -336,10 +336,8 @@ void X86Subtarget::resetSubtargetFeatures(const MachineFunction *MF) { !CPUAttr.hasAttribute(Attribute::None) ?CPUAttr.getValueAsString() : ""; std::string FS = !FSAttr.hasAttribute(Attribute::None) ? FSAttr.getValueAsString() : ""; - if (!FS.empty()) { - initializeEnvironment(); + if (!FS.empty()) resetSubtargetFeatures(CPU, FS); - } } void X86Subtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { @@ -419,50 +417,46 @@ void X86Subtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { stackAlignment = 16; } -void X86Subtarget::initializeEnvironment() { - PICStyle = PICStyles::None; - X86SSELevel = NoMMXSSE; - X863DNowLevel = NoThreeDNow; - HasCMov = false; - HasX86_64 = false; - HasPOPCNT = false; - HasSSE4A = false; - HasAES = false; - HasPCLMUL = false; - HasFMA = false; - HasFMA4 = false; - HasXOP = false; - HasMOVBE = false; - HasRDRAND = false; - HasF16C = false; - HasFSGSBase = false; - HasLZCNT = false; - HasBMI = false; - HasBMI2 = false; - HasRTM = false; - HasADX = false; - IsBTMemSlow = false; - IsUAMemFast = false; - HasVectorUAMem = false; - HasCmpxchg16b = false; - UseLeaForSP = false; - HasSlowDivide = false; - PostRAScheduler = false; - PadShortFunctions = false; - stackAlignment = 4; - // FIXME: this is a known good value for Yonah. How about others? - MaxInlineSizeThreshold = 128; -} - X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, const std::string &FS, unsigned StackAlignOverride, bool is64Bit) : X86GenSubtargetInfo(TT, CPU, FS) , X86ProcFamily(Others) + , PICStyle(PICStyles::None) + , X86SSELevel(NoMMXSSE) + , X863DNowLevel(NoThreeDNow) + , HasCMov(false) + , HasX86_64(false) + , HasPOPCNT(false) + , HasSSE4A(false) + , HasAES(false) + , HasPCLMUL(false) + , HasFMA(false) + , HasFMA4(false) + , HasXOP(false) + , HasMOVBE(false) + , HasRDRAND(false) + , HasF16C(false) + , HasFSGSBase(false) + , HasLZCNT(false) + , HasBMI(false) + , HasBMI2(false) + , HasRTM(false) + , HasADX(false) + , IsBTMemSlow(false) + , IsUAMemFast(false) + , HasVectorUAMem(false) + , HasCmpxchg16b(false) + , UseLeaForSP(false) + , HasSlowDivide(false) + , PostRAScheduler(false) + , PadShortFunctions(false) + , stackAlignment(4) + // FIXME: this is a known good value for Yonah. How about others? + , MaxInlineSizeThreshold(128) , TargetTriple(TT) , StackAlignOverride(StackAlignOverride) , In64BitMode(is64Bit) { - initializeEnvironment(); resetSubtargetFeatures(CPU, FS); } diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index e97da4b..d1c7067 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -201,10 +201,8 @@ public: /// \brief Reset the features for the X86 target. virtual void resetSubtargetFeatures(const MachineFunction *MF); -private: - void initializeEnvironment(); void resetSubtargetFeatures(StringRef CPU, StringRef FS); -public: + /// Is this x86_64? (disregarding specific ABI / programming model) bool is64Bit() const { return In64BitMode; -- cgit v1.1 From abbf9df7f42e8e3e95b02b16ebbc6a0684bb4f6d Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Sat, 16 Feb 2013 00:14:37 +0000 Subject: [mips] Remove SDNPWantParent from the list of SDNodeProperties. No functionality change intended. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175325 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsISelDAGToDAG.cpp | 25 +++++++++++-------------- lib/Target/Mips/MipsInstrInfo.td | 6 +++--- 2 files changed, 14 insertions(+), 17 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp index 385ade5..78c74ef 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -97,16 +97,13 @@ private: // Complex Pattern. /// (reg + imm). - bool selectAddrRegImm(SDNode *Parent, SDValue Addr, SDValue &Base, - SDValue &Offset) const; + bool selectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset) const; /// Fall back on this function if all else fails. - bool selectAddrDefault(SDNode *Parent, SDValue Addr, SDValue &Base, - SDValue &Offset) const; + bool selectAddrDefault(SDValue Addr, SDValue &Base, SDValue &Offset) const; /// Match integer address pattern. - bool selectIntAddr(SDNode *Parent, SDValue Addr, SDValue &Base, - SDValue &Offset) const; + bool selectIntAddr(SDValue Addr, SDValue &Base, SDValue &Offset) const; bool SelectAddr16(SDNode *Parent, SDValue N, SDValue &Base, SDValue &Offset, SDValue &Alias); @@ -333,8 +330,8 @@ SDValue MipsDAGToDAGISel::getMips16SPAliasReg() { /// ComplexPattern used on MipsInstrInfo /// Used on Mips Load/Store instructions -bool MipsDAGToDAGISel::selectAddrRegImm(SDNode *Parent, SDValue Addr, - SDValue &Base, SDValue &Offset) const { +bool MipsDAGToDAGISel::selectAddrRegImm(SDValue Addr, SDValue &Base, + SDValue &Offset) const { EVT ValTy = Addr.getValueType(); // if Address is FI, get the TargetFrameIndex. @@ -399,17 +396,17 @@ bool MipsDAGToDAGISel::selectAddrRegImm(SDNode *Parent, SDValue Addr, return false; } -bool MipsDAGToDAGISel::selectAddrDefault(SDNode *Parent, SDValue Addr, - SDValue &Base, SDValue &Offset) const { +bool MipsDAGToDAGISel::selectAddrDefault(SDValue Addr, SDValue &Base, + SDValue &Offset) const { Base = Addr; Offset = CurDAG->getTargetConstant(0, Addr.getValueType()); return true; } -bool MipsDAGToDAGISel::selectIntAddr(SDNode *Parent, SDValue Addr, - SDValue &Base, SDValue &Offset) const { - return selectAddrRegImm(Parent, Addr, Base, Offset) || - selectAddrDefault(Parent, Addr, Base, Offset); +bool MipsDAGToDAGISel::selectIntAddr(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + return selectAddrRegImm(Addr, Base, Offset) || + selectAddrDefault(Addr, Base, Offset); } void MipsDAGToDAGISel::getMips16SPRefReg(SDNode *Parent, SDValue &AliasReg) { diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index f37f935..de09c9e 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -334,13 +334,13 @@ def immZExt5 : ImmLeaf; // Mips Address Mode! SDNode frameindex could possibily be a match // since load and store instructions from stack used it. def addr : - ComplexPattern; + ComplexPattern; def addrRegImm : - ComplexPattern; + ComplexPattern; def addrDefault : - ComplexPattern; + ComplexPattern; //===----------------------------------------------------------------------===// // Instructions specific format -- cgit v1.1 From 42edeb1ba8aabcbe0e5cc846d9e5f2a9e2261292 Mon Sep 17 00:00:00 2001 From: Joerg Sonnenberger Date: Sat, 16 Feb 2013 00:32:53 +0000 Subject: Derive ELF section type from the name in some cases where GNU as does so. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175327 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCParser/ELFAsmParser.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/MC/MCParser/ELFAsmParser.cpp b/lib/MC/MCParser/ELFAsmParser.cpp index eb39415..7921abb 100644 --- a/lib/MC/MCParser/ELFAsmParser.cpp +++ b/lib/MC/MCParser/ELFAsmParser.cpp @@ -413,7 +413,16 @@ bool ELFAsmParser::ParseDirectiveSection(StringRef, SMLoc) { unsigned Type = ELF::SHT_PROGBITS; - if (!TypeName.empty()) { + if (TypeName.empty()) { + if (SectionName.startswith(".note")) + Type = ELF::SHT_NOTE; + else if (SectionName == ".init_array") + Type = ELF::SHT_INIT_ARRAY; + else if (SectionName == ".fini_array") + Type = ELF::SHT_FINI_ARRAY; + else if (SectionName == ".preinit_array") + Type = ELF::SHT_PREINIT_ARRAY; + } else { if (TypeName == "init_array") Type = ELF::SHT_INIT_ARRAY; else if (TypeName == "fini_array") -- cgit v1.1 From b56606274d43c7a3e01b18a08d1115fbf2889996 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Sat, 16 Feb 2013 01:25:28 +0000 Subject: [ms-inline asm] Do not omit the frame pointer if we have ms-inline assembly. If the frame pointer is omitted, and any stack changes occur in the inline assembly, e.g.: "pusha", then any C local variable or C argument references will be incorrect. I pass no judgement on anyone who would do such a thing. ;) rdar://13218191 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175334 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp | 31 +++++++++++++++------------ lib/Target/X86/X86FrameLowering.cpp | 2 +- 2 files changed, 18 insertions(+), 15 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index fa913f6..39d3a5d 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -441,25 +441,28 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { } // Determine if there are any calls in this machine function. + MF->setHasMSInlineAsm(false); MachineFrameInfo *MFI = MF->getFrameInfo(); - if (!MFI->hasCalls()) { - for (MachineFunction::const_iterator - I = MF->begin(), E = MF->end(); I != E; ++I) { - const MachineBasicBlock *MBB = I; - for (MachineBasicBlock::const_iterator - II = MBB->begin(), IE = MBB->end(); II != IE; ++II) { - const MCInstrDesc &MCID = TM.getInstrInfo()->get(II->getOpcode()); - - if ((MCID.isCall() && !MCID.isReturn()) || - II->isStackAligningInlineAsm()) { - MFI->setHasCalls(true); - goto done; - } + for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E; + ++I) { + + if (MFI->hasCalls() && MF->hasMSInlineAsm()) + break; + + const MachineBasicBlock *MBB = I; + for (MachineBasicBlock::const_iterator II = MBB->begin(), IE = MBB->end(); + II != IE; ++II) { + const MCInstrDesc &MCID = TM.getInstrInfo()->get(II->getOpcode()); + if ((MCID.isCall() && !MCID.isReturn()) || + II->isStackAligningInlineAsm()) { + MFI->setHasCalls(true); + } + if (II->isMSInlineAsm()) { + MF->setHasMSInlineAsm(true); } } } - done: // Determine if there is a call to setjmp in the machine function. MF->setExposesReturnsTwice(Fn.callsFunctionThatReturnsTwice()); diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 84b1c10..950fd39 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -50,7 +50,7 @@ bool X86FrameLowering::hasFP(const MachineFunction &MF) const { return (MF.getTarget().Options.DisableFramePointerElim(MF) || RegInfo->needsStackRealignment(MF) || MFI->hasVarSizedObjects() || - MFI->isFrameAddressTaken() || + MFI->isFrameAddressTaken() || MF.hasMSInlineAsm() || MF.getInfo()->getForceFramePointer() || MMI.callsUnwindInit() || MMI.callsEHReturn()); } -- cgit v1.1 From 901d80065c9afa0ba33e8546c2e1e99a00aceb14 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Sat, 16 Feb 2013 01:36:26 +0000 Subject: Reinitialize the ivars in the subtarget so that they can be reset with the new features. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175336 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMSubtarget.cpp | 84 ++++++++++++++++++++++------------------- lib/Target/ARM/ARMSubtarget.h | 4 +- lib/Target/X86/X86Subtarget.cpp | 70 ++++++++++++++++++---------------- lib/Target/X86/X86Subtarget.h | 4 +- 4 files changed, 89 insertions(+), 73 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index c33bb9d..e11314d 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -45,51 +45,55 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU, const std::string &FS) : ARMGenSubtargetInfo(TT, CPU, FS) , ARMProcFamily(Others) - , HasV4TOps(false) - , HasV5TOps(false) - , HasV5TEOps(false) - , HasV6Ops(false) - , HasV6T2Ops(false) - , HasV7Ops(false) - , HasVFPv2(false) - , HasVFPv3(false) - , HasVFPv4(false) - , HasNEON(false) - , UseNEONForSinglePrecisionFP(false) - , UseMulOps(UseFusedMulOps) - , SlowFPVMLx(false) - , HasVMLxForwarding(false) - , SlowFPBrcc(false) - , InThumbMode(false) - , HasThumb2(false) - , IsMClass(false) - , NoARM(false) - , PostRAScheduler(false) - , IsR9Reserved(ReserveR9) - , UseMovt(false) - , SupportsTailCall(false) - , HasFP16(false) - , HasD16(false) - , HasHardwareDivide(false) - , HasHardwareDivideInARM(false) - , HasT2ExtractPack(false) - , HasDataBarrier(false) - , Pref32BitThumb(false) - , AvoidCPSRPartialUpdate(false) - , AvoidMOVsShifterOperand(false) - , HasRAS(false) - , HasMPExtension(false) - , FPOnlySP(false) - , AllowsUnalignedMem(false) - , Thumb2DSP(false) - , UseNaClTrap(false) , stackAlignment(4) , CPUString(CPU) , TargetTriple(TT) , TargetABI(ARM_ABI_APCS) { + initializeEnvironment(); resetSubtargetFeatures(CPU, FS); } +void ARMSubtarget::initializeEnvironment() { + HasV4TOps = false; + HasV5TOps = false; + HasV5TEOps = false; + HasV6Ops = false; + HasV6T2Ops = false; + HasV7Ops = false; + HasVFPv2 = false; + HasVFPv3 = false; + HasVFPv4 = false; + HasNEON = false; + UseNEONForSinglePrecisionFP = false; + UseMulOps = UseFusedMulOps; + SlowFPVMLx = false; + HasVMLxForwarding = false; + SlowFPBrcc = false; + InThumbMode = false; + HasThumb2 = false; + IsMClass = false; + NoARM = false; + PostRAScheduler = false; + IsR9Reserved = ReserveR9; + UseMovt = false; + SupportsTailCall = false; + HasFP16 = false; + HasD16 = false; + HasHardwareDivide = false; + HasHardwareDivideInARM = false; + HasT2ExtractPack = false; + HasDataBarrier = false; + Pref32BitThumb = false; + AvoidCPSRPartialUpdate = false; + AvoidMOVsShifterOperand = false; + HasRAS = false; + HasMPExtension = false; + FPOnlySP = false; + AllowsUnalignedMem = false; + Thumb2DSP = false; + UseNaClTrap = false; +} + void ARMSubtarget::resetSubtargetFeatures(const MachineFunction *MF) { AttributeSet FnAttrs = MF->getFunction()->getAttributes(); Attribute CPUAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex, @@ -100,8 +104,10 @@ void ARMSubtarget::resetSubtargetFeatures(const MachineFunction *MF) { !CPUAttr.hasAttribute(Attribute::None) ?CPUAttr.getValueAsString() : ""; std::string FS = !FSAttr.hasAttribute(Attribute::None) ? FSAttr.getValueAsString() : ""; - if (!FS.empty()) + if (!FS.empty()) { + initializeEnvironment(); resetSubtargetFeatures(CPU, FS); + } } void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 87834b8..f47555c 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -204,8 +204,10 @@ protected: /// \brief Reset the features for the X86 target. virtual void resetSubtargetFeatures(const MachineFunction *MF); +private: + void initializeEnvironment(); void resetSubtargetFeatures(StringRef CPU, StringRef FS); - +public: void computeIssueWidth(); bool hasV4TOps() const { return HasV4TOps; } diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 58e0d06..0f2c008 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -336,8 +336,10 @@ void X86Subtarget::resetSubtargetFeatures(const MachineFunction *MF) { !CPUAttr.hasAttribute(Attribute::None) ?CPUAttr.getValueAsString() : ""; std::string FS = !FSAttr.hasAttribute(Attribute::None) ? FSAttr.getValueAsString() : ""; - if (!FS.empty()) + if (!FS.empty()) { + initializeEnvironment(); resetSubtargetFeatures(CPU, FS); + } } void X86Subtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { @@ -417,46 +419,50 @@ void X86Subtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { stackAlignment = 16; } +void X86Subtarget::initializeEnvironment() { + X86SSELevel = NoMMXSSE; + X863DNowLevel = NoThreeDNow; + HasCMov = false; + HasX86_64 = false; + HasPOPCNT = false; + HasSSE4A = false; + HasAES = false; + HasPCLMUL = false; + HasFMA = false; + HasFMA4 = false; + HasXOP = false; + HasMOVBE = false; + HasRDRAND = false; + HasF16C = false; + HasFSGSBase = false; + HasLZCNT = false; + HasBMI = false; + HasBMI2 = false; + HasRTM = false; + HasADX = false; + IsBTMemSlow = false; + IsUAMemFast = false; + HasVectorUAMem = false; + HasCmpxchg16b = false; + UseLeaForSP = false; + HasSlowDivide = false; + PostRAScheduler = false; + PadShortFunctions = false; + stackAlignment = 4; + // FIXME: this is a known good value for Yonah. How about others? + MaxInlineSizeThreshold = 128; +} + X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, const std::string &FS, unsigned StackAlignOverride, bool is64Bit) : X86GenSubtargetInfo(TT, CPU, FS) , X86ProcFamily(Others) , PICStyle(PICStyles::None) - , X86SSELevel(NoMMXSSE) - , X863DNowLevel(NoThreeDNow) - , HasCMov(false) - , HasX86_64(false) - , HasPOPCNT(false) - , HasSSE4A(false) - , HasAES(false) - , HasPCLMUL(false) - , HasFMA(false) - , HasFMA4(false) - , HasXOP(false) - , HasMOVBE(false) - , HasRDRAND(false) - , HasF16C(false) - , HasFSGSBase(false) - , HasLZCNT(false) - , HasBMI(false) - , HasBMI2(false) - , HasRTM(false) - , HasADX(false) - , IsBTMemSlow(false) - , IsUAMemFast(false) - , HasVectorUAMem(false) - , HasCmpxchg16b(false) - , UseLeaForSP(false) - , HasSlowDivide(false) - , PostRAScheduler(false) - , PadShortFunctions(false) - , stackAlignment(4) - // FIXME: this is a known good value for Yonah. How about others? - , MaxInlineSizeThreshold(128) , TargetTriple(TT) , StackAlignOverride(StackAlignOverride) , In64BitMode(is64Bit) { + initializeEnvironment(); resetSubtargetFeatures(CPU, FS); } diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index d1c7067..e97da4b 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -201,8 +201,10 @@ public: /// \brief Reset the features for the X86 target. virtual void resetSubtargetFeatures(const MachineFunction *MF); +private: + void initializeEnvironment(); void resetSubtargetFeatures(StringRef CPU, StringRef FS); - +public: /// Is this x86_64? (disregarding specific ABI / programming model) bool is64Bit() const { return In64BitMode; -- cgit v1.1 From 6a0da011e42e553d497fce2059f43401e854b99d Mon Sep 17 00:00:00 2001 From: Reed Kotler Date: Sat, 16 Feb 2013 09:47:57 +0000 Subject: Use a different scheme to chose 16/32 variants. This scheme is more consistent with how BuildMI works. No new tests needed. All should work the same as before. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175342 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips16InstrInfo.cpp | 14 ++++++-------- lib/Target/Mips/Mips16InstrInfo.h | 4 +--- 2 files changed, 7 insertions(+), 11 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp index 5a91519..19e00df 100644 --- a/lib/Target/Mips/Mips16InstrInfo.cpp +++ b/lib/Target/Mips/Mips16InstrInfo.cpp @@ -184,7 +184,7 @@ void Mips16InstrInfo::makeFrame(unsigned SP, int64_t FrameSize, int64_t Remainder = FrameSize - Base; BuildMI(MBB, I, DL, get(Mips::SaveRaF16)). addImm(Base); if (isInt<16>(-Remainder)) - BuildAddiuSpImm(MBB, I, DL, -Remainder); + BuildMI(MBB, I, DL, AddiuSpImm(-Remainder)).addImm(-Remainder); else adjustStackPtrBig(SP, -Remainder, MBB, I, Mips::V0, Mips::V1); } @@ -225,7 +225,7 @@ void Mips16InstrInfo::restoreFrame(unsigned SP, int64_t FrameSize, // returns largest possible n bit unsigned integer int64_t Remainder = FrameSize - Base; if (isInt<16>(Remainder)) - BuildAddiuSpImm(MBB, I, DL, Remainder); + BuildMI(MBB, I, DL, AddiuSpImm(Remainder)).addImm(Remainder); else adjustStackPtrBig(SP, Remainder, MBB, I, Mips::A0, Mips::A1); BuildMI(MBB, I, DL, get(Mips::RestoreRaF16)). addImm(Base); @@ -299,7 +299,7 @@ void Mips16InstrInfo::adjustStackPtr(unsigned SP, int64_t Amount, MachineBasicBlock::iterator I) const { DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); if (isInt<16>(Amount)) // need to change to addiu sp, ....and isInt<16> - BuildAddiuSpImm(MBB, I, DL, Amount); + BuildMI(MBB, I, DL, AddiuSpImm(Amount)).addImm(Amount); else adjustStackPtrBigUnrestricted(SP, Amount, MBB, I); } @@ -400,13 +400,11 @@ void Mips16InstrInfo::ExpandRetRA16(MachineBasicBlock &MBB, BuildMI(MBB, I, I->getDebugLoc(), get(Opc)); } -void Mips16InstrInfo::BuildAddiuSpImm( - MachineBasicBlock &MBB, - MachineBasicBlock::iterator II, DebugLoc DL, int64_t Imm) const { +const MCInstrDesc &Mips16InstrInfo::AddiuSpImm(int64_t Imm) const { if (validSpImm8(Imm)) - BuildMI(MBB, II, DL, get(Mips::AddiuSpImm16)).addImm(Imm); + return get(Mips::AddiuSpImm16); else - BuildMI(MBB, II, DL, get(Mips::AddiuSpImmX16)).addImm(Imm); + return get(Mips::AddiuSpImmX16); } const MipsInstrInfo *llvm::createMips16InstrInfo(MipsTargetMachine &TM) { diff --git a/lib/Target/Mips/Mips16InstrInfo.h b/lib/Target/Mips/Mips16InstrInfo.h index f8570bd..0e8e87a 100644 --- a/lib/Target/Mips/Mips16InstrInfo.h +++ b/lib/Target/Mips/Mips16InstrInfo.h @@ -93,10 +93,8 @@ public: // // build the proper one based on the Imm field // - void BuildAddiuSpImm(MachineBasicBlock &MBB, - MachineBasicBlock::iterator II, DebugLoc DL, - int64_t Imm) const; + const MCInstrDesc& AddiuSpImm(int64_t Imm) const; private: virtual unsigned GetAnalyzableBrOpc(unsigned Opc) const; -- cgit v1.1 From f0e469bcaf65dddeadb5ccb400b4a712c5f763be Mon Sep 17 00:00:00 2001 From: Christian Konig Date: Sat, 16 Feb 2013 11:27:29 +0000 Subject: R600/structurizer: add class to find the Nearest Common Dominator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a candidate for the stable branch. Signed-off-by: Christian König Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175345 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUStructurizeCFG.cpp | 66 ++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) (limited to 'lib') diff --git a/lib/Target/R600/AMDGPUStructurizeCFG.cpp b/lib/Target/R600/AMDGPUStructurizeCFG.cpp index c4c9762..86cb04a 100644 --- a/lib/Target/R600/AMDGPUStructurizeCFG.cpp +++ b/lib/Target/R600/AMDGPUStructurizeCFG.cpp @@ -39,6 +39,7 @@ typedef SmallVector BBValueVector; typedef SmallPtrSet BBSet; typedef DenseMap PhiMap; +typedef DenseMap DTN2UnsignedMap; typedef DenseMap BBPhiMap; typedef DenseMap BBPredicates; typedef DenseMap PredMap; @@ -48,6 +49,71 @@ typedef DenseMap BB2BBVecMap; static const char *FlowBlockName = "Flow"; +/// @brief Find the nearest common dominator for multiple BasicBlocks +/// +/// Helper class for AMDGPUStructurizeCFG +/// TODO: Maybe move into common code +class NearestCommonDominator { + + DominatorTree *DT; + + DTN2UnsignedMap IndexMap; + + BasicBlock *Result; + unsigned ResultIndex; + bool ExplicitMentioned; + +public: + /// \brief Start a new query + NearestCommonDominator(DominatorTree *DomTree) { + DT = DomTree; + Result = 0; + } + + /// \brief Add BB to the resulting dominator + void addBlock(BasicBlock *BB, bool Remember = true) { + + DomTreeNode *Node = DT->getNode(BB); + + if (Result == 0) { + unsigned Numbering = 0; + for (;Node;Node = Node->getIDom()) + IndexMap[Node] = ++Numbering; + Result = BB; + ResultIndex = 1; + ExplicitMentioned = Remember; + return; + } + + for (;Node;Node = Node->getIDom()) + if (IndexMap.count(Node)) + break; + else + IndexMap[Node] = 0; + + assert(Node && "Dominator tree invalid!"); + + unsigned Numbering = IndexMap[Node]; + if (Numbering > ResultIndex) { + Result = Node->getBlock(); + ResultIndex = Numbering; + ExplicitMentioned = Remember && (Result == BB); + } else if (Numbering == ResultIndex) { + ExplicitMentioned |= Remember; + } + } + + /// \brief Is "Result" one of the BBs added with "Remember" = True? + bool wasResultExplicitMentioned() { + return ExplicitMentioned; + } + + /// \brief Get the query result + BasicBlock *getResult() { + return Result; + } +}; + /// @brief Transforms the control flow graph on one single entry/exit region /// at a time. /// -- cgit v1.1 From 4c79c71d99336baf312e2784311b257c5d9eebce Mon Sep 17 00:00:00 2001 From: Christian Konig Date: Sat, 16 Feb 2013 11:27:35 +0000 Subject: R600/structurizer: improve PHI value finding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using the new NearestCommonDominator class. This is a candidate for the stable branch. Signed-off-by: Christian König Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175346 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUStructurizeCFG.cpp | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'lib') diff --git a/lib/Target/R600/AMDGPUStructurizeCFG.cpp b/lib/Target/R600/AMDGPUStructurizeCFG.cpp index 86cb04a..3801347 100644 --- a/lib/Target/R600/AMDGPUStructurizeCFG.cpp +++ b/lib/Target/R600/AMDGPUStructurizeCFG.cpp @@ -533,12 +533,18 @@ void AMDGPUStructurizeCFG::setPhiValues() { Updater.AddAvailableValue(&Func->getEntryBlock(), Undef); Updater.AddAvailableValue(To, Undef); + NearestCommonDominator Dominator(DT); + Dominator.addBlock(To, false); for (BBValueVector::iterator VI = PI->second.begin(), VE = PI->second.end(); VI != VE; ++VI) { Updater.AddAvailableValue(VI->first, VI->second); + Dominator.addBlock(VI->first); } + if (!Dominator.wasResultExplicitMentioned()) + Updater.AddAvailableValue(Dominator.getResult(), Undef); + for (BBVector::iterator FI = From.begin(), FE = From.end(); FI != FE; ++FI) { -- cgit v1.1 From 25bd884c3d0e9803dfafda10e7ecede152ad156f Mon Sep 17 00:00:00 2001 From: Christian Konig Date: Sat, 16 Feb 2013 11:27:40 +0000 Subject: R600/structurizer: improve finding condition values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using the new NearestCommonDominator class. This is a candidate for the stable branch. Signed-off-by: Christian König Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175347 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUStructurizeCFG.cpp | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) (limited to 'lib') diff --git a/lib/Target/R600/AMDGPUStructurizeCFG.cpp b/lib/Target/R600/AMDGPUStructurizeCFG.cpp index 3801347..e97e049 100644 --- a/lib/Target/R600/AMDGPUStructurizeCFG.cpp +++ b/lib/Target/R600/AMDGPUStructurizeCFG.cpp @@ -458,28 +458,41 @@ void AMDGPUStructurizeCFG::insertConditions() { assert(Term->isConditional()); + Value *Default = (Parent == LoopEnd) ? BoolTrue : BoolFalse; + PhiInserter.Initialize(Boolean, ""); - if (Parent == LoopEnd) { + PhiInserter.AddAvailableValue(&Func->getEntryBlock(), Default); + if (Parent == LoopEnd) PhiInserter.AddAvailableValue(LoopStart, BoolTrue); - } else { - PhiInserter.AddAvailableValue(&Func->getEntryBlock(), BoolFalse); + else PhiInserter.AddAvailableValue(Parent, BoolFalse); - } - bool ParentHasValue = false; BasicBlock *Succ = Term->getSuccessor(0); BBPredicates &Preds = (Parent == LoopEnd) ? LoopPred : Predicates[Succ]; + + NearestCommonDominator Dominator(DT); + Dominator.addBlock(Parent, false); + + Value *ParentValue = 0; for (BBPredicates::iterator PI = Preds.begin(), PE = Preds.end(); PI != PE; ++PI) { + if (PI->first == Parent) { + ParentValue = PI->second; + break; + } PhiInserter.AddAvailableValue(PI->first, PI->second); - ParentHasValue |= PI->first == Parent; + Dominator.addBlock(PI->first); } - if (ParentHasValue) - Term->setCondition(PhiInserter.GetValueAtEndOfBlock(Parent)); - else + if (ParentValue) { + Term->setCondition(ParentValue); + } else { + if (!Dominator.wasResultExplicitMentioned()) + PhiInserter.AddAvailableValue(Dominator.getResult(), Default); + Term->setCondition(PhiInserter.GetValueInMiddleOfBlock(Parent)); + } } } -- cgit v1.1 From 623977d9ba064a6f3b46edee1cb2246716a33397 Mon Sep 17 00:00:00 2001 From: Christian Konig Date: Sat, 16 Feb 2013 11:27:45 +0000 Subject: R600/structurizer: improve loop handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Generate more than one loop if it seems to make sense. This is a candidate for the stable branch. Signed-off-by: Christian König Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175348 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUStructurizeCFG.cpp | 344 +++++++++++++------------------ 1 file changed, 148 insertions(+), 196 deletions(-) (limited to 'lib') diff --git a/lib/Target/R600/AMDGPUStructurizeCFG.cpp b/lib/Target/R600/AMDGPUStructurizeCFG.cpp index e97e049..c2b084a 100644 --- a/lib/Target/R600/AMDGPUStructurizeCFG.cpp +++ b/lib/Target/R600/AMDGPUStructurizeCFG.cpp @@ -43,6 +43,7 @@ typedef DenseMap DTN2UnsignedMap; typedef DenseMap BBPhiMap; typedef DenseMap BBPredicates; typedef DenseMap PredMap; +typedef DenseMap BB2BBMap; typedef DenseMap BB2BBVecMap; // The name for newly created blocks. @@ -175,29 +176,30 @@ class AMDGPUStructurizeCFG : public RegionPass { RNVector Order; BBSet Visited; - PredMap Predicates; + BBPhiMap DeletedPhis; BB2BBVecMap AddedPhis; + + PredMap Predicates; BranchVector Conditions; - BasicBlock *LoopStart; - BasicBlock *LoopEnd; - BBSet LoopTargets; - BBPredicates LoopPred; + BB2BBMap Loops; + PredMap LoopPreds; + BranchVector LoopConds; - void orderNodes(); + RegionNode *PrevNode; - Value *buildCondition(BranchInst *Term, unsigned Idx, bool Invert); + void orderNodes(); - bool analyzeLoopStart(BasicBlock *From, BasicBlock *To, Value *Condition); + void analyzeLoops(RegionNode *N); - void analyzeNode(RegionNode *N); + Value *buildCondition(BranchInst *Term, unsigned Idx, bool Invert); - void analyzeLoopEnd(RegionNode *N); + void gatherPredicates(RegionNode *N); void collectInfos(); - void insertConditions(); + void insertConditions(bool Loops); void delPhiValues(BasicBlock *From, BasicBlock *To); @@ -212,17 +214,19 @@ class AMDGPUStructurizeCFG : public RegionPass { BasicBlock *getNextFlow(BasicBlock *Dominator); - BasicBlock *needPrefix(RegionNode *&Prev, RegionNode *Node); + BasicBlock *needPrefix(bool NeedEmpty); BasicBlock *needPostfix(BasicBlock *Flow, bool ExitUseAllowed); - RegionNode *getNextPrev(BasicBlock *Next); + void setPrevNode(BasicBlock *BB); bool dominatesPredicates(BasicBlock *BB, RegionNode *Node); - bool isPredictableTrue(RegionNode *Who, RegionNode *Where); + bool isPredictableTrue(RegionNode *Node); + + void wireFlow(bool ExitUseAllowed, BasicBlock *LoopEnd); - RegionNode *wireFlow(RegionNode *&Prev, bool ExitUseAllowed); + void handleLoops(bool ExitUseAllowed, BasicBlock *LoopEnd); void createFlow(); @@ -278,6 +282,29 @@ void AMDGPUStructurizeCFG::orderNodes() { } } +/// \brief Determine the end of the loops +void AMDGPUStructurizeCFG::analyzeLoops(RegionNode *N) { + + if (N->isSubRegion()) { + // Test for exit as back edge + BasicBlock *Exit = N->getNodeAs()->getExit(); + if (Visited.count(Exit)) + Loops[Exit] = N->getEntry(); + + } else { + // Test for sucessors as back edge + BasicBlock *BB = N->getNodeAs(); + BranchInst *Term = cast(BB->getTerminator()); + + for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) { + BasicBlock *Succ = Term->getSuccessor(i); + + if (Visited.count(Succ)) + Loops[Succ] = BB; + } + } +} + /// \brief Build the condition for one edge Value *AMDGPUStructurizeCFG::buildCondition(BranchInst *Term, unsigned Idx, bool Invert) { @@ -291,54 +318,20 @@ Value *AMDGPUStructurizeCFG::buildCondition(BranchInst *Term, unsigned Idx, return Cond; } -/// \brief Analyze the start of a loop and insert predicates as necessary -bool AMDGPUStructurizeCFG::analyzeLoopStart(BasicBlock *From, BasicBlock *To, - Value *Condition) { - LoopPred[From] = Condition; - LoopTargets.insert(To); - if (!LoopStart) { - LoopStart = To; - return true; - - } else if (LoopStart == To) - return true; - - // We need to handle the case of intersecting loops, e. g. - // - // /----<----- - // | | - // -> A -> B -> C -> D - // | | - // -----<----/ - - RNVector::reverse_iterator OI = Order.rbegin(), OE = Order.rend(); - - for (;OI != OE; ++OI) - if ((*OI)->getEntry() == LoopStart) - break; - - for (;OI != OE && (*OI)->getEntry() != To; ++OI) { - BBPredicates &Pred = Predicates[(*OI)->getEntry()]; - if (!Pred.count(From)) - Pred[From] = Condition; - } - return false; -} - /// \brief Analyze the predecessors of each block and build up predicates -void AMDGPUStructurizeCFG::analyzeNode(RegionNode *N) { +void AMDGPUStructurizeCFG::gatherPredicates(RegionNode *N) { + RegionInfo *RI = ParentRegion->getRegionInfo(); BasicBlock *BB = N->getEntry(); BBPredicates &Pred = Predicates[BB]; + BBPredicates &LPred = LoopPreds[BB]; for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) { - if (!ParentRegion->contains(*PI)) { - // It's a branch from outside into our region entry - Pred[*PI] = BoolTrue; + // Ignore it if it's a branch from outside into our region entry + if (!ParentRegion->contains(*PI)) continue; - } Region *R = RI->getRegionFor(*PI); if (R == ParentRegion) { @@ -355,7 +348,7 @@ void AMDGPUStructurizeCFG::analyzeNode(RegionNode *N) { if (Term->isConditional()) { // Try to treat it like an ELSE block BasicBlock *Other = Term->getSuccessor(!i); - if (Visited.count(Other) && !LoopTargets.count(Other) && + if (Visited.count(Other) && !Loops.count(Other) && !Pred.count(Other) && !Pred.count(*PI)) { Pred[Other] = BoolFalse; @@ -363,13 +356,12 @@ void AMDGPUStructurizeCFG::analyzeNode(RegionNode *N) { continue; } } - + Pred[*PI] = buildCondition(Term, i, false); + } else { // Back edge - if (analyzeLoopStart(*PI, BB, buildCondition(Term, i, true))) - continue; + LPred[*PI] = buildCondition(Term, i, true); } - Pred[*PI] = buildCondition(Term, i, false); } } else { @@ -383,34 +375,10 @@ void AMDGPUStructurizeCFG::analyzeNode(RegionNode *N) { continue; BasicBlock *Entry = R->getEntry(); - if (!Visited.count(Entry)) - if (analyzeLoopStart(Entry, BB, BoolFalse)) - continue; - - Pred[Entry] = BoolTrue; - } - } -} - -/// \brief Determine the end of the loop -void AMDGPUStructurizeCFG::analyzeLoopEnd(RegionNode *N) { - - if (N->isSubRegion()) { - // Test for exit as back edge - BasicBlock *Exit = N->getNodeAs()->getExit(); - if (Visited.count(Exit)) - LoopEnd = N->getEntry(); - - } else { - // Test for sucessors as back edge - BasicBlock *BB = N->getNodeAs(); - BranchInst *Term = cast(BB->getTerminator()); - - for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) { - BasicBlock *Succ = Term->getSuccessor(i); - - if (Visited.count(Succ)) - LoopEnd = BB; + if (Visited.count(Entry)) + Pred[Entry] = BoolTrue; + else + LPred[Entry] = BoolFalse; } } } @@ -422,9 +390,8 @@ void AMDGPUStructurizeCFG::collectInfos() { Predicates.clear(); // and loop infos - LoopStart = LoopEnd = 0; - LoopTargets.clear(); - LoopPred.clear(); + Loops.clear(); + LoopPreds.clear(); // Reset the visited nodes Visited.clear(); @@ -433,42 +400,37 @@ void AMDGPUStructurizeCFG::collectInfos() { OI != OE; ++OI) { // Analyze all the conditions leading to a node - analyzeNode(*OI); + gatherPredicates(*OI); // Remember that we've seen this node Visited.insert((*OI)->getEntry()); - // Find the last back edge - analyzeLoopEnd(*OI); + // Find the last back edges + analyzeLoops(*OI); } - - // Both or neither must be set - assert(!LoopStart == !LoopEnd); } /// \brief Insert the missing branch conditions -void AMDGPUStructurizeCFG::insertConditions() { +void AMDGPUStructurizeCFG::insertConditions(bool Loops) { + BranchVector &Conds = Loops ? LoopConds : Conditions; + Value *Default = Loops ? BoolTrue : BoolFalse; SSAUpdater PhiInserter; - for (BranchVector::iterator I = Conditions.begin(), - E = Conditions.end(); I != E; ++I) { + for (BranchVector::iterator I = Conds.begin(), + E = Conds.end(); I != E; ++I) { BranchInst *Term = *I; - BasicBlock *Parent = Term->getParent(); - assert(Term->isConditional()); - Value *Default = (Parent == LoopEnd) ? BoolTrue : BoolFalse; + BasicBlock *Parent = Term->getParent(); + BasicBlock *SuccTrue = Term->getSuccessor(0); + BasicBlock *SuccFalse = Term->getSuccessor(1); PhiInserter.Initialize(Boolean, ""); PhiInserter.AddAvailableValue(&Func->getEntryBlock(), Default); - if (Parent == LoopEnd) - PhiInserter.AddAvailableValue(LoopStart, BoolTrue); - else - PhiInserter.AddAvailableValue(Parent, BoolFalse); + PhiInserter.AddAvailableValue(Loops ? SuccFalse : Parent, Default); - BasicBlock *Succ = Term->getSuccessor(0); - BBPredicates &Preds = (Parent == LoopEnd) ? LoopPred : Predicates[Succ]; + BBPredicates &Preds = Loops ? LoopPreds[SuccFalse] : Predicates[SuccTrue]; NearestCommonDominator Dominator(DT); Dominator.addBlock(Parent, false); @@ -648,54 +610,24 @@ BasicBlock *AMDGPUStructurizeCFG::getNextFlow(BasicBlock *Dominator) { } /// \brief Create a new or reuse the previous node as flow node -BasicBlock *AMDGPUStructurizeCFG::needPrefix(RegionNode *&Prev, - RegionNode *Node) { - - if (!Prev || Prev->isSubRegion() || - (Node && Node->getEntry() == LoopStart)) { - - // We need to insert a flow node, first figure out the dominator - DomTreeNode *Dominator = Prev ? DT->getNode(Prev->getEntry()) : 0; - if (!Dominator) - Dominator = DT->getNode(Node->getEntry())->getIDom(); - assert(Dominator && "Illegal loop to function entry"); - - // then create the flow node - BasicBlock *Flow = getNextFlow(Dominator->getBlock()); - - // wire up the new flow - if (Prev) { - changeExit(Prev, Flow, true); - } else { - // Parent regions entry needs predicates, create a new region entry - BasicBlock *Entry = Node->getEntry(); - for (pred_iterator I = pred_begin(Entry), E = pred_end(Entry); - I != E;) { - - BasicBlock *BB = *(I++); - if (ParentRegion->contains(BB)) - continue; +BasicBlock *AMDGPUStructurizeCFG::needPrefix(bool NeedEmpty) { - // Remove PHY values from outside to our entry node - delPhiValues(BB, Entry); + BasicBlock *Entry = PrevNode->getEntry(); - // Update the branch instructions - BB->getTerminator()->replaceUsesOfWith(Entry, Flow); - } + if (!PrevNode->isSubRegion()) { + killTerminator(Entry); + if (!NeedEmpty || Entry->getFirstInsertionPt() == Entry->end()) + return Entry; - // Populate the region tree with the new entry - for (Region *R = ParentRegion; R && R->getEntry() == Entry; - R = R->getParent()) { - R->replaceEntry(Flow); - } - } - Prev = ParentRegion->getBBNode(Flow); + } - } else { - killTerminator(Prev->getEntry()); - } + // create a new flow node + BasicBlock *Flow = getNextFlow(Entry); - return Prev->getEntry(); + // and wire it up + changeExit(PrevNode, Flow, true); + PrevNode = ParentRegion->getBBNode(Flow); + return Flow; } /// \brief Returns the region exit if possible, otherwise just a new flow node @@ -711,9 +643,9 @@ BasicBlock *AMDGPUStructurizeCFG::needPostfix(BasicBlock *Flow, return getNextFlow(Flow); } -/// \brief Returns the region node for Netx, or null if Next is the exit -RegionNode *AMDGPUStructurizeCFG::getNextPrev(BasicBlock *Next) { - return ParentRegion->contains(Next) ? ParentRegion->getBBNode(Next) : 0; +/// \brief Set the previous node +void AMDGPUStructurizeCFG::setPrevNode(BasicBlock *BB) { + PrevNode = ParentRegion->contains(BB) ? ParentRegion->getBBNode(BB) : 0; } /// \brief Does BB dominate all the predicates of Node ? @@ -729,11 +661,14 @@ bool AMDGPUStructurizeCFG::dominatesPredicates(BasicBlock *BB, RegionNode *Node) } /// \brief Can we predict that this node will always be called? -bool AMDGPUStructurizeCFG::isPredictableTrue(RegionNode *Who, - RegionNode *Where) { +bool AMDGPUStructurizeCFG::isPredictableTrue(RegionNode *Node) { - BBPredicates &Preds = Predicates[Who->getEntry()]; - bool Dominated = Where == 0; + BBPredicates &Preds = Predicates[Node->getEntry()]; + bool Dominated = false; + + // Regionentry is always true + if (PrevNode == 0) + return true; for (BBPredicates::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) { @@ -741,7 +676,7 @@ bool AMDGPUStructurizeCFG::isPredictableTrue(RegionNode *Who, if (I->second != BoolTrue) return false; - if (!Dominated && DT->dominates(I->first, Where->getEntry())) + if (!Dominated && DT->dominates(I->first, PrevNode->getEntry())) Dominated = true; } @@ -750,45 +685,69 @@ bool AMDGPUStructurizeCFG::isPredictableTrue(RegionNode *Who, } /// Take one node from the order vector and wire it up -RegionNode *AMDGPUStructurizeCFG::wireFlow(RegionNode *&Prev, - bool ExitUseAllowed) { +void AMDGPUStructurizeCFG::wireFlow(bool ExitUseAllowed, + BasicBlock *LoopEnd) { RegionNode *Node = Order.pop_back_val(); + Visited.insert(Node->getEntry()); - if (isPredictableTrue(Node, Prev)) { + if (isPredictableTrue(Node)) { // Just a linear flow - if (Prev) { - changeExit(Prev, Node->getEntry(), true); + if (PrevNode) { + changeExit(PrevNode, Node->getEntry(), true); } - Prev = Node; + PrevNode = Node; } else { // Insert extra prefix node (or reuse last one) - BasicBlock *Flow = needPrefix(Prev, Node); - if (Node->getEntry() == LoopStart) - LoopStart = Flow; + BasicBlock *Flow = needPrefix(false); // Insert extra postfix node (or use exit instead) BasicBlock *Entry = Node->getEntry(); - BasicBlock *Next = needPostfix(Flow, ExitUseAllowed && Entry != LoopEnd); + BasicBlock *Next = needPostfix(Flow, ExitUseAllowed); // let it point to entry and next block Conditions.push_back(BranchInst::Create(Entry, Next, BoolUndef, Flow)); addPhiValues(Flow, Entry); DT->changeImmediateDominator(Entry, Flow); - Prev = Node; - while (!Order.empty() && Node->getEntry() != LoopEnd && - !LoopTargets.count(Order.back()->getEntry()) && + PrevNode = Node; + while (!Order.empty() && !Visited.count(LoopEnd) && dominatesPredicates(Entry, Order.back())) { - Node = wireFlow(Prev, false); + handleLoops(false, LoopEnd); } - changeExit(Prev, Next, false); - Prev = getNextPrev(Next); + changeExit(PrevNode, Next, false); + setPrevNode(Next); } +} - return Node; +void AMDGPUStructurizeCFG::handleLoops(bool ExitUseAllowed, + BasicBlock *LoopEnd) { + RegionNode *Node = Order.back(); + BasicBlock *LoopStart = Node->getEntry(); + + if (!Loops.count(LoopStart)) { + wireFlow(ExitUseAllowed, LoopEnd); + return; + } + + if (!isPredictableTrue(Node)) + LoopStart = needPrefix(true); + + LoopEnd = Loops[Node->getEntry()]; + wireFlow(false, LoopEnd); + while (!Visited.count(LoopEnd)) { + handleLoops(false, LoopEnd); + } + + // Create an extra loop end node + LoopEnd = needPrefix(false); + BasicBlock *Next = needPostfix(LoopEnd, ExitUseAllowed); + LoopConds.push_back(BranchInst::Create(Next, LoopStart, + BoolUndef, LoopEnd)); + addPhiValues(LoopEnd, LoopStart); + setPrevNode(Next); } /// After this function control flow looks like it should be, but @@ -801,26 +760,17 @@ void AMDGPUStructurizeCFG::createFlow() { DeletedPhis.clear(); AddedPhis.clear(); Conditions.clear(); + LoopConds.clear(); - RegionNode *Prev = 0; - while (!Order.empty()) { - - RegionNode *Node = wireFlow(Prev, EntryDominatesExit); - - // Create an extra loop end node - if (Node->getEntry() == LoopEnd) { - LoopEnd = needPrefix(Prev, 0); - BasicBlock *Next = needPostfix(LoopEnd, EntryDominatesExit); + PrevNode = 0; + Visited.clear(); - Conditions.push_back(BranchInst::Create(Next, LoopStart, - BoolUndef, LoopEnd)); - addPhiValues(LoopEnd, LoopStart); - Prev = getNextPrev(Next); - } + while (!Order.empty()) { + handleLoops(EntryDominatesExit, 0); } - if (Prev) - changeExit(Prev, Exit, EntryDominatesExit); + if (PrevNode) + changeExit(PrevNode, Exit, EntryDominatesExit); else assert(EntryDominatesExit); } @@ -880,19 +830,21 @@ bool AMDGPUStructurizeCFG::runOnRegion(Region *R, RGPassManager &RGM) { orderNodes(); collectInfos(); createFlow(); - insertConditions(); + insertConditions(false); + insertConditions(true); setPhiValues(); rebuildSSA(); // Cleanup Order.clear(); Visited.clear(); - Predicates.clear(); DeletedPhis.clear(); AddedPhis.clear(); + Predicates.clear(); Conditions.clear(); - LoopTargets.clear(); - LoopPred.clear(); + Loops.clear(); + LoopPreds.clear(); + LoopConds.clear(); return true; } -- cgit v1.1 From ef6b24856d39ca69381d445c8363a86f5e0945db Mon Sep 17 00:00:00 2001 From: Christian Konig Date: Sat, 16 Feb 2013 11:27:50 +0000 Subject: R600/structurizer: improve inverting conditions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Stop adding more instructions than necessary. This is a candidate for the stable branch. Signed-off-by: Christian König Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175349 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUStructurizeCFG.cpp | 40 +++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/R600/AMDGPUStructurizeCFG.cpp b/lib/Target/R600/AMDGPUStructurizeCFG.cpp index c2b084a..26f842e 100644 --- a/lib/Target/R600/AMDGPUStructurizeCFG.cpp +++ b/lib/Target/R600/AMDGPUStructurizeCFG.cpp @@ -22,8 +22,10 @@ #include "llvm/Analysis/RegionPass.h" #include "llvm/IR/Module.h" #include "llvm/Transforms/Utils/SSAUpdater.h" +#include "llvm/Support/PatternMatch.h" using namespace llvm; +using namespace llvm::PatternMatch; namespace { @@ -193,6 +195,8 @@ class AMDGPUStructurizeCFG : public RegionPass { void analyzeLoops(RegionNode *N); + Value *invert(Value *Condition); + Value *buildCondition(BranchInst *Term, unsigned Idx, bool Invert); void gatherPredicates(RegionNode *N); @@ -305,6 +309,40 @@ void AMDGPUStructurizeCFG::analyzeLoops(RegionNode *N) { } } +/// \brief Invert the given condition +Value *AMDGPUStructurizeCFG::invert(Value *Condition) { + + // First: Check if it's a constant + if (Condition == BoolTrue) + return BoolFalse; + + if (Condition == BoolFalse) + return BoolTrue; + + if (Condition == BoolUndef) + return BoolUndef; + + // Second: If the condition is already inverted, return the original value + if (match(Condition, m_Not(m_Value(Condition)))) + return Condition; + + // Third: Check all the users for an invert + BasicBlock *Parent = cast(Condition)->getParent(); + for (Value::use_iterator I = Condition->use_begin(), + E = Condition->use_end(); I != E; ++I) { + + Instruction *User = dyn_cast(*I); + if (!User || User->getParent() != Parent) + continue; + + if (match(*I, m_Not(m_Specific(Condition)))) + return *I; + } + + // Last option: Create a new instruction + return BinaryOperator::CreateNot(Condition, "", Parent->getTerminator()); +} + /// \brief Build the condition for one edge Value *AMDGPUStructurizeCFG::buildCondition(BranchInst *Term, unsigned Idx, bool Invert) { @@ -313,7 +351,7 @@ Value *AMDGPUStructurizeCFG::buildCondition(BranchInst *Term, unsigned Idx, Cond = Term->getCondition(); if (Idx != Invert) - Cond = BinaryOperator::CreateNot(Cond, "", Term); + Cond = invert(Cond); } return Cond; } -- cgit v1.1 From 0432d7964f04875b8ce0662df8579b7f6c537e13 Mon Sep 17 00:00:00 2001 From: Christian Konig Date: Sat, 16 Feb 2013 11:27:56 +0000 Subject: R600/SI: remove some more unused code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a candidate for the stable branch. Signed-off-by: Christian König Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175350 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUCodeEmitter.h | 45 ---------------------- lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h | 7 ---- 2 files changed, 52 deletions(-) delete mode 100644 lib/Target/R600/AMDGPUCodeEmitter.h (limited to 'lib') diff --git a/lib/Target/R600/AMDGPUCodeEmitter.h b/lib/Target/R600/AMDGPUCodeEmitter.h deleted file mode 100644 index 5d61cd0..0000000 --- a/lib/Target/R600/AMDGPUCodeEmitter.h +++ /dev/null @@ -1,45 +0,0 @@ -//===-- AMDGPUCodeEmitter.h - AMDGPU Code Emitter interface -----------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -/// \file -/// \brief CodeEmitter interface for R600 and SI codegen. -// -//===----------------------------------------------------------------------===// - -#ifndef AMDGPUCODEEMITTER_H -#define AMDGPUCODEEMITTER_H - -namespace llvm { - -class AMDGPUCodeEmitter { -public: - uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const; - virtual uint64_t getMachineOpValue(const MachineInstr &MI, - const MachineOperand &MO) const { return 0; } - virtual unsigned GPR4AlignEncode(const MachineInstr &MI, - unsigned OpNo) const { - return 0; - } - virtual unsigned GPR2AlignEncode(const MachineInstr &MI, - unsigned OpNo) const { - return 0; - } - virtual uint64_t VOPPostEncode(const MachineInstr &MI, - uint64_t Value) const { - return Value; - } - virtual uint64_t i32LiteralEncode(const MachineInstr &MI, - unsigned OpNo) const { - return 0; - } -}; - -} // End namespace llvm - -#endif // AMDGPUCODEEMITTER_H diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h b/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h index 3b3816a..8721f80 100644 --- a/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h +++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h @@ -42,13 +42,6 @@ public: SmallVectorImpl &Fixups) const { return 0; } - virtual uint64_t VOPPostEncode(const MCInst &MI, uint64_t Value) const { - return Value; - } - virtual uint64_t i32LiteralEncode(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixups) const { - return 0; - } }; } // End namespace llvm -- cgit v1.1 From 7c52866a14e0c928e9be020b9dc8e585f0965212 Mon Sep 17 00:00:00 2001 From: Christian Konig Date: Sat, 16 Feb 2013 11:28:02 +0000 Subject: R600/SI: move *_Helper definitions to SIInstrFormat.td MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a candidate for the stable branch. Signed-off-by: Christian König Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175351 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIInstrFormats.td | 66 +++++++++++++++++++++++++++++++++++++++ lib/Target/R600/SIInstrInfo.td | 66 --------------------------------------- 2 files changed, 66 insertions(+), 66 deletions(-) (limited to 'lib') diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td index aea3b5a..7040115 100644 --- a/lib/Target/R600/SIInstrFormats.td +++ b/lib/Target/R600/SIInstrFormats.td @@ -144,3 +144,69 @@ class SOPC_32 op, string opName, list pattern> class SOPC_64 op, string opName, list pattern> : SOPC ; +class MIMG_Load_Helper op, string asm> : MIMG < + op, + (outs VReg_128:$vdata), + (ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128, + i1imm:$tfe, i1imm:$lwe, i1imm:$slc, VReg_32:$vaddr, + GPR4Align:$srsrc, GPR4Align:$ssamp), + asm, + []> { + let mayLoad = 1; + let mayStore = 0; +} + +class MTBUF_Store_Helper op, string asm, RegisterClass regClass> : MTBUF < + op, + (outs), + (ins regClass:$vdata, i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, + i1imm:$addr64, i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr, + GPR4Align:$srsrc, i1imm:$slc, i1imm:$tfe, SReg_32:$soffset), + asm, + []> { + let mayStore = 1; + let mayLoad = 0; +} + +class MUBUF_Load_Helper op, string asm, RegisterClass regClass> : MUBUF < + op, + (outs regClass:$dst), + (ins i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64, + i1imm:$lds, VReg_32:$vaddr, GPR4Align:$srsrc, i1imm:$slc, + i1imm:$tfe, SReg_32:$soffset), + asm, + []> { + let mayLoad = 1; + let mayStore = 0; +} + +class MTBUF_Load_Helper op, string asm, RegisterClass regClass> : MTBUF < + op, + (outs regClass:$dst), + (ins i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64, + i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr, GPR4Align:$srsrc, + i1imm:$slc, i1imm:$tfe, SReg_32:$soffset), + asm, + []> { + let mayLoad = 1; + let mayStore = 0; +} + +multiclass SMRD_Helper op, string asm, RegisterClass dstClass> { + def _IMM : SMRD < + op, 1, + (outs dstClass:$dst), + (ins GPR2Align:$sbase, i32imm:$offset), + asm, + [] + >; + + def _SGPR : SMRD < + op, 0, + (outs dstClass:$dst), + (ins GPR2Align:$sbase, SReg_32:$soff), + asm, + [] + >; +} + diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index b983e8a..aa156f3 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -484,71 +484,5 @@ class VOPC op, dag ins, string asm, list pattern> : } // End Uses = [EXEC] -class MIMG_Load_Helper op, string asm> : MIMG < - op, - (outs VReg_128:$vdata), - (ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128, - i1imm:$tfe, i1imm:$lwe, i1imm:$slc, VReg_32:$vaddr, - GPR4Align:$srsrc, GPR4Align:$ssamp), - asm, - []> { - let mayLoad = 1; - let mayStore = 0; -} - -class MUBUF_Load_Helper op, string asm, RegisterClass regClass> : MUBUF < - op, - (outs regClass:$dst), - (ins i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64, - i1imm:$lds, VReg_32:$vaddr, GPR4Align:$srsrc, i1imm:$slc, - i1imm:$tfe, SReg_32:$soffset), - asm, - []> { - let mayLoad = 1; - let mayStore = 0; -} - -class MTBUF_Load_Helper op, string asm, RegisterClass regClass> : MTBUF < - op, - (outs regClass:$dst), - (ins i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64, - i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr, GPR4Align:$srsrc, - i1imm:$slc, i1imm:$tfe, SReg_32:$soffset), - asm, - []> { - let mayLoad = 1; - let mayStore = 0; -} - -class MTBUF_Store_Helper op, string asm, RegisterClass regClass> : MTBUF < - op, - (outs), - (ins regClass:$vdata, i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, - i1imm:$addr64, i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr, - GPR4Align:$srsrc, i1imm:$slc, i1imm:$tfe, SReg_32:$soffset), - asm, - []> { - let mayStore = 1; - let mayLoad = 0; -} - -multiclass SMRD_Helper op, string asm, RegisterClass dstClass> { - def _IMM : SMRD < - op, 1, - (outs dstClass:$dst), - (ins GPR2Align:$sbase, i32imm:$offset), - asm, - [] - >; - - def _SGPR : SMRD < - op, 0, - (outs dstClass:$dst), - (ins GPR2Align:$sbase, SReg_32:$soff), - asm, - [] - >; -} - include "SIInstrFormats.td" include "SIInstructions.td" -- cgit v1.1 From 305fefbb65c3df7bf5b3a8f6157efe24652c1e56 Mon Sep 17 00:00:00 2001 From: Christian Konig Date: Sat, 16 Feb 2013 11:28:07 +0000 Subject: R600/SI: fix VOPC encoding v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously it only worked because of coincident. v2: fix 64bit versions, use 0x80 (inline 0) instead of SGPR0 for the unused SRC2 This is a candidate for the stable branch. Signed-off-by: Christian König Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175352 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIInstrFormats.td | 53 ++++++++++++--------------------------- 1 file changed, 16 insertions(+), 37 deletions(-) (limited to 'lib') diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td index 7040115..bd31bc1 100644 --- a/lib/Target/R600/SIInstrFormats.td +++ b/lib/Target/R600/SIInstrFormats.td @@ -21,26 +21,12 @@ // //===----------------------------------------------------------------------===// -class VOP3b_2IN op, string opName, RegisterClass dstClass, - RegisterClass src0Class, RegisterClass src1Class, - list pattern> - : VOP3b ; - - -class VOP3_1_32 op, string opName, list pattern> - : VOP3b_2IN ; - class VOP3_32 op, string opName, list pattern> : VOP3 ; class VOP3_64 op, string opName, list pattern> : VOP3 ; - class SOP1_32 op, string opName, list pattern> : SOP1 ; @@ -109,34 +95,27 @@ class SOPK_32 op, string opName, list pattern> class SOPK_64 op, string opName, list pattern> : SOPK ; -class VOPC_Helper op, RegisterClass vrc, RegisterClass arc, - string opName, list pattern> : - VOPC < - op, (ins arc:$src0, vrc:$src1), opName, pattern - >; - -multiclass VOPC_32 op, string opName, list pattern> { - - def _e32 : VOPC_Helper < - {op{7}, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, - VReg_32, AllReg_32, opName, pattern - >; +multiclass VOPC_Helper op, RegisterClass vrc, RegisterClass arc, + string opName, list pattern> { - def _e64 : VOP3_1_32 < - op, + def _e32 : VOPC ; + def _e64 : VOP3 < + {0, op{7}, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, + (outs SReg_1:$dst), + (ins arc:$src0, vrc:$src1, + InstFlag:$abs, InstFlag:$clamp, + InstFlag:$omod, InstFlag:$neg), opName, pattern - >; + > { + let SRC2 = 0x80; + } } -multiclass VOPC_64 op, string opName, list pattern> { +multiclass VOPC_32 op, string opName, list pattern> + : VOPC_Helper ; - def _e32 : VOPC_Helper ; - - def _e64 : VOP3_64 < - {0, op{7}, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, - opName, [] - >; -} +multiclass VOPC_64 op, string opName, list pattern> + : VOPC_Helper ; class SOPC_32 op, string opName, list pattern> : SOPC ; -- cgit v1.1 From 8e4eebcecf291386a321d0f8582b8a57841ea8c9 Mon Sep 17 00:00:00 2001 From: Christian Konig Date: Sat, 16 Feb 2013 11:28:13 +0000 Subject: R600/SI: replace AllReg_* with [SV]Src_* v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mark all the operands that can also have an immediate. v2: SOFFSET is also an SSrc_32 operand This is a candidate for the stable branch. Signed-off-by: Christian König Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175353 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIInstrFormats.td | 36 +++++------ lib/Target/R600/SIInstructions.td | 128 +++++++++++++++++++------------------- lib/Target/R600/SIRegisterInfo.td | 10 ++- 3 files changed, 89 insertions(+), 85 deletions(-) (limited to 'lib') diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td index bd31bc1..5c69c15 100644 --- a/lib/Target/R600/SIInstrFormats.td +++ b/lib/Target/R600/SIInstrFormats.td @@ -22,25 +22,25 @@ //===----------------------------------------------------------------------===// class VOP3_32 op, string opName, list pattern> - : VOP3 ; + : VOP3 ; class VOP3_64 op, string opName, list pattern> - : VOP3 ; + : VOP3 ; class SOP1_32 op, string opName, list pattern> - : SOP1 ; + : SOP1 ; class SOP1_64 op, string opName, list pattern> - : SOP1 ; + : SOP1 ; class SOP2_32 op, string opName, list pattern> - : SOP2 ; + : SOP2 ; class SOP2_64 op, string opName, list pattern> - : SOP2 ; + : SOP2 ; class SOP2_VCC op, string opName, list pattern> - : SOP2 ; + : SOP2 ; class VOP1_Helper op, RegisterClass vrc, RegisterClass arc, string opName, list pattern> : @@ -49,7 +49,7 @@ class VOP1_Helper op, RegisterClass vrc, RegisterClass arc, >; multiclass VOP1_32 op, string opName, list pattern> { - def _e32: VOP1_Helper ; + def _e32: VOP1_Helper ; def _e64 : VOP3_32 <{1, 1, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, opName, [] >; @@ -57,7 +57,7 @@ multiclass VOP1_32 op, string opName, list pattern> { multiclass VOP1_64 op, string opName, list pattern> { - def _e32 : VOP1_Helper ; + def _e32 : VOP1_Helper ; def _e64 : VOP3_64 < {1, 1, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, @@ -73,7 +73,7 @@ class VOP2_Helper op, RegisterClass vrc, RegisterClass arc, multiclass VOP2_32 op, string opName, list pattern> { - def _e32 : VOP2_Helper ; + def _e32 : VOP2_Helper ; def _e64 : VOP3_32 <{1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, opName, [] @@ -81,7 +81,7 @@ multiclass VOP2_32 op, string opName, list pattern> { } multiclass VOP2_64 op, string opName, list pattern> { - def _e32: VOP2_Helper ; + def _e32: VOP2_Helper ; def _e64 : VOP3_64 < {1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, @@ -112,16 +112,16 @@ multiclass VOPC_Helper op, RegisterClass vrc, RegisterClass arc, } multiclass VOPC_32 op, string opName, list pattern> - : VOPC_Helper ; + : VOPC_Helper ; multiclass VOPC_64 op, string opName, list pattern> - : VOPC_Helper ; + : VOPC_Helper ; class SOPC_32 op, string opName, list pattern> - : SOPC ; + : SOPC ; class SOPC_64 op, string opName, list pattern> - : SOPC ; + : SOPC ; class MIMG_Load_Helper op, string asm> : MIMG < op, @@ -140,7 +140,7 @@ class MTBUF_Store_Helper op, string asm, RegisterClass regClass> : MTBU (outs), (ins regClass:$vdata, i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64, i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr, - GPR4Align:$srsrc, i1imm:$slc, i1imm:$tfe, SReg_32:$soffset), + GPR4Align:$srsrc, i1imm:$slc, i1imm:$tfe, SSrc_32:$soffset), asm, []> { let mayStore = 1; @@ -152,7 +152,7 @@ class MUBUF_Load_Helper op, string asm, RegisterClass regClass> : MUBUF (outs regClass:$dst), (ins i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64, i1imm:$lds, VReg_32:$vaddr, GPR4Align:$srsrc, i1imm:$slc, - i1imm:$tfe, SReg_32:$soffset), + i1imm:$tfe, SSrc_32:$soffset), asm, []> { let mayLoad = 1; @@ -164,7 +164,7 @@ class MTBUF_Load_Helper op, string asm, RegisterClass regClass> : MTBUF (outs regClass:$dst), (ins i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64, i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr, GPR4Align:$srsrc, - i1imm:$slc, i1imm:$tfe, SReg_32:$soffset), + i1imm:$slc, i1imm:$tfe, SSrc_32:$soffset), asm, []> { let mayLoad = 1; diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index b1533bd..16c9c01 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -130,33 +130,33 @@ def S_GETREG_REGRD_B32 : SOPK_32 <0x00000014, "S_GETREG_REGRD_B32", []>; defm V_CMP_F_F32 : VOPC_32 <0x00000000, "V_CMP_F_F32", []>; defm V_CMP_LT_F32 : VOPC_32 <0x00000001, "V_CMP_LT_F32", []>; def : Pat < - (i1 (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_LT)), - (V_CMP_LT_F32_e64 AllReg_32:$src0, VReg_32:$src1) + (i1 (setcc (f32 VSrc_32:$src0), VReg_32:$src1, COND_LT)), + (V_CMP_LT_F32_e64 VSrc_32:$src0, VReg_32:$src1) >; defm V_CMP_EQ_F32 : VOPC_32 <0x00000002, "V_CMP_EQ_F32", []>; def : Pat < - (i1 (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_EQ)), - (V_CMP_EQ_F32_e64 AllReg_32:$src0, VReg_32:$src1) + (i1 (setcc (f32 VSrc_32:$src0), VReg_32:$src1, COND_EQ)), + (V_CMP_EQ_F32_e64 VSrc_32:$src0, VReg_32:$src1) >; defm V_CMP_LE_F32 : VOPC_32 <0x00000003, "V_CMP_LE_F32", []>; def : Pat < - (i1 (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_LE)), - (V_CMP_LE_F32_e64 AllReg_32:$src0, VReg_32:$src1) + (i1 (setcc (f32 VSrc_32:$src0), VReg_32:$src1, COND_LE)), + (V_CMP_LE_F32_e64 VSrc_32:$src0, VReg_32:$src1) >; defm V_CMP_GT_F32 : VOPC_32 <0x00000004, "V_CMP_GT_F32", []>; def : Pat < - (i1 (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_GT)), - (V_CMP_GT_F32_e64 AllReg_32:$src0, VReg_32:$src1) + (i1 (setcc (f32 VSrc_32:$src0), VReg_32:$src1, COND_GT)), + (V_CMP_GT_F32_e64 VSrc_32:$src0, VReg_32:$src1) >; defm V_CMP_LG_F32 : VOPC_32 <0x00000005, "V_CMP_LG_F32", []>; def : Pat < - (i1 (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_NE)), - (V_CMP_LG_F32_e64 AllReg_32:$src0, VReg_32:$src1) + (i1 (setcc (f32 VSrc_32:$src0), VReg_32:$src1, COND_NE)), + (V_CMP_LG_F32_e64 VSrc_32:$src0, VReg_32:$src1) >; defm V_CMP_GE_F32 : VOPC_32 <0x00000006, "V_CMP_GE_F32", []>; def : Pat < - (i1 (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_GE)), - (V_CMP_GE_F32_e64 AllReg_32:$src0, VReg_32:$src1) + (i1 (setcc (f32 VSrc_32:$src0), VReg_32:$src1, COND_GE)), + (V_CMP_GE_F32_e64 VSrc_32:$src0, VReg_32:$src1) >; defm V_CMP_O_F32 : VOPC_32 <0x00000007, "V_CMP_O_F32", []>; defm V_CMP_U_F32 : VOPC_32 <0x00000008, "V_CMP_U_F32", []>; @@ -166,8 +166,8 @@ defm V_CMP_NGT_F32 : VOPC_32 <0x0000000b, "V_CMP_NGT_F32", []>; defm V_CMP_NLE_F32 : VOPC_32 <0x0000000c, "V_CMP_NLE_F32", []>; defm V_CMP_NEQ_F32 : VOPC_32 <0x0000000d, "V_CMP_NEQ_F32", []>; def : Pat < - (i1 (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_NE)), - (V_CMP_NEQ_F32_e64 AllReg_32:$src0, VReg_32:$src1) + (i1 (setcc (f32 VSrc_32:$src0), VReg_32:$src1, COND_NE)), + (V_CMP_NEQ_F32_e64 VSrc_32:$src0, VReg_32:$src1) >; defm V_CMP_NLT_F32 : VOPC_32 <0x0000000e, "V_CMP_NLT_F32", []>; defm V_CMP_TRU_F32 : VOPC_32 <0x0000000f, "V_CMP_TRU_F32", []>; @@ -300,33 +300,33 @@ defm V_CMPSX_TRU_F64 : VOPC_64 <0x0000007f, "V_CMPSX_TRU_F64", []>; defm V_CMP_F_I32 : VOPC_32 <0x00000080, "V_CMP_F_I32", []>; defm V_CMP_LT_I32 : VOPC_32 <0x00000081, "V_CMP_LT_I32", []>; def : Pat < - (i1 (setcc (i32 AllReg_32:$src0), VReg_32:$src1, COND_LT)), - (V_CMP_LT_I32_e64 AllReg_32:$src0, VReg_32:$src1) + (i1 (setcc (i32 VSrc_32:$src0), VReg_32:$src1, COND_LT)), + (V_CMP_LT_I32_e64 VSrc_32:$src0, VReg_32:$src1) >; defm V_CMP_EQ_I32 : VOPC_32 <0x00000082, "V_CMP_EQ_I32", []>; def : Pat < - (i1 (setcc (i32 AllReg_32:$src0), VReg_32:$src1, COND_EQ)), - (V_CMP_EQ_I32_e64 AllReg_32:$src0, VReg_32:$src1) + (i1 (setcc (i32 VSrc_32:$src0), VReg_32:$src1, COND_EQ)), + (V_CMP_EQ_I32_e64 VSrc_32:$src0, VReg_32:$src1) >; defm V_CMP_LE_I32 : VOPC_32 <0x00000083, "V_CMP_LE_I32", []>; def : Pat < - (i1 (setcc (i32 AllReg_32:$src0), VReg_32:$src1, COND_LE)), - (V_CMP_LE_I32_e64 AllReg_32:$src0, VReg_32:$src1) + (i1 (setcc (i32 VSrc_32:$src0), VReg_32:$src1, COND_LE)), + (V_CMP_LE_I32_e64 VSrc_32:$src0, VReg_32:$src1) >; defm V_CMP_GT_I32 : VOPC_32 <0x00000084, "V_CMP_GT_I32", []>; def : Pat < - (i1 (setcc (i32 AllReg_32:$src0), VReg_32:$src1, COND_GT)), - (V_CMP_GT_I32_e64 AllReg_32:$src0, VReg_32:$src1) + (i1 (setcc (i32 VSrc_32:$src0), VReg_32:$src1, COND_GT)), + (V_CMP_GT_I32_e64 VSrc_32:$src0, VReg_32:$src1) >; defm V_CMP_NE_I32 : VOPC_32 <0x00000085, "V_CMP_NE_I32", []>; def : Pat < - (i1 (setcc (i32 AllReg_32:$src0), VReg_32:$src1, COND_NE)), - (V_CMP_NE_I32_e64 AllReg_32:$src0, VReg_32:$src1) + (i1 (setcc (i32 VSrc_32:$src0), VReg_32:$src1, COND_NE)), + (V_CMP_NE_I32_e64 VSrc_32:$src0, VReg_32:$src1) >; defm V_CMP_GE_I32 : VOPC_32 <0x00000086, "V_CMP_GE_I32", []>; def : Pat < - (i1 (setcc (i32 AllReg_32:$src0), VReg_32:$src1, COND_GE)), - (V_CMP_GE_I32_e64 AllReg_32:$src0, VReg_32:$src1) + (i1 (setcc (i32 VSrc_32:$src0), VReg_32:$src1, COND_GE)), + (V_CMP_GE_I32_e64 VSrc_32:$src0, VReg_32:$src1) >; defm V_CMP_T_I32 : VOPC_32 <0x00000087, "V_CMP_T_I32", []>; @@ -594,12 +594,12 @@ defm V_READFIRSTLANE_B32 : VOP1_32 <0x00000002, "V_READFIRSTLANE_B32", []>; //defm V_CVT_I32_F64 : VOP1_32 <0x00000003, "V_CVT_I32_F64", []>; //defm V_CVT_F64_I32 : VOP1_64 <0x00000004, "V_CVT_F64_I32", []>; defm V_CVT_F32_I32 : VOP1_32 <0x00000005, "V_CVT_F32_I32", - [(set VReg_32:$dst, (sint_to_fp AllReg_32:$src0))] + [(set VReg_32:$dst, (sint_to_fp VSrc_32:$src0))] >; //defm V_CVT_F32_U32 : VOP1_32 <0x00000006, "V_CVT_F32_U32", []>; //defm V_CVT_U32_F32 : VOP1_32 <0x00000007, "V_CVT_U32_F32", []>; defm V_CVT_I32_F32 : VOP1_32 <0x00000008, "V_CVT_I32_F32", - [(set (i32 VReg_32:$dst), (fp_to_sint AllReg_32:$src0))] + [(set (i32 VReg_32:$dst), (fp_to_sint VSrc_32:$src0))] >; defm V_MOV_FED_B32 : VOP1_32 <0x00000009, "V_MOV_FED_B32", []>; ////def V_CVT_F16_F32 : VOP1_F16 <0x0000000a, "V_CVT_F16_F32", []>; @@ -616,33 +616,33 @@ defm V_MOV_FED_B32 : VOP1_32 <0x00000009, "V_MOV_FED_B32", []>; //defm V_CVT_U32_F64 : VOP1_32 <0x00000015, "V_CVT_U32_F64", []>; //defm V_CVT_F64_U32 : VOP1_64 <0x00000016, "V_CVT_F64_U32", []>; defm V_FRACT_F32 : VOP1_32 <0x00000020, "V_FRACT_F32", - [(set VReg_32:$dst, (AMDGPUfract AllReg_32:$src0))] + [(set VReg_32:$dst, (AMDGPUfract VSrc_32:$src0))] >; defm V_TRUNC_F32 : VOP1_32 <0x00000021, "V_TRUNC_F32", []>; defm V_CEIL_F32 : VOP1_32 <0x00000022, "V_CEIL_F32", []>; defm V_RNDNE_F32 : VOP1_32 <0x00000023, "V_RNDNE_F32", - [(set VReg_32:$dst, (frint AllReg_32:$src0))] + [(set VReg_32:$dst, (frint VSrc_32:$src0))] >; defm V_FLOOR_F32 : VOP1_32 <0x00000024, "V_FLOOR_F32", - [(set VReg_32:$dst, (ffloor AllReg_32:$src0))] + [(set VReg_32:$dst, (ffloor VSrc_32:$src0))] >; defm V_EXP_F32 : VOP1_32 <0x00000025, "V_EXP_F32", - [(set VReg_32:$dst, (fexp2 AllReg_32:$src0))] + [(set VReg_32:$dst, (fexp2 VSrc_32:$src0))] >; defm V_LOG_CLAMP_F32 : VOP1_32 <0x00000026, "V_LOG_CLAMP_F32", []>; defm V_LOG_F32 : VOP1_32 <0x00000027, "V_LOG_F32", - [(set VReg_32:$dst, (flog2 AllReg_32:$src0))] + [(set VReg_32:$dst, (flog2 VSrc_32:$src0))] >; defm V_RCP_CLAMP_F32 : VOP1_32 <0x00000028, "V_RCP_CLAMP_F32", []>; defm V_RCP_LEGACY_F32 : VOP1_32 <0x00000029, "V_RCP_LEGACY_F32", []>; defm V_RCP_F32 : VOP1_32 <0x0000002a, "V_RCP_F32", - [(set VReg_32:$dst, (fdiv FP_ONE, AllReg_32:$src0))] + [(set VReg_32:$dst, (fdiv FP_ONE, VSrc_32:$src0))] >; defm V_RCP_IFLAG_F32 : VOP1_32 <0x0000002b, "V_RCP_IFLAG_F32", []>; defm V_RSQ_CLAMP_F32 : VOP1_32 <0x0000002c, "V_RSQ_CLAMP_F32", []>; defm V_RSQ_LEGACY_F32 : VOP1_32 < 0x0000002d, "V_RSQ_LEGACY_F32", - [(set VReg_32:$dst, (int_AMDGPU_rsq AllReg_32:$src0))] + [(set VReg_32:$dst, (int_AMDGPU_rsq VSrc_32:$src0))] >; defm V_RSQ_F32 : VOP1_32 <0x0000002e, "V_RSQ_F32", []>; defm V_RCP_F64 : VOP1_64 <0x0000002f, "V_RCP_F64", []>; @@ -774,7 +774,7 @@ def S_WAITCNT : SOPP <0x0000000c, (ins i32imm:$simm16), "S_WAITCNT $simm16", //def S_TTRACEDATA : SOPP_ <0x00000016, "S_TTRACEDATA", []>; def V_CNDMASK_B32_e32 : VOP2 <0x00000000, (outs VReg_32:$dst), - (ins AllReg_32:$src0, VReg_32:$src1, VCCReg:$vcc), "V_CNDMASK_B32_e32", + (ins VSrc_32:$src0, VReg_32:$src1, VCCReg:$vcc), "V_CNDMASK_B32_e32", [] >{ let DisableEncoding = "$vcc"; @@ -797,35 +797,35 @@ defm V_WRITELANE_B32 : VOP2_32 <0x00000002, "V_WRITELANE_B32", []>; defm V_ADD_F32 : VOP2_32 <0x00000003, "V_ADD_F32", []>; def : Pat < - (f32 (fadd AllReg_32:$src0, VReg_32:$src1)), - (V_ADD_F32_e32 AllReg_32:$src0, VReg_32:$src1) + (f32 (fadd VSrc_32:$src0, VReg_32:$src1)), + (V_ADD_F32_e32 VSrc_32:$src0, VReg_32:$src1) >; defm V_SUB_F32 : VOP2_32 <0x00000004, "V_SUB_F32", []>; def : Pat < - (f32 (fsub AllReg_32:$src0, VReg_32:$src1)), - (V_SUB_F32_e32 AllReg_32:$src0, VReg_32:$src1) + (f32 (fsub VSrc_32:$src0, VReg_32:$src1)), + (V_SUB_F32_e32 VSrc_32:$src0, VReg_32:$src1) >; defm V_SUBREV_F32 : VOP2_32 <0x00000005, "V_SUBREV_F32", []>; defm V_MAC_LEGACY_F32 : VOP2_32 <0x00000006, "V_MAC_LEGACY_F32", []>; defm V_MUL_LEGACY_F32 : VOP2_32 < 0x00000007, "V_MUL_LEGACY_F32", - [(set VReg_32:$dst, (int_AMDGPU_mul AllReg_32:$src0, VReg_32:$src1))] + [(set VReg_32:$dst, (int_AMDGPU_mul VSrc_32:$src0, VReg_32:$src1))] >; defm V_MUL_F32 : VOP2_32 <0x00000008, "V_MUL_F32", - [(set VReg_32:$dst, (fmul AllReg_32:$src0, VReg_32:$src1))] + [(set VReg_32:$dst, (fmul VSrc_32:$src0, VReg_32:$src1))] >; //defm V_MUL_I32_I24 : VOP2_32 <0x00000009, "V_MUL_I32_I24", []>; //defm V_MUL_HI_I32_I24 : VOP2_32 <0x0000000a, "V_MUL_HI_I32_I24", []>; //defm V_MUL_U32_U24 : VOP2_32 <0x0000000b, "V_MUL_U32_U24", []>; //defm V_MUL_HI_U32_U24 : VOP2_32 <0x0000000c, "V_MUL_HI_U32_U24", []>; defm V_MIN_LEGACY_F32 : VOP2_32 <0x0000000d, "V_MIN_LEGACY_F32", - [(set VReg_32:$dst, (AMDGPUfmin AllReg_32:$src0, VReg_32:$src1))] + [(set VReg_32:$dst, (AMDGPUfmin VSrc_32:$src0, VReg_32:$src1))] >; defm V_MAX_LEGACY_F32 : VOP2_32 <0x0000000e, "V_MAX_LEGACY_F32", - [(set VReg_32:$dst, (AMDGPUfmax AllReg_32:$src0, VReg_32:$src1))] + [(set VReg_32:$dst, (AMDGPUfmax VSrc_32:$src0, VReg_32:$src1))] >; defm V_MIN_F32 : VOP2_32 <0x0000000f, "V_MIN_F32", []>; defm V_MAX_F32 : VOP2_32 <0x00000010, "V_MAX_F32", []>; @@ -840,13 +840,13 @@ defm V_ASHRREV_I32 : VOP2_32 <0x00000018, "V_ASHRREV_I32", []>; defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32", []>; defm V_LSHLREV_B32 : VOP2_32 <0x0000001a, "V_LSHLREV_B32", []>; defm V_AND_B32 : VOP2_32 <0x0000001b, "V_AND_B32", - [(set VReg_32:$dst, (and AllReg_32:$src0, VReg_32:$src1))] + [(set VReg_32:$dst, (and VSrc_32:$src0, VReg_32:$src1))] >; defm V_OR_B32 : VOP2_32 <0x0000001c, "V_OR_B32", - [(set VReg_32:$dst, (or AllReg_32:$src0, VReg_32:$src1))] + [(set VReg_32:$dst, (or VSrc_32:$src0, VReg_32:$src1))] >; defm V_XOR_B32 : VOP2_32 <0x0000001d, "V_XOR_B32", - [(set VReg_32:$dst, (xor AllReg_32:$src0, VReg_32:$src1))] + [(set VReg_32:$dst, (xor VSrc_32:$src0, VReg_32:$src1))] >; defm V_BFM_B32 : VOP2_32 <0x0000001e, "V_BFM_B32", []>; defm V_MAC_F32 : VOP2_32 <0x0000001f, "V_MAC_F32", []>; @@ -857,10 +857,10 @@ defm V_MADAK_F32 : VOP2_32 <0x00000021, "V_MADAK_F32", []>; //defm V_MBCNT_HI_U32_B32 : VOP2_32 <0x00000024, "V_MBCNT_HI_U32_B32", []>; let Defs = [VCC] in { // Carry-out goes to VCC defm V_ADD_I32 : VOP2_32 <0x00000025, "V_ADD_I32", - [(set VReg_32:$dst, (add (i32 AllReg_32:$src0), (i32 VReg_32:$src1)))] + [(set VReg_32:$dst, (add (i32 VSrc_32:$src0), (i32 VReg_32:$src1)))] >; defm V_SUB_I32 : VOP2_32 <0x00000026, "V_SUB_I32", - [(set VReg_32:$dst, (sub (i32 AllReg_32:$src0), (i32 VReg_32:$src1)))] + [(set VReg_32:$dst, (sub (i32 VSrc_32:$src0), (i32 VReg_32:$src1)))] >; } // End Defs = [VCC] defm V_SUBREV_I32 : VOP2_32 <0x00000027, "V_SUBREV_I32", []>; @@ -872,7 +872,7 @@ defm V_LDEXP_F32 : VOP2_32 <0x0000002b, "V_LDEXP_F32", []>; ////def V_CVT_PKNORM_I16_F32 : VOP2_I16 <0x0000002d, "V_CVT_PKNORM_I16_F32", []>; ////def V_CVT_PKNORM_U16_F32 : VOP2_U16 <0x0000002e, "V_CVT_PKNORM_U16_F32", []>; defm V_CVT_PKRTZ_F16_F32 : VOP2_32 <0x0000002f, "V_CVT_PKRTZ_F16_F32", - [(set VReg_32:$dst, (int_SI_packf16 AllReg_32:$src0, VReg_32:$src1))] + [(set VReg_32:$dst, (int_SI_packf16 VSrc_32:$src0, VReg_32:$src1))] >; ////def V_CVT_PK_U16_U32 : VOP2_U16 <0x00000030, "V_CVT_PK_U16_U32", []>; ////def V_CVT_PK_I16_I32 : VOP2_I16 <0x00000031, "V_CVT_PK_I16_I32", []>; @@ -943,8 +943,8 @@ def V_MUL_LO_U32 : VOP3_32 <0x00000169, "V_MUL_LO_U32", []>; def V_MUL_HI_U32 : VOP3_32 <0x0000016a, "V_MUL_HI_U32", []>; def V_MUL_LO_I32 : VOP3_32 <0x0000016b, "V_MUL_LO_I32", []>; def : Pat < - (mul AllReg_32:$src0, VReg_32:$src1), - (V_MUL_LO_I32 AllReg_32:$src0, VReg_32:$src1, (IMPLICIT_DEF), 0, 0, 0, 0) + (mul VSrc_32:$src0, VReg_32:$src1), + (V_MUL_LO_I32 VSrc_32:$src0, VReg_32:$src1, (IMPLICIT_DEF), 0, 0, 0, 0) >; def V_MUL_HI_I32 : VOP3_32 <0x0000016c, "V_MUL_HI_I32", []>; def V_DIV_SCALE_F32 : VOP3_32 <0x0000016d, "V_DIV_SCALE_F32", []>; @@ -983,10 +983,10 @@ def : Pat < def S_AND_B32 : SOP2_32 <0x0000000e, "S_AND_B32", []>; def S_AND_B64 : SOP2_64 <0x0000000f, "S_AND_B64", - [(set SReg_64:$dst, (and SReg_64:$src0, SReg_64:$src1))] + [(set SReg_64:$dst, (and SSrc_64:$src0, SSrc_64:$src1))] >; def S_AND_VCC : SOP2_VCC <0x0000000f, "S_AND_B64", - [(set SReg_1:$vcc, (SIvcc_and SReg_64:$src0, SReg_64:$src1))] + [(set SReg_1:$vcc, (SIvcc_and SSrc_64:$src0, SSrc_64:$src1))] >; def S_OR_B32 : SOP2_32 <0x00000010, "S_OR_B32", []>; def S_OR_B64 : SOP2_64 <0x00000011, "S_OR_B64", []>; @@ -1386,23 +1386,23 @@ def : Pat < def : POW_Common ; def : Pat < - (int_AMDGPU_div AllReg_32:$src0, AllReg_32:$src1), - (V_MUL_LEGACY_F32_e32 AllReg_32:$src0, (V_RCP_LEGACY_F32_e32 AllReg_32:$src1)) + (int_AMDGPU_div VSrc_32:$src0, VSrc_32:$src1), + (V_MUL_LEGACY_F32_e32 VSrc_32:$src0, (V_RCP_LEGACY_F32_e32 VSrc_32:$src1)) >; def : Pat< - (fdiv AllReg_32:$src0, AllReg_32:$src1), - (V_MUL_F32_e32 AllReg_32:$src0, (V_RCP_F32_e32 AllReg_32:$src1)) + (fdiv VSrc_32:$src0, VSrc_32:$src1), + (V_MUL_F32_e32 VSrc_32:$src0, (V_RCP_F32_e32 VSrc_32:$src1)) >; def : Pat < - (fcos AllReg_32:$src0), - (V_COS_F32_e32 (V_MUL_F32_e32 AllReg_32:$src0, (V_MOV_IMM_I32 CONST.TWO_PI_INV))) + (fcos VSrc_32:$src0), + (V_COS_F32_e32 (V_MUL_F32_e32 VSrc_32:$src0, (V_MOV_IMM_I32 CONST.TWO_PI_INV))) >; def : Pat < - (fsin AllReg_32:$src0), - (V_SIN_F32_e32 (V_MUL_F32_e32 AllReg_32:$src0, (V_MOV_IMM_I32 CONST.TWO_PI_INV))) + (fsin VSrc_32:$src0), + (V_SIN_F32_e32 (V_MUL_F32_e32 VSrc_32:$src0, (V_MOV_IMM_I32 CONST.TWO_PI_INV))) >; def : Pat < @@ -1430,8 +1430,8 @@ def : Pat < /********** VOP3 Patterns **********/ /********** ================== **********/ -def : Pat <(f32 (IL_mad AllReg_32:$src0, VReg_32:$src1, VReg_32:$src2)), - (V_MAD_LEGACY_F32 AllReg_32:$src0, VReg_32:$src1, VReg_32:$src2, +def : Pat <(f32 (IL_mad VSrc_32:$src0, VReg_32:$src1, VReg_32:$src2)), + (V_MAD_LEGACY_F32 VSrc_32:$src0, VReg_32:$src1, VReg_32:$src2, 0, 0, 0, 0)>; /********** ================== **********/ diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td index 809d503..150c92e 100644 --- a/lib/Target/R600/SIRegisterInfo.td +++ b/lib/Target/R600/SIRegisterInfo.td @@ -177,10 +177,14 @@ def VReg_256 : RegisterClass<"AMDGPU", [v8i32], 256, (add VGPR_256)>; def VReg_512 : RegisterClass<"AMDGPU", [v16i32], 512, (add VGPR_512)>; -// AllReg_* - A set of all scalar and vector registers of a given width. -def AllReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32, (add VReg_32, SReg_32)>; +// [SV]Src_* operands can have either an immediate or an register +def SSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add SReg_32)>; -def AllReg_64 : RegisterClass<"AMDGPU", [f64, i64], 64, (add SReg_64, VReg_64)>; +def SSrc_64 : RegisterClass<"AMDGPU", [i64], 64, (add SReg_64)>; + +def VSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add VReg_32, SReg_32)>; + +def VSrc_64 : RegisterClass<"AMDGPU", [i64], 64, (add SReg_64, VReg_64)>; // Special register classes for predicates and the M0 register def SCCReg : RegisterClass<"AMDGPU", [i1], 1, (add SCC)>; -- cgit v1.1 From e25e490793241e471036c3e2f969ce6a068e5ce1 Mon Sep 17 00:00:00 2001 From: Christian Konig Date: Sat, 16 Feb 2013 11:28:22 +0000 Subject: R600/SI: cleanup literal handling v3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Seems to be allot simpler, and also paves the way for further improvements. v2: rebased on master, use 0 in BUFFER_LOAD_FORMAT_XYZW, use VGPR0 in dummy EXP, avoid compiler warning, break after encoding the first literal. v3: correctly use V_ADD_F32_e64 This is a candidate for the stable branch. Signed-off-by: Christian König Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175354 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPU.h | 1 - lib/Target/R600/AMDGPUAsmPrinter.cpp | 2 - lib/Target/R600/AMDGPUTargetMachine.cpp | 1 - lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp | 146 +++++++++++++++++++---- lib/Target/R600/SIISelLowering.cpp | 14 +-- lib/Target/R600/SIInstrInfo.cpp | 5 +- lib/Target/R600/SIInstructions.td | 81 ++++++------- lib/Target/R600/SILowerControlFlow.cpp | 10 +- lib/Target/R600/SILowerLiteralConstants.cpp | 107 ----------------- lib/Target/R600/SIRegisterInfo.td | 4 +- 10 files changed, 176 insertions(+), 195 deletions(-) delete mode 100644 lib/Target/R600/SILowerLiteralConstants.cpp (limited to 'lib') diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h index bac01a3..ba87918 100644 --- a/lib/Target/R600/AMDGPU.h +++ b/lib/Target/R600/AMDGPU.h @@ -30,7 +30,6 @@ FunctionPass *createSIAnnotateControlFlowPass(); FunctionPass *createSIAssignInterpRegsPass(TargetMachine &tm); FunctionPass *createSILowerControlFlowPass(TargetMachine &tm); FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS); -FunctionPass *createSILowerLiteralConstantsPass(TargetMachine &tm); FunctionPass *createSIInsertWaits(TargetMachine &tm); // Passes common to R600 and SI diff --git a/lib/Target/R600/AMDGPUAsmPrinter.cpp b/lib/Target/R600/AMDGPUAsmPrinter.cpp index 89d1307..c30dbe4 100644 --- a/lib/Target/R600/AMDGPUAsmPrinter.cpp +++ b/lib/Target/R600/AMDGPUAsmPrinter.cpp @@ -91,8 +91,6 @@ void AMDGPUAsmPrinter::EmitProgramInfo(MachineFunction &MF) { switch (reg) { default: break; case AMDGPU::EXEC: - case AMDGPU::SI_LITERAL_CONSTANT: - case AMDGPU::SREG_LIT_0: case AMDGPU::M0: continue; } diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp index 821e864..e2f00be 100644 --- a/lib/Target/R600/AMDGPUTargetMachine.cpp +++ b/lib/Target/R600/AMDGPUTargetMachine.cpp @@ -145,7 +145,6 @@ bool AMDGPUPassConfig::addPreEmitPass() { addPass(&FinalizeMachineBundlesID); addPass(createR600LowerConstCopy(*TM)); } else { - addPass(createSILowerLiteralConstantsPass(*TM)); addPass(createSILowerControlFlowPass(*TM)); } diff --git a/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp index fbdf77e..0d9f3d8 100644 --- a/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp +++ b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp @@ -27,6 +27,13 @@ using namespace llvm; namespace { + +/// \brief Helper type used in encoding +typedef union { + int32_t I; + float F; +} IntFloatUnion; + class SIMCCodeEmitter : public AMDGPUMCCodeEmitter { SIMCCodeEmitter(const SIMCCodeEmitter &); // DO NOT IMPLEMENT void operator=(const SIMCCodeEmitter &); // DO NOT IMPLEMENT @@ -35,6 +42,15 @@ class SIMCCodeEmitter : public AMDGPUMCCodeEmitter { const MCSubtargetInfo &STI; MCContext &Ctx; + /// \brief Encode a sequence of registers with the correct alignment. + unsigned GPRAlign(const MCInst &MI, unsigned OpNo, unsigned shift) const; + + /// \brief Can this operand also contain immediate values? + bool isSrcOperand(const MCInstrDesc &Desc, unsigned OpNo) const; + + /// \brief Encode an fp or int literal + uint32_t getLitEncoding(const MCOperand &MO) const; + public: SIMCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri, const MCSubtargetInfo &sti, MCContext &ctx) @@ -50,11 +66,6 @@ public: virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO, SmallVectorImpl &Fixups) const; -public: - - /// \brief Encode a sequence of registers with the correct alignment. - unsigned GPRAlign(const MCInst &MI, unsigned OpNo, unsigned shift) const; - /// \brief Encoding for when 2 consecutive registers are used virtual unsigned GPR2AlignEncode(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixup) const; @@ -73,39 +84,131 @@ MCCodeEmitter *llvm::createSIMCCodeEmitter(const MCInstrInfo &MCII, return new SIMCCodeEmitter(MCII, MRI, STI, Ctx); } +bool SIMCCodeEmitter::isSrcOperand(const MCInstrDesc &Desc, + unsigned OpNo) const { + + unsigned RegClass = Desc.OpInfo[OpNo].RegClass; + return (AMDGPU::SSrc_32RegClassID == RegClass) || + (AMDGPU::SSrc_64RegClassID == RegClass) || + (AMDGPU::VSrc_32RegClassID == RegClass) || + (AMDGPU::VSrc_64RegClassID == RegClass); +} + +uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO) const { + + IntFloatUnion Imm; + if (MO.isImm()) + Imm.I = MO.getImm(); + else if (MO.isFPImm()) + Imm.F = MO.getFPImm(); + else + return ~0; + + if (Imm.I >= 0 && Imm.I <= 64) + return 128 + Imm.I; + + if (Imm.I >= -16 && Imm.I <= -1) + return 192 + abs(Imm.I); + + if (Imm.F == 0.5f) + return 240; + + if (Imm.F == -0.5f) + return 241; + + if (Imm.F == 1.0f) + return 242; + + if (Imm.F == -1.0f) + return 243; + + if (Imm.F == 2.0f) + return 244; + + if (Imm.F == -2.0f) + return 245; + + if (Imm.F == 4.0f) + return 246; + + if (Imm.F == 4.0f) + return 247; + + return 255; +} + void SIMCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl &Fixups) const { + uint64_t Encoding = getBinaryCodeForInstr(MI, Fixups); - unsigned bytes = MCII.get(MI.getOpcode()).getSize(); + const MCInstrDesc &Desc = MCII.get(MI.getOpcode()); + unsigned bytes = Desc.getSize(); + for (unsigned i = 0; i < bytes; i++) { OS.write((uint8_t) ((Encoding >> (8 * i)) & 0xff)); } + + if (bytes > 4) + return; + + // Check for additional literals in SRC0/1/2 (Op 1/2/3) + for (unsigned i = 0, e = MI.getNumOperands(); i < e; ++i) { + + // Check if this operand should be encoded as [SV]Src + if (!isSrcOperand(Desc, i)) + continue; + + // Is this operand a literal immediate? + const MCOperand &Op = MI.getOperand(i); + if (getLitEncoding(Op) != 255) + continue; + + // Yes! Encode it + IntFloatUnion Imm; + if (Op.isImm()) + Imm.I = Op.getImm(); + else + Imm.F = Op.getFPImm(); + + for (unsigned j = 0; j < 4; j++) { + OS.write((uint8_t) ((Imm.I >> (8 * j)) & 0xff)); + } + + // Only one literal value allowed + break; + } } uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI, const MCOperand &MO, SmallVectorImpl &Fixups) const { - if (MO.isReg()) { + if (MO.isReg()) return MRI.getEncodingValue(MO.getReg()); - } else if (MO.isImm()) { - return MO.getImm(); - } else if (MO.isFPImm()) { - // XXX: Not all instructions can use inline literals - // XXX: We should make sure this is a 32-bit constant - union { - float F; - uint32_t I; - } Imm; - Imm.F = MO.getFPImm(); - return Imm.I; - } else if (MO.isExpr()) { + + if (MO.isExpr()) { const MCExpr *Expr = MO.getExpr(); MCFixupKind Kind = MCFixupKind(FK_PCRel_4); Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc())); return 0; - } else{ - llvm_unreachable("Encoding of this operand type is not supported yet."); } + + // Figure out the operand number, needed for isSrcOperand check + unsigned OpNo = 0; + for (unsigned e = MI.getNumOperands(); OpNo < e; ++OpNo) { + if (&MO == &MI.getOperand(OpNo)) + break; + } + + const MCInstrDesc &Desc = MCII.get(MI.getOpcode()); + if (isSrcOperand(Desc, OpNo)) { + uint32_t Enc = getLitEncoding(MO); + if (Enc != ~0U && (Enc != 255 || Desc.getSize() == 4)) + return Enc; + + } else if (MO.isImm()) + return MO.getImm(); + + llvm_unreachable("Encoding of this operand type is not supported yet."); return 0; } @@ -118,6 +221,7 @@ unsigned SIMCCodeEmitter::GPRAlign(const MCInst &MI, unsigned OpNo, unsigned regCode = MRI.getEncodingValue(MI.getOperand(OpNo).getReg()); return (regCode & 0xff) >> shift; } + unsigned SIMCCodeEmitter::GPR2AlignEncode(const MCInst &MI, unsigned OpNo , SmallVectorImpl &Fixup) const { diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 3919b97..9a8ce80 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -77,8 +77,8 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_ADD_F32_e64)) .addOperand(MI->getOperand(0)) .addOperand(MI->getOperand(1)) - .addReg(AMDGPU::SREG_LIT_0) - .addReg(AMDGPU::SREG_LIT_0) + .addImm(0x80) // SRC1 + .addImm(0x80) // SRC2 .addImm(0) // ABS .addImm(1) // CLAMP .addImm(0) // OMOD @@ -90,8 +90,8 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_ADD_F32_e64)) .addOperand(MI->getOperand(0)) .addOperand(MI->getOperand(1)) - .addReg(AMDGPU::SREG_LIT_0) - .addReg(AMDGPU::SREG_LIT_0) + .addImm(0x80) // SRC1 + .addImm(0x80) // SRC2 .addImm(1) // ABS .addImm(0) // CLAMP .addImm(0) // OMOD @@ -103,8 +103,8 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_ADD_F32_e64)) .addOperand(MI->getOperand(0)) .addOperand(MI->getOperand(1)) - .addReg(AMDGPU::SREG_LIT_0) - .addReg(AMDGPU::SREG_LIT_0) + .addImm(0x80) // SRC1 + .addImm(0x80) // SRC2 .addImm(0) // ABS .addImm(0) // CLAMP .addImm(0) // OMOD @@ -176,7 +176,7 @@ void SITargetLowering::LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB, BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_CMP_GT_F32_e32), VCC) - .addReg(AMDGPU::SREG_LIT_0) + .addImm(0) .addOperand(MI->getOperand(1)); BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_CNDMASK_B32_e32)) diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp index f6d00f5..4dfd26e 100644 --- a/lib/Target/R600/SIInstrInfo.cpp +++ b/lib/Target/R600/SIInstrInfo.cpp @@ -68,7 +68,7 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineInstr * SIInstrInfo::getMovImmInstr(MachineFunction *MF, unsigned DstReg, int64_t Imm) const { - MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::V_MOV_IMM_I32), DebugLoc()); + MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::V_MOV_B32_e32), DebugLoc()); MachineInstrBuilder MIB(*MF, MI); MIB.addReg(DstReg, RegState::Define); MIB.addImm(Imm); @@ -84,9 +84,6 @@ bool SIInstrInfo::isMov(unsigned Opcode) const { case AMDGPU::S_MOV_B64: case AMDGPU::V_MOV_B32_e32: case AMDGPU::V_MOV_B32_e64: - case AMDGPU::V_MOV_IMM_F32: - case AMDGPU::V_MOV_IMM_I32: - case AMDGPU::S_MOV_IMM_I32: return true; } } diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 16c9c01..613fe13 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1018,45 +1018,6 @@ def S_BFE_I64 : SOP2_64 <0x0000002a, "S_BFE_I64", []>; //def S_CBRANCH_G_FORK : SOP2_ <0x0000002b, "S_CBRANCH_G_FORK", []>; def S_ABSDIFF_I32 : SOP2_32 <0x0000002c, "S_ABSDIFF_I32", []>; -class V_MOV_IMM : InstSI < - (outs VReg_32:$dst), - (ins immType:$src0), - "V_MOV_IMM", - [(set VReg_32:$dst, (type immNode:$src0))] ->; - -let isCodeGenOnly = 1, isPseudo = 1 in { - -def V_MOV_IMM_I32 : V_MOV_IMM; -def V_MOV_IMM_F32 : V_MOV_IMM; - -def S_MOV_IMM_I32 : InstSI < - (outs SReg_32:$dst), - (ins i32imm:$src0), - "S_MOV_IMM_I32", - [(set SReg_32:$dst, (imm:$src0))] ->; - -} // End isCodeGenOnly, isPseudo = 1 - -// i64 immediates aren't supported in hardware, split it into two 32bit values -def : Pat < - (i64 imm:$imm), - (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)), - (S_MOV_IMM_I32 (LO32 imm:$imm)), sub0), - (S_MOV_IMM_I32 (HI32 imm:$imm)), sub1) ->; - -class SI_LOAD_LITERAL : - Enc32 <(outs), (ins ImmType:$imm), "LOAD_LITERAL $imm", []> { - - bits<32> imm; - let Inst{31-0} = imm; -} - -def SI_LOAD_LITERAL_I32 : SI_LOAD_LITERAL; -def SI_LOAD_LITERAL_F32 : SI_LOAD_LITERAL; - let isCodeGenOnly = 1, isPseudo = 1 in { def SET_M0 : InstSI < @@ -1173,7 +1134,7 @@ def SI_KILL : InstSI < def : Pat < (int_AMDGPU_kilp), - (SI_KILL (V_MOV_IMM_I32 0xbf800000)) + (SI_KILL (V_MOV_B32_e32 0xbf800000)) >; /* int_SI_vs_load_input */ @@ -1182,7 +1143,7 @@ def : Pat< VReg_32:$buf_idx_vgpr), (BUFFER_LOAD_FORMAT_XYZW imm:$attr_offset, 0, 1, 0, 0, 0, VReg_32:$buf_idx_vgpr, SReg_128:$tlst, - 0, 0, (i32 SREG_LIT_0)) + 0, 0, 0) >; /* int_SI_export */ @@ -1319,6 +1280,38 @@ def : Pat < (COPY_TO_REGCLASS SReg_64:$vcc, VCCReg) >; +/********** ================== **********/ +/********** Immediate Patterns **********/ +/********** ================== **********/ + +def : Pat < + (i32 imm:$imm), + (V_MOV_B32_e32 imm:$imm) +>; + +def : Pat < + (f32 fpimm:$imm), + (V_MOV_B32_e32 fpimm:$imm) +>; + +def : Pat < + (i32 imm:$imm), + (S_MOV_B32 imm:$imm) +>; + +def : Pat < + (f32 fpimm:$imm), + (S_MOV_B32 fpimm:$imm) +>; + +// i64 immediates aren't supported in hardware, split it into two 32bit values +def : Pat < + (i64 imm:$imm), + (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (S_MOV_B32 (i32 (LO32 imm:$imm))), sub0), + (S_MOV_B32 (i32 (HI32 imm:$imm))), sub1) +>; + /********** ===================== **********/ /********** Interpolation Paterns **********/ /********** ===================== **********/ @@ -1397,12 +1390,12 @@ def : Pat< def : Pat < (fcos VSrc_32:$src0), - (V_COS_F32_e32 (V_MUL_F32_e32 VSrc_32:$src0, (V_MOV_IMM_I32 CONST.TWO_PI_INV))) + (V_COS_F32_e32 (V_MUL_F32_e32 VSrc_32:$src0, (V_MOV_B32_e32 CONST.TWO_PI_INV))) >; def : Pat < (fsin VSrc_32:$src0), - (V_SIN_F32_e32 (V_MUL_F32_e32 VSrc_32:$src0, (V_MOV_IMM_I32 CONST.TWO_PI_INV))) + (V_SIN_F32_e32 (V_MUL_F32_e32 VSrc_32:$src0, (V_MOV_B32_e32 CONST.TWO_PI_INV))) >; def : Pat < @@ -1448,7 +1441,7 @@ multiclass SMRD_Pattern { // 2. Offset loaded in an 32bit SGPR def : Pat < (constant_load (SIadd64bit32bit SReg_64:$sbase, imm:$offset)), - (vt (Instr_SGPR SReg_64:$sbase, (S_MOV_IMM_I32 imm:$offset))) + (vt (Instr_SGPR SReg_64:$sbase, (S_MOV_B32 imm:$offset))) >; // 3. No offset at all diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp index 1b0dbcc..b215aa2 100644 --- a/lib/Target/R600/SILowerControlFlow.cpp +++ b/lib/Target/R600/SILowerControlFlow.cpp @@ -158,10 +158,10 @@ void SILowerControlFlowPass::SkipIfDead(MachineInstr &MI) { .addImm(0) .addImm(1) .addImm(1) - .addReg(AMDGPU::SREG_LIT_0) - .addReg(AMDGPU::SREG_LIT_0) - .addReg(AMDGPU::SREG_LIT_0) - .addReg(AMDGPU::SREG_LIT_0); + .addReg(AMDGPU::VGPR0) + .addReg(AMDGPU::VGPR0) + .addReg(AMDGPU::VGPR0) + .addReg(AMDGPU::VGPR0); // ... and terminate wavefront BuildMI(MBB, Insert, DL, TII->get(AMDGPU::S_ENDPGM)); @@ -296,7 +296,7 @@ void SILowerControlFlowPass::Kill(MachineInstr &MI) { // Clear this pixel from the exec mask if the operand is negative BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMPX_LE_F32_e32), AMDGPU::VCC) - .addReg(AMDGPU::SREG_LIT_0) + .addImm(0) .addOperand(MI.getOperand(0)); MI.eraseFromParent(); diff --git a/lib/Target/R600/SILowerLiteralConstants.cpp b/lib/Target/R600/SILowerLiteralConstants.cpp deleted file mode 100644 index 6f5fd36..0000000 --- a/lib/Target/R600/SILowerLiteralConstants.cpp +++ /dev/null @@ -1,107 +0,0 @@ -//===-- SILowerLiteralConstants.cpp - Lower intrs using literal constants--===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -/// \file -/// \brief This pass performs the following transformation on instructions with -/// literal constants: -/// -/// %VGPR0 = V_MOV_IMM_I32 1 -/// -/// becomes: -/// -/// BUNDLE -/// * %VGPR = V_MOV_B32_32 SI_LITERAL_CONSTANT -/// * SI_LOAD_LITERAL 1 -/// -/// The resulting sequence matches exactly how the hardware handles immediate -/// operands, so this transformation greatly simplifies the code generator. -/// -/// Only the *_MOV_IMM_* support immediate operands at the moment, but when -/// support for immediate operands is added to other instructions, they -/// will be lowered here as well. -//===----------------------------------------------------------------------===// - -#include "AMDGPU.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineInstrBundle.h" - -using namespace llvm; - -namespace { - -class SILowerLiteralConstantsPass : public MachineFunctionPass { - -private: - static char ID; - const TargetInstrInfo *TII; - -public: - SILowerLiteralConstantsPass(TargetMachine &tm) : - MachineFunctionPass(ID), TII(tm.getInstrInfo()) { } - - virtual bool runOnMachineFunction(MachineFunction &MF); - - const char *getPassName() const { - return "SI Lower literal constants pass"; - } -}; - -} // End anonymous namespace - -char SILowerLiteralConstantsPass::ID = 0; - -FunctionPass *llvm::createSILowerLiteralConstantsPass(TargetMachine &tm) { - return new SILowerLiteralConstantsPass(tm); -} - -bool SILowerLiteralConstantsPass::runOnMachineFunction(MachineFunction &MF) { - for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); - BB != BB_E; ++BB) { - MachineBasicBlock &MBB = *BB; - for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I); - I != MBB.end(); I = Next) { - Next = llvm::next(I); - MachineInstr &MI = *I; - switch (MI.getOpcode()) { - default: break; - case AMDGPU::S_MOV_IMM_I32: - case AMDGPU::V_MOV_IMM_F32: - case AMDGPU::V_MOV_IMM_I32: { - unsigned MovOpcode; - unsigned LoadLiteralOpcode; - MachineOperand LiteralOp = MI.getOperand(1); - if (AMDGPU::VReg_32RegClass.contains(MI.getOperand(0).getReg())) { - MovOpcode = AMDGPU::V_MOV_B32_e32; - } else { - MovOpcode = AMDGPU::S_MOV_B32; - } - if (LiteralOp.isImm()) { - LoadLiteralOpcode = AMDGPU::SI_LOAD_LITERAL_I32; - } else { - LoadLiteralOpcode = AMDGPU::SI_LOAD_LITERAL_F32; - } - MIBundleBuilder Bundle(MBB, I); - Bundle - .append(BuildMI(MF, MBB.findDebugLoc(I), TII->get(MovOpcode), - MI.getOperand(0).getReg()) - .addReg(AMDGPU::SI_LITERAL_CONSTANT)) - .append(BuildMI(MF, MBB.findDebugLoc(I), - TII->get(LoadLiteralOpcode)) - .addOperand(MI.getOperand(1))); - llvm::finalizeBundle(MBB, Bundle.begin()); - MI.eraseFromParent(); - break; - } - } - } - } - return false; -} diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td index 150c92e..7f1fec0 100644 --- a/lib/Target/R600/SIRegisterInfo.td +++ b/lib/Target/R600/SIRegisterInfo.td @@ -22,8 +22,6 @@ def EXEC_LO : SIReg <"EXEC LO", 126>; def EXEC_HI : SIReg <"EXEC HI", 127>; def EXEC : SI_64<"EXEC", [EXEC_LO, EXEC_HI], 126>; def SCC : SIReg<"SCC", 253>; -def SREG_LIT_0 : SIReg <"S LIT 0", 128>; -def SI_LITERAL_CONSTANT : SIReg<"LITERAL CONSTANT", 255>; def M0 : SIReg <"M0", 124>; //Interpolation registers @@ -136,7 +134,7 @@ def VGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7, // Register class for all scalar registers (SGPRs + Special Registers) def SReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32, - (add SGPR_32, SREG_LIT_0, M0, EXEC_LO, EXEC_HI) + (add SGPR_32, M0, EXEC_LO, EXEC_HI) >; def SReg_64 : RegisterClass<"AMDGPU", [i64], 64, (add SGPR_64, VCC, EXEC)>; -- cgit v1.1 From e9ba1830df2efef3da113a740909195e839ebd36 Mon Sep 17 00:00:00 2001 From: Christian Konig Date: Sat, 16 Feb 2013 11:28:30 +0000 Subject: R600/SI: nuke SReg_1 v3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's completely unnecessary and can be replace with proper SReg_64 handling instead. This actually fixes a piglit test on SI. v2: use correct register class in addRegisterClass, set special classes as not allocatable v3: revert setting special classes as not allocateable This is a candidate for the stable branch. Signed-off-by: Christian König Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175355 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelLowering.h | 10 -------- lib/Target/R600/SIISelLowering.cpp | 40 +---------------------------- lib/Target/R600/SIISelLowering.h | 3 --- lib/Target/R600/SIInstrFormats.td | 5 +--- lib/Target/R600/SIInstrInfo.td | 27 ------------------- lib/Target/R600/SIInstructions.td | 50 ++++++++++++++---------------------- lib/Target/R600/SIRegisterInfo.td | 6 ++--- 7 files changed, 23 insertions(+), 118 deletions(-) (limited to 'lib') diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h index 4b844a3..99a11ff 100644 --- a/lib/Target/R600/AMDGPUISelLowering.h +++ b/lib/Target/R600/AMDGPUISelLowering.h @@ -135,16 +135,6 @@ enum { } // End namespace AMDGPUISD -namespace SIISD { - -enum { - SI_FIRST = AMDGPUISD::LAST_AMDGPU_ISD_NUMBER, - VCC_AND, - VCC_BITCAST -}; - -} // End namespace SIISD - } // End namespace llvm #endif // AMDGPUISELLOWERING_H diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 9a8ce80..4085890 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -31,8 +31,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : addRegisterClass(MVT::f32, &AMDGPU::VReg_32RegClass); addRegisterClass(MVT::i32, &AMDGPU::VReg_32RegClass); addRegisterClass(MVT::i64, &AMDGPU::SReg_64RegClass); - addRegisterClass(MVT::i1, &AMDGPU::SCCRegRegClass); - addRegisterClass(MVT::i1, &AMDGPU::VCCRegRegClass); + addRegisterClass(MVT::i1, &AMDGPU::SReg_64RegClass); addRegisterClass(MVT::v1i32, &AMDGPU::VReg_32RegClass); addRegisterClass(MVT::v2i32, &AMDGPU::VReg_64RegClass); @@ -42,8 +41,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : computeRegisterProperties(); - setOperationAction(ISD::AND, MVT::i1, Custom); - setOperationAction(ISD::ADD, MVT::i64, Legal); setOperationAction(ISD::ADD, MVT::i32, Legal); @@ -202,7 +199,6 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::BRCOND: return LowerBRCOND(Op, DAG); case ISD::LOAD: return LowerLOAD(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); - case ISD::AND: return Loweri1ContextSwitch(Op, DAG, ISD::AND); case ISD::INTRINSIC_WO_CHAIN: { unsigned IntrinsicID = cast(Op.getOperand(0))->getZExtValue(); @@ -219,30 +215,6 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { return SDValue(); } -/// \brief The function is for lowering i1 operations on the -/// VCC register. -/// -/// In the VALU context, VCC is a one bit register, but in the -/// SALU context the VCC is a 64-bit register (1-bit per thread). Since only -/// the SALU can perform operations on the VCC register, we need to promote -/// the operand types from i1 to i64 in order for tablegen to be able to match -/// this operation to the correct SALU instruction. We do this promotion by -/// wrapping the operands in a CopyToReg node. -/// -SDValue SITargetLowering::Loweri1ContextSwitch(SDValue Op, - SelectionDAG &DAG, - unsigned VCCNode) const { - DebugLoc DL = Op.getDebugLoc(); - - SDValue OpNode = DAG.getNode(VCCNode, DL, MVT::i64, - DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i64, - Op.getOperand(0)), - DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i64, - Op.getOperand(1))); - - return DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i1, OpNode); -} - /// \brief Helper function for LowerBRCOND static SDNode *findUser(SDValue Value, unsigned Opcode) { @@ -446,13 +418,3 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N, } return SDValue(); } - -#define NODE_NAME_CASE(node) case SIISD::node: return #node; - -const char* SITargetLowering::getTargetNodeName(unsigned Opcode) const { - switch (Opcode) { - default: return AMDGPUTargetLowering::getTargetNodeName(Opcode); - NODE_NAME_CASE(VCC_AND) - NODE_NAME_CASE(VCC_BITCAST) - } -} diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h index f4bc94d..a8429b7 100644 --- a/lib/Target/R600/SIISelLowering.h +++ b/lib/Target/R600/SIISelLowering.h @@ -32,8 +32,6 @@ class SITargetLowering : public AMDGPUTargetLowering { void LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB, MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const; - SDValue Loweri1ContextSwitch(SDValue Op, SelectionDAG &DAG, - unsigned VCCNode) const; SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; @@ -45,7 +43,6 @@ public: virtual EVT getSetCCResultType(EVT VT) const; virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; - virtual const char* getTargetNodeName(unsigned Opcode) const; }; } // End namespace llvm diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td index 5c69c15..40e37aa 100644 --- a/lib/Target/R600/SIInstrFormats.td +++ b/lib/Target/R600/SIInstrFormats.td @@ -39,9 +39,6 @@ class SOP2_32 op, string opName, list pattern> class SOP2_64 op, string opName, list pattern> : SOP2 ; -class SOP2_VCC op, string opName, list pattern> - : SOP2 ; - class VOP1_Helper op, RegisterClass vrc, RegisterClass arc, string opName, list pattern> : VOP1 < @@ -101,7 +98,7 @@ multiclass VOPC_Helper op, RegisterClass vrc, RegisterClass arc, def _e32 : VOPC ; def _e64 : VOP3 < {0, op{7}, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, - (outs SReg_1:$dst), + (outs SReg_64:$dst), (ins arc:$src0, vrc:$src1, InstFlag:$abs, InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg), diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index aa156f3..efc6015 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -8,36 +8,9 @@ //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// -// SI DAG Profiles -//===----------------------------------------------------------------------===// -def SDTVCCBinaryOp : SDTypeProfile<1, 2, [ - SDTCisInt<0>, SDTCisInt<1>, SDTCisSameAs<1, 2> -]>; - -//===----------------------------------------------------------------------===// // SI DAG Nodes //===----------------------------------------------------------------------===// -// and operation on 64-bit wide vcc -def SIsreg1_and : SDNode<"SIISD::VCC_AND", SDTVCCBinaryOp, - [SDNPCommutative, SDNPAssociative] ->; - -// Special bitcast node for sharing VCC register between VALU and SALU -def SIsreg1_bitcast : SDNode<"SIISD::VCC_BITCAST", - SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisInt<1>]> ->; - -// and operation on 64-bit wide vcc -def SIvcc_and : SDNode<"SIISD::VCC_AND", SDTVCCBinaryOp, - [SDNPCommutative, SDNPAssociative] ->; - -// Special bitcast node for sharing VCC register between VALU and SALU -def SIvcc_bitcast : SDNode<"SIISD::VCC_BITCAST", - SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisInt<1>]> ->; - // SMRD takes a 64bit memory address and can only add an 32bit offset def SIadd64bit32bit : SDNode<"ISD::ADD", SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisVT<0, i64>, SDTCisVT<2, i32>]> diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 613fe13..7a83303 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -781,15 +781,15 @@ def V_CNDMASK_B32_e32 : VOP2 <0x00000000, (outs VReg_32:$dst), } def V_CNDMASK_B32_e64 : VOP3 <0x00000100, (outs VReg_32:$dst), - (ins VReg_32:$src0, VReg_32:$src1, SReg_1:$src2, InstFlag:$abs, InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg), + (ins VReg_32:$src0, VReg_32:$src1, SReg_64:$src2, InstFlag:$abs, InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg), "V_CNDMASK_B32_e64", - [(set (i32 VReg_32:$dst), (select SReg_1:$src2, VReg_32:$src1, VReg_32:$src0))] + [(set (i32 VReg_32:$dst), (select (i1 SReg_64:$src2), VReg_32:$src1, VReg_32:$src0))] >; //f32 pattern for V_CNDMASK_B32_e64 def : Pat < - (f32 (select SReg_1:$src2, VReg_32:$src1, VReg_32:$src0)), - (V_CNDMASK_B32_e64 VReg_32:$src0, VReg_32:$src1, SReg_1:$src2) + (f32 (select (i1 SReg_64:$src2), VReg_32:$src1, VReg_32:$src0)), + (V_CNDMASK_B32_e64 VReg_32:$src0, VReg_32:$src1, SReg_64:$src2) >; defm V_READLANE_B32 : VOP2_32 <0x00000001, "V_READLANE_B32", []>; @@ -983,11 +983,14 @@ def : Pat < def S_AND_B32 : SOP2_32 <0x0000000e, "S_AND_B32", []>; def S_AND_B64 : SOP2_64 <0x0000000f, "S_AND_B64", - [(set SReg_64:$dst, (and SSrc_64:$src0, SSrc_64:$src1))] + [(set SReg_64:$dst, (i64 (and SSrc_64:$src0, SSrc_64:$src1)))] >; -def S_AND_VCC : SOP2_VCC <0x0000000f, "S_AND_B64", - [(set SReg_1:$vcc, (SIvcc_and SSrc_64:$src0, SSrc_64:$src1))] + +def : Pat < + (i1 (and SSrc_64:$src0, SSrc_64:$src1)), + (S_AND_B64 SSrc_64:$src0, SSrc_64:$src1) >; + def S_OR_B32 : SOP2_32 <0x00000010, "S_OR_B32", []>; def S_OR_B64 : SOP2_64 <0x00000011, "S_OR_B64", []>; def S_XOR_B32 : SOP2_32 <0x00000012, "S_XOR_B32", []>; @@ -1069,9 +1072,9 @@ let isBranch = 1, isTerminator = 1 in { def SI_IF : InstSI < (outs SReg_64:$dst), - (ins SReg_1:$vcc, brtarget:$target), + (ins SReg_64:$vcc, brtarget:$target), "SI_IF", - [(set SReg_64:$dst, (int_SI_if SReg_1:$vcc, bb:$target))] + [(set SReg_64:$dst, (int_SI_if SReg_64:$vcc, bb:$target))] >; def SI_ELSE : InstSI < @@ -1101,9 +1104,9 @@ def SI_BREAK : InstSI < def SI_IF_BREAK : InstSI < (outs SReg_64:$dst), - (ins SReg_1:$vcc, SReg_64:$src), + (ins SReg_64:$vcc, SReg_64:$src), "SI_IF_BREAK", - [(set SReg_64:$dst, (int_SI_if_break SReg_1:$vcc, SReg_64:$src))] + [(set SReg_64:$dst, (int_SI_if_break SReg_64:$vcc, SReg_64:$src))] >; def SI_ELSE_BREAK : InstSI < @@ -1260,31 +1263,16 @@ def : BitConvert ; def : BitConvert ; def : BitConvert ; -def : Pat < - (i64 (SIsreg1_bitcast SReg_1:$vcc)), - (S_MOV_B64 (COPY_TO_REGCLASS SReg_1:$vcc, SReg_64)) ->; - -def : Pat < - (i1 (SIsreg1_bitcast SReg_64:$vcc)), - (COPY_TO_REGCLASS SReg_64:$vcc, SReg_1) ->; - -def : Pat < - (i64 (SIvcc_bitcast VCCReg:$vcc)), - (S_MOV_B64 (COPY_TO_REGCLASS VCCReg:$vcc, SReg_64)) ->; - -def : Pat < - (i1 (SIvcc_bitcast SReg_64:$vcc)), - (COPY_TO_REGCLASS SReg_64:$vcc, VCCReg) ->; - /********** ================== **********/ /********** Immediate Patterns **********/ /********** ================== **********/ def : Pat < + (i1 imm:$imm), + (S_MOV_B64 imm:$imm) +>; + +def : Pat < (i32 imm:$imm), (V_MOV_B32_e32 imm:$imm) >; diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td index 7f1fec0..ab36b87 100644 --- a/lib/Target/R600/SIRegisterInfo.td +++ b/lib/Target/R600/SIRegisterInfo.td @@ -137,9 +137,7 @@ def SReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32, (add SGPR_32, M0, EXEC_LO, EXEC_HI) >; -def SReg_64 : RegisterClass<"AMDGPU", [i64], 64, (add SGPR_64, VCC, EXEC)>; - -def SReg_1 : RegisterClass<"AMDGPU", [i1], 1, (add VCC, SGPR_64, EXEC)>; +def SReg_64 : RegisterClass<"AMDGPU", [i1, i64], 64, (add SGPR_64, VCC, EXEC)>; def SReg_128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, (add SGPR_128)>; @@ -178,7 +176,7 @@ def VReg_512 : RegisterClass<"AMDGPU", [v16i32], 512, (add VGPR_512)>; // [SV]Src_* operands can have either an immediate or an register def SSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add SReg_32)>; -def SSrc_64 : RegisterClass<"AMDGPU", [i64], 64, (add SReg_64)>; +def SSrc_64 : RegisterClass<"AMDGPU", [i1, i64], 64, (add SReg_64)>; def VSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add VReg_32, SReg_32)>; -- cgit v1.1 From ecc571f063eb0d6a49033fbe85ffbf0db6114f59 Mon Sep 17 00:00:00 2001 From: Christian Konig Date: Sat, 16 Feb 2013 11:28:36 +0000 Subject: R600/SI: Add pattern to simplify i64 loading MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a candidate for the stable branch. Signed-off-by: Christian König Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175356 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIInstrInfo.td | 4 ++++ lib/Target/R600/SIInstructions.td | 5 +++++ 2 files changed, 9 insertions(+) (limited to 'lib') diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index efc6015..8c4e5af 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -40,6 +40,10 @@ def IMM12bit : ImmLeaf < [{return isUInt<12>(Imm);}] >; +class InlineImm : ImmLeaf ; + class InstSI pattern> : AMDGPUInst { diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 7a83303..9372993 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1292,6 +1292,11 @@ def : Pat < (S_MOV_B32 fpimm:$imm) >; +def : Pat < + (i64 InlineImm:$imm), + (S_MOV_B64 InlineImm:$imm) +>; + // i64 immediates aren't supported in hardware, split it into two 32bit values def : Pat < (i64 imm:$imm), -- cgit v1.1 From 30fcfc39e2abdb7765c282e8c9d4c63c3d4f83b1 Mon Sep 17 00:00:00 2001 From: Jakub Staszak Date: Sat, 16 Feb 2013 13:34:26 +0000 Subject: Minor cleanups. No functionality change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175359 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 02ac8bf..f0e0352 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -2655,8 +2655,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // This isn't right, although it's probably harmless on x86; liveouts // should be computed from returns not tail calls. Consider a void // function making a tail call to a function returning int. - return DAG.getNode(X86ISD::TC_RETURN, dl, - NodeTys, &Ops[0], Ops.size()); + return DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size()); } Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, &Ops[0], Ops.size()); @@ -4222,10 +4221,11 @@ static unsigned getShuffleCLImmediate(ShuffleVectorSDNode *N) { /// isZeroNode - Returns true if Elt is a constant zero or a floating point /// constant +0.0. bool X86::isZeroNode(SDValue Elt) { - return ((isa(Elt) && - cast(Elt)->isNullValue()) || - (isa(Elt) && - cast(Elt)->getValueAPF().isPosZero())); + if (ConstantSDNode *CN = dyn_cast(Elt)) + return CN->isNullValue(); + if (ConstantFPSDNode *CFP = dyn_cast(Elt)) + return CFP->getValueAPF().isPosZero(); + return false; } /// CommuteVectorShuffle - Swap vector_shuffle operands as well as values in @@ -15675,7 +15675,7 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG, ConstantSDNode *CmpAgainst = 0; if ((Cond.getOpcode() == X86ISD::CMP || Cond.getOpcode() == X86ISD::SUB) && (CmpAgainst = dyn_cast(Cond.getOperand(1))) && - dyn_cast(Cond.getOperand(0)) == 0) { + !isa(Cond.getOperand(0))) { if (CC == X86::COND_NE && CmpAgainst == dyn_cast(FalseOp)) { @@ -15955,8 +15955,7 @@ static SDValue CMPEQCombine(SDNode *N, SelectionDAG &DAG, if (VT == MVT::f32 || VT == MVT::f64) { bool ExpectingFlags = false; // Check for any users that want flags: - for (SDNode::use_iterator UI = N->use_begin(), - UE = N->use_end(); + for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); !ExpectingFlags && UI != UE; ++UI) switch (UI->getOpcode()) { default: -- cgit v1.1 From 8a89cf233dcb3359c8649febefb4c38246f2acb4 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sat, 16 Feb 2013 15:00:16 +0000 Subject: MCParser: Reject .balign with non-pow2 alignments. GNU as rejects them and there are configure scripts in the wild that check if the assembler rejects ".align 3" to determine whether the alignment is in bytes or powers of two. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175360 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCParser/AsmParser.cpp | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'lib') diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index b7953c1..9c998ff 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -2456,6 +2456,10 @@ bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) { } Alignment = 1ULL << Alignment; + } else { + // Reject alignments that aren't a power of two, for gas compatibility. + if (!isPowerOf2_64(Alignment)) + Error(AlignmentLoc, "alignment must be a power of 2"); } // Diagnose non-sensical max bytes to align. -- cgit v1.1 From c213366063fb3513884175d6d040568efbc1419f Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Sat, 16 Feb 2013 15:30:28 +0000 Subject: Target/R600/CMakeLists.txt: Prune SILowerLiteralConstants.cpp corresponding to r175354. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175361 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/R600/CMakeLists.txt b/lib/Target/R600/CMakeLists.txt index 4f74b04..00f8b10 100644 --- a/lib/Target/R600/CMakeLists.txt +++ b/lib/Target/R600/CMakeLists.txt @@ -45,7 +45,6 @@ add_llvm_target(R600CodeGen SIInsertWaits.cpp SIInstrInfo.cpp SIISelLowering.cpp - SILowerLiteralConstants.cpp SILowerControlFlow.cpp SIMachineFunctionInfo.cpp SIRegisterInfo.cpp -- cgit v1.1 From b717a5084722a5cad843444a8b1b4bf53f1c6325 Mon Sep 17 00:00:00 2001 From: Jakub Staszak Date: Sat, 16 Feb 2013 15:47:26 +0000 Subject: Use const reference instead of vector object when passing an argument to updateScheduledPressure method. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175362 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineScheduler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index ddaf566..589fa1f 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -487,7 +487,7 @@ void ScheduleDAGMI::initRegPressure() { // FIXME: When the pressure tracker deals in pressure differences then we won't // iterate over all RegionCriticalPSets[i]. void ScheduleDAGMI:: -updateScheduledPressure(std::vector NewMaxPressure) { +updateScheduledPressure(const std::vector &NewMaxPressure) { for (unsigned i = 0, e = RegionCriticalPSets.size(); i < e; ++i) { unsigned ID = RegionCriticalPSets[i].PSetID; int &MaxUnits = RegionCriticalPSets[i].UnitIncrease; -- cgit v1.1 From 3d6417d170fa832a9b1bf02cd5d9fc69cacd16cb Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Sat, 16 Feb 2013 16:04:29 +0000 Subject: [msvc x64] Update X86CompilationCallback_Win64.asm corresponding to r175267. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175363 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86CompilationCallback_Win64.asm | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86CompilationCallback_Win64.asm b/lib/Target/X86/X86CompilationCallback_Win64.asm index f321778..69b4c71 100644 --- a/lib/Target/X86/X86CompilationCallback_Win64.asm +++ b/lib/Target/X86/X86CompilationCallback_Win64.asm @@ -11,7 +11,7 @@ ;; ;;===----------------------------------------------------------------------=== -extrn X86CompilationCallback2: PROC +extrn LLVMX86CompilationCallback2: PROC .code X86CompilationCallback proc @@ -42,7 +42,7 @@ X86CompilationCallback proc ; Pass prev frame and return address. mov rcx, rbp mov rdx, qword ptr [rbp+8] - call X86CompilationCallback2 + call LLVMX86CompilationCallback2 ; Restore all XMM arg registers. movaps xmm3, [rsp+48+32] -- cgit v1.1 From 71d6a79ada4de2178fce9fae2aa5627f0c268f56 Mon Sep 17 00:00:00 2001 From: Jakub Staszak Date: Sat, 16 Feb 2013 16:08:15 +0000 Subject: Reduce indents in LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode method. No functionality change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175364 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/LoopStrengthReduce.cpp | 134 +++++++++++++-------------- 1 file changed, 67 insertions(+), 67 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 5847dfe..4e4cb86 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -3850,83 +3850,83 @@ void LSRInstance::NarrowSearchSpaceByDetectingSupersets() { /// for expressions like A, A+1, A+2, etc., allocate a single register for /// them. void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() { - if (EstimateSearchSpaceComplexity() >= ComplexityLimit) { - DEBUG(dbgs() << "The search space is too complex.\n"); + if (EstimateSearchSpaceComplexity() < ComplexityLimit) + return; - DEBUG(dbgs() << "Narrowing the search space by assuming that uses " - "separated by a constant offset will use the same " - "registers.\n"); + DEBUG(dbgs() << "The search space is too complex.\n" + "Narrowing the search space by assuming that uses separated " + "by a constant offset will use the same registers.\n"); - // This is especially useful for unrolled loops. + // This is especially useful for unrolled loops. - for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { - LSRUse &LU = Uses[LUIdx]; - for (SmallVectorImpl::const_iterator I = LU.Formulae.begin(), - E = LU.Formulae.end(); I != E; ++I) { - const Formula &F = *I; - if (F.BaseOffset != 0 && F.Scale == 0) { - if (LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU)) { - if (reconcileNewOffset(*LUThatHas, F.BaseOffset, - /*HasBaseReg=*/false, - LU.Kind, LU.AccessTy)) { - DEBUG(dbgs() << " Deleting use "; LU.print(dbgs()); - dbgs() << '\n'); - - LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop; - - // Update the relocs to reference the new use. - for (SmallVectorImpl::iterator I = Fixups.begin(), - E = Fixups.end(); I != E; ++I) { - LSRFixup &Fixup = *I; - if (Fixup.LUIdx == LUIdx) { - Fixup.LUIdx = LUThatHas - &Uses.front(); - Fixup.Offset += F.BaseOffset; - // Add the new offset to LUThatHas' offset list. - if (LUThatHas->Offsets.back() != Fixup.Offset) { - LUThatHas->Offsets.push_back(Fixup.Offset); - if (Fixup.Offset > LUThatHas->MaxOffset) - LUThatHas->MaxOffset = Fixup.Offset; - if (Fixup.Offset < LUThatHas->MinOffset) - LUThatHas->MinOffset = Fixup.Offset; - } - DEBUG(dbgs() << "New fixup has offset " - << Fixup.Offset << '\n'); - } - if (Fixup.LUIdx == NumUses-1) - Fixup.LUIdx = LUIdx; - } + for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { + LSRUse &LU = Uses[LUIdx]; + for (SmallVectorImpl::const_iterator I = LU.Formulae.begin(), + E = LU.Formulae.end(); I != E; ++I) { + const Formula &F = *I; + if (F.BaseOffset == 0 || F.Scale != 0) + continue; - // Delete formulae from the new use which are no longer legal. - bool Any = false; - for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) { - Formula &F = LUThatHas->Formulae[i]; - if (!isLegalUse(TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset, - LUThatHas->Kind, LUThatHas->AccessTy, F)) { - DEBUG(dbgs() << " Deleting "; F.print(dbgs()); - dbgs() << '\n'); - LUThatHas->DeleteFormula(F); - --i; - --e; - Any = true; - } - } - if (Any) - LUThatHas->RecomputeRegs(LUThatHas - &Uses.front(), RegUses); + LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU); + if (!LUThatHas) + continue; - // Delete the old use. - DeleteUse(LU, LUIdx); - --LUIdx; - --NumUses; - break; - } + if (!reconcileNewOffset(*LUThatHas, F.BaseOffset, /*HasBaseReg=*/ false, + LU.Kind, LU.AccessTy)) + continue; + + DEBUG(dbgs() << " Deleting use "; LU.print(dbgs()); dbgs() << '\n'); + + LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop; + + // Update the relocs to reference the new use. + for (SmallVectorImpl::iterator I = Fixups.begin(), + E = Fixups.end(); I != E; ++I) { + LSRFixup &Fixup = *I; + if (Fixup.LUIdx == LUIdx) { + Fixup.LUIdx = LUThatHas - &Uses.front(); + Fixup.Offset += F.BaseOffset; + // Add the new offset to LUThatHas' offset list. + if (LUThatHas->Offsets.back() != Fixup.Offset) { + LUThatHas->Offsets.push_back(Fixup.Offset); + if (Fixup.Offset > LUThatHas->MaxOffset) + LUThatHas->MaxOffset = Fixup.Offset; + if (Fixup.Offset < LUThatHas->MinOffset) + LUThatHas->MinOffset = Fixup.Offset; } + DEBUG(dbgs() << "New fixup has offset " << Fixup.Offset << '\n'); } + if (Fixup.LUIdx == NumUses-1) + Fixup.LUIdx = LUIdx; } - } - DEBUG(dbgs() << "After pre-selection:\n"; - print_uses(dbgs())); + // Delete formulae from the new use which are no longer legal. + bool Any = false; + for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) { + Formula &F = LUThatHas->Formulae[i]; + if (!isLegalUse(TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset, + LUThatHas->Kind, LUThatHas->AccessTy, F)) { + DEBUG(dbgs() << " Deleting "; F.print(dbgs()); + dbgs() << '\n'); + LUThatHas->DeleteFormula(F); + --i; + --e; + Any = true; + } + } + + if (Any) + LUThatHas->RecomputeRegs(LUThatHas - &Uses.front(), RegUses); + + // Delete the old use. + DeleteUse(LU, LUIdx); + --LUIdx; + --NumUses; + break; + } } + + DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs())); } /// NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters - Call -- cgit v1.1 From 631dd86804c3e74652d555b82ffe2f4750c81d68 Mon Sep 17 00:00:00 2001 From: Jakub Staszak Date: Sat, 16 Feb 2013 16:15:42 +0000 Subject: LegalizeDAG.cpp doesn't need DenseMap. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175365 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 1 - 1 file changed, 1 deletion(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 4a0176b..a9d40d0 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Triple.h" -- cgit v1.1 From 81474e98e10565e2ee0ad257ddc9469217520711 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sat, 16 Feb 2013 17:06:32 +0000 Subject: Replace loop with std::find. No functionality change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175366 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/ScheduleDAG.cpp | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp index 70ad949..45b4f68 100644 --- a/lib/CodeGen/ScheduleDAG.cpp +++ b/lib/CodeGen/ScheduleDAG.cpp @@ -135,20 +135,14 @@ void SUnit::removePred(const SDep &D) { for (SmallVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) if (*I == D) { - bool FoundSucc = false; // Find the corresponding successor in N. SDep P = D; P.setSUnit(this); SUnit *N = D.getSUnit(); - for (SmallVector::iterator II = N->Succs.begin(), - EE = N->Succs.end(); II != EE; ++II) - if (*II == P) { - FoundSucc = true; - N->Succs.erase(II); - break; - } - assert(FoundSucc && "Mismatching preds / succs lists!"); - (void)FoundSucc; + SmallVectorImpl::iterator Succ = std::find(N->Succs.begin(), + N->Succs.end(), P); + assert(Succ != N->Succs.end() && "Mismatching preds / succs lists!"); + N->Succs.erase(Succ); Preds.erase(I); // Update the bookkeeping. if (P.getKind() == SDep::Data) { -- cgit v1.1 From eb4774a972af4bdd36d8795625c8c5d96ca507d1 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sat, 16 Feb 2013 17:06:38 +0000 Subject: Replace erase loop with std::remove_if. This avoids unnecessary copies. No functionality change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175367 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/RegisterPressure.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp index 62e95aa..97f22e1 100644 --- a/lib/CodeGen/RegisterPressure.cpp +++ b/lib/CodeGen/RegisterPressure.cpp @@ -357,15 +357,14 @@ protected: /// Collect physical and virtual register operands. static void collectOperands(const MachineInstr *MI, RegisterOperands &RegOpers) { - for(ConstMIBundleOperands OperI(MI); OperI.isValid(); ++OperI) + for (ConstMIBundleOperands OperI(MI); OperI.isValid(); ++OperI) RegOpers.collect(*OperI); // Remove redundant physreg dead defs. - for (unsigned i = RegOpers.DeadDefs.size(); i > 0; --i) { - unsigned Reg = RegOpers.DeadDefs[i-1]; - if (containsReg(RegOpers.Defs, Reg)) - RegOpers.DeadDefs.erase(&RegOpers.DeadDefs[i-1]); - } + SmallVectorImpl::iterator I = + std::remove_if(RegOpers.DeadDefs.begin(), RegOpers.DeadDefs.end(), + std::bind1st(std::ptr_fun(containsReg), RegOpers.Defs)); + RegOpers.DeadDefs.erase(I, RegOpers.DeadDefs.end()); } /// Force liveness of registers. -- cgit v1.1 From 2de893210b0d4178edb4e3f2a965d57e97410341 Mon Sep 17 00:00:00 2001 From: Reed Kotler Date: Sat, 16 Feb 2013 19:04:29 +0000 Subject: One more try to make this look nice. I have lots of pseudo lowering as well as 16/32 bit variants to do and so I want this to look nice when I do it. I've been experimenting with this. No new test cases are needed. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175369 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips16InstrInfo.cpp | 13 +++++++++---- lib/Target/Mips/Mips16InstrInfo.h | 3 +++ 2 files changed, 12 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp index 19e00df..0612335 100644 --- a/lib/Target/Mips/Mips16InstrInfo.cpp +++ b/lib/Target/Mips/Mips16InstrInfo.cpp @@ -184,7 +184,7 @@ void Mips16InstrInfo::makeFrame(unsigned SP, int64_t FrameSize, int64_t Remainder = FrameSize - Base; BuildMI(MBB, I, DL, get(Mips::SaveRaF16)). addImm(Base); if (isInt<16>(-Remainder)) - BuildMI(MBB, I, DL, AddiuSpImm(-Remainder)).addImm(-Remainder); + BuildAddiuSpImm(MBB, I, -Remainder); else adjustStackPtrBig(SP, -Remainder, MBB, I, Mips::V0, Mips::V1); } @@ -225,7 +225,7 @@ void Mips16InstrInfo::restoreFrame(unsigned SP, int64_t FrameSize, // returns largest possible n bit unsigned integer int64_t Remainder = FrameSize - Base; if (isInt<16>(Remainder)) - BuildMI(MBB, I, DL, AddiuSpImm(Remainder)).addImm(Remainder); + BuildAddiuSpImm(MBB, I, Remainder); else adjustStackPtrBig(SP, Remainder, MBB, I, Mips::A0, Mips::A1); BuildMI(MBB, I, DL, get(Mips::RestoreRaF16)). addImm(Base); @@ -297,9 +297,8 @@ void Mips16InstrInfo::adjustStackPtrBigUnrestricted(unsigned SP, int64_t Amount, void Mips16InstrInfo::adjustStackPtr(unsigned SP, int64_t Amount, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { - DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); if (isInt<16>(Amount)) // need to change to addiu sp, ....and isInt<16> - BuildMI(MBB, I, DL, AddiuSpImm(Amount)).addImm(Amount); + BuildAddiuSpImm(MBB, I, Amount); else adjustStackPtrBigUnrestricted(SP, Amount, MBB, I); } @@ -407,6 +406,12 @@ const MCInstrDesc &Mips16InstrInfo::AddiuSpImm(int64_t Imm) const { return get(Mips::AddiuSpImmX16); } +void Mips16InstrInfo::BuildAddiuSpImm + (MachineBasicBlock &MBB, MachineBasicBlock::iterator I, int64_t Imm) const { + DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); + BuildMI(MBB, I, DL, AddiuSpImm(Imm)).addImm(Imm); +} + const MipsInstrInfo *llvm::createMips16InstrInfo(MipsTargetMachine &TM) { return new Mips16InstrInfo(TM); } diff --git a/lib/Target/Mips/Mips16InstrInfo.h b/lib/Target/Mips/Mips16InstrInfo.h index 0e8e87a..d197e7c 100644 --- a/lib/Target/Mips/Mips16InstrInfo.h +++ b/lib/Target/Mips/Mips16InstrInfo.h @@ -96,6 +96,9 @@ public: const MCInstrDesc& AddiuSpImm(int64_t Imm) const; + void BuildAddiuSpImm + (MachineBasicBlock &MBB, MachineBasicBlock::iterator I, int64_t Imm) const; + private: virtual unsigned GetAnalyzableBrOpc(unsigned Opc) const; -- cgit v1.1 From c835b8c30127d15599de2d614434d39a6cc3ae17 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sat, 16 Feb 2013 19:13:18 +0000 Subject: Turn the enum attributes DenseSet in AttrBuilder into a set of bits. Avoids malloc and is a lot denser. We lose iteration over target independent attributes, but that's a strange interface anyways and didn't have any users outside of AttrBuilder. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175370 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Attributes.cpp | 47 +++++++++++++++++++++++------------------------ 1 file changed, 23 insertions(+), 24 deletions(-) (limited to 'lib') diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 629679c..99df5ff 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -355,8 +355,6 @@ uint64_t AttributeImpl::getAttrMask(Attribute::AttrKind Val) { // FIXME: Remove this. switch (Val) { case Attribute::EndAttrKinds: - case Attribute::AttrKindEmptyKey: - case Attribute::AttrKindTombstoneKey: llvm_unreachable("Synthetic enumerators which should never get here"); case Attribute::None: return 0; @@ -597,8 +595,11 @@ AttributeSet AttributeSet::get(LLVMContext &C, unsigned Idx, AttrBuilder &B) { // Add target-independent attributes. SmallVector, 8> Attrs; - for (AttrBuilder::iterator I = B.begin(), E = B.end(); I != E; ++I) { - Attribute::AttrKind Kind = *I; + for (Attribute::AttrKind Kind = Attribute::None; + Kind != Attribute::EndAttrKinds; ++Kind) { + if (!B.contains(Kind)) + continue; + if (Kind == Attribute::Alignment) Attrs.push_back(std::make_pair(Idx, Attribute:: getWithAlignment(C, B.getAlignment()))); @@ -907,7 +908,7 @@ void AttributeSet::dump() const { //===----------------------------------------------------------------------===// AttrBuilder::AttrBuilder(AttributeSet AS, unsigned Idx) - : Alignment(0), StackAlignment(0) { + : Attrs(0), Alignment(0), StackAlignment(0) { AttributeSetImpl *pImpl = AS.pImpl; if (!pImpl) return; @@ -923,14 +924,16 @@ AttrBuilder::AttrBuilder(AttributeSet AS, unsigned Idx) } void AttrBuilder::clear() { - Attrs.clear(); + Attrs = 0; Alignment = StackAlignment = 0; } AttrBuilder &AttrBuilder::addAttribute(Attribute::AttrKind Val) { + assert((unsigned)Val < 64 && Val < Attribute::EndAttrKinds && + "Attribute out of range!"); assert(Val != Attribute::Alignment && Val != Attribute::StackAlignment && "Adding alignment attribute without adding alignment value!"); - Attrs.insert(Val); + Attrs |= 1ULL << Val; return *this; } @@ -941,7 +944,7 @@ AttrBuilder &AttrBuilder::addAttribute(Attribute Attr) { } Attribute::AttrKind Kind = Attr.getKindAsEnum(); - Attrs.insert(Kind); + Attrs |= 1ULL << Kind; if (Kind == Attribute::Alignment) Alignment = Attr.getAlignment(); @@ -956,7 +959,9 @@ AttrBuilder &AttrBuilder::addAttribute(StringRef A, StringRef V) { } AttrBuilder &AttrBuilder::removeAttribute(Attribute::AttrKind Val) { - Attrs.erase(Val); + assert((unsigned)Val < 64 && Val < Attribute::EndAttrKinds && + "Attribute out of range!"); + Attrs &= ~(1ULL << Val); if (Val == Attribute::Alignment) Alignment = 0; @@ -980,7 +985,7 @@ AttrBuilder &AttrBuilder::removeAttributes(AttributeSet A, uint64_t Index) { Attribute Attr = *I; if (Attr.isEnumAttribute() || Attr.isAlignAttribute()) { Attribute::AttrKind Kind = I->getKindAsEnum(); - Attrs.erase(Kind); + Attrs &= ~(1ULL << Kind); if (Kind == Attribute::Alignment) Alignment = 0; @@ -1011,7 +1016,7 @@ AttrBuilder &AttrBuilder::addAlignmentAttr(unsigned Align) { assert(isPowerOf2_32(Align) && "Alignment must be a power of two."); assert(Align <= 0x40000000 && "Alignment too large."); - Attrs.insert(Attribute::Alignment); + Attrs |= 1ULL << Attribute::Alignment; Alignment = Align; return *this; } @@ -1023,7 +1028,7 @@ AttrBuilder &AttrBuilder::addStackAlignmentAttr(unsigned Align) { assert(isPowerOf2_32(Align) && "Alignment must be a power of two."); assert(Align <= 0x100 && "Alignment too large."); - Attrs.insert(Attribute::StackAlignment); + Attrs |= 1ULL << Attribute::StackAlignment; StackAlignment = Align; return *this; } @@ -1036,7 +1041,7 @@ AttrBuilder &AttrBuilder::merge(const AttrBuilder &B) { if (!StackAlignment) StackAlignment = B.StackAlignment; - Attrs.insert(B.Attrs.begin(), B.Attrs.end()); + Attrs |= B.Attrs; for (td_const_iterator I = B.TargetDepAttrs.begin(), E = B.TargetDepAttrs.end(); I != E; ++I) @@ -1045,16 +1050,12 @@ AttrBuilder &AttrBuilder::merge(const AttrBuilder &B) { return *this; } -bool AttrBuilder::contains(Attribute::AttrKind A) const { - return Attrs.count(A); -} - bool AttrBuilder::contains(StringRef A) const { return TargetDepAttrs.find(A) != TargetDepAttrs.end(); } bool AttrBuilder::hasAttributes() const { - return !Attrs.empty() || !TargetDepAttrs.empty(); + return Attrs != 0 || !TargetDepAttrs.empty(); } bool AttrBuilder::hasAttributes(AttributeSet A, uint64_t Index) const { @@ -1071,7 +1072,7 @@ bool AttrBuilder::hasAttributes(AttributeSet A, uint64_t Index) const { I != E; ++I) { Attribute Attr = *I; if (Attr.isEnumAttribute() || Attr.isAlignAttribute()) { - if (Attrs.count(I->getKindAsEnum())) + if (Attrs & (1ULL << I->getKindAsEnum())) return true; } else { assert(Attr.isStringAttribute() && "Invalid attribute kind!"); @@ -1087,10 +1088,8 @@ bool AttrBuilder::hasAlignmentAttr() const { } bool AttrBuilder::operator==(const AttrBuilder &B) { - for (DenseSet::iterator I = Attrs.begin(), - E = Attrs.end(); I != E; ++I) - if (!B.Attrs.count(*I)) - return false; + if (Attrs != B.Attrs) + return false; for (td_const_iterator I = TargetDepAttrs.begin(), E = TargetDepAttrs.end(); I != E; ++I) @@ -1107,7 +1106,7 @@ AttrBuilder &AttrBuilder::addRawValue(uint64_t Val) { for (Attribute::AttrKind I = Attribute::None; I != Attribute::EndAttrKinds; I = Attribute::AttrKind(I + 1)) { if (uint64_t A = (Val & AttributeImpl::getAttrMask(I))) { - Attrs.insert(I); + Attrs |= 1ULL << I; if (I == Attribute::Alignment) Alignment = 1ULL << ((A >> 16) - 1); -- cgit v1.1 From b26f98fa1f098b527cc84ef7535fe84d89953c21 Mon Sep 17 00:00:00 2001 From: Renato Golin Date: Sat, 16 Feb 2013 19:14:59 +0000 Subject: Typo git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175371 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMSubtarget.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index f47555c..8ce22e1 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -202,7 +202,7 @@ protected: /// subtarget options. Definition of function is auto generated by tblgen. void ParseSubtargetFeatures(StringRef CPU, StringRef FS); - /// \brief Reset the features for the X86 target. + /// \brief Reset the features for the ARM target. virtual void resetSubtargetFeatures(const MachineFunction *MF); private: void initializeEnvironment(); -- cgit v1.1 From 7c1461252b7ee8b3b5a556993cb7b96d793223b6 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sat, 16 Feb 2013 19:22:28 +0000 Subject: GCC doesn't like ++ on enums. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175373 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Attributes.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 99df5ff..77a1c82 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -596,7 +596,7 @@ AttributeSet AttributeSet::get(LLVMContext &C, unsigned Idx, AttrBuilder &B) { // Add target-independent attributes. SmallVector, 8> Attrs; for (Attribute::AttrKind Kind = Attribute::None; - Kind != Attribute::EndAttrKinds; ++Kind) { + Kind != Attribute::EndAttrKinds; Kind = Attribute::AttrKind(Kind + 1)) { if (!B.contains(Kind)) continue; -- cgit v1.1 From 5be5fa468acc4948b8f4d7f5e945d77a53e40bb8 Mon Sep 17 00:00:00 2001 From: Reed Kotler Date: Sat, 16 Feb 2013 23:39:52 +0000 Subject: Clean up mips16 td file in preparation for massive pseudo lowering work. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175379 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips16InstrInfo.td | 139 ++++++++++++++++++------------------- 1 file changed, 68 insertions(+), 71 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td index 872de52..ea4bbe8 100644 --- a/lib/Target/Mips/Mips16InstrInfo.td +++ b/lib/Target/Mips/Mips16InstrInfo.td @@ -64,10 +64,9 @@ class F2RI16_ins _op, string asmstr, // // EXT-CCRR Instruction format // -class FEXT_CCRXI16_ins _op, string asmstr, - InstrItinClass itin>: - FEXT_RI16<_op, (outs CPU16Regs:$cc), (ins CPU16Regs:$rx, simm16:$imm), - !strconcat(asmstr, "\t$rx, $imm\n\tmove\t$cc, $$t8"), [], itin> { +class FEXT_CCRXI16_ins: + MipsPseudo16<(outs CPU16Regs:$cc), (ins CPU16Regs:$rx, simm16:$imm), + !strconcat(asmstr, "\t$rx, $imm\n\tmove\t$cc, $$t8"), []> { let isCodeGenOnly=1; } @@ -113,9 +112,9 @@ class FEXT_I816_SP_ins _func, string asmstr, // // CC-RR Instruction format // -class FCCRR16_ins f, string asmstr, InstrItinClass itin> : - FRR16 { +class FCCRR16_ins : + MipsPseudo16<(outs CPU16Regs:$cc), (ins CPU16Regs:$rx, CPU16Regs:$ry), + !strconcat(asmstr, "\t$rx, $ry\n\tmove\t$cc, $$t8"), []> { let isCodeGenOnly=1; } @@ -189,24 +188,22 @@ class FEXT_SHIFT16_ins _f, string asmstr, InstrItinClass itin>: // // EXT-T8I8 // -class FEXT_T8I816_ins _func, string asmstr, string asmstr2, - InstrItinClass itin>: - FEXT_I816<_func, (outs), - (ins CPU16Regs:$rx, CPU16Regs:$ry, brtarget:$imm), - !strconcat(asmstr2, !strconcat("\t$rx, $ry\n\t", - !strconcat(asmstr, "\t$imm"))),[], itin> { +class FEXT_T8I816_ins: + MipsPseudo16<(outs), + (ins CPU16Regs:$rx, CPU16Regs:$ry, brtarget:$imm), + !strconcat(asmstr2, !strconcat("\t$rx, $ry\n\t", + !strconcat(asmstr, "\t$imm"))),[]> { let isCodeGenOnly=1; } // // EXT-T8I8I // -class FEXT_T8I8I16_ins _func, string asmstr, string asmstr2, - InstrItinClass itin>: - FEXT_I816<_func, (outs), - (ins CPU16Regs:$rx, simm16:$imm, brtarget:$targ), - !strconcat(asmstr2, !strconcat("\t$rx, $imm\n\t", - !strconcat(asmstr, "\t$targ"))), [], itin> { +class FEXT_T8I8I16_ins: + MipsPseudo16<(outs), + (ins CPU16Regs:$rx, simm16:$imm, brtarget:$targ), + !strconcat(asmstr2, !strconcat("\t$rx, $imm\n\t", + !strconcat(asmstr, "\t$targ"))), []> { let isCodeGenOnly=1; } // @@ -255,9 +252,9 @@ class FRR16_ins f, string asmstr, InstrItinClass itin> : !strconcat(asmstr, "\t$rx, $ry"), [], itin> { } -class FRRTR16_ins f, string asmstr, InstrItinClass itin> : - FRR16 ; +class FRRTR16_ins : + MipsPseudo16<(outs CPU16Regs:$rz), (ins CPU16Regs:$rx, CPU16Regs:$ry), + !strconcat(asmstr, "\t$rx, $ry\n\tmove\t$rz, $$t8"), []> ; // // maybe refactor but need a $zero as a dummy first parameter @@ -332,11 +329,11 @@ class FRRR16_ins _f, string asmstr, InstrItinClass itin> : // // So this pseudo class only has one operand, i.e. op // -class Sel f1, string op, InstrItinClass itin>: - MipsInst16_32<(outs CPU16Regs:$rd_), (ins CPU16Regs:$rd, CPU16Regs:$rs, - CPU16Regs:$rt), - !strconcat(op, "\t$rt, .+4\n\t\n\tmove $rd, $rs"), [], itin> { - let isCodeGenOnly=1; +class Sel: + MipsPseudo16<(outs CPU16Regs:$rd_), (ins CPU16Regs:$rd, CPU16Regs:$rs, + CPU16Regs:$rt), + !strconcat(op, "\t$rt, .+4\n\t\n\tmove $rd, $rs"), []> { + //let isCodeGenOnly=1; let Constraints = "$rd = $rd_"; } @@ -355,13 +352,12 @@ class Sel f1, string op, InstrItinClass itin>: // move $rd, $rs // // -class SeliT f1, string op1, bits<5> f2, string op2, - InstrItinClass itin>: - MipsInst16_32<(outs CPU16Regs:$rd_), (ins CPU16Regs:$rd, CPU16Regs:$rs, - CPU16Regs:$rl, simm16:$imm), - !strconcat(op2, - !strconcat("\t$rl, $imm\n\t", - !strconcat(op1, "\t.+4\n\tmove $rd, $rs"))), [], itin> { +class SeliT: + MipsPseudo16<(outs CPU16Regs:$rd_), (ins CPU16Regs:$rd, CPU16Regs:$rs, + CPU16Regs:$rl, simm16:$imm), + !strconcat(op2, + !strconcat("\t$rl, $imm\n\t", + !strconcat(op1, "\t.+4\n\tmove $rd, $rs"))), []> { let isCodeGenOnly=1; let Constraints = "$rd = $rd_"; } @@ -378,13 +374,13 @@ class SeliT f1, string op1, bits<5> f2, string op2, // move $rd, $rs // // -class SelT f1, string op1, bits<5> f2, string op2, - InstrItinClass itin>: - MipsInst16_32<(outs CPU16Regs:$rd_), (ins CPU16Regs:$rd, CPU16Regs:$rs, +class SelT: + MipsPseudo16<(outs CPU16Regs:$rd_), + (ins CPU16Regs:$rd, CPU16Regs:$rs, CPU16Regs:$rl, CPU16Regs:$rr), - !strconcat(op2, - !strconcat("\t$rl, $rr\n\t", - !strconcat(op1, "\t.+4\n\tmove $rd, $rs"))), [], itin> { + !strconcat(op2, + !strconcat("\t$rl, $rr\n\t", + !strconcat(op1, "\t.+4\n\tmove $rd, $rs"))), []> { let isCodeGenOnly=1; let Constraints = "$rd = $rd_"; } @@ -434,6 +430,7 @@ class MayStore { } // + // Format: ADDIU rx, immediate MIPS16e // Purpose: Add Immediate Unsigned Word (2-Operand, Extended) // To add a constant to a 32-bit integer. @@ -521,18 +518,18 @@ def BnezRxImmX16: FEXT_RI16_B_ins<0b00101, "bnez", IIAlu>, cbranch16; // def BteqzX16: FEXT_I816_ins<0b000, "bteqz", IIAlu>, cbranch16; -def BteqzT8CmpX16: FEXT_T8I816_ins<0b000, "bteqz", "cmp", IIAlu>, cbranch16; +def BteqzT8CmpX16: FEXT_T8I816_ins<"bteqz", "cmp">, cbranch16; -def BteqzT8CmpiX16: FEXT_T8I8I16_ins<0b000, "bteqz", "cmpi", IIAlu>, +def BteqzT8CmpiX16: FEXT_T8I8I16_ins<"bteqz", "cmpi">, cbranch16; -def BteqzT8SltX16: FEXT_T8I816_ins<0b000, "bteqz", "slt", IIAlu>, cbranch16; +def BteqzT8SltX16: FEXT_T8I816_ins<"bteqz", "slt">, cbranch16; -def BteqzT8SltuX16: FEXT_T8I816_ins<0b000, "bteqz", "sltu", IIAlu>, cbranch16; +def BteqzT8SltuX16: FEXT_T8I816_ins<"bteqz", "sltu">, cbranch16; -def BteqzT8SltiX16: FEXT_T8I8I16_ins<0b000, "bteqz", "slti", IIAlu>, cbranch16; +def BteqzT8SltiX16: FEXT_T8I8I16_ins<"bteqz", "slti">, cbranch16; -def BteqzT8SltiuX16: FEXT_T8I8I16_ins<0b000, "bteqz", "sltiu", IIAlu>, +def BteqzT8SltiuX16: FEXT_T8I8I16_ins<"bteqz", "sltiu">, cbranch16; // @@ -542,17 +539,17 @@ def BteqzT8SltiuX16: FEXT_T8I8I16_ins<0b000, "bteqz", "sltiu", IIAlu>, // def BtnezX16: FEXT_I816_ins<0b001, "btnez", IIAlu> ,cbranch16; -def BtnezT8CmpX16: FEXT_T8I816_ins<0b000, "btnez", "cmp", IIAlu>, cbranch16; +def BtnezT8CmpX16: FEXT_T8I816_ins<"btnez", "cmp">, cbranch16; -def BtnezT8CmpiX16: FEXT_T8I8I16_ins<0b000, "btnez", "cmpi", IIAlu>, cbranch16; +def BtnezT8CmpiX16: FEXT_T8I8I16_ins<"btnez", "cmpi">, cbranch16; -def BtnezT8SltX16: FEXT_T8I816_ins<0b000, "btnez", "slt", IIAlu>, cbranch16; +def BtnezT8SltX16: FEXT_T8I816_ins<"btnez", "slt">, cbranch16; -def BtnezT8SltuX16: FEXT_T8I816_ins<0b000, "btnez", "sltu", IIAlu>, cbranch16; +def BtnezT8SltuX16: FEXT_T8I816_ins<"btnez", "sltu">, cbranch16; -def BtnezT8SltiX16: FEXT_T8I8I16_ins<0b000, "btnez", "slti", IIAlu>, cbranch16; +def BtnezT8SltiX16: FEXT_T8I8I16_ins<"btnez", "slti">, cbranch16; -def BtnezT8SltiuX16: FEXT_T8I8I16_ins<0b000, "btnez", "sltiu", IIAlu>, +def BtnezT8SltiuX16: FEXT_T8I8I16_ins<"btnez", "sltiu">, cbranch16; // @@ -847,7 +844,7 @@ def SbRxRyOffMemX16: // Purpose: if rt==0, do nothing // else rs = rt // -def SelBeqZ: Sel<0b00100, "beqz", IIAlu>; +def SelBeqZ: Sel<"beqz">; // // Format: SelTBteqZCmp rd, rs, rl, rr @@ -855,7 +852,7 @@ def SelBeqZ: Sel<0b00100, "beqz", IIAlu>; // If b==0 then do nothing. // if b!=0 then rd = rs // -def SelTBteqZCmp: SelT<0b000, "bteqz", 0b01010, "cmp", IIAlu>; +def SelTBteqZCmp: SelT<"bteqz", "cmp">; // // Format: SelTBteqZCmpi rd, rs, rl, rr @@ -863,7 +860,7 @@ def SelTBteqZCmp: SelT<0b000, "bteqz", 0b01010, "cmp", IIAlu>; // If b==0 then do nothing. // if b!=0 then rd = rs // -def SelTBteqZCmpi: SeliT<0b000, "bteqz", 0b01110, "cmpi", IIAlu>; +def SelTBteqZCmpi: SeliT<"bteqz", "cmpi">; // // Format: SelTBteqZSlt rd, rs, rl, rr @@ -871,7 +868,7 @@ def SelTBteqZCmpi: SeliT<0b000, "bteqz", 0b01110, "cmpi", IIAlu>; // If b==0 then do nothing. // if b!=0 then rd = rs // -def SelTBteqZSlt: SelT<0b000, "bteqz", 0b00010, "slt", IIAlu>; +def SelTBteqZSlt: SelT<"bteqz", "slt">; // // Format: SelTBteqZSlti rd, rs, rl, rr @@ -879,7 +876,7 @@ def SelTBteqZSlt: SelT<0b000, "bteqz", 0b00010, "slt", IIAlu>; // If b==0 then do nothing. // if b!=0 then rd = rs // -def SelTBteqZSlti: SeliT<0b000, "bteqz", 0b01010, "slti", IIAlu>; +def SelTBteqZSlti: SeliT<"bteqz", "slti">; // // Format: SelTBteqZSltu rd, rs, rl, rr @@ -887,7 +884,7 @@ def SelTBteqZSlti: SeliT<0b000, "bteqz", 0b01010, "slti", IIAlu>; // If b==0 then do nothing. // if b!=0 then rd = rs // -def SelTBteqZSltu: SelT<0b000, "bteqz", 0b00011, "sltu", IIAlu>; +def SelTBteqZSltu: SelT<"bteqz", "sltu">; // // Format: SelTBteqZSltiu rd, rs, rl, rr @@ -895,14 +892,14 @@ def SelTBteqZSltu: SelT<0b000, "bteqz", 0b00011, "sltu", IIAlu>; // If b==0 then do nothing. // if b!=0 then rd = rs // -def SelTBteqZSltiu: SeliT<0b000, "bteqz", 0b01011, "sltiu", IIAlu>; +def SelTBteqZSltiu: SeliT<"bteqz", "sltiu">; // // Format: SelBnez rd, rs, rt // Purpose: if rt!=0, do nothing // else rs = rt // -def SelBneZ: Sel<0b00101, "bnez", IIAlu>; +def SelBneZ: Sel<"bnez">; // // Format: SelTBtneZCmp rd, rs, rl, rr @@ -910,7 +907,7 @@ def SelBneZ: Sel<0b00101, "bnez", IIAlu>; // If b!=0 then do nothing. // if b0=0 then rd = rs // -def SelTBtneZCmp: SelT<0b001, "btnez", 0b01010, "cmp", IIAlu>; +def SelTBtneZCmp: SelT<"btnez", "cmp">; // // Format: SelTBtnezCmpi rd, rs, rl, rr @@ -918,7 +915,7 @@ def SelTBtneZCmp: SelT<0b001, "btnez", 0b01010, "cmp", IIAlu>; // If b!=0 then do nothing. // if b==0 then rd = rs // -def SelTBtneZCmpi: SeliT<0b000, "btnez", 0b01110, "cmpi", IIAlu>; +def SelTBtneZCmpi: SeliT<"btnez", "cmpi">; // // Format: SelTBtneZSlt rd, rs, rl, rr @@ -926,7 +923,7 @@ def SelTBtneZCmpi: SeliT<0b000, "btnez", 0b01110, "cmpi", IIAlu>; // If b!=0 then do nothing. // if b==0 then rd = rs // -def SelTBtneZSlt: SelT<0b001, "btnez", 0b00010, "slt", IIAlu>; +def SelTBtneZSlt: SelT<"btnez", "slt">; // // Format: SelTBtneZSlti rd, rs, rl, rr @@ -934,7 +931,7 @@ def SelTBtneZSlt: SelT<0b001, "btnez", 0b00010, "slt", IIAlu>; // If b!=0 then do nothing. // if b==0 then rd = rs // -def SelTBtneZSlti: SeliT<0b001, "btnez", 0b01010, "slti", IIAlu>; +def SelTBtneZSlti: SeliT<"btnez", "slti">; // // Format: SelTBtneZSltu rd, rs, rl, rr @@ -942,7 +939,7 @@ def SelTBtneZSlti: SeliT<0b001, "btnez", 0b01010, "slti", IIAlu>; // If b!=0 then do nothing. // if b==0 then rd = rs // -def SelTBtneZSltu: SelT<0b001, "btnez", 0b00011, "sltu", IIAlu>; +def SelTBtneZSltu: SelT<"btnez", "sltu">; // // Format: SelTBtneZSltiu rd, rs, rl, rr @@ -950,7 +947,7 @@ def SelTBtneZSltu: SelT<0b001, "btnez", 0b00011, "sltu", IIAlu>; // If b!=0 then do nothing. // if b==0 then rd = rs // -def SelTBtneZSltiu: SeliT<0b001, "btnez", 0b01011, "sltiu", IIAlu>; +def SelTBtneZSltiu: SeliT<"btnez", "sltiu">; // // // Format: SH ry, offset(rx) MIPS16e @@ -979,14 +976,14 @@ def SllvRxRy16 : FRxRxRy16_ins<0b00100, "sllv", IIAlu>; // Purpose: Set on Less Than Immediate (Extended) // To record the result of a less-than comparison with a constant. // -def SltiCCRxImmX16: FEXT_CCRXI16_ins<0b01010, "slti", IIAlu>; +def SltiCCRxImmX16: FEXT_CCRXI16_ins<"slti">; // // Format: SLTIU rx, immediate MIPS16e // Purpose: Set on Less Than Immediate Unsigned (Extended) // To record the result of a less-than comparison with a constant. // -def SltiuCCRxImmX16: FEXT_CCRXI16_ins<0b01011, "sltiu", IIAlu>; +def SltiuCCRxImmX16: FEXT_CCRXI16_ins<"sltiu">; // // Format: SLT rx, ry MIPS16e @@ -995,18 +992,18 @@ def SltiuCCRxImmX16: FEXT_CCRXI16_ins<0b01011, "sltiu", IIAlu>; // def SltRxRy16: FRR16_ins<0b00010, "slt", IIAlu>; -def SltCCRxRy16: FCCRR16_ins<0b00010, "slt", IIAlu>; +def SltCCRxRy16: FCCRR16_ins<"slt">; // Format: SLTU rx, ry MIPS16e // Purpose: Set on Less Than Unsigned // To record the result of an unsigned less-than comparison. // -def SltuRxRyRz16: FRRTR16_ins<0b00011, "sltu", IIAlu> { +def SltuRxRyRz16: FRRTR16_ins<"sltu"> { let isCodeGenOnly=1; } -def SltuCCRxRy16: FCCRR16_ins<0b00011, "sltu", IIAlu>; +def SltuCCRxRy16: FCCRR16_ins<"sltu">; // // Format: SRAV ry, rx MIPS16e // Purpose: Shift Word Right Arithmetic Variable -- cgit v1.1 From cc54889cd58322b8766525f43cc1f7cb52e4692e Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Sat, 16 Feb 2013 23:41:36 +0000 Subject: The transform is: (or (bool?A:B),(bool?C:D)) --> (bool?(or A,C):(or B,D)) By the time the OR is visited, both the SELECTs have been visited and not optimized and the OR itself hasn't been transformed so we do this transform in the hopes that the new ORs will be optimized. The transform is explicitly disabled for vector-selects until "codegen matures to handle them better". Patch by Muhammad Tauqir! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175380 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/InstCombine/InstCombineAndOrXor.cpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'lib') diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index bf065fe..4332467 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -2071,6 +2071,20 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { return BinaryOperator::CreateOr(Inner, C1); } + // Change (or (bool?A:B),(bool?C:D)) --> (bool?(or A,C):(or B,D)) + // Since this OR statement hasn't been optimized further yet, we hope + // that this transformation will allow the new ORs to be optimized. + { + Value *X = 0, *Y = 0; + if (Op0->hasOneUse() && Op1->hasOneUse() && + match(Op0, m_Select(m_Value(X), m_Value(A), m_Value(B))) && + match(Op1, m_Select(m_Value(Y), m_Value(C), m_Value(D))) && X == Y) { + Value *orTrue = Builder->CreateOr(A, C); + Value *orFalse = Builder->CreateOr(B, D); + return SelectInst::Create(X, orTrue, orFalse); + } + } + return Changed ? &I : 0; } -- cgit v1.1 From f0b2535344e8c9e2912da78010918a44c5a18cab Mon Sep 17 00:00:00 2001 From: Cameron Zwarich Date: Sun, 17 Feb 2013 00:10:44 +0000 Subject: Add support for updating the LiveIntervals of registers used by 'exotic' terminators that actually have register uses when splitting critical edges. This commit also introduces a method repairIntervalsInRange() on LiveIntervals, which allows for repairing LiveIntervals in a small range after an arbitrary target hook modifies, inserts, and removes instructions. It's pretty limited right now, but I hope to extend it to support all of the things that are done by the convertToThreeAddress() target hooks. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175382 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/LiveIntervalAnalysis.cpp | 36 ++++++++++++++++++++++++++++++++++++ lib/CodeGen/MachineBasicBlock.cpp | 29 +++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) (limited to 'lib') diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index 0d4ec11..3c67be5 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -1032,3 +1032,39 @@ void LiveIntervals::handleMoveIntoBundle(MachineInstr* MI, HMEditor HME(*this, *MRI, *TRI, OldIndex, NewIndex, UpdateFlags); HME.updateAllRanges(MI); } + +void +LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB, + MachineBasicBlock::reverse_iterator RBegin, + MachineBasicBlock::reverse_iterator REnd, + SmallVectorImpl &OrigRegs) { + for (unsigned i = 0, e = OrigRegs.size(); i != e; ++i) { + unsigned Reg = OrigRegs[i]; + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + + LiveInterval &LI = getInterval(Reg); + SlotIndex startIdx = (REnd == MBB->rend()) ? getMBBStartIdx(MBB) + : getInstructionIndex(&*REnd); + for (MachineBasicBlock::reverse_iterator I = RBegin; I != REnd; ++I) { + MachineInstr *MI = &*I; + SlotIndex instrIdx = getInstructionIndex(MI); + + for (MachineInstr::mop_iterator OI = MI->operands_begin(), + OE = MI->operands_end(); OI != OE; ++OI) { + const MachineOperand &MO = *OI; + if (!MO.isReg() || MO.getReg() != Reg) + continue; + + assert(MO.isUse() && "Register defs are not yet supported."); + + if (!LI.liveAt(instrIdx)) { + LiveRange *LR = LI.getLiveRangeContaining(startIdx.getRegSlot()); + assert(LR && "Used registers must be live-in."); + LR->end = instrIdx.getRegSlot(); + break; + } + } + } + } +} diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index df1c7c2..3d75436 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -698,6 +698,24 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { } } + SmallVector UsedRegs; + if (LIS) { + for (instr_iterator I = getFirstInstrTerminator(), E = instr_end(); + I != E; ++I) { + MachineInstr *MI = I; + + for (MachineInstr::mop_iterator OI = MI->operands_begin(), + OE = MI->operands_end(); OI != OE; ++OI) { + if (!OI->isReg() || OI->getReg() == 0) + continue; + + unsigned Reg = OI->getReg(); + if (std::find(UsedRegs.begin(), UsedRegs.end(), Reg) == UsedRegs.end()) + UsedRegs.push_back(Reg); + } + } + } + ReplaceUsesOfBlockWith(Succ, NMBB); // If updateTerminator() removes instructions, we need to remove them from @@ -830,6 +848,17 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { LI.removeRange(StartIndex, EndIndex); } } + + // Update all intervals for registers whose uses may have been modified by + // updateTerminator(). + iterator FirstTerminator = getFirstTerminator(); + MachineInstr *FirstTerminatorMI = FirstTerminator; + if (FirstTerminatorMI->isBundled()) + FirstTerminatorMI = getBundleStart(FirstTerminatorMI); + reverse_iterator PreTerminators = + (FirstTerminator == begin()) ? rend() + : reverse_iterator(FirstTerminatorMI); + LIS->repairIntervalsInRange(this, rbegin(), PreTerminators, UsedRegs); } if (MachineDominatorTree *MDT = -- cgit v1.1 From 0c222835982bae5e4831e16090f6ce594ef541a6 Mon Sep 17 00:00:00 2001 From: Cameron Zwarich Date: Sun, 17 Feb 2013 01:45:04 +0000 Subject: Fix a conversion from a forward iterator to a reverse iterator in MachineBasicBlock::SplitCriticalEdge. Since this is an iterator rather than an instr_iterator, the isBundled() check only passes if getFirstTerminator() returned end() and the garbage memory happens to lean that way. Multiple successors can be present without any terminator instructions in the case of exception handling with a fallthrough. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175383 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineBasicBlock.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index 3d75436..f22a707 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -852,12 +852,13 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { // Update all intervals for registers whose uses may have been modified by // updateTerminator(). iterator FirstTerminator = getFirstTerminator(); - MachineInstr *FirstTerminatorMI = FirstTerminator; - if (FirstTerminatorMI->isBundled()) - FirstTerminatorMI = getBundleStart(FirstTerminatorMI); - reverse_iterator PreTerminators = - (FirstTerminator == begin()) ? rend() - : reverse_iterator(FirstTerminatorMI); + reverse_iterator PreTerminators; + if (FirstTerminator == begin()) + PreTerminators = rend(); + else if (FirstTerminator == end()) + PreTerminators = rbegin(); + else + PreTerminators = reverse_iterator(FirstTerminator); LIS->repairIntervalsInRange(this, rbegin(), PreTerminators, UsedRegs); } -- cgit v1.1 From 7324d4e593ee2611ee6b272c03b15541fe2df62e Mon Sep 17 00:00:00 2001 From: Cameron Zwarich Date: Sun, 17 Feb 2013 03:48:23 +0000 Subject: Use ArrayRef instead of a reference to a SmallVectorImpl. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175385 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/LiveIntervalAnalysis.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index 3c67be5..e09ac4b 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -1037,7 +1037,7 @@ void LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB, MachineBasicBlock::reverse_iterator RBegin, MachineBasicBlock::reverse_iterator REnd, - SmallVectorImpl &OrigRegs) { + ArrayRef OrigRegs) { for (unsigned i = 0, e = OrigRegs.size(); i != e; ++i) { unsigned Reg = OrigRegs[i]; if (!TargetRegisterInfo::isVirtualRegister(Reg)) -- cgit v1.1 From 680c98f6323dde0eae566710ea49497e16499653 Mon Sep 17 00:00:00 2001 From: Cameron Zwarich Date: Sun, 17 Feb 2013 11:09:00 +0000 Subject: Remove use of reverse iterators in repairIntervalsInRange(). While they were arguably better than forward iterators for this use case, they are confusing and there are some implementation problems with reverse iterators and MI bundles. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175393 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/LiveIntervalAnalysis.cpp | 19 ++++++++++++------- lib/CodeGen/MachineBasicBlock.cpp | 10 +--------- 2 files changed, 13 insertions(+), 16 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index e09ac4b..0978d73 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -1035,19 +1035,24 @@ void LiveIntervals::handleMoveIntoBundle(MachineInstr* MI, void LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB, - MachineBasicBlock::reverse_iterator RBegin, - MachineBasicBlock::reverse_iterator REnd, + MachineBasicBlock::iterator Begin, + MachineBasicBlock::iterator End, ArrayRef OrigRegs) { + SlotIndex startIdx; + if (Begin == MBB->begin()) + startIdx = getMBBStartIdx(MBB); + else + startIdx = getInstructionIndex(prior(Begin)).getRegSlot(); + for (unsigned i = 0, e = OrigRegs.size(); i != e; ++i) { unsigned Reg = OrigRegs[i]; if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; LiveInterval &LI = getInterval(Reg); - SlotIndex startIdx = (REnd == MBB->rend()) ? getMBBStartIdx(MBB) - : getInstructionIndex(&*REnd); - for (MachineBasicBlock::reverse_iterator I = RBegin; I != REnd; ++I) { - MachineInstr *MI = &*I; + for (MachineBasicBlock::iterator I = End; I != Begin;) { + --I; + MachineInstr *MI = I; SlotIndex instrIdx = getInstructionIndex(MI); for (MachineInstr::mop_iterator OI = MI->operands_begin(), @@ -1059,7 +1064,7 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB, assert(MO.isUse() && "Register defs are not yet supported."); if (!LI.liveAt(instrIdx)) { - LiveRange *LR = LI.getLiveRangeContaining(startIdx.getRegSlot()); + LiveRange *LR = LI.getLiveRangeContaining(startIdx); assert(LR && "Used registers must be live-in."); LR->end = instrIdx.getRegSlot(); break; diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index f22a707..898e165 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -851,15 +851,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { // Update all intervals for registers whose uses may have been modified by // updateTerminator(). - iterator FirstTerminator = getFirstTerminator(); - reverse_iterator PreTerminators; - if (FirstTerminator == begin()) - PreTerminators = rend(); - else if (FirstTerminator == end()) - PreTerminators = rbegin(); - else - PreTerminators = reverse_iterator(FirstTerminator); - LIS->repairIntervalsInRange(this, rbegin(), PreTerminators, UsedRegs); + LIS->repairIntervalsInRange(this, getFirstTerminator(), end(), UsedRegs); } if (MachineDominatorTree *MDT = -- cgit v1.1 From a79cbb12324db93236e06cc820f0e36ea1f7e4c4 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sun, 17 Feb 2013 14:30:32 +0000 Subject: Make the visibility of LLVMPPCCompilationCallback work with GCC. GCC warns about the attribute being ignored if it occurs after void*. There seems to be some kind of incompatibility between clang and gcc here, but I can't fathom who's right. void* LLVM_LIBRARY_VISIBILITY foo(); // clang: hidden, gcc: default LLVM_LIBRARY_VISIBILITY void *bar(); // clang: hidden, gcc: hidden void LLVM_LIBRARY_VISIBILITY qux(); // clang: hidden, gcc: hidden git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175394 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCJITInfo.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/PowerPC/PPCJITInfo.cpp b/lib/Target/PowerPC/PPCJITInfo.cpp index 79d0c1f..cfcd749 100644 --- a/lib/Target/PowerPC/PPCJITInfo.cpp +++ b/lib/Target/PowerPC/PPCJITInfo.cpp @@ -292,7 +292,7 @@ void PPC64CompilationCallback() { #endif extern "C" { -void* LLVM_LIBRARY_VISIBILITY +LLVM_LIBRARY_VISIBILITY void * LLVMPPCCompilationCallback(unsigned *StubCallAddrPlus4, unsigned *OrigCallAddrPlus4, bool is64Bit) { -- cgit v1.1 From f79f136cc64b0625b77c7b9008ed8c5b848b6b17 Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Sun, 17 Feb 2013 15:59:26 +0000 Subject: BBVectorize: Fix an invalid reference bug This fixes PR15289. This bug was introduced (recently) in r175215; collecting all std::vector references for candidate pairs to delete at once is invalid because subsequent lookups in the owning DenseMap could invalidate the references. bugpoint was able to reduce a useful test case. Unfortunately, because whether or not this asserts depends on memory layout, this test case will sometimes appear to produce valid output. Nevertheless, running under valgrind will reveal the error. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175397 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/BBVectorize.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp index 1773cff..7636541 100644 --- a/lib/Transforms/Vectorize/BBVectorize.cpp +++ b/lib/Transforms/Vectorize/BBVectorize.cpp @@ -2164,10 +2164,7 @@ namespace { *S->second << "\n"); // Remove all candidate pairs that have values in the chosen dag. - std::vector &KK = CandidatePairs[S->first], - &LL = CandidatePairs2[S->second], - &MM = CandidatePairs[S->second], - &NN = CandidatePairs2[S->first]; + std::vector &KK = CandidatePairs[S->first]; for (std::vector::iterator K = KK.begin(), KE = KK.end(); K != KE; ++K) { if (*K == S->second) @@ -2175,6 +2172,8 @@ namespace { CandidatePairsSet.erase(ValuePair(S->first, *K)); } + + std::vector &LL = CandidatePairs2[S->second]; for (std::vector::iterator L = LL.begin(), LE = LL.end(); L != LE; ++L) { if (*L == S->first) @@ -2182,11 +2181,15 @@ namespace { CandidatePairsSet.erase(ValuePair(*L, S->second)); } + + std::vector &MM = CandidatePairs[S->second]; for (std::vector::iterator M = MM.begin(), ME = MM.end(); M != ME; ++M) { assert(*M != S->first && "Flipped pair in candidate list?"); CandidatePairsSet.erase(ValuePair(S->second, *M)); } + + std::vector &NN = CandidatePairs2[S->first]; for (std::vector::iterator N = NN.begin(), NE = NN.end(); N != NE; ++N) { assert(*N != S->second && "Flipped pair in candidate list?"); -- cgit v1.1 From 906727dcfeb359acec4caca3ba8c756c4308ff6b Mon Sep 17 00:00:00 2001 From: Duncan Sands Date: Sun, 17 Feb 2013 16:35:51 +0000 Subject: Add multithreading functions and shutdown to the C API. Patch by Moritz Maxeiner. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175398 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Core.cpp | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'lib') diff --git a/lib/IR/Core.cpp b/lib/IR/Core.cpp index 79eb269..983b49c 100644 --- a/lib/IR/Core.cpp +++ b/lib/IR/Core.cpp @@ -26,9 +26,11 @@ #include "llvm/Support/CallSite.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/system_error.h" +#include "llvm/Support/Threading.h" #include #include #include @@ -48,6 +50,10 @@ void LLVMInitializeCore(LLVMPassRegistryRef R) { initializeCore(*unwrap(R)); } +void LLVMShutdown() { + llvm_shutdown(); +} + /*===-- Error handling ----------------------------------------------------===*/ void LLVMDisposeMessage(char *Message) { @@ -2436,3 +2442,17 @@ LLVMBool LLVMFinalizeFunctionPassManager(LLVMPassManagerRef FPM) { void LLVMDisposePassManager(LLVMPassManagerRef PM) { delete unwrap(PM); } + +/*===-- Threading ------------------------------------------------------===*/ + +LLVMBool LLVMStartMultithreaded() { + return llvm_start_multithreaded(); +} + +void LLVMStopMultithreaded() { + llvm_stop_multithreaded(); +} + +LLVMBool LLVMIsMultithreaded() { + return llvm_is_multithreaded(); +} -- cgit v1.1 From 9831bf06e8747206d27d480f06dedbf4a8605145 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sun, 17 Feb 2013 17:55:32 +0000 Subject: AArch64: Avoid shifts by 64, that's undefined behavior. No functionality change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175400 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64ISelLowering.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 739ca95..cea7f91 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2512,7 +2512,7 @@ static bool findMaskedBFI(SDValue N, SDValue &BFI, uint64_t &Mask, N = N.getOperand(0); } else { // Mask is the whole width. - Mask = (1ULL << N.getValueType().getSizeInBits()) - 1; + Mask = -1ULL >> (64 - N.getValueType().getSizeInBits()); } if (N.getOpcode() == AArch64ISD::BFI) { @@ -2590,7 +2590,7 @@ static SDValue tryCombineToBFI(SDNode *N, DAG.getConstant(Width, MVT::i64)); // Mask is trivial - if ((LHSMask | RHSMask) == (1ULL << VT.getSizeInBits()) - 1) + if ((LHSMask | RHSMask) == (-1ULL >> (64 - VT.getSizeInBits()))) return BFI; return DAG.getNode(ISD::AND, DL, VT, BFI, @@ -2660,7 +2660,7 @@ static SDValue tryCombineToLargerBFI(SDNode *N, BFI.getOperand(2), BFI.getOperand(3)); // If the masking is trivial, we don't need to create it. - if ((ExtraMask | ExistingMask) == (1ULL << VT.getSizeInBits()) - 1) + if ((ExtraMask | ExistingMask) == (-1ULL >> (64 - VT.getSizeInBits()))) return BFI; return DAG.getNode(ISD::AND, DL, VT, BFI, -- cgit v1.1 From d61932bf844134d886b57e6730a5ae0831ebd115 Mon Sep 17 00:00:00 2001 From: Jakub Staszak Date: Sun, 17 Feb 2013 18:35:25 +0000 Subject: Return false instead of 0. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175402 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FastISel.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 7f230ff..509095c 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -726,7 +726,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { // Don't handle popping bytes on return for now. if (X86MFInfo->getBytesToPopOnReturn() != 0) - return 0; + return false; // fastcc with -tailcallopt is intended to provide a guaranteed // tail call optimization. Fastisel doesn't know how to do that. -- cgit v1.1 From cbe6c88b6811e4641629d111f941879982362fe8 Mon Sep 17 00:00:00 2001 From: Richard Osborne Date: Sun, 17 Feb 2013 20:43:17 +0000 Subject: [XCore] Add missing u6 / lu6 instructions. These instructions are not targeted by the compiler but they are needed for the MC layer. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175403 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/XCore/XCoreInstrInfo.td | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td index c23f874..1254062 100644 --- a/lib/Target/XCore/XCoreInstrInfo.td +++ b/lib/Target/XCore/XCoreInstrInfo.td @@ -603,6 +603,7 @@ defm BRBF: FRU6_LRU6_backwards_branch<0b011111, "bf">; let Defs = [SP], Uses = [SP] in { let neverHasSideEffects = 1 in defm EXTSP : FU6_LU6_np<0b0111011110, "extsp">; + let mayStore = 1 in defm ENTSP : FU6_LU6_np<0b0111011101, "entsp">; @@ -611,8 +612,12 @@ defm RETSP : FU6_LU6<0b0111011111, "retsp", XCoreRetsp>; } } -// TODO extdp, kentsp, krestsp, blat -// getsr, kalli +let neverHasSideEffects = 1 in +defm EXTDP : FU6_LU6_np<0b0111001110, "extdp">; + +let Uses = [R11], isCall=1 in +defm BLAT : FU6_LU6_np<0b0111001101, "blat">; + let isBranch = 1, isTerminator = 1, isBarrier = 1 in { def BRBU_u6 : _FU6<0b0111011100, (outs), (ins brtarget:$a), "bu -$a", []>; @@ -632,6 +637,9 @@ let Defs = [R11], isReMaterializable = 1 in def LDAWCP_lu6: _FLU6<0b0111111101, (outs), (ins MEMii:$a), "ldaw r11, cp[$a]", [(set R11, ADDRcpii:$a)]>; +let Defs = [R11] in +defm GETSR : FU6_LU6_np<0b0111111100, "getsr r11,">; + defm SETSR : FU6_LU6_int<0b0111101101, "setsr", int_xcore_setsr>; defm CLRSR : FU6_LU6_int<0b0111101100, "clrsr", int_xcore_clrsr>; @@ -644,6 +652,14 @@ defm SETSR_branch : FU6_LU6_np<0b0111101101, "setsr">; defm CLRSR_branch : FU6_LU6_np<0b0111101100, "clrsr">; } +defm KCALL : FU6_LU6_np<0b0111001111, "kcall">; + +let Uses = [SP], Defs = [SP], mayStore = 1 in +defm KENTSP : FU6_LU6_np<0b0111101110, "kentsp">; + +let Uses = [SP], Defs = [SP], mayLoad = 1 in +defm KRESTSP : FU6_LU6_np<0b0111101111, "krestsp">; + // U10 // TODO ldwcpl, blacp -- cgit v1.1 From a970dde9060d8994c242bd186bb3636d2caf22d2 Mon Sep 17 00:00:00 2001 From: Richard Osborne Date: Sun, 17 Feb 2013 20:44:48 +0000 Subject: [XCore] Add missing u10 / lu10 instructions. These instructions are not targeted by the compiler but they are needed for the MC layer. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175404 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/XCore/XCoreInstrInfo.td | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td index 1254062..38e2fbc 100644 --- a/lib/Target/XCore/XCoreInstrInfo.td +++ b/lib/Target/XCore/XCoreInstrInfo.td @@ -661,7 +661,6 @@ let Uses = [SP], Defs = [SP], mayLoad = 1 in defm KRESTSP : FU6_LU6_np<0b0111101111, "krestsp">; // U10 -// TODO ldwcpl, blacp let Defs = [R11], isReMaterializable = 1, neverHasSideEffects = 1 in def LDAPF_u10 : _FU10<0b110110, (outs), (ins i32imm:$a), "ldap r11, $a", []>; @@ -677,6 +676,10 @@ def LDAPF_lu10_ba : _FLU10<0b110110, (outs), (ins i32imm:$a), "ldap r11, $a", let isCall=1, // All calls clobber the link register and the non-callee-saved registers: Defs = [R0, R1, R2, R3, R11, LR], Uses = [SP] in { +def BLACP_u10 : _FU10<0b111000, (outs), (ins i32imm:$a), "bla cp[$a]", []>; + +def BLACP_lu10 : _FLU10<0b111000, (outs), (ins i32imm:$a), "bla cp[$a]", []>; + def BLRF_u10 : _FU10<0b110100, (outs), (ins calltarget:$a), "bl $a", [(XCoreBranchLink immU10:$a)]>; @@ -684,6 +687,14 @@ def BLRF_lu10 : _FLU10<0b110100, (outs), (ins calltarget:$a), "bl $a", [(XCoreBranchLink immU20:$a)]>; } +let Defs = [R11], mayLoad = 1, isReMaterializable = 1, + neverHasSideEffects = 1 in { +def LDWCP_u10 : _FU10<0b111001, (outs), (ins i32imm:$a), "ldw r11, cp[$a]", []>; + +def LDWCP_lu10 : _FLU10<0b111001, (outs), (ins i32imm:$a), "ldw r11, cp[$a]", + []>; +} + // Two operand short // TODO eet, eef, tsetmr def NOT : _F2R<0b100010, (outs GRRegs:$dst), (ins GRRegs:$b), -- cgit v1.1 From 763c858edeb76173ee4ef5ab9bf7d750db5d8c4f Mon Sep 17 00:00:00 2001 From: Richard Osborne Date: Sun, 17 Feb 2013 22:32:41 +0000 Subject: [XCore] Add TSETR instruction. This instruction is not targeted by the compiler but it is needed for the MC layer. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175406 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/XCore/Disassembler/XCoreDisassembler.cpp | 21 +++++++++++++++++++++ lib/Target/XCore/XCoreInstrFormats.td | 8 ++++++++ lib/Target/XCore/XCoreInstrInfo.td | 6 +++++- 3 files changed, 34 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp b/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp index c995a9c..0bae15c 100644 --- a/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp +++ b/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp @@ -140,6 +140,11 @@ static DecodeStatus Decode3RInstruction(MCInst &Inst, uint64_t Address, const void *Decoder); +static DecodeStatus Decode3RImmInstruction(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + static DecodeStatus Decode2RUSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, @@ -313,6 +318,9 @@ Decode2OpInstructionFail(MCInst &Inst, unsigned Insn, uint64_t Address, case 0x16: Inst.setOpcode(XCore::EQ_2rus); return Decode2RUSInstruction(Inst, Insn, Address, Decoder); + case 0x17: + Inst.setOpcode(XCore::TSETR_3r); + return Decode3RImmInstruction(Inst, Insn, Address, Decoder); case 0x18: Inst.setOpcode(XCore::LSS_3r); return Decode3RInstruction(Inst, Insn, Address, Decoder); @@ -516,6 +524,19 @@ Decode3RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, } static DecodeStatus +Decode3RImmInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, + const void *Decoder) { + unsigned Op1, Op2, Op3; + DecodeStatus S = Decode3OpInstruction(Insn, Op1, Op2, Op3); + if (S == MCDisassembler::Success) { + Inst.addOperand(MCOperand::CreateImm(Op1)); + DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op3, Address, Decoder); + } + return S; +} + +static DecodeStatus Decode2RUSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { unsigned Op1, Op2, Op3; diff --git a/lib/Target/XCore/XCoreInstrFormats.td b/lib/Target/XCore/XCoreInstrFormats.td index 8dceb30..057721e 100644 --- a/lib/Target/XCore/XCoreInstrFormats.td +++ b/lib/Target/XCore/XCoreInstrFormats.td @@ -39,6 +39,14 @@ class _F3R opc, dag outs, dag ins, string asmstr, list pattern> let DecoderMethod = "Decode3RInstruction"; } +// 3R with first operand as an immediate. Used for TSETR where the first +// operand is treated as an immediate since it refers to a register number in +// another thread. +class _F3RImm opc, dag outs, dag ins, string asmstr, list pattern> + : _F3R { + let DecoderMethod = "Decode3RImmInstruction"; +} + class _FL3R opc, dag outs, dag ins, string asmstr, list pattern> : InstXCore<4, outs, ins, asmstr, pattern> { let Inst{31-27} = opc{8-4}; diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td index 38e2fbc..d366919 100644 --- a/lib/Target/XCore/XCoreInstrInfo.td +++ b/lib/Target/XCore/XCoreInstrInfo.td @@ -395,7 +395,11 @@ def STW_2rus : _F2RUS<0b0000, (outs), defm SHL : F3R_2RBITP<0b00100, 0b10100, "shl", shl>; defm SHR : F3R_2RBITP<0b00101, 0b10101, "shr", srl>; -// TODO tsetr + +// The first operand is treated as an immediate since it refers to a register +// number in another thread. +def TSETR_3r : _F3RImm<0b10111, (outs), (ins i32imm:$a, GRRegs:$b, GRRegs:$c), + "set t[$c]:r$a, $b", []>; // Three operand long def LDAWF_l3r : _FL3R<0b000111100, (outs GRRegs:$dst), -- cgit v1.1 From 8dc741e400213ea8183e09626f0d1f45f14e044f Mon Sep 17 00:00:00 2001 From: Richard Osborne Date: Sun, 17 Feb 2013 22:38:05 +0000 Subject: [XCore] Add missing 2r instructions. These instructions are not targeted by the compiler but it is needed for the MC layer. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175407 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/XCore/Disassembler/XCoreDisassembler.cpp | 18 ++++++++++++++++++ lib/Target/XCore/XCoreInstrFormats.td | 8 ++++++++ lib/Target/XCore/XCoreInstrInfo.td | 10 +++++++++- 3 files changed, 35 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp b/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp index 0bae15c..7e7d396 100644 --- a/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp +++ b/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp @@ -100,6 +100,11 @@ static DecodeStatus Decode2RInstruction(MCInst &Inst, uint64_t Address, const void *Decoder); +static DecodeStatus Decode2RImmInstruction(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + static DecodeStatus DecodeR2RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, @@ -345,6 +350,19 @@ Decode2RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, } static DecodeStatus +Decode2RImmInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, + const void *Decoder) { + unsigned Op1, Op2; + DecodeStatus S = Decode2OpInstruction(Insn, Op1, Op2); + if (S != MCDisassembler::Success) + return Decode2OpInstructionFail(Inst, Insn, Address, Decoder); + + Inst.addOperand(MCOperand::CreateImm(Op1)); + DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder); + return S; +} + +static DecodeStatus DecodeR2RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { unsigned Op1, Op2; diff --git a/lib/Target/XCore/XCoreInstrFormats.td b/lib/Target/XCore/XCoreInstrFormats.td index 057721e..379cc39 100644 --- a/lib/Target/XCore/XCoreInstrFormats.td +++ b/lib/Target/XCore/XCoreInstrFormats.td @@ -158,6 +158,14 @@ class _F2R opc, dag outs, dag ins, string asmstr, list pattern> let DecoderMethod = "Decode2RInstruction"; } +// 2R with first operand as an immediate. Used for TSETMR where the first +// operand is treated as an immediate since it refers to a register number in +// another thread. +class _F2RImm opc, dag outs, dag ins, string asmstr, list pattern> + : _F2R { + let DecoderMethod = "Decode2RImmInstruction"; +} + // 2R with first operand as both a source and a destination. class _F2RSrcDst opc, dag outs, dag ins, string asmstr, list pattern> : _F2R { diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td index d366919..e140ef2 100644 --- a/lib/Target/XCore/XCoreInstrInfo.td +++ b/lib/Target/XCore/XCoreInstrInfo.td @@ -700,7 +700,6 @@ def LDWCP_lu10 : _FLU10<0b111001, (outs), (ins i32imm:$a), "ldw r11, cp[$a]", } // Two operand short -// TODO eet, eef, tsetmr def NOT : _F2R<0b100010, (outs GRRegs:$dst), (ins GRRegs:$b), "not $dst, $b", [(set GRRegs:$dst, (not GRRegs:$b))]>; @@ -848,6 +847,15 @@ def ENDIN_2r : _F2R<0b100101, (outs GRRegs:$dst), (ins GRRegs:$src), "endin $dst, res[$src]", [(set GRRegs:$dst, (int_xcore_endin GRRegs:$src))]>; +def EEF_2r : _F2R<0b001011, (outs), (ins GRRegs:$a, GRRegs:$b), + "eef $a, res[$b]", []>; + +def EET_2r : _F2R<0b001001, (outs), (ins GRRegs:$a, GRRegs:$b), + "eet $a, res[$b]", []>; + +def TSETMR_2r : _F2RImm<0b000111, (outs), (ins i32imm:$a, GRRegs:$b), + "tsetmr r$a, $b", []>; + // Two operand long def BITREV_l2r : _FL2R<0b0000011000, (outs GRRegs:$dst), (ins GRRegs:$src), "bitrev $dst, $src", -- cgit v1.1 From 774ec7ba05606925966899d7dbe30453a3a2a877 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sun, 17 Feb 2013 23:34:14 +0000 Subject: X86: Add a note. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175408 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/README-SSE.txt | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'lib') diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt index 4011035..496b704 100644 --- a/lib/Target/X86/README-SSE.txt +++ b/lib/Target/X86/README-SSE.txt @@ -953,3 +953,12 @@ similarly, v[0]-v[1] should match to hsubpd, and {v[0]-v[1], w[0]-w[1]} should turn into hsubpd also. //===---------------------------------------------------------------------===// + +define <2 x i32> @foo(<2 x double> %in) { + %x = fptosi <2 x double> %in to <2 x i32> + ret <2 x i32> %x +} + +Should compile into cvttpd2dq instead of being scalarized into 2 cvttsd2si. + +//===---------------------------------------------------------------------===// -- cgit v1.1 From da4afa72f7cbe2801f3876eda33416aa3ba42987 Mon Sep 17 00:00:00 2001 From: Reed Kotler Date: Mon, 18 Feb 2013 00:59:04 +0000 Subject: Beginning of expanding all current mips16 macro/pseudo instruction sequences. This expansion will be moved to expandISelPseudos as soon as I can figure out how to do that. There are other instructions which use this ExpandFEXT_T8I816_ins and as soon as I have finished expanding them all, I will delete the macro asm string text so it has no way to be used in the future. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175413 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips16InstrInfo.cpp | 15 +++++++++++++++ lib/Target/Mips/Mips16InstrInfo.h | 4 +++- lib/Target/Mips/Mips16InstrInfo.td | 18 ++++++++++++++++-- 3 files changed, 34 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp index 0612335..721af26 100644 --- a/lib/Target/Mips/Mips16InstrInfo.cpp +++ b/lib/Target/Mips/Mips16InstrInfo.cpp @@ -21,6 +21,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" @@ -135,6 +136,9 @@ bool Mips16InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { switch(MI->getDesc().getOpcode()) { default: return false; + case Mips::BtnezT8CmpX16: + ExpandFEXT_T8I816_ins(MBB, MI, Mips::BtnezX16, Mips::CmpRxRy16); + break; case Mips::RetRA16: ExpandRetRA16(MBB, MI, Mips::JrcRa16); break; @@ -399,6 +403,17 @@ void Mips16InstrInfo::ExpandRetRA16(MachineBasicBlock &MBB, BuildMI(MBB, I, I->getDebugLoc(), get(Opc)); } + +void Mips16InstrInfo::ExpandFEXT_T8I816_ins( + MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned BtOpc, unsigned CmpOpc) const { + unsigned regX = I->getOperand(0).getReg(); + unsigned regY = I->getOperand(1).getReg(); + MachineBasicBlock *target = I->getOperand(2).getMBB(); + BuildMI(MBB, I, I->getDebugLoc(), get(CmpOpc)).addReg(regX).addReg(regY); + BuildMI(MBB, I, I->getDebugLoc(), get(BtOpc)).addMBB(target); + +} const MCInstrDesc &Mips16InstrInfo::AddiuSpImm(int64_t Imm) const { if (validSpImm8(Imm)) return get(Mips::AddiuSpImm16); diff --git a/lib/Target/Mips/Mips16InstrInfo.h b/lib/Target/Mips/Mips16InstrInfo.h index d197e7c..2e2ba9b 100644 --- a/lib/Target/Mips/Mips16InstrInfo.h +++ b/lib/Target/Mips/Mips16InstrInfo.h @@ -115,7 +115,9 @@ private: MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; - + void ExpandFEXT_T8I816_ins(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned BtOpc, unsigned CmpOpc) const; }; } diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td index ea4bbe8..2ab7b47 100644 --- a/lib/Target/Mips/Mips16InstrInfo.td +++ b/lib/Target/Mips/Mips16InstrInfo.td @@ -516,7 +516,9 @@ def BnezRxImmX16: FEXT_RI16_B_ins<0b00101, "bnez", IIAlu>, cbranch16; // Purpose: Branch on T Equal to Zero (Extended) // To test special register T then do a PC-relative conditional branch. // -def BteqzX16: FEXT_I816_ins<0b000, "bteqz", IIAlu>, cbranch16; +def BteqzX16: FEXT_I816_ins<0b000, "bteqz", IIAlu>, cbranch16 { + let Uses = [T8]; +} def BteqzT8CmpX16: FEXT_T8I816_ins<"bteqz", "cmp">, cbranch16; @@ -537,7 +539,9 @@ def BteqzT8SltiuX16: FEXT_T8I8I16_ins<"bteqz", "sltiu">, // Purpose: Branch on T Not Equal to Zero (Extended) // To test special register T then do a PC-relative conditional branch. // -def BtnezX16: FEXT_I816_ins<0b001, "btnez", IIAlu> ,cbranch16; +def BtnezX16: FEXT_I816_ins<0b001, "btnez", IIAlu> ,cbranch16 { + let Uses = [T8]; +} def BtnezT8CmpX16: FEXT_T8I816_ins<"btnez", "cmp">, cbranch16; @@ -553,6 +557,16 @@ def BtnezT8SltiuX16: FEXT_T8I8I16_ins<"btnez", "sltiu">, cbranch16; // +// Format: CMP rx, ry MIPS16e +// Purpose: Compare +// To compare the contents of two GPRs. +// +def CmpRxRy16: FRR16_ins<0b01010, "cmp", IIAlu> { + let Defs = [T8]; +} + + +// // Format: DIV rx, ry MIPS16e // Purpose: Divide Word // To divide 32-bit signed integers. -- cgit v1.1 From a8601bb4ffc5a3d7668cfadcd884e5400c526231 Mon Sep 17 00:00:00 2001 From: Reed Kotler Date: Mon, 18 Feb 2013 03:06:29 +0000 Subject: Expand macro/pseudo BteqzT8CmpX16. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175416 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips16InstrInfo.cpp | 3 +++ 1 file changed, 3 insertions(+) (limited to 'lib') diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp index 721af26..f2d4f06 100644 --- a/lib/Target/Mips/Mips16InstrInfo.cpp +++ b/lib/Target/Mips/Mips16InstrInfo.cpp @@ -136,6 +136,9 @@ bool Mips16InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { switch(MI->getDesc().getOpcode()) { default: return false; + case Mips::BteqzT8CmpX16: + ExpandFEXT_T8I816_ins(MBB, MI, Mips::BteqzX16, Mips::CmpRxRy16); + break; case Mips::BtnezT8CmpX16: ExpandFEXT_T8I816_ins(MBB, MI, Mips::BtnezX16, Mips::CmpRxRy16); break; -- cgit v1.1 From dabfebb5c61e49ab23c5828953506d965bcf7401 Mon Sep 17 00:00:00 2001 From: Reed Kotler Date: Mon, 18 Feb 2013 04:04:26 +0000 Subject: Expand pseudo/macro BteqzT8SltX16. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175417 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips16InstrInfo.cpp | 3 +++ 1 file changed, 3 insertions(+) (limited to 'lib') diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp index f2d4f06..ba1002e 100644 --- a/lib/Target/Mips/Mips16InstrInfo.cpp +++ b/lib/Target/Mips/Mips16InstrInfo.cpp @@ -139,6 +139,9 @@ bool Mips16InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { case Mips::BteqzT8CmpX16: ExpandFEXT_T8I816_ins(MBB, MI, Mips::BteqzX16, Mips::CmpRxRy16); break; + case Mips::BteqzT8SltX16: + ExpandFEXT_T8I816_ins(MBB, MI, Mips::BteqzX16, Mips::SltRxRy16); + break; case Mips::BtnezT8CmpX16: ExpandFEXT_T8I816_ins(MBB, MI, Mips::BtnezX16, Mips::CmpRxRy16); break; -- cgit v1.1 From 139748f1c180d4f2d55f31b321e9cfe87b06eb64 Mon Sep 17 00:00:00 2001 From: Reed Kotler Date: Mon, 18 Feb 2013 04:55:38 +0000 Subject: Expand pseudo/macro BteqzT8SltuX16 . There is no test case because at this time, llvm is generating a different but equivalent pattern that would lead to this instruction. I am trying to think of a way to get it to generate this. If I can't, I may just remove the pseudo. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175419 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips16InstrInfo.cpp | 5 +++++ lib/Target/Mips/Mips16InstrInfo.td | 2 ++ 2 files changed, 7 insertions(+) (limited to 'lib') diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp index ba1002e..e0277ff 100644 --- a/lib/Target/Mips/Mips16InstrInfo.cpp +++ b/lib/Target/Mips/Mips16InstrInfo.cpp @@ -142,6 +142,11 @@ bool Mips16InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { case Mips::BteqzT8SltX16: ExpandFEXT_T8I816_ins(MBB, MI, Mips::BteqzX16, Mips::SltRxRy16); break; + case Mips::BteqzT8SltuX16: + // TBD: figure out a way to get this or remove the instruction + // altogether. + ExpandFEXT_T8I816_ins(MBB, MI, Mips::BteqzX16, Mips::SltuRxRy16); + break; case Mips::BtnezT8CmpX16: ExpandFEXT_T8I816_ins(MBB, MI, Mips::BtnezX16, Mips::CmpRxRy16); break; diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td index 2ab7b47..c7adce3 100644 --- a/lib/Target/Mips/Mips16InstrInfo.td +++ b/lib/Target/Mips/Mips16InstrInfo.td @@ -1012,6 +1012,8 @@ def SltCCRxRy16: FCCRR16_ins<"slt">; // Purpose: Set on Less Than Unsigned // To record the result of an unsigned less-than comparison. // +def SltuRxRy16: FRR16_ins<0b00011, "sltu", IIAlu>; + def SltuRxRyRz16: FRRTR16_ins<"sltu"> { let isCodeGenOnly=1; } -- cgit v1.1 From bb01b3cb936f110fc20700b4c4447e3e7214cab3 Mon Sep 17 00:00:00 2001 From: Reed Kotler Date: Mon, 18 Feb 2013 05:43:03 +0000 Subject: Expand macro/pseudo instructions BtnezT8SltX16 and BtnezT8SltuX16. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175420 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips16InstrInfo.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'lib') diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp index e0277ff..ab1258a 100644 --- a/lib/Target/Mips/Mips16InstrInfo.cpp +++ b/lib/Target/Mips/Mips16InstrInfo.cpp @@ -150,6 +150,14 @@ bool Mips16InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { case Mips::BtnezT8CmpX16: ExpandFEXT_T8I816_ins(MBB, MI, Mips::BtnezX16, Mips::CmpRxRy16); break; + case Mips::BtnezT8SltX16: + ExpandFEXT_T8I816_ins(MBB, MI, Mips::BtnezX16, Mips::SltRxRy16); + break; + case Mips::BtnezT8SltuX16: + // TBD: figure out a way to get this or remove the instruction + // altogether. + ExpandFEXT_T8I816_ins(MBB, MI, Mips::BtnezX16, Mips::SltuRxRy16); + break; case Mips::RetRA16: ExpandRetRA16(MBB, MI, Mips::JrcRa16); break; -- cgit v1.1 From d67c5cab3b770b0709dcb05256aef51b35f3f113 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Mon, 18 Feb 2013 06:41:57 +0000 Subject: Narrow the return types of a few DIBuilder utility functions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175421 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/DIBuilder.cpp | 76 +++++++++++++++++++++++++--------------------------- 1 file changed, 37 insertions(+), 39 deletions(-) (limited to 'lib') diff --git a/lib/IR/DIBuilder.cpp b/lib/IR/DIBuilder.cpp index df1a81f..af055c9 100644 --- a/lib/IR/DIBuilder.cpp +++ b/lib/IR/DIBuilder.cpp @@ -185,7 +185,7 @@ DIBuilder::createBasicType(StringRef Name, uint64_t SizeInBits, /// createQualifiedType - Create debugging information entry for a qualified /// type, e.g. 'const int'. -DIType DIBuilder::createQualifiedType(unsigned Tag, DIType FromTy) { +DIDerivedType DIBuilder::createQualifiedType(unsigned Tag, DIType FromTy) { // Qualified types are encoded in DIDerivedType format. Value *Elts[] = { GetTagConstant(VMContext, Tag), @@ -199,12 +199,13 @@ DIType DIBuilder::createQualifiedType(unsigned Tag, DIType FromTy) { ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags FromTy }; - return DIType(MDNode::get(VMContext, Elts)); + return DIDerivedType(MDNode::get(VMContext, Elts)); } /// createPointerType - Create debugging information entry for a pointer. -DIType DIBuilder::createPointerType(DIType PointeeTy, uint64_t SizeInBits, - uint64_t AlignInBits, StringRef Name) { +DIDerivedType +DIBuilder::createPointerType(DIType PointeeTy, uint64_t SizeInBits, + uint64_t AlignInBits, StringRef Name) { // Pointer types are encoded in DIDerivedType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_pointer_type), @@ -218,10 +219,10 @@ DIType DIBuilder::createPointerType(DIType PointeeTy, uint64_t SizeInBits, ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags PointeeTy }; - return DIType(MDNode::get(VMContext, Elts)); + return DIDerivedType(MDNode::get(VMContext, Elts)); } -DIType DIBuilder::createMemberPointerType(DIType PointeeTy, DIType Base) { +DIDerivedType DIBuilder::createMemberPointerType(DIType PointeeTy, DIType Base) { // Pointer types are encoded in DIDerivedType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_ptr_to_member_type), @@ -236,12 +237,12 @@ DIType DIBuilder::createMemberPointerType(DIType PointeeTy, DIType Base) { PointeeTy, Base }; - return DIType(MDNode::get(VMContext, Elts)); + return DIDerivedType(MDNode::get(VMContext, Elts)); } /// createReferenceType - Create debugging information entry for a reference /// type. -DIType DIBuilder::createReferenceType(unsigned Tag, DIType RTy) { +DIDerivedType DIBuilder::createReferenceType(unsigned Tag, DIType RTy) { assert(RTy.Verify() && "Unable to create reference type"); // References are encoded in DIDerivedType format. Value *Elts[] = { @@ -256,12 +257,12 @@ DIType DIBuilder::createReferenceType(unsigned Tag, DIType RTy) { ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags RTy }; - return DIType(MDNode::get(VMContext, Elts)); + return DIDerivedType(MDNode::get(VMContext, Elts)); } /// createTypedef - Create debugging information entry for a typedef. -DIType DIBuilder::createTypedef(DIType Ty, StringRef Name, DIFile File, - unsigned LineNo, DIDescriptor Context) { +DIDerivedType DIBuilder::createTypedef(DIType Ty, StringRef Name, DIFile File, + unsigned LineNo, DIDescriptor Context) { // typedefs are encoded in DIDerivedType format. assert(Ty.Verify() && "Invalid typedef type!"); Value *Elts[] = { @@ -276,7 +277,7 @@ DIType DIBuilder::createTypedef(DIType Ty, StringRef Name, DIFile File, ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags Ty }; - return DIType(MDNode::get(VMContext, Elts)); + return DIDerivedType(MDNode::get(VMContext, Elts)); } /// createFriend - Create debugging information entry for a 'friend'. @@ -301,8 +302,8 @@ DIType DIBuilder::createFriend(DIType Ty, DIType FriendTy) { /// createInheritance - Create debugging information entry to establish /// inheritance relationship between two types. -DIType DIBuilder::createInheritance(DIType Ty, DIType BaseTy, - uint64_t BaseOffset, unsigned Flags) { +DIDerivedType DIBuilder::createInheritance( + DIType Ty, DIType BaseTy, uint64_t BaseOffset, unsigned Flags) { assert(Ty.Verify() && "Unable to create inheritance"); // TAG_inheritance is encoded in DIDerivedType format. Value *Elts[] = { @@ -317,15 +318,14 @@ DIType DIBuilder::createInheritance(DIType Ty, DIType BaseTy, ConstantInt::get(Type::getInt32Ty(VMContext), Flags), BaseTy }; - return DIType(MDNode::get(VMContext, Elts)); + return DIDerivedType(MDNode::get(VMContext, Elts)); } /// createMemberType - Create debugging information entry for a member. -DIType DIBuilder::createMemberType(DIDescriptor Scope, StringRef Name, - DIFile File, unsigned LineNumber, - uint64_t SizeInBits, uint64_t AlignInBits, - uint64_t OffsetInBits, unsigned Flags, - DIType Ty) { +DIDerivedType DIBuilder::createMemberType( + DIDescriptor Scope, StringRef Name, DIFile File, unsigned LineNumber, + uint64_t SizeInBits, uint64_t AlignInBits, uint64_t OffsetInBits, + unsigned Flags, DIType Ty) { // TAG_member is encoded in DIDerivedType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_member), @@ -339,7 +339,7 @@ DIType DIBuilder::createMemberType(DIDescriptor Scope, StringRef Name, ConstantInt::get(Type::getInt32Ty(VMContext), Flags), Ty }; - return DIType(MDNode::get(VMContext, Elts)); + return DIDerivedType(MDNode::get(VMContext, Elts)); } /// createStaticMemberType - Create debugging information entry for a @@ -533,11 +533,10 @@ DIType DIBuilder::createStructType(DIDescriptor Context, StringRef Name, } /// createUnionType - Create debugging information entry for an union. -DIType DIBuilder::createUnionType(DIDescriptor Scope, StringRef Name, - DIFile File, - unsigned LineNumber, uint64_t SizeInBits, - uint64_t AlignInBits, unsigned Flags, - DIArray Elements, unsigned RunTimeLang) { +DICompositeType DIBuilder::createUnionType( + DIDescriptor Scope, StringRef Name, DIFile File, unsigned LineNumber, + uint64_t SizeInBits, uint64_t AlignInBits, unsigned Flags, DIArray Elements, + unsigned RunTimeLang) { // TAG_union_type is encoded in DICompositeType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_union_type), @@ -554,11 +553,12 @@ DIType DIBuilder::createUnionType(DIDescriptor Scope, StringRef Name, ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeLang), Constant::getNullValue(Type::getInt32Ty(VMContext)) }; - return DIType(MDNode::get(VMContext, Elts)); + return DICompositeType(MDNode::get(VMContext, Elts)); } /// createSubroutineType - Create subroutine type. -DIType DIBuilder::createSubroutineType(DIFile File, DIArray ParameterTypes) { +DICompositeType +DIBuilder::createSubroutineType(DIFile File, DIArray ParameterTypes) { // TAG_subroutine_type is encoded in DICompositeType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_subroutine_type), @@ -575,17 +575,15 @@ DIType DIBuilder::createSubroutineType(DIFile File, DIArray ParameterTypes) { ConstantInt::get(Type::getInt32Ty(VMContext), 0), Constant::getNullValue(Type::getInt32Ty(VMContext)) }; - return DIType(MDNode::get(VMContext, Elts)); + return DICompositeType(MDNode::get(VMContext, Elts)); } /// createEnumerationType - Create debugging information entry for an /// enumeration. -DIType DIBuilder::createEnumerationType(DIDescriptor Scope, StringRef Name, - DIFile File, unsigned LineNumber, - uint64_t SizeInBits, - uint64_t AlignInBits, - DIArray Elements, - DIType ClassType) { +DICompositeType DIBuilder::createEnumerationType( + DIDescriptor Scope, StringRef Name, DIFile File, unsigned LineNumber, + uint64_t SizeInBits, uint64_t AlignInBits, DIArray Elements, + DIType ClassType) { // TAG_enumeration_type is encoded in DICompositeType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_enumeration_type), @@ -604,12 +602,12 @@ DIType DIBuilder::createEnumerationType(DIDescriptor Scope, StringRef Name, }; MDNode *Node = MDNode::get(VMContext, Elts); AllEnumTypes.push_back(Node); - return DIType(Node); + return DICompositeType(Node); } /// createArrayType - Create debugging information entry for an array. -DIType DIBuilder::createArrayType(uint64_t Size, uint64_t AlignInBits, - DIType Ty, DIArray Subscripts) { +DICompositeType DIBuilder::createArrayType(uint64_t Size, uint64_t AlignInBits, + DIType Ty, DIArray Subscripts) { // TAG_array_type is encoded in DICompositeType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_array_type), @@ -626,7 +624,7 @@ DIType DIBuilder::createArrayType(uint64_t Size, uint64_t AlignInBits, ConstantInt::get(Type::getInt32Ty(VMContext), 0), Constant::getNullValue(Type::getInt32Ty(VMContext)) }; - return DIType(MDNode::get(VMContext, Elts)); + return DICompositeType(MDNode::get(VMContext, Elts)); } /// createVectorType - Create debugging information entry for a vector. -- cgit v1.1 From 3ef669c7175f959c39b7652f10c2b6ed9d8f83ae Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Mon, 18 Feb 2013 07:06:48 +0000 Subject: [ms-inline asm] Fix undefined behavior to reset hasMSInlineAsm in advance of SelectAllBasicBlocks(). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175422 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 39d3a5d..c1235a9 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -372,6 +372,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { SDB->init(GFI, *AA, LibInfo); + MF->setHasMSInlineAsm(false); // FIXME: This affected hasFP(). SelectAllBasicBlocks(Fn); // If the first basic block in the function has live ins that need to be -- cgit v1.1 From 139f7e50688ce120b275ae368f563d03d9c18184 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Mon, 18 Feb 2013 07:10:22 +0000 Subject: DIBuilder: Add function and method definitions to the list of all subprograms Previously we seemed to be assuming that all functions were definitions and all methods were declarations. This may be consistent with how Clang uses DIBuilder but doesn't have to be true of all clients (such as DragonEgg). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175423 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/DIBuilder.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/IR/DIBuilder.cpp b/lib/IR/DIBuilder.cpp index af055c9..2fe13c4 100644 --- a/lib/IR/DIBuilder.cpp +++ b/lib/IR/DIBuilder.cpp @@ -924,7 +924,8 @@ DISubprogram DIBuilder::createFunction(DIDescriptor Context, MDNode *Node = MDNode::get(VMContext, Elts); // Create a named metadata so that we do not lose this mdnode. - AllSubprograms.push_back(Node); + if (isDefinition) + AllSubprograms.push_back(Node); return DISubprogram(Node); } @@ -968,6 +969,8 @@ DISubprogram DIBuilder::createMethod(DIDescriptor Context, ConstantInt::get(Type::getInt32Ty(VMContext), LineNo) }; MDNode *Node = MDNode::get(VMContext, Elts); + if (isDefinition) + AllSubprograms.push_back(Node); return DISubprogram(Node); } -- cgit v1.1 From d4f92fd0d1f22af553db08416b476241e4372d9a Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Mon, 18 Feb 2013 07:27:30 +0000 Subject: DIBuilder: Correct the null/0 type of trailing fields in struct debug info. Paired with an Clang commit so this may cause temporary build failures. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175426 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/DIBuilder.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/IR/DIBuilder.cpp b/lib/IR/DIBuilder.cpp index 2fe13c4..16632a1 100644 --- a/lib/IR/DIBuilder.cpp +++ b/lib/IR/DIBuilder.cpp @@ -526,8 +526,8 @@ DIType DIBuilder::createStructType(DIDescriptor Context, StringRef Name, NULL, Elements, ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeLang), - ConstantInt::get(Type::getInt32Ty(VMContext), 0), - ConstantInt::get(Type::getInt32Ty(VMContext), 0), + NULL, + NULL, }; return DIType(MDNode::get(VMContext, Elts)); } -- cgit v1.1 From 3f213e7b3a6829a154d4e8ceb7d8689b389bd5dc Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Mon, 18 Feb 2013 12:09:51 +0000 Subject: Futureproof AttrBuild if we ever have more than 64 attr enum values. Currently we're at 34. Bitset should compile into virtually the same code as uint64_t here. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175437 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Attributes.cpp | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) (limited to 'lib') diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 77a1c82..d89ebc5 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -924,16 +924,15 @@ AttrBuilder::AttrBuilder(AttributeSet AS, unsigned Idx) } void AttrBuilder::clear() { - Attrs = 0; + Attrs.reset(); Alignment = StackAlignment = 0; } AttrBuilder &AttrBuilder::addAttribute(Attribute::AttrKind Val) { - assert((unsigned)Val < 64 && Val < Attribute::EndAttrKinds && - "Attribute out of range!"); + assert((unsigned)Val < Attribute::EndAttrKinds && "Attribute out of range!"); assert(Val != Attribute::Alignment && Val != Attribute::StackAlignment && "Adding alignment attribute without adding alignment value!"); - Attrs |= 1ULL << Val; + Attrs[Val] = true; return *this; } @@ -944,7 +943,7 @@ AttrBuilder &AttrBuilder::addAttribute(Attribute Attr) { } Attribute::AttrKind Kind = Attr.getKindAsEnum(); - Attrs |= 1ULL << Kind; + Attrs[Kind] = true; if (Kind == Attribute::Alignment) Alignment = Attr.getAlignment(); @@ -959,9 +958,8 @@ AttrBuilder &AttrBuilder::addAttribute(StringRef A, StringRef V) { } AttrBuilder &AttrBuilder::removeAttribute(Attribute::AttrKind Val) { - assert((unsigned)Val < 64 && Val < Attribute::EndAttrKinds && - "Attribute out of range!"); - Attrs &= ~(1ULL << Val); + assert((unsigned)Val < Attribute::EndAttrKinds && "Attribute out of range!"); + Attrs[Val] = false; if (Val == Attribute::Alignment) Alignment = 0; @@ -985,7 +983,7 @@ AttrBuilder &AttrBuilder::removeAttributes(AttributeSet A, uint64_t Index) { Attribute Attr = *I; if (Attr.isEnumAttribute() || Attr.isAlignAttribute()) { Attribute::AttrKind Kind = I->getKindAsEnum(); - Attrs &= ~(1ULL << Kind); + Attrs[Kind] = false; if (Kind == Attribute::Alignment) Alignment = 0; @@ -1016,7 +1014,7 @@ AttrBuilder &AttrBuilder::addAlignmentAttr(unsigned Align) { assert(isPowerOf2_32(Align) && "Alignment must be a power of two."); assert(Align <= 0x40000000 && "Alignment too large."); - Attrs |= 1ULL << Attribute::Alignment; + Attrs[Attribute::Alignment] = true; Alignment = Align; return *this; } @@ -1028,7 +1026,7 @@ AttrBuilder &AttrBuilder::addStackAlignmentAttr(unsigned Align) { assert(isPowerOf2_32(Align) && "Alignment must be a power of two."); assert(Align <= 0x100 && "Alignment too large."); - Attrs |= 1ULL << Attribute::StackAlignment; + Attrs[Attribute::StackAlignment] = true; StackAlignment = Align; return *this; } @@ -1055,7 +1053,7 @@ bool AttrBuilder::contains(StringRef A) const { } bool AttrBuilder::hasAttributes() const { - return Attrs != 0 || !TargetDepAttrs.empty(); + return !Attrs.none() || !TargetDepAttrs.empty(); } bool AttrBuilder::hasAttributes(AttributeSet A, uint64_t Index) const { @@ -1072,7 +1070,7 @@ bool AttrBuilder::hasAttributes(AttributeSet A, uint64_t Index) const { I != E; ++I) { Attribute Attr = *I; if (Attr.isEnumAttribute() || Attr.isAlignAttribute()) { - if (Attrs & (1ULL << I->getKindAsEnum())) + if (Attrs[I->getKindAsEnum()]) return true; } else { assert(Attr.isStringAttribute() && "Invalid attribute kind!"); @@ -1106,7 +1104,7 @@ AttrBuilder &AttrBuilder::addRawValue(uint64_t Val) { for (Attribute::AttrKind I = Attribute::None; I != Attribute::EndAttrKinds; I = Attribute::AttrKind(I + 1)) { if (uint64_t A = (Val & AttributeImpl::getAttrMask(I))) { - Attrs |= 1ULL << I; + Attrs[I] = true; if (I == Attribute::Alignment) Alignment = 1ULL << ((A >> 16) - 1); -- cgit v1.1 From 605ff6655b31033dde21e61416751847bd0ee201 Mon Sep 17 00:00:00 2001 From: Kostya Serebryany Date: Mon, 18 Feb 2013 13:47:02 +0000 Subject: [asan] revert r175266 as it breaks code with packed structures. supporting long double will require a more general solution git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175442 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Instrumentation/AddressSanitizer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 5769e94..b97e342 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -639,7 +639,7 @@ void AddressSanitizer::instrumentMop(Instruction *I) { Type *OrigTy = cast(OrigPtrTy)->getElementType(); assert(OrigTy->isSized()); - uint32_t TypeSize = TD->getTypeAllocSizeInBits(OrigTy); + uint32_t TypeSize = TD->getTypeStoreSizeInBits(OrigTy); if (TypeSize != 8 && TypeSize != 16 && TypeSize != 32 && TypeSize != 64 && TypeSize != 128) { -- cgit v1.1 From 628f6d5820aeb00ac1c142c79c5b35c13836de45 Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Mon, 18 Feb 2013 13:48:09 +0000 Subject: R600: Increase number of ArrayBase Reg to 32 Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175443 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/R600RegisterInfo.td | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Target/R600/R600RegisterInfo.td b/lib/Target/R600/R600RegisterInfo.td index 3812eb7..0718854 100644 --- a/lib/Target/R600/R600RegisterInfo.td +++ b/lib/Target/R600/R600RegisterInfo.td @@ -44,7 +44,7 @@ foreach Index = 0-127 in { } // Array Base Register holding input in FS -foreach Index = 448-464 in { +foreach Index = 448-480 in { def ArrayBase#Index : R600Reg<"ARRAY_BASE", Index>; } @@ -66,7 +66,7 @@ def PRED_SEL_ONE : R600Reg<"Pred_sel_one", 3>; def AR_X : R600Reg<"AR.x", 0>; def R600_ArrayBase : RegisterClass <"AMDGPU", [f32, i32], 32, - (add (sequence "ArrayBase%u", 448, 464))>; + (add (sequence "ArrayBase%u", 448, 480))>; // special registers for ALU src operands // const buffer reference, SRCx_SEL contains index def ALU_CONST : R600Reg<"CBuf", 0>; -- cgit v1.1 From bbbef49118809c6a8d424a9434a70c0fdc3a66d5 Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Mon, 18 Feb 2013 14:11:19 +0000 Subject: R600: Support for TBO NOTE: This is a candidate for the Mesa stable branch. Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175445 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp | 3 +- lib/Target/R600/R600Instructions.td | 54 ++++++++++++++++++++++ lib/Target/R600/R600Intrinsics.td | 2 + 3 files changed, 58 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp index 2171f90..5a315cb 100644 --- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp +++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp @@ -165,7 +165,8 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS, case AMDGPU::VTX_READ_GLOBAL_8_eg: case AMDGPU::VTX_READ_GLOBAL_32_eg: case AMDGPU::VTX_READ_GLOBAL_128_eg: - case AMDGPU::TEX_VTX_CONSTBUF: { + case AMDGPU::TEX_VTX_CONSTBUF: + case AMDGPU::TEX_VTX_TEXBUF : { uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups); uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index e495bea..f88d3fc 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -1708,6 +1708,60 @@ def TEX_VTX_CONSTBUF : // Inst{127-96} = 0; } +def TEX_VTX_TEXBUF: + InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "TEX_VTX_EXPLICIT_READ $dst, $ptr", + [(set R600_Reg128:$dst, (int_R600_load_texbuf ADDRGA_VAR_OFFSET:$ptr, imm:$BUFFER_ID))]>, +VTX_WORD1_GPR, VTX_WORD0 { + +let VC_INST = 0; +let FETCH_TYPE = 2; +let FETCH_WHOLE_QUAD = 0; +let SRC_REL = 0; +let SRC_SEL_X = 0; +let DST_REL = 0; +let USE_CONST_FIELDS = 1; +let NUM_FORMAT_ALL = 0; +let FORMAT_COMP_ALL = 0; +let SRF_MODE_ALL = 1; +let MEGA_FETCH_COUNT = 16; +let DST_SEL_X = 0; +let DST_SEL_Y = 1; +let DST_SEL_Z = 2; +let DST_SEL_W = 3; +let DATA_FORMAT = 0; + +let Inst{31-0} = Word0; +let Inst{63-32} = Word1; + +// LLVM can only encode 64-bit instructions, so these fields are manually +// encoded in R600CodeEmitter +// +// bits<16> OFFSET; +// bits<2> ENDIAN_SWAP = 0; +// bits<1> CONST_BUF_NO_STRIDE = 0; +// bits<1> MEGA_FETCH = 0; +// bits<1> ALT_CONST = 0; +// bits<2> BUFFER_INDEX_MODE = 0; + + + +// VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding +// is done in R600CodeEmitter +// +// Inst{79-64} = OFFSET; +// Inst{81-80} = ENDIAN_SWAP; +// Inst{82} = CONST_BUF_NO_STRIDE; +// Inst{83} = MEGA_FETCH; +// Inst{84} = ALT_CONST; +// Inst{86-85} = BUFFER_INDEX_MODE; +// Inst{95-86} = 0; Reserved + +// VTX_WORD3 (Padding) +// +// Inst{127-96} = 0; +} + + //===--------------------------------------------------------------------===// // Instructions support diff --git a/lib/Target/R600/R600Intrinsics.td b/lib/Target/R600/R600Intrinsics.td index b5e4f1e..dc8980a 100644 --- a/lib/Target/R600/R600Intrinsics.td +++ b/lib/Target/R600/R600Intrinsics.td @@ -16,6 +16,8 @@ let TargetPrefix = "R600", isTarget = 1 in { Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>; def int_R600_interp_input : Intrinsic<[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_R600_load_texbuf : + Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_R600_store_swizzle : Intrinsic<[], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>; def int_R600_store_stream_output : -- cgit v1.1 From e3111964a0902bc38440980b0915b189f829c395 Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Mon, 18 Feb 2013 14:11:28 +0000 Subject: R600/SI: Use MULADD_IEEE/V_MAD_F32 instruction for mad pattern git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175446 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelLowering.cpp | 10 +++------- lib/Target/R600/AMDGPUISelLowering.h | 1 - lib/Target/R600/AMDILISelLowering.cpp | 3 ++- lib/Target/R600/AMDILInstrInfo.td | 1 - lib/Target/R600/AMDILIntrinsics.td | 10 ---------- lib/Target/R600/R600Instructions.td | 9 ++++++++- lib/Target/R600/SIInstructions.td | 4 ++-- 7 files changed, 15 insertions(+), 23 deletions(-) (limited to 'lib') diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index d0d23d6..0a33264 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -127,9 +127,6 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return LowerIntrinsicLRP(Op, DAG); case AMDGPUIntrinsic::AMDIL_fraction: return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1)); - case AMDGPUIntrinsic::AMDIL_mad: - return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1), - Op.getOperand(2), Op.getOperand(3)); case AMDGPUIntrinsic::AMDIL_max: return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1), Op.getOperand(2)); @@ -176,9 +173,9 @@ SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op, Op.getOperand(1)); SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA, Op.getOperand(3)); - return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1), - Op.getOperand(2), - OneSubAC); + return DAG.getNode(ISD::FADD, DL, VT, + DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), Op.getOperand(2)), + OneSubAC); } /// \brief Generate Min/Max node @@ -393,7 +390,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { switch (Opcode) { default: return 0; // AMDIL DAG nodes - NODE_NAME_CASE(MAD); NODE_NAME_CASE(CALL); NODE_NAME_CASE(UMUL); NODE_NAME_CASE(DIV_INF); diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h index 99a11ff..404f620 100644 --- a/lib/Target/R600/AMDGPUISelLowering.h +++ b/lib/Target/R600/AMDGPUISelLowering.h @@ -108,7 +108,6 @@ namespace AMDGPUISD { enum { // AMDIL ISD Opcodes FIRST_NUMBER = ISD::BUILTIN_OP_END, - MAD, // 32bit Fused Multiply Add instruction CALL, // Function call based on a single integer UMUL, // 32bit unsigned multiplication DIV_INF, // Divide with infinity returned on zero divisor diff --git a/lib/Target/R600/AMDILISelLowering.cpp b/lib/Target/R600/AMDILISelLowering.cpp index 2e60adc..3480ac8 100644 --- a/lib/Target/R600/AMDILISelLowering.cpp +++ b/lib/Target/R600/AMDILISelLowering.cpp @@ -451,7 +451,8 @@ AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const { SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq); // float fr = mad(fqneg, fb, fa); - SDValue fr = DAG.getNode(AMDGPUISD::MAD, DL, FLTTY, fqneg, fb, fa); + SDValue fr = DAG.getNode(ISD::FADD, DL, FLTTY, + DAG.getNode(ISD::MUL, DL, FLTTY, fqneg, fb), fa); // int iq = (int)fq; SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq); diff --git a/lib/Target/R600/AMDILInstrInfo.td b/lib/Target/R600/AMDILInstrInfo.td index e969bbf..110f147 100644 --- a/lib/Target/R600/AMDILInstrInfo.td +++ b/lib/Target/R600/AMDILInstrInfo.td @@ -116,7 +116,6 @@ def IL_retflag : SDNode<"AMDGPUISD::RET_FLAG", SDTNone, //===--------------------------------------------------------------------===// // Floating point math functions def IL_div_inf : SDNode<"AMDGPUISD::DIV_INF", SDTIL_GenBinaryOp>; -def IL_mad : SDNode<"AMDGPUISD::MAD", SDTIL_GenTernaryOp>; //===----------------------------------------------------------------------===// // Integer functions diff --git a/lib/Target/R600/AMDILIntrinsics.td b/lib/Target/R600/AMDILIntrinsics.td index 3f9e20f..6ec3559 100644 --- a/lib/Target/R600/AMDILIntrinsics.td +++ b/lib/Target/R600/AMDILIntrinsics.td @@ -92,12 +92,6 @@ let TargetPrefix = "AMDIL", isTarget = 1 in { TernaryIntInt; def int_AMDIL_bfm : GCCBuiltin<"__amdil_bfm">, BinaryIntInt; - def int_AMDIL_mad_i32 : GCCBuiltin<"__amdil_imad">, - TernaryIntInt; - def int_AMDIL_mad_u32 : GCCBuiltin<"__amdil_umad">, - TernaryIntInt; - def int_AMDIL_mad : GCCBuiltin<"__amdil_mad">, - TernaryIntFloat; def int_AMDIL_mulhi_i32 : GCCBuiltin<"__amdil_imul_high">, BinaryIntInt; def int_AMDIL_mulhi_u32 : GCCBuiltin<"__amdil_umul_high">, @@ -110,10 +104,6 @@ let TargetPrefix = "AMDIL", isTarget = 1 in { BinaryIntInt; def int_AMDIL_mulhi24_u32 : GCCBuiltin<"__amdil_umul24_high">, BinaryIntInt; - def int_AMDIL_mad24_i32 : GCCBuiltin<"__amdil_imad24">, - TernaryIntInt; - def int_AMDIL_mad24_u32 : GCCBuiltin<"__amdil_umad24">, - TernaryIntInt; def int_AMDIL_carry_i32 : GCCBuiltin<"__amdil_carry">, BinaryIntInt; def int_AMDIL_borrow_i32 : GCCBuiltin<"__amdil_borrow">, diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index f88d3fc..d24a363 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -949,8 +949,13 @@ class MUL_LIT_Common inst> : R600_3OP < class MULADD_Common inst> : R600_3OP < inst, "MULADD", + [] +>; + +class MULADD_IEEE_Common inst> : R600_3OP < + inst, "MULADD_IEEE", [(set (f32 R600_Reg32:$dst), - (IL_mad R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2))] + (fadd (fmul R600_Reg32:$src0, R600_Reg32:$src1), R600_Reg32:$src2))] >; class CNDE_Common inst> : R600_3OP < @@ -1107,6 +1112,7 @@ let Predicates = [isR600] in { def MUL_LIT_r600 : MUL_LIT_Common<0x0C>; def MULADD_r600 : MULADD_Common<0x10>; + def MULADD_IEEE_r600 : MULADD_IEEE_Common<0x14>; def CNDE_r600 : CNDE_Common<0x18>; def CNDGT_r600 : CNDGT_Common<0x19>; def CNDGE_r600 : CNDGE_Common<0x1A>; @@ -1246,6 +1252,7 @@ let Predicates = [isEGorCayman] in { >; def MULADD_eg : MULADD_Common<0x14>; + def MULADD_IEEE_eg : MULADD_IEEE_Common<0x18>; def ASHR_eg : ASHR_Common<0x15>; def LSHR_eg : LSHR_Common<0x16>; def LSHL_eg : LSHL_Common<0x17>; diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 9372993..b4a263d 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1416,8 +1416,8 @@ def : Pat < /********** VOP3 Patterns **********/ /********** ================== **********/ -def : Pat <(f32 (IL_mad VSrc_32:$src0, VReg_32:$src1, VReg_32:$src2)), - (V_MAD_LEGACY_F32 VSrc_32:$src0, VReg_32:$src1, VReg_32:$src2, +def : Pat <(f32 (fadd (fmul VSrc_32:$src0, VReg_32:$src1), VReg_32:$src2)), + (V_MAD_F32 VSrc_32:$src0, VReg_32:$src1, VReg_32:$src2, 0, 0, 0, 0)>; /********** ================== **********/ -- cgit v1.1 From b875acda987650d1d734b8bc6e76283950529f84 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Mon, 18 Feb 2013 20:13:59 +0000 Subject: [ms-inline asm] Remove a redundant call to the setHasMSInlineAsm function. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175456 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index c1235a9..a598ec4 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -372,7 +372,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { SDB->init(GFI, *AA, LibInfo); - MF->setHasMSInlineAsm(false); // FIXME: This affected hasFP(). + MF->setHasMSInlineAsm(false); SelectAllBasicBlocks(Fn); // If the first basic block in the function has live ins that need to be @@ -442,7 +442,6 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { } // Determine if there are any calls in this machine function. - MF->setHasMSInlineAsm(false); MachineFrameInfo *MFI = MF->getFrameInfo(); for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E; ++I) { -- cgit v1.1 From 98fbe27ac8f0766ea94b89b8c03418131b72bea4 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Mon, 18 Feb 2013 20:55:12 +0000 Subject: Support for HiPE-compatible code emission, patch by Yiannis Tsiouris. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175457 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/PrologEpilogInserter.cpp | 8 ++ lib/Target/X86/X86FrameLowering.cpp | 160 +++++++++++++++++++++++++++++++++-- lib/Target/X86/X86FrameLowering.h | 2 + 3 files changed, 165 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index 954613d..45e04a9 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -693,6 +693,14 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) { // space in small chunks instead of one large contiguous block. if (Fn.getTarget().Options.EnableSegmentedStacks) TFI.adjustForSegmentedStacks(Fn); + + // Emit additional code that is required to explicitly handle the stack in + // HiPE native code (if needed) when loaded in the Erlang/OTP runtime. The + // approach is rather similar to that of Segmented Stacks, but it uses a + // different conditional check and another BIF for allocating more stack + // space. + if (Fn.getFunction()->getCallingConv() == CallingConv::HiPE) + TFI.adjustForHiPEPrologue(Fn); } /// replaceFrameIndices - Replace all MO_FrameIndex operands with physical diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 950fd39..eb9f865 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -1387,16 +1387,25 @@ HasNestArgument(const MachineFunction *MF) { } -/// GetScratchRegister - Get a register for performing work in the segmented -/// stack prologue. Depending on platform and the properties of the function -/// either one or two registers will be needed. Set primary to true for -/// the first register, false for the second. +/// GetScratchRegister - Get a temp register for performing work in the +/// segmented stack and the Erlang/HiPE stack prologue. Depending on platform +/// and the properties of the function either one or two registers will be +/// needed. Set primary to true for the first register, false for the second. static unsigned GetScratchRegister(bool Is64Bit, const MachineFunction &MF, bool Primary) { + CallingConv::ID CallingConvention = MF.getFunction()->getCallingConv(); + + // Erlang stuff. + if (CallingConvention == CallingConv::HiPE) { + if (Is64Bit) + return Primary ? X86::R14 : X86::R13; + else + return Primary ? X86::EBX : X86::EDI; + } + if (Is64Bit) return Primary ? X86::R11 : X86::R12; - CallingConv::ID CallingConvention = MF.getFunction()->getCallingConv(); bool IsNested = HasNestArgument(&MF); if (CallingConvention == CallingConv::X86_FastCall || @@ -1603,3 +1612,144 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { MF.verify(); #endif } + +// Erlang programs may need a special prologue to handle the stack size they +// might need at runtime. That is because Erlang/OTP does not implement a C +// stack but uses a custom implementation of hybrid stack/heap +// architecture. (for more information see Eric Stenman's Ph.D. thesis: +// http://publications.uu.se/uu/fulltext/nbn_se_uu_diva-2688.pdf) +// +// +// CheckStack: +// temp0 = sp - MaxStack +// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart +// OldStart: +// ... +// IncStack: +// call inc_stack # doubles the stack space +// temp0 = sp - MaxStack +// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart +void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const { + const X86InstrInfo &TII = *TM.getInstrInfo(); + const X86Subtarget *ST = &MF.getTarget().getSubtarget(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + const uint64_t SlotSize = TM.getRegisterInfo()->getSlotSize(); + const bool Is64Bit = STI.is64Bit(); + DebugLoc DL; + // HiPE-specific values + const unsigned HipeLeafWords = 24; + const unsigned CCRegisteredArgs = Is64Bit ? 6 : 5; + const unsigned Guaranteed = HipeLeafWords * SlotSize; + const unsigned CallerStkArity = + std::max(0, MF.getFunction()->arg_size() - CCRegisteredArgs); + unsigned MaxStack = + MFI->getStackSize() + CallerStkArity * SlotSize + SlotSize; + + assert(ST->isTargetLinux() && + "HiPE prologue is only supported on Linux operating systems."); + + // Compute the largest caller's frame that is needed to fit the callees' + // frames. This 'MaxStack' is computed from: + // + // a) the fixed frame size, which is the space needed for all spilled temps, + // b) outgoing on-stack parameter areas, and + // c) the minimum stack space this function needs to make available for the + // functions it calls (a tunable ABI property). + if (MFI->hasCalls()) { + unsigned MoreStackForCalls = 0; + + for (MachineFunction::iterator MBBI = MF.begin(), MBBE = MF.end(); + MBBI != MBBE; ++MBBI) + for (MachineBasicBlock::iterator MI = MBBI->begin(), ME = MBBI->end(); + MI != ME; ++MI) + if (MI->isCall()) { + // Get callee operand. + const MachineOperand &MO = MI->getOperand(0); + const Function *F; + + // Only take account of global function calls (no closures etc.). + if (!MO.isGlobal()) continue; + if (!(F = dyn_cast(MO.getGlobal()))) continue; + + // Do not update 'MaxStack' for primitive and built-in functions + // (encoded with names either starting with "erlang."/"bif_" or not + // having a ".", such as a simple .., or an + // "_", such as the BIF "suspend_0") as they are executed on another + // stack. + if ((F->getName().find("erlang.") != std::string::npos) || + (F->getName().find("bif_") != std::string::npos)) continue; + if (F->getName().find_first_of("._") == std::string::npos) + continue; + + const uint64_t CalleeStkArity = + std::max(0, F->arg_size() - CCRegisteredArgs); + MoreStackForCalls = std::max( + MoreStackForCalls, (HipeLeafWords - 1 - CalleeStkArity) * SlotSize); + } + MaxStack += MoreStackForCalls; + } + + // If the stack frame needed is larger than the guaranteed then runtime checks + // and calls to "inc_stack_0" BIF should be inserted in the assembly prologue. + if (MaxStack > Guaranteed) { + MachineBasicBlock &prologueMBB = MF.front(); + MachineBasicBlock *stackCheckMBB = MF.CreateMachineBasicBlock(); + MachineBasicBlock *incStackMBB = MF.CreateMachineBasicBlock(); + + for (MachineBasicBlock::livein_iterator I = prologueMBB.livein_begin(), + E = prologueMBB.livein_end(); I != E; I++) { + stackCheckMBB->addLiveIn(*I); + incStackMBB->addLiveIn(*I); + } + + MF.push_front(incStackMBB); + MF.push_front(stackCheckMBB); + + unsigned ScratchReg, SPReg, PReg, SPLimitOffset; + unsigned LEAop, CMPop, CALLop; + if (Is64Bit) { + SPReg = X86::RSP; + PReg = X86::RBP; + LEAop = X86::LEA64r; + CMPop = X86::CMP64rm; + CALLop = X86::CALL64pcrel32; + SPLimitOffset = 0x90; + } else { + SPReg = X86::ESP; + PReg = X86::EBP; + LEAop = X86::LEA32r; + CMPop = X86::CMP32rm; + CALLop = X86::CALLpcrel32; + SPLimitOffset = 0x4c; + } + + ScratchReg = GetScratchRegister(Is64Bit, MF, true); + assert(!MF.getRegInfo().isLiveIn(ScratchReg) && + "HiPE prologue scratch register is live-in"); + + // Create new MBB for StackCheck: + addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(LEAop), ScratchReg), + SPReg, false, -MaxStack); + // SPLimitOffset is in a fixed heap location (pointed by BP). + addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(CMPop)) + .addReg(ScratchReg), PReg, false, SPLimitOffset); + BuildMI(stackCheckMBB, DL, TII.get(X86::JAE_4)).addMBB(&prologueMBB); + + // Create new MBB for IncStack: + BuildMI(incStackMBB, DL, TII.get(CALLop)). + addExternalSymbol("inc_stack_0"); + addRegOffset(BuildMI(incStackMBB, DL, TII.get(LEAop), ScratchReg), + SPReg, false, -MaxStack); + addRegOffset(BuildMI(incStackMBB, DL, TII.get(CMPop)) + .addReg(ScratchReg), PReg, false, SPLimitOffset); + BuildMI(incStackMBB, DL, TII.get(X86::JLE_4)).addMBB(incStackMBB); + + stackCheckMBB->addSuccessor(&prologueMBB, 99); + stackCheckMBB->addSuccessor(incStackMBB, 1); + incStackMBB->addSuccessor(&prologueMBB, 99); + incStackMBB->addSuccessor(incStackMBB, 1); + } +#ifdef XDEBUG + MF.verify(); +#endif +} diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h index dc515dc..c35d952 100644 --- a/lib/Target/X86/X86FrameLowering.h +++ b/lib/Target/X86/X86FrameLowering.h @@ -43,6 +43,8 @@ public: void adjustForSegmentedStacks(MachineFunction &MF) const; + void adjustForHiPEPrologue(MachineFunction &MF) const; + void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS = NULL) const; -- cgit v1.1 From 6228999d826c87ac90344356c2d123361dc1d648 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Mon, 18 Feb 2013 21:45:01 +0000 Subject: Fix a 32/64 bit incompatibility in the HiPE prologue generation. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175458 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FrameLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index eb9f865..038c395 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -1682,7 +1682,7 @@ void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const { continue; const uint64_t CalleeStkArity = - std::max(0, F->arg_size() - CCRegisteredArgs); + std::max(0, F->arg_size() - CCRegisteredArgs); MoreStackForCalls = std::max( MoreStackForCalls, (HipeLeafWords - 1 - CalleeStkArity) * SlotSize); } -- cgit v1.1 From 848c25ddfa8530fd9349bdf5ed8a8633f27eb388 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Mon, 18 Feb 2013 21:46:28 +0000 Subject: [fast-isel] Remove an invalid assert. If the memcpy has an odd length with an alignment of 2, this would incorrectly assert on the last 1 byte copy. rdar://13202135 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175459 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMFastISel.cpp | 1 - 1 file changed, 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 022fe97..b545dbc 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -2458,7 +2458,6 @@ bool ARMFastISel::ARMTryEmitSmallMemCpy(Address Dest, Address Src, if (Len >= 2 && Alignment == 2) VT = MVT::i16; else { - assert (Alignment == 1 && "Expected an alignment of 1!"); VT = MVT::i8; } } -- cgit v1.1 From 20ea2bc391bf72480998f456494011636dc19fea Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Mon, 18 Feb 2013 22:20:16 +0000 Subject: Remove a useless assert. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175463 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FastISel.cpp | 1 - 1 file changed, 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 509095c..4e5430d 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -1377,7 +1377,6 @@ bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM, else if (Len >= 2) VT = MVT::i16; else { - assert(Len == 1); VT = MVT::i8; } -- cgit v1.1 From 82f7815e6f7819223b5f311195bfc1d9645d4754 Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Mon, 18 Feb 2013 23:08:49 +0000 Subject: X86FrameLowering.cpp: Fix a warning in -Asserts. [-Wunused-variable] git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175464 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FrameLowering.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 038c395..a52c4d9 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -1631,7 +1631,6 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { // if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const { const X86InstrInfo &TII = *TM.getInstrInfo(); - const X86Subtarget *ST = &MF.getTarget().getSubtarget(); MachineFrameInfo *MFI = MF.getFrameInfo(); const uint64_t SlotSize = TM.getRegisterInfo()->getSlotSize(); const bool Is64Bit = STI.is64Bit(); @@ -1645,7 +1644,7 @@ void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const { unsigned MaxStack = MFI->getStackSize() + CallerStkArity * SlotSize + SlotSize; - assert(ST->isTargetLinux() && + assert(getTarget().getSubtarget()->ST->isTargetLinux() && "HiPE prologue is only supported on Linux operating systems."); // Compute the largest caller's frame that is needed to fit the callees' -- cgit v1.1 From a39058aaed4540fc37681cad728b99546595b2e8 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Mon, 18 Feb 2013 23:11:17 +0000 Subject: Use LLVM_DELETED_FUNCTION rather than '// do not implement' comments. Also removes some redundant DNI comments on function declarations already using the macro. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175466 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp | 4 ++-- lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp | 4 ++-- lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp index 8d45198..d5d995b 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp @@ -29,8 +29,8 @@ using namespace llvm; namespace { class AArch64MCCodeEmitter : public MCCodeEmitter { - AArch64MCCodeEmitter(const AArch64MCCodeEmitter &); // DO NOT IMPLEMENT - void operator=(const AArch64MCCodeEmitter &); // DO NOT IMPLEMENT + AArch64MCCodeEmitter(const AArch64MCCodeEmitter &) LLVM_DELETED_FUNCTION; + void operator=(const AArch64MCCodeEmitter &) LLVM_DELETED_FUNCTION; const MCInstrInfo &MCII; const MCSubtargetInfo &STI; MCContext &Ctx; diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp index 5a315cb..d207160 100644 --- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp +++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp @@ -38,8 +38,8 @@ using namespace llvm; namespace { class R600MCCodeEmitter : public AMDGPUMCCodeEmitter { - R600MCCodeEmitter(const R600MCCodeEmitter &); // DO NOT IMPLEMENT - void operator=(const R600MCCodeEmitter &); // DO NOT IMPLEMENT + R600MCCodeEmitter(const R600MCCodeEmitter &) LLVM_DELETED_FUNCTION; + void operator=(const R600MCCodeEmitter &) LLVM_DELETED_FUNCTION; const MCInstrInfo &MCII; const MCRegisterInfo &MRI; const MCSubtargetInfo &STI; diff --git a/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp index 0d9f3d8..2bf8fb8 100644 --- a/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp +++ b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp @@ -35,8 +35,8 @@ typedef union { } IntFloatUnion; class SIMCCodeEmitter : public AMDGPUMCCodeEmitter { - SIMCCodeEmitter(const SIMCCodeEmitter &); // DO NOT IMPLEMENT - void operator=(const SIMCCodeEmitter &); // DO NOT IMPLEMENT + SIMCCodeEmitter(const SIMCCodeEmitter &) LLVM_DELETED_FUNCTION; + void operator=(const SIMCCodeEmitter &) LLVM_DELETED_FUNCTION; const MCInstrInfo &MCII; const MCRegisterInfo &MRI; const MCSubtargetInfo &STI; -- cgit v1.1 From 9d7c53af30363038a0acb594201cdb1282510f7b Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Mon, 18 Feb 2013 23:15:21 +0000 Subject: X86FrameLowering.cpp: Fixup. Sorry for the breakage. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175467 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FrameLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index a52c4d9..c98c80d 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -1644,7 +1644,7 @@ void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const { unsigned MaxStack = MFI->getStackSize() + CallerStkArity * SlotSize + SlotSize; - assert(getTarget().getSubtarget()->ST->isTargetLinux() && + assert(MF.getTarget().getSubtarget().isTargetLinux() && "HiPE prologue is only supported on Linux operating systems."); // Compute the largest caller's frame that is needed to fit the callees' -- cgit v1.1 From 2af5035a1d4124881c8fe3d46354341d01374fb7 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Mon, 18 Feb 2013 23:17:16 +0000 Subject: Check to see if the 'no-builtin' attribute is set before simplifying a library call. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175470 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Utils/SimplifyLibCalls.cpp | 3 +++ 1 file changed, 3 insertions(+) (limited to 'lib') diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index cccf0a6..2f47174 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1889,6 +1889,9 @@ LibCallSimplifier::~LibCallSimplifier() { } Value *LibCallSimplifier::optimizeCall(CallInst *CI) { + Function *F = CI->getParent()->getParent(); + // We don't want to "optimize" if the function doesn't want builtins. + if (F->hasFnAttribute("no-builtin")) return 0; return Impl->optimizeCall(CI); } -- cgit v1.1 From 56f58ad0e415fcc390cdd4f891e6bf936f0dcf53 Mon Sep 17 00:00:00 2001 From: Jakub Staszak Date: Mon, 18 Feb 2013 23:18:22 +0000 Subject: Use array_pod_sort instead of std::sort. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175472 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index f0e0352..9ec0502 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -17657,7 +17657,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const { AsmPieces.clear(); const std::string &ConstraintsStr = IA->getConstraintString(); SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ","); - std::sort(AsmPieces.begin(), AsmPieces.end()); + array_pod_sort(AsmPieces.begin(), AsmPieces.end()); if (AsmPieces.size() == 4 && AsmPieces[0] == "~{cc}" && AsmPieces[1] == "~{dirflag}" && @@ -17675,7 +17675,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const { AsmPieces.clear(); const std::string &ConstraintsStr = IA->getConstraintString(); SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ","); - std::sort(AsmPieces.begin(), AsmPieces.end()); + array_pod_sort(AsmPieces.begin(), AsmPieces.end()); if (AsmPieces.size() == 4 && AsmPieces[0] == "~{cc}" && AsmPieces[1] == "~{dirflag}" && -- cgit v1.1 From f80167520740cbd9b73ead4fa524533532c5538e Mon Sep 17 00:00:00 2001 From: Reed Kotler Date: Tue, 19 Feb 2013 00:20:58 +0000 Subject: Expand pseudos BteqzT8CmpiX16 and BtnezT8CmpiX16. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175474 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips16InstrInfo.cpp | 26 ++++++++++++++++++++++++++ lib/Target/Mips/Mips16InstrInfo.h | 5 +++++ lib/Target/Mips/Mips16InstrInfo.td | 27 +++++++++++++++++++++++++++ 3 files changed, 58 insertions(+) (limited to 'lib') diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp index ab1258a..b619439 100644 --- a/lib/Target/Mips/Mips16InstrInfo.cpp +++ b/lib/Target/Mips/Mips16InstrInfo.cpp @@ -139,6 +139,10 @@ bool Mips16InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { case Mips::BteqzT8CmpX16: ExpandFEXT_T8I816_ins(MBB, MI, Mips::BteqzX16, Mips::CmpRxRy16); break; + case Mips::BteqzT8CmpiX16: + ExpandFEXT_T8I8I16_ins(MBB, MI, Mips::BteqzX16, + Mips::CmpiRxImm16, Mips::CmpiRxImmX16); + break; case Mips::BteqzT8SltX16: ExpandFEXT_T8I816_ins(MBB, MI, Mips::BteqzX16, Mips::SltRxRy16); break; @@ -150,6 +154,10 @@ bool Mips16InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { case Mips::BtnezT8CmpX16: ExpandFEXT_T8I816_ins(MBB, MI, Mips::BtnezX16, Mips::CmpRxRy16); break; + case Mips::BtnezT8CmpiX16: + ExpandFEXT_T8I8I16_ins(MBB, MI, Mips::BtnezX16, + Mips::CmpiRxImm16, Mips::CmpiRxImmX16); + break; case Mips::BtnezT8SltX16: ExpandFEXT_T8I816_ins(MBB, MI, Mips::BtnezX16, Mips::SltRxRy16); break; @@ -433,6 +441,24 @@ void Mips16InstrInfo::ExpandFEXT_T8I816_ins( BuildMI(MBB, I, I->getDebugLoc(), get(BtOpc)).addMBB(target); } + +void Mips16InstrInfo::ExpandFEXT_T8I8I16_ins( + MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned BtOpc, unsigned CmpiOpc, unsigned CmpiXOpc) const { + unsigned regX = I->getOperand(0).getReg(); + int64_t imm = I->getOperand(1).getImm(); + MachineBasicBlock *target = I->getOperand(2).getMBB(); + unsigned CmpOpc; + if (isUInt<8>(imm)) + CmpOpc = CmpiOpc; + else if (isUInt<16>(imm)) + CmpOpc = CmpiXOpc; + else + llvm_unreachable("immediate field not usable"); + BuildMI(MBB, I, I->getDebugLoc(), get(CmpOpc)).addReg(regX).addImm(imm); + BuildMI(MBB, I, I->getDebugLoc(), get(BtOpc)).addMBB(target); +} + const MCInstrDesc &Mips16InstrInfo::AddiuSpImm(int64_t Imm) const { if (validSpImm8(Imm)) return get(Mips::AddiuSpImm16); diff --git a/lib/Target/Mips/Mips16InstrInfo.h b/lib/Target/Mips/Mips16InstrInfo.h index 2e2ba9b..2699a1c 100644 --- a/lib/Target/Mips/Mips16InstrInfo.h +++ b/lib/Target/Mips/Mips16InstrInfo.h @@ -118,6 +118,11 @@ private: void ExpandFEXT_T8I816_ins(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned BtOpc, unsigned CmpOpc) const; + + void ExpandFEXT_T8I8I16_ins( + MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned BtOpc, unsigned CmpiOpc, unsigned CmpiXOpc) const; + }; } diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td index c7adce3..1a5c30b 100644 --- a/lib/Target/Mips/Mips16InstrInfo.td +++ b/lib/Target/Mips/Mips16InstrInfo.td @@ -51,6 +51,15 @@ class FI816_SP_ins _func, string asmstr, // +class FRI16_ins_base op, string asmstr, string asmstr2, + InstrItinClass itin>: + FRI16; + +class FRI16_ins op, string asmstr, + InstrItinClass itin>: + FRI16_ins_base; + class F2RI16_ins _op, string asmstr, InstrItinClass itin>: FRI16<_op, (outs CPU16Regs:$rx), (ins CPU16Regs:$rx_, simm16:$imm), @@ -565,6 +574,24 @@ def CmpRxRy16: FRR16_ins<0b01010, "cmp", IIAlu> { let Defs = [T8]; } +// +// Format: CMPI rx, immediate MIPS16e +// Purpose: Compare Immediate +// To compare a constant with the contents of a GPR. +// +def CmpiRxImm16: FRI16_ins<0b01110, "cmpi", IIAlu> { + let Defs = [T8]; +} + +// +// Format: CMPI rx, immediate MIPS16e +// Purpose: Compare Immediate (Extended) +// To compare a constant with the contents of a GPR. +// +def CmpiRxImmX16: FEXT_RI16_ins<0b01110, "cmpi", IIAlu> { + let Defs = [T8]; +} + // // Format: DIV rx, ry MIPS16e -- cgit v1.1 From df8a668e3f694abcbe55224050f2c2f242b24e1e Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Tue, 19 Feb 2013 00:52:45 +0000 Subject: Temporarily revert r175470 for more review. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175476 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Utils/SimplifyLibCalls.cpp | 3 --- 1 file changed, 3 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index 2f47174..cccf0a6 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1889,9 +1889,6 @@ LibCallSimplifier::~LibCallSimplifier() { } Value *LibCallSimplifier::optimizeCall(CallInst *CI) { - Function *F = CI->getParent()->getParent(); - // We don't want to "optimize" if the function doesn't want builtins. - if (F->hasFnAttribute("no-builtin")) return 0; return Impl->optimizeCall(CI); } -- cgit v1.1 From 2878b7d7ab8dc4bf424690b58088e0b7d3ada49f Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Tue, 19 Feb 2013 02:08:14 +0000 Subject: Remove some unused private fields from the AArch64MCCodeEmitter. These fields were only ever set in the constructor. The create method retains its consistent interface so that these bits can be re-threaded through the emitter if they're ever needed. This was found by the -Wunused-private-field Clang warning. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175482 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp index d5d995b..756e037 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp @@ -31,15 +31,10 @@ namespace { class AArch64MCCodeEmitter : public MCCodeEmitter { AArch64MCCodeEmitter(const AArch64MCCodeEmitter &) LLVM_DELETED_FUNCTION; void operator=(const AArch64MCCodeEmitter &) LLVM_DELETED_FUNCTION; - const MCInstrInfo &MCII; - const MCSubtargetInfo &STI; MCContext &Ctx; public: - AArch64MCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti, - MCContext &ctx) - : MCII(mcii), STI(sti), Ctx(ctx) { - } + AArch64MCCodeEmitter(MCContext &ctx) : Ctx(ctx) {} ~AArch64MCCodeEmitter() {} @@ -492,7 +487,7 @@ MCCodeEmitter *llvm::createAArch64MCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, const MCSubtargetInfo &STI, MCContext &Ctx) { - return new AArch64MCCodeEmitter(MCII, STI, Ctx); + return new AArch64MCCodeEmitter(Ctx); } void AArch64MCCodeEmitter:: -- cgit v1.1 From 252d798fc302ed78bc4b11e66a2382015a25c6e0 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 19 Feb 2013 03:06:17 +0000 Subject: Use a reference into the BlockLiveness DenseMap to avoid repeated hash lookups in collectMarkers. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175484 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/StackColoring.cpp | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp index bd0d809..0f1caf7 100644 --- a/lib/CodeGen/StackColoring.cpp +++ b/lib/CodeGen/StackColoring.cpp @@ -240,8 +240,11 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) { BasicBlocks[*FI] = BasicBlockNumbering.size(); BasicBlockNumbering.push_back(*FI); - BlockLiveness[*FI].Begin.resize(NumSlot); - BlockLiveness[*FI].End.resize(NumSlot); + // Keep a reference to avoid repeated lookups. + BlockLifetimeInfo &BlockInfo = BlockLiveness[*FI]; + + BlockInfo.Begin.resize(NumSlot); + BlockInfo.End.resize(NumSlot); for (MachineBasicBlock::iterator BI = (*FI)->begin(), BE = (*FI)->end(); BI != BE; ++BI) { @@ -265,15 +268,15 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) { } if (IsStart) { - BlockLiveness[*FI].Begin.set(Slot); + BlockInfo.Begin.set(Slot); } else { - if (BlockLiveness[*FI].Begin.test(Slot)) { + if (BlockInfo.Begin.test(Slot)) { // Allocas that start and end within a single block are handled // specially when computing the LiveIntervals to avoid pessimizing // the liveness propagation. - BlockLiveness[*FI].Begin.reset(Slot); + BlockInfo.Begin.reset(Slot); } else { - BlockLiveness[*FI].End.set(Slot); + BlockInfo.End.set(Slot); } } } -- cgit v1.1 From cbc6d797054a2bf2a641031f270d38804a6f2295 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 19 Feb 2013 03:14:22 +0000 Subject: Make the dump() function const and reduce the number of hash lookups it performs. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175485 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/StackColoring.cpp | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp index 0f1caf7..7031355 100644 --- a/lib/CodeGen/StackColoring.cpp +++ b/lib/CodeGen/StackColoring.cpp @@ -144,7 +144,7 @@ public: private: /// Debug. - void dump(); + void dump() const; /// Removes all of the lifetime marker instructions from the function. /// \returns true if any markers were removed. @@ -199,30 +199,36 @@ void StackColoring::getAnalysisUsage(AnalysisUsage &AU) const { MachineFunctionPass::getAnalysisUsage(AU); } -void StackColoring::dump() { +void StackColoring::dump() const { for (df_iterator FI = df_begin(MF), FE = df_end(MF); FI != FE; ++FI) { - DEBUG(dbgs()<<"Inspecting block #"<getName()<<"]\n"); + + DenseMap::const_iterator BI = + BlockLiveness.find(*FI); + assert(BI != BlockLiveness.end() && "Block not found"); + const BlockLifetimeInfo &BlockInfo = BI->second; + DEBUG(dbgs()<<"BEGIN : {"); - for (unsigned i=0; i < BlockLiveness[*FI].Begin.size(); ++i) - DEBUG(dbgs()< Date: Tue, 19 Feb 2013 03:56:57 +0000 Subject: Expand pseudos/macros BteqzT8SltiX16, BteqzT8SltiuX16, BtnezT8SltiX16, BtnezT8SltiuX16 . git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175486 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips16InstrInfo.cpp | 17 ++++++++++++++++- lib/Target/Mips/Mips16InstrInfo.td | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp index b619439..22cb963 100644 --- a/lib/Target/Mips/Mips16InstrInfo.cpp +++ b/lib/Target/Mips/Mips16InstrInfo.cpp @@ -132,7 +132,6 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, bool Mips16InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { MachineBasicBlock &MBB = *MI->getParent(); - switch(MI->getDesc().getOpcode()) { default: return false; @@ -146,11 +145,19 @@ bool Mips16InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { case Mips::BteqzT8SltX16: ExpandFEXT_T8I816_ins(MBB, MI, Mips::BteqzX16, Mips::SltRxRy16); break; + case Mips::BteqzT8SltiX16: + ExpandFEXT_T8I8I16_ins(MBB, MI, Mips::BteqzX16, + Mips::SltiRxImm16, Mips::SltiRxImmX16); + break; case Mips::BteqzT8SltuX16: // TBD: figure out a way to get this or remove the instruction // altogether. ExpandFEXT_T8I816_ins(MBB, MI, Mips::BteqzX16, Mips::SltuRxRy16); break; + case Mips::BteqzT8SltiuX16: + ExpandFEXT_T8I8I16_ins(MBB, MI, Mips::BteqzX16, + Mips::SltiuRxImm16, Mips::SltiuRxImmX16); + break; case Mips::BtnezT8CmpX16: ExpandFEXT_T8I816_ins(MBB, MI, Mips::BtnezX16, Mips::CmpRxRy16); break; @@ -161,11 +168,19 @@ bool Mips16InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { case Mips::BtnezT8SltX16: ExpandFEXT_T8I816_ins(MBB, MI, Mips::BtnezX16, Mips::SltRxRy16); break; + case Mips::BtnezT8SltiX16: + ExpandFEXT_T8I8I16_ins(MBB, MI, Mips::BtnezX16, + Mips::SltiRxImm16, Mips::SltiRxImmX16); + break; case Mips::BtnezT8SltuX16: // TBD: figure out a way to get this or remove the instruction // altogether. ExpandFEXT_T8I816_ins(MBB, MI, Mips::BtnezX16, Mips::SltuRxRy16); break; + case Mips::BtnezT8SltiuX16: + ExpandFEXT_T8I8I16_ins(MBB, MI, Mips::BtnezX16, + Mips::SltiuRxImm16, Mips::SltiuRxImmX16); + break; case Mips::RetRA16: ExpandRetRA16(MBB, MI, Mips::JrcRa16); break; diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td index 1a5c30b..0d90df4 100644 --- a/lib/Target/Mips/Mips16InstrInfo.td +++ b/lib/Target/Mips/Mips16InstrInfo.td @@ -1012,13 +1012,45 @@ def SllX16: FEXT_SHIFT16_ins<0b00, "sll", IIAlu>; // def SllvRxRy16 : FRxRxRy16_ins<0b00100, "sllv", IIAlu>; +// Format: SLTI rx, immediate MIPS16e +// Purpose: Set on Less Than Immediate +// To record the result of a less-than comparison with a constant. +// +// +def SltiRxImm16: FRI16_ins<0b01010, "slti", IIAlu> { + let Defs = [T8]; +} + // // Format: SLTI rx, immediate MIPS16e // Purpose: Set on Less Than Immediate (Extended) // To record the result of a less-than comparison with a constant. // +// +def SltiRxImmX16: FEXT_RI16_ins<0b01010, "slti", IIAlu> { + let Defs = [T8]; +} + def SltiCCRxImmX16: FEXT_CCRXI16_ins<"slti">; +// Format: SLTIU rx, immediate MIPS16e +// Purpose: Set on Less Than Immediate Unsigned +// To record the result of a less-than comparison with a constant. +// +// +def SltiuRxImm16: FRI16_ins<0b01011, "sltiu", IIAlu> { + let Defs = [T8]; +} + +// +// Format: SLTI rx, immediate MIPS16e +// Purpose: Set on Less Than Immediate Unsigned (Extended) +// To record the result of a less-than comparison with a constant. +// +// +def SltiuRxImmX16: FEXT_RI16_ins<0b01011, "sltiu", IIAlu> { + let Defs = [T8]; +} // // Format: SLTIU rx, immediate MIPS16e // Purpose: Set on Less Than Immediate Unsigned (Extended) -- cgit v1.1 From cede03886712e18b697f9ec91311d4a8df60c734 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 19 Feb 2013 04:47:31 +0000 Subject: Avoid extra DenseMap lookups in StackColoring::calculateLocalLiveness. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175487 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/StackColoring.cpp | 50 +++++++++++++++++++++++++++---------------- 1 file changed, 32 insertions(+), 18 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp index 7031355..f3da088 100644 --- a/lib/CodeGen/StackColoring.cpp +++ b/lib/CodeGen/StackColoring.cpp @@ -316,30 +316,44 @@ void StackColoring::calculateLocalLiveness() { MachineBasicBlock *BB = *PI; if (!BBSet.count(BB)) continue; + // Use an iterator to avoid repeated lookups. + DenseMap::iterator BI = + BlockLiveness.find(BB); + assert(BI != BlockLiveness.end() && "Block not found"); + BlockLifetimeInfo &BlockInfo = BI->second; + BitVector LocalLiveIn; BitVector LocalLiveOut; // Forward propagation from begins to ends. - for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(), - PE = BB->pred_end(); PI != PE; ++PI) - LocalLiveIn |= BlockLiveness[*PI].LiveOut; - LocalLiveIn |= BlockLiveness[BB].End; - LocalLiveIn.reset(BlockLiveness[BB].Begin); + for (MachineBasicBlock::const_pred_iterator PI = BB->pred_begin(), + PE = BB->pred_end(); PI != PE; ++PI) { + DenseMap::const_iterator I = + BlockLiveness.find(*PI); + assert(I != BlockLiveness.end() && "Predecessor not found"); + LocalLiveIn |= I->second.LiveOut; + } + LocalLiveIn |= BlockInfo.End; + LocalLiveIn.reset(BlockInfo.Begin); // Reverse propagation from ends to begins. - for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), - SE = BB->succ_end(); SI != SE; ++SI) - LocalLiveOut |= BlockLiveness[*SI].LiveIn; - LocalLiveOut |= BlockLiveness[BB].Begin; - LocalLiveOut.reset(BlockLiveness[BB].End); + for (MachineBasicBlock::const_succ_iterator SI = BB->succ_begin(), + SE = BB->succ_end(); SI != SE; ++SI) { + DenseMap::const_iterator I = + BlockLiveness.find(*SI); + assert(I != BlockLiveness.end() && "Successor not found"); + LocalLiveOut |= I->second.LiveIn; + } + LocalLiveOut |= BlockInfo.Begin; + LocalLiveOut.reset(BlockInfo.End); LocalLiveIn |= LocalLiveOut; LocalLiveOut |= LocalLiveIn; // After adopting the live bits, we need to turn-off the bits which // are de-activated in this block. - LocalLiveOut.reset(BlockLiveness[BB].End); - LocalLiveIn.reset(BlockLiveness[BB].Begin); + LocalLiveOut.reset(BlockInfo.End); + LocalLiveIn.reset(BlockInfo.Begin); // If we have both BEGIN and END markers in the same basic block then // we know that the BEGIN marker comes after the END, because we already @@ -348,23 +362,23 @@ void StackColoring::calculateLocalLiveness() { // Want to enable the LIVE_IN and LIVE_OUT of slots that have both // BEGIN and END because it means that the value lives before and after // this basic block. - BitVector LocalEndBegin = BlockLiveness[BB].End; - LocalEndBegin &= BlockLiveness[BB].Begin; + BitVector LocalEndBegin = BlockInfo.End; + LocalEndBegin &= BlockInfo.Begin; LocalLiveIn |= LocalEndBegin; LocalLiveOut |= LocalEndBegin; - if (LocalLiveIn.test(BlockLiveness[BB].LiveIn)) { + if (LocalLiveIn.test(BlockInfo.LiveIn)) { changed = true; - BlockLiveness[BB].LiveIn |= LocalLiveIn; + BlockInfo.LiveIn |= LocalLiveIn; for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(), PE = BB->pred_end(); PI != PE; ++PI) NextBBSet.insert(*PI); } - if (LocalLiveOut.test(BlockLiveness[BB].LiveOut)) { + if (LocalLiveOut.test(BlockInfo.LiveOut)) { changed = true; - BlockLiveness[BB].LiveOut |= LocalLiveOut; + BlockInfo.LiveOut |= LocalLiveOut; for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), SE = BB->succ_end(); SI != SE; ++SI) -- cgit v1.1 From 04fbcb59432c085bb284501dcea9693f435a417b Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 19 Feb 2013 05:32:02 +0000 Subject: Const-correct the stack coloring code. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175488 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/StackColoring.cpp | 33 +++++++++++++++------------------ 1 file changed, 15 insertions(+), 18 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp index f3da088..6c5212b 100644 --- a/lib/CodeGen/StackColoring.cpp +++ b/lib/CodeGen/StackColoring.cpp @@ -102,12 +102,13 @@ class StackColoring : public MachineFunctionPass { }; /// Maps active slots (per bit) for each basic block. - DenseMap BlockLiveness; + typedef DenseMap LivenessMap; + LivenessMap BlockLiveness; /// Maps serial numbers to basic blocks. - DenseMap BasicBlocks; + DenseMap BasicBlocks; /// Maps basic blocks to a serial number. - SmallVector BasicBlockNumbering; + SmallVector BasicBlockNumbering; /// Maps liveness intervals for each slot. SmallVector Intervals; @@ -205,8 +206,7 @@ void StackColoring::dump() const { DEBUG(dbgs()<<"Inspecting block #"<getName()<<"]\n"); - DenseMap::const_iterator BI = - BlockLiveness.find(*FI); + LivenessMap::const_iterator BI = BlockLiveness.find(*FI); assert(BI != BlockLiveness.end() && "Block not found"); const BlockLifetimeInfo &BlockInfo = BI->second; @@ -299,26 +299,25 @@ void StackColoring::calculateLocalLiveness() { // formulation, and END is equivalent to GEN. The result of this computation // is a map from blocks to bitvectors where the bitvectors represent which // allocas are live in/out of that block. - SmallPtrSet BBSet(BasicBlockNumbering.begin(), - BasicBlockNumbering.end()); + SmallPtrSet BBSet(BasicBlockNumbering.begin(), + BasicBlockNumbering.end()); unsigned NumSSMIters = 0; bool changed = true; while (changed) { changed = false; ++NumSSMIters; - SmallPtrSet NextBBSet; + SmallPtrSet NextBBSet; - for (SmallVector::iterator + for (SmallVector::iterator PI = BasicBlockNumbering.begin(), PE = BasicBlockNumbering.end(); PI != PE; ++PI) { - MachineBasicBlock *BB = *PI; + const MachineBasicBlock *BB = *PI; if (!BBSet.count(BB)) continue; // Use an iterator to avoid repeated lookups. - DenseMap::iterator BI = - BlockLiveness.find(BB); + LivenessMap::iterator BI = BlockLiveness.find(BB); assert(BI != BlockLiveness.end() && "Block not found"); BlockLifetimeInfo &BlockInfo = BI->second; @@ -328,8 +327,7 @@ void StackColoring::calculateLocalLiveness() { // Forward propagation from begins to ends. for (MachineBasicBlock::const_pred_iterator PI = BB->pred_begin(), PE = BB->pred_end(); PI != PE; ++PI) { - DenseMap::const_iterator I = - BlockLiveness.find(*PI); + LivenessMap::const_iterator I = BlockLiveness.find(*PI); assert(I != BlockLiveness.end() && "Predecessor not found"); LocalLiveIn |= I->second.LiveOut; } @@ -339,8 +337,7 @@ void StackColoring::calculateLocalLiveness() { // Reverse propagation from ends to begins. for (MachineBasicBlock::const_succ_iterator SI = BB->succ_begin(), SE = BB->succ_end(); SI != SE; ++SI) { - DenseMap::const_iterator I = - BlockLiveness.find(*SI); + LivenessMap::const_iterator I = BlockLiveness.find(*SI); assert(I != BlockLiveness.end() && "Successor not found"); LocalLiveOut |= I->second.LiveIn; } @@ -371,7 +368,7 @@ void StackColoring::calculateLocalLiveness() { changed = true; BlockInfo.LiveIn |= LocalLiveIn; - for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(), + for (MachineBasicBlock::const_pred_iterator PI = BB->pred_begin(), PE = BB->pred_end(); PI != PE; ++PI) NextBBSet.insert(*PI); } @@ -380,7 +377,7 @@ void StackColoring::calculateLocalLiveness() { changed = true; BlockInfo.LiveOut |= LocalLiveOut; - for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + for (MachineBasicBlock::const_succ_iterator SI = BB->succ_begin(), SE = BB->succ_end(); SI != SE; ++SI) NextBBSet.insert(*SI); } -- cgit v1.1 From 261abf5f4011e5b1e8949d7404190a4f4eaff8d8 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 19 Feb 2013 06:02:40 +0000 Subject: More const correcting of stack coloring. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175490 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/StackColoring.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp index 6c5212b..ec44b8c 100644 --- a/lib/CodeGen/StackColoring.cpp +++ b/lib/CodeGen/StackColoring.cpp @@ -262,7 +262,7 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) { Markers.push_back(BI); bool IsStart = BI->getOpcode() == TargetOpcode::LIFETIME_START; - MachineOperand &MI = BI->getOperand(0); + const MachineOperand &MI = BI->getOperand(0); unsigned Slot = MI.getIndex(); MarkersFound++; @@ -401,9 +401,9 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) { Finishes.resize(NumSlots); // Create the interval for the basic blocks with lifetime markers in them. - for (SmallVector::iterator it = Markers.begin(), + for (SmallVectorImpl::const_iterator it = Markers.begin(), e = Markers.end(); it != e; ++it) { - MachineInstr *MI = *it; + const MachineInstr *MI = *it; if (MI->getParent() != MBB) continue; @@ -412,7 +412,7 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) { "Invalid Lifetime marker"); bool IsStart = MI->getOpcode() == TargetOpcode::LIFETIME_START; - MachineOperand &Mo = MI->getOperand(0); + const MachineOperand &Mo = MI->getOperand(0); int Slot = Mo.getIndex(); assert(Slot >= 0 && "Invalid slot"); @@ -499,7 +499,7 @@ void StackColoring::remapInstructions(DenseMap &SlotRemap) { // Keep a list of *allocas* which need to be remapped. DenseMap Allocas; - for (DenseMap::iterator it = SlotRemap.begin(), + for (DenseMap::const_iterator it = SlotRemap.begin(), e = SlotRemap.end(); it != e; ++it) { const AllocaInst *From = MFI->getObjectAllocation(it->first); const AllocaInst *To = MFI->getObjectAllocation(it->second); @@ -594,8 +594,8 @@ void StackColoring::remapInstructions(DenseMap &SlotRemap) { } void StackColoring::removeInvalidSlotRanges() { - MachineFunction::iterator BB, BBE; - MachineBasicBlock::iterator I, IE; + MachineFunction::const_iterator BB, BBE; + MachineBasicBlock::const_iterator I, IE; for (BB = MF->begin(), BBE = MF->end(); BB != BBE; ++BB) for (I = BB->begin(), IE = BB->end(); I != IE; ++I) { @@ -614,7 +614,7 @@ void StackColoring::removeInvalidSlotRanges() { // Check all of the machine operands. for (unsigned i = 0 ; i < I->getNumOperands(); ++i) { - MachineOperand &MO = I->getOperand(i); + const MachineOperand &MO = I->getOperand(i); if (!MO.isFI()) continue; -- cgit v1.1 From ff3139fe53fdc391972bb0ff4d7bde6ced2f5d5a Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 19 Feb 2013 07:43:59 +0000 Subject: Fix capitalization in comment to match function name. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175497 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 9ec0502..9ed03cd 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -10210,7 +10210,7 @@ static SDValue LowerVACOPY(SDValue Op, const X86Subtarget *Subtarget, MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV)); } -// getTargetVShiftNOde - Handle vector element shifts where the shift amount +// getTargetVShiftNode - Handle vector element shifts where the shift amount // may or may not be a constant. Takes immediate version of shift as input. static SDValue getTargetVShiftNode(unsigned Opc, DebugLoc dl, EVT VT, SDValue SrcOp, SDValue ShAmt, -- cgit v1.1 From c61e83e6de778e5bd937e401564fe6bd0836b727 Mon Sep 17 00:00:00 2001 From: Jakub Staszak Date: Tue, 19 Feb 2013 09:48:30 +0000 Subject: Simplify code. No functionality change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175501 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Bitcode/Reader/BitstreamReader.cpp | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) (limited to 'lib') diff --git a/lib/Bitcode/Reader/BitstreamReader.cpp b/lib/Bitcode/Reader/BitstreamReader.cpp index b133502..942346b 100644 --- a/lib/Bitcode/Reader/BitstreamReader.cpp +++ b/lib/Bitcode/Reader/BitstreamReader.cpp @@ -26,34 +26,28 @@ void BitstreamCursor::operator=(const BitstreamCursor &RHS) { // Copy abbreviations, and bump ref counts. CurAbbrevs = RHS.CurAbbrevs; - for (unsigned i = 0, e = static_cast(CurAbbrevs.size()); - i != e; ++i) + for (size_t i = 0, e = CurAbbrevs.size(); i != e; ++i) CurAbbrevs[i]->addRef(); // Copy block scope and bump ref counts. BlockScope = RHS.BlockScope; - for (unsigned S = 0, e = static_cast(BlockScope.size()); - S != e; ++S) { + for (size_t S = 0, e = BlockScope.size(); S != e; ++S) { std::vector &Abbrevs = BlockScope[S].PrevAbbrevs; - for (unsigned i = 0, e = static_cast(Abbrevs.size()); - i != e; ++i) + for (size_t i = 0, e = Abbrevs.size(); i != e; ++i) Abbrevs[i]->addRef(); } } void BitstreamCursor::freeState() { // Free all the Abbrevs. - for (unsigned i = 0, e = static_cast(CurAbbrevs.size()); - i != e; ++i) + for (size_t i = 0, e = CurAbbrevs.size(); i != e; ++i) CurAbbrevs[i]->dropRef(); CurAbbrevs.clear(); // Free all the Abbrevs in the block scope. - for (unsigned S = 0, e = static_cast(BlockScope.size()); - S != e; ++S) { + for (size_t S = 0, e = BlockScope.size(); S != e; ++S) { std::vector &Abbrevs = BlockScope[S].PrevAbbrevs; - for (unsigned i = 0, e = static_cast(Abbrevs.size()); - i != e; ++i) + for (size_t i = 0, e = Abbrevs.size(); i != e; ++i) Abbrevs[i]->dropRef(); } BlockScope.clear(); @@ -69,8 +63,7 @@ bool BitstreamCursor::EnterSubBlock(unsigned BlockID, unsigned *NumWordsP) { // Add the abbrevs specific to this block to the CurAbbrevs list. if (const BitstreamReader::BlockInfo *Info = BitStream->getBlockInfo(BlockID)) { - for (unsigned i = 0, e = static_cast(Info->Abbrevs.size()); - i != e; ++i) { + for (size_t i = 0, e = Info->Abbrevs.size(); i != e; ++i) { CurAbbrevs.push_back(Info->Abbrevs[i]); CurAbbrevs.back()->addRef(); } -- cgit v1.1 From 6ecccdbb2bf24a011b9c8ecbdd39be5a02269670 Mon Sep 17 00:00:00 2001 From: Kostya Serebryany Date: Tue, 19 Feb 2013 11:29:21 +0000 Subject: [asan] instrument memory accesses with unusual sizes This patch makes asan instrument memory accesses with unusual sizes (e.g. 5 bytes or 10 bytes), e.g. long double or packed structures. Instrumentation is done with two 1-byte checks (first and last bytes) and if the error is found __asan_report_load_n(addr, real_size) or __asan_report_store_n(addr, real_size) is called. Also, call these two new functions in memset/memcpy instrumentation. asan-rt part will follow. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175507 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../Instrumentation/AddressSanitizer.cpp | 78 ++++++++++++++-------- 1 file changed, 49 insertions(+), 29 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index b97e342..1d59ba5 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -64,6 +64,8 @@ static const char *kAsanModuleCtorName = "asan.module_ctor"; static const char *kAsanModuleDtorName = "asan.module_dtor"; static const int kAsanCtorAndCtorPriority = 1; static const char *kAsanReportErrorTemplate = "__asan_report_"; +static const char *kAsanReportLoadN = "__asan_report_load_n"; +static const char *kAsanReportStoreN = "__asan_report_store_n"; static const char *kAsanRegisterGlobalsName = "__asan_register_globals"; static const char *kAsanUnregisterGlobalsName = "__asan_unregister_globals"; static const char *kAsanPoisonGlobalsName = "__asan_before_dynamic_init"; @@ -257,12 +259,14 @@ struct AddressSanitizer : public FunctionPass { return "AddressSanitizerFunctionPass"; } void instrumentMop(Instruction *I); - void instrumentAddress(Instruction *OrigIns, IRBuilder<> &IRB, - Value *Addr, uint32_t TypeSize, bool IsWrite); + void instrumentAddress(Instruction *OrigIns, Instruction *InsertBefore, + Value *Addr, uint32_t TypeSize, bool IsWrite, + Value *SizeArgument); Value *createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong, Value *ShadowValue, uint32_t TypeSize); Instruction *generateCrashCode(Instruction *InsertBefore, Value *Addr, - bool IsWrite, size_t AccessSizeIndex); + bool IsWrite, size_t AccessSizeIndex, + Value *SizeArgument); bool instrumentMemIntrinsic(MemIntrinsic *MI); void instrumentMemIntrinsicParam(Instruction *OrigIns, Value *Addr, Value *Size, @@ -300,6 +304,8 @@ struct AddressSanitizer : public FunctionPass { OwningPtr BL; // This array is indexed by AccessIsWrite and log2(AccessSize). Function *AsanErrorCallback[2][kNumberOfAccessSizes]; + // This array is indexed by AccessIsWrite. + Function *AsanErrorCallbackSized[2]; InlineAsm *EmptyAsm; SetOfDynamicallyInitializedGlobals DynamicallyInitializedGlobals; @@ -548,21 +554,17 @@ Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB) { void AddressSanitizer::instrumentMemIntrinsicParam( Instruction *OrigIns, Value *Addr, Value *Size, Instruction *InsertBefore, bool IsWrite) { + IRBuilder<> IRB(InsertBefore); + if (Size->getType() != IntptrTy) + Size = IRB.CreateIntCast(Size, IntptrTy, false); // Check the first byte. - { - IRBuilder<> IRB(InsertBefore); - instrumentAddress(OrigIns, IRB, Addr, 8, IsWrite); - } + instrumentAddress(OrigIns, InsertBefore, Addr, 8, IsWrite, Size); // Check the last byte. - { - IRBuilder<> IRB(InsertBefore); - Value *SizeMinusOne = IRB.CreateSub( - Size, ConstantInt::get(Size->getType(), 1)); - SizeMinusOne = IRB.CreateIntCast(SizeMinusOne, IntptrTy, false); - Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy); - Value *AddrPlusSizeMinisOne = IRB.CreateAdd(AddrLong, SizeMinusOne); - instrumentAddress(OrigIns, IRB, AddrPlusSizeMinisOne, 8, IsWrite); - } + IRB.SetInsertPoint(InsertBefore); + Value *SizeMinusOne = IRB.CreateSub(Size, ConstantInt::get(IntptrTy, 1)); + Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy); + Value *AddrLast = IRB.CreateAdd(AddrLong, SizeMinusOne); + instrumentAddress(OrigIns, InsertBefore, AddrLast, 8, IsWrite, Size); } // Instrument memset/memmove/memcpy @@ -641,14 +643,24 @@ void AddressSanitizer::instrumentMop(Instruction *I) { assert(OrigTy->isSized()); uint32_t TypeSize = TD->getTypeStoreSizeInBits(OrigTy); - if (TypeSize != 8 && TypeSize != 16 && - TypeSize != 32 && TypeSize != 64 && TypeSize != 128) { - // Ignore all unusual sizes. - return; - } + assert((TypeSize % 8) == 0); + // Instrument a 1-, 2-, 4-, 8-, or 16- byte access with one check. + if (TypeSize == 8 || TypeSize == 16 || + TypeSize == 32 || TypeSize == 64 || TypeSize == 128) + return instrumentAddress(I, I, Addr, TypeSize, IsWrite, 0); + // Instrument unusual size (but still multiple of 8). + // We can not do it with a single check, so we do 1-byte check for the first + // and the last bytes. We call __asan_report_*_n(addr, real_size) to be able + // to report the actual access size. IRBuilder<> IRB(I); - instrumentAddress(I, IRB, Addr, TypeSize, IsWrite); + Value *LastByte = IRB.CreateIntToPtr( + IRB.CreateAdd(IRB.CreatePointerCast(Addr, IntptrTy), + ConstantInt::get(IntptrTy, TypeSize / 8 - 1)), + OrigPtrTy); + Value *Size = ConstantInt::get(IntptrTy, TypeSize / 8); + instrumentAddress(I, I, Addr, 8, IsWrite, Size); + instrumentAddress(I, I, LastByte, 8, IsWrite, Size); } // Validate the result of Module::getOrInsertFunction called for an interface @@ -664,10 +676,12 @@ static Function *checkInterfaceFunction(Constant *FuncOrBitcast) { Instruction *AddressSanitizer::generateCrashCode( Instruction *InsertBefore, Value *Addr, - bool IsWrite, size_t AccessSizeIndex) { + bool IsWrite, size_t AccessSizeIndex, Value *SizeArgument) { IRBuilder<> IRB(InsertBefore); - CallInst *Call = IRB.CreateCall(AsanErrorCallback[IsWrite][AccessSizeIndex], - Addr); + CallInst *Call = SizeArgument + ? IRB.CreateCall2(AsanErrorCallbackSized[IsWrite], Addr, SizeArgument) + : IRB.CreateCall(AsanErrorCallback[IsWrite][AccessSizeIndex], Addr); + // We don't do Call->setDoesNotReturn() because the BB already has // UnreachableInst at the end. // This EmptyAsm is required to avoid callback merge. @@ -694,8 +708,10 @@ Value *AddressSanitizer::createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong, } void AddressSanitizer::instrumentAddress(Instruction *OrigIns, - IRBuilder<> &IRB, Value *Addr, - uint32_t TypeSize, bool IsWrite) { + Instruction *InsertBefore, + Value *Addr, uint32_t TypeSize, + bool IsWrite, Value *SizeArgument) { + IRBuilder<> IRB(InsertBefore); Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy); Type *ShadowTy = IntegerType::get( @@ -727,8 +743,8 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns, CrashTerm = SplitBlockAndInsertIfThen(cast(Cmp), true); } - Instruction *Crash = - generateCrashCode(CrashTerm, AddrLong, IsWrite, AccessSizeIndex); + Instruction *Crash = generateCrashCode( + CrashTerm, AddrLong, IsWrite, AccessSizeIndex, SizeArgument); Crash->setDebugLoc(OrigIns->getDebugLoc()); } @@ -997,6 +1013,10 @@ void AddressSanitizer::initializeCallbacks(Module &M) { FunctionName, IRB.getVoidTy(), IntptrTy, NULL)); } } + AsanErrorCallbackSized[0] = checkInterfaceFunction(M.getOrInsertFunction( + kAsanReportLoadN, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL)); + AsanErrorCallbackSized[1] = checkInterfaceFunction(M.getOrInsertFunction( + kAsanReportStoreN, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL)); AsanHandleNoReturnFunc = checkInterfaceFunction(M.getOrInsertFunction( kAsanHandleNoReturnName, IRB.getVoidTy(), NULL)); -- cgit v1.1 From 9f306bdc70757d11b6510525938c0d92c5529cc7 Mon Sep 17 00:00:00 2001 From: Alexey Samsonov Date: Tue, 19 Feb 2013 11:35:39 +0000 Subject: Fix initialization-order bug in llvm::Support::TimeValue. TimeValue::now() is explicitly called during module initialization of lib/Support/Process.cpp. It reads the field of global object PosixZeroTime, which is not guaranteed to be initialized at this point. Found by AddressSanitizer with -fsanitize=init-order option. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175509 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Support/TimeValue.cpp | 9 +++++++-- lib/Support/Unix/TimeValue.inc | 3 ++- 2 files changed, 9 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Support/TimeValue.cpp b/lib/Support/TimeValue.cpp index 1a0f7bc..bd8af17 100644 --- a/lib/Support/TimeValue.cpp +++ b/lib/Support/TimeValue.cpp @@ -17,11 +17,16 @@ namespace llvm { using namespace sys; +const TimeValue::SecondsType + TimeValue::PosixZeroTimeSeconds = -946684800; +const TimeValue::SecondsType + TimeValue::Win32ZeroTimeSeconds = -12591158400ULL; + const TimeValue TimeValue::MinTime = TimeValue ( INT64_MIN,0 ); const TimeValue TimeValue::MaxTime = TimeValue ( INT64_MAX,0 ); const TimeValue TimeValue::ZeroTime = TimeValue ( 0,0 ); -const TimeValue TimeValue::PosixZeroTime = TimeValue ( -946684800,0 ); -const TimeValue TimeValue::Win32ZeroTime = TimeValue ( -12591158400ULL,0 ); +const TimeValue TimeValue::PosixZeroTime = TimeValue ( PosixZeroTimeSeconds,0 ); +const TimeValue TimeValue::Win32ZeroTime = TimeValue ( Win32ZeroTimeSeconds,0 ); void TimeValue::normalize( void ) { diff --git a/lib/Support/Unix/TimeValue.inc b/lib/Support/Unix/TimeValue.inc index 5cf5a9d..df8558b 100644 --- a/lib/Support/Unix/TimeValue.inc +++ b/lib/Support/Unix/TimeValue.inc @@ -48,7 +48,8 @@ TimeValue TimeValue::now() { } return TimeValue( - static_cast( the_time.tv_sec + PosixZeroTime.seconds_ ), + static_cast( the_time.tv_sec + + PosixZeroTimeSeconds ), static_cast( the_time.tv_usec * NANOSECONDS_PER_MICROSECOND ) ); } -- cgit v1.1 From 403554c65847d3f42c5a01661ce494fb877f8ced Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Tue, 19 Feb 2013 15:22:42 +0000 Subject: R600: Fix tracking of implicit defs in the IndirectAddressing pass In some cases, we were losing track of live implicit registers which was creating dead defs and causing the scheduler to produce invalid code. NOTE: This is a candidate for the Mesa stable branch. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175516 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUIndirectAddressing.cpp | 32 ++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/Target/R600/AMDGPUIndirectAddressing.cpp b/lib/Target/R600/AMDGPUIndirectAddressing.cpp index 56aaf23..15840b3 100644 --- a/lib/Target/R600/AMDGPUIndirectAddressing.cpp +++ b/lib/Target/R600/AMDGPUIndirectAddressing.cpp @@ -169,9 +169,6 @@ bool AMDGPUIndirectAddressingPass::runOnMachineFunction(MachineFunction &MF) { } if (RegisterAddressMap[Reg] == Address) { - if (!regHasExplicitDef(MRI, Reg)) { - continue; - } PhiRegisters.push_back(Reg); } } @@ -270,7 +267,8 @@ bool AMDGPUIndirectAddressingPass::runOnMachineFunction(MachineFunction &MF) { // instruction that uses indirect addressing. BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::COPY), MI.getOperand(0).getReg()) - .addReg(AddrReg); + .addReg(AddrReg) + .addReg(Reg, RegState::Implicit); } } else { // Indirect register access @@ -292,8 +290,7 @@ bool AMDGPUIndirectAddressingPass::runOnMachineFunction(MachineFunction &MF) { // We only need to use REG_SEQUENCE for explicit defs, since the // register coalescer won't do anything with the implicit defs. MachineInstr *DefInstr = MRI.getVRegDef(Reg); - if (!DefInstr->getOperand(0).isReg() || - DefInstr->getOperand(0).getReg() != Reg) { + if (!regHasExplicitDef(MRI, Reg)) { continue; } @@ -310,6 +307,7 @@ bool AMDGPUIndirectAddressingPass::runOnMachineFunction(MachineFunction &MF) { Mov.addReg(IndirectReg, RegState::Implicit | RegState::Kill); + Mov.addReg(LiveAddressRegisterMap[Address], RegState::Implicit); } MI.eraseFromParent(); @@ -321,6 +319,26 @@ bool AMDGPUIndirectAddressingPass::runOnMachineFunction(MachineFunction &MF) { bool AMDGPUIndirectAddressingPass::regHasExplicitDef(MachineRegisterInfo &MRI, unsigned Reg) const { MachineInstr *DefInstr = MRI.getVRegDef(Reg); - return DefInstr && DefInstr->getOperand(0).isReg() && + + if (!DefInstr) { + return false; + } + + if (DefInstr->getOpcode() == AMDGPU::PHI) { + bool Explicit = false; + for (MachineInstr::const_mop_iterator I = DefInstr->operands_begin(), + E = DefInstr->operands_end(); + I != E; ++I) { + const MachineOperand &MO = *I; + if (!MO.isReg() || MO.isDef()) { + continue; + } + + Explicit = Explicit || regHasExplicitDef(MRI, MO.getReg()); + } + return Explicit; + } + + return DefInstr->getOperand(0).isReg() && DefInstr->getOperand(0).getReg() == Reg; } -- cgit v1.1 From 23339b68e224cedac1e7025ba8bca3e3eb127fab Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Tue, 19 Feb 2013 15:22:44 +0000 Subject: R600: Fix scheduler crash caused by invalid MachinePointerInfo Kernel function arguments are lowered to loads from the PARAM_I address space. When creating these load instructions, we were initializing their MachinePointerInfo with an Arguement object that was not attached to any function. This was causing the MachineScheduler to crash when it tried to access the parent of the Arguement. This has been fixed by initializing the MachinePointerInfo with a UndefValue instead. NOTE: This is a candidate for the Mesa stable branch. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175517 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/R600ISelLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index a7796b6..b5c2a93 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -1039,7 +1039,7 @@ SDValue R600TargetLowering::LowerFormalArguments( AMDGPUAS::PARAM_I_ADDRESS); SDValue Arg = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getRoot(), DAG.getConstant(ParamOffsetBytes, MVT::i32), - MachinePointerInfo(new Argument(PtrTy)), + MachinePointerInfo(UndefValue::get(PtrTy)), ArgVT, false, false, ArgBytes); InVals.push_back(Arg); ParamOffsetBytes += ArgBytes; -- cgit v1.1 From 2b4b68d9365e7003fb7404aca2540b5372d7791b Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Tue, 19 Feb 2013 15:22:45 +0000 Subject: R600: Mark all members of the TRegMem register class as reserved This stops the Machine Verifier from complaining about uses of undefined physical registers. NOTE: This is a candidate for the Mesa stable branch. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175518 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/R600RegisterInfo.cpp | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'lib') diff --git a/lib/Target/R600/R600RegisterInfo.cpp b/lib/Target/R600/R600RegisterInfo.cpp index 33e858d..bbd7995 100644 --- a/lib/Target/R600/R600RegisterInfo.cpp +++ b/lib/Target/R600/R600RegisterInfo.cpp @@ -49,6 +49,12 @@ BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(*I); } + for (TargetRegisterClass::iterator I = AMDGPU::TRegMemRegClass.begin(), + E = AMDGPU::TRegMemRegClass.end(); + I != E; ++I) { + Reserved.set(*I); + } + const R600InstrInfo *RII = static_cast(&TII); std::vector IndirectRegs = RII->getIndirectReservedRegs(MF); for (std::vector::iterator I = IndirectRegs.begin(), -- cgit v1.1 From e5839d0fc9999a3d53659354a3cceb838cb87711 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Tue, 19 Feb 2013 15:22:47 +0000 Subject: R600: Add AR_X to the R600_TReg_X register class. NOTE: This is a candidate for the Mesa stable branch. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175519 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/R600RegisterInfo.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/R600/R600RegisterInfo.td b/lib/Target/R600/R600RegisterInfo.td index 0718854..ce5994c 100644 --- a/lib/Target/R600/R600RegisterInfo.td +++ b/lib/Target/R600/R600RegisterInfo.td @@ -81,7 +81,7 @@ def R600_Addr : RegisterClass <"AMDGPU", [i32], 127, (add (sequence "Addr%u_X", } // End isAllocatable = 0 def R600_TReg32_X : RegisterClass <"AMDGPU", [f32, i32], 32, - (add (sequence "T%u_X", 0, 127))>; + (add (sequence "T%u_X", 0, 127), AR_X)>; def R600_TReg32_Y : RegisterClass <"AMDGPU", [f32, i32], 32, (add (sequence "T%u_Y", 0, 127))>; -- cgit v1.1 From 2e750c12e91ab09949ef1617ab3af14e1b6cd239 Mon Sep 17 00:00:00 2001 From: Arnold Schwaighofer Date: Tue, 19 Feb 2013 15:27:05 +0000 Subject: ARM NEON: Merge a f32 bitcast of a v2i32 extractelt A vectorized sitfp on doubles will get scalarized to a sequence of an extract_element of <2 x i32>, a bitcast to f32 and a sitofp. Due to the the extract_element, and the bitcast we will uneccessarily generate moves between scalar and vector registers. The patch fixes this by using a COPY_TO_REGCLASS and a EXTRACT_SUBREG to extract the element from the vector instead. radar://13191881 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175520 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrNEON.td | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'lib') diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 901ff64..9f68c22 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -5745,6 +5745,12 @@ def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>; def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>; def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>; +// Fold extracting an element out of a v2i32 into a vfp register. +def : Pat<(f32 (bitconvert (i32 (extractelt (v2i32 DPR:$src), imm:$lane)))), + (f32 (EXTRACT_SUBREG + (v2f32 (COPY_TO_REGCLASS (v2i32 DPR:$src), DPR)), + (SSubReg_f32_reg imm:$lane)))>; + // Vector lengthening move with load, matching extending loads. // extload, zextload and sextload for a standard lengthening load. Example: -- cgit v1.1 From faec9a33968acf994892c65901bc6448f0034ecd Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Tue, 19 Feb 2013 16:38:32 +0000 Subject: Make pass name more precise and fix comment. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175525 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86AsmPrinter.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86AsmPrinter.h b/lib/Target/X86/X86AsmPrinter.h index 61eb14e..bc7496b 100644 --- a/lib/Target/X86/X86AsmPrinter.h +++ b/lib/Target/X86/X86AsmPrinter.h @@ -1,4 +1,4 @@ -//===-- X86AsmPrinter.h - Convert X86 LLVM code to assembly -----*- C++ -*-===// +//===-- X86AsmPrinter.h - X86 implementation of AsmPrinter ------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -6,10 +6,6 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// -// AT&T assembly code printer class. -// -//===----------------------------------------------------------------------===// #ifndef X86ASMPRINTER_H #define X86ASMPRINTER_H @@ -35,7 +31,7 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter { } virtual const char *getPassName() const LLVM_OVERRIDE { - return "X86 AT&T-Style Assembly Printer"; + return "X86 Assembly / Object Emitter"; } const X86Subtarget &getSubtarget() const { return *Subtarget; } -- cgit v1.1 From 26a5c4dd3176a7e3a7cc9601d32ccad8f41d3104 Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Tue, 19 Feb 2013 16:47:59 +0000 Subject: Make ARMAsmPrinter pass name more precise and fix comment. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175527 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMAsmPrinter.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h index f7392fb..c945e4f 100644 --- a/lib/Target/ARM/ARMAsmPrinter.h +++ b/lib/Target/ARM/ARMAsmPrinter.h @@ -1,4 +1,4 @@ -//===-- ARMAsmPrinter.h - Print machine code to an ARM .s file --*- C++ -*-===// +//===-- ARMAsmPrinter.h - ARM implementation of AsmPrinter ------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -6,10 +6,6 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// -// ARM Assembly printer class. -// -//===----------------------------------------------------------------------===// #ifndef ARMASMPRINTER_H #define ARMASMPRINTER_H @@ -54,7 +50,7 @@ public: } virtual const char *getPassName() const LLVM_OVERRIDE { - return "ARM Assembly Printer"; + return "ARM Assembly / Object Emitter"; } void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O, -- cgit v1.1 From 063337309e71683fc57c049c10d03d4f8a2ce356 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Tue, 19 Feb 2013 16:51:44 +0000 Subject: Fix GCMetadaPrinter::finishAssembly not executed, patch by Yiannis Tsiouris. Due to the execution order of doFinalization functions, the GC information were deleted before AsmPrinter::doFinalization was executed. Thus, the GCMetadataPrinter::finishAssembly was never called. The patch fixes that by moving the code of the GCInfoDeleter::doFinalization to Printer::doFinalization. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175528 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/GCMetadata.cpp | 41 +++------------------------------------ lib/CodeGen/LLVMTargetMachine.cpp | 2 -- 2 files changed, 3 insertions(+), 40 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/GCMetadata.cpp b/lib/CodeGen/GCMetadata.cpp index a6a06e4..ef5247c 100644 --- a/lib/CodeGen/GCMetadata.cpp +++ b/lib/CodeGen/GCMetadata.cpp @@ -37,21 +37,9 @@ namespace { void getAnalysisUsage(AnalysisUsage &AU) const; bool runOnFunction(Function &F); - }; - - class Deleter : public FunctionPass { - static char ID; - - public: - Deleter(); - - const char *getPassName() const; - void getAnalysisUsage(AnalysisUsage &AU) const; - - bool runOnFunction(Function &F); bool doFinalization(Module &M); }; - + } INITIALIZE_PASS(GCModuleInfo, "collector-metadata", @@ -182,32 +170,9 @@ bool Printer::runOnFunction(Function &F) { return false; } -// ----------------------------------------------------------------------------- - -char Deleter::ID = 0; - -FunctionPass *llvm::createGCInfoDeleter() { - return new Deleter(); -} - -Deleter::Deleter() : FunctionPass(ID) {} - -const char *Deleter::getPassName() const { - return "Delete Garbage Collector Information"; -} - -void Deleter::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); - AU.addRequired(); -} - -bool Deleter::runOnFunction(Function &MF) { - return false; -} - -bool Deleter::doFinalization(Module &M) { +bool Printer::doFinalization(Module &M) { GCModuleInfo *GMI = getAnalysisIfAvailable(); - assert(GMI && "Deleter didn't require GCModuleInfo?!"); + assert(GMI && "Printer didn't require GCModuleInfo?!"); GMI->clear(); return false; } diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index 12cd2d1..1a09837 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -226,7 +226,6 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, PM.add(Printer); - PM.add(createGCInfoDeleter()); return false; } @@ -245,7 +244,6 @@ bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM, return true; addCodeEmitter(PM, JCE); - PM.add(createGCInfoDeleter()); return false; // success! } -- cgit v1.1 From e5a83d15b247cacbace981c22b793863f5328188 Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Tue, 19 Feb 2013 17:14:33 +0000 Subject: Move LLVM_LIBRARY_VISIBILITY for consistency with what was done to PPCJITInfo.cpp in r175394. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175531 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86JITInfo.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp index aeb05cc..44d8cce 100644 --- a/lib/Target/X86/X86JITInfo.cpp +++ b/lib/Target/X86/X86JITInfo.cpp @@ -339,7 +339,7 @@ extern "C" { /// must locate the start of the stub or call site and pass it into the JIT /// compiler function. extern "C" { -void LLVM_LIBRARY_VISIBILITY LLVMX86CompilationCallback2(intptr_t *StackPtr, +LLVM_LIBRARY_VISIBILITY void LLVMX86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) { intptr_t *RetAddrLoc = &StackPtr[1]; // We are reading raw stack data here. Tell MemorySanitizer that it is -- cgit v1.1 From b1e1d5d4a575f5e5b4ceb7af68f33e75695ee959 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Tue, 19 Feb 2013 17:32:57 +0000 Subject: Clean up HiPE prologue emission a bit and avoid signed arithmetic tricks. No intended functionality change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175536 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FrameLowering.cpp | 90 +++++++++++++++++++------------------ 1 file changed, 47 insertions(+), 43 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index c98c80d..2b3d853 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -1433,7 +1433,6 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { bool Is64Bit = STI.is64Bit(); unsigned TlsReg, TlsOffset; DebugLoc DL; - const X86Subtarget *ST = &MF.getTarget().getSubtarget(); unsigned ScratchReg = GetScratchRegister(Is64Bit, MF, true); assert(!MF.getRegInfo().isLiveIn(ScratchReg) && @@ -1441,8 +1440,8 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { if (MF.getFunction()->isVarArg()) report_fatal_error("Segmented stacks do not support vararg functions."); - if (!ST->isTargetLinux() && !ST->isTargetDarwin() && - !ST->isTargetWin32() && !ST->isTargetFreeBSD()) + if (!STI.isTargetLinux() && !STI.isTargetDarwin() && + !STI.isTargetWin32() && !STI.isTargetFreeBSD()) report_fatal_error("Segmented stacks not supported on this platform."); MachineBasicBlock *allocMBB = MF.CreateMachineBasicBlock(); @@ -1480,13 +1479,13 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { // Read the limit off the current stacklet off the stack_guard location. if (Is64Bit) { - if (ST->isTargetLinux()) { + if (STI.isTargetLinux()) { TlsReg = X86::FS; TlsOffset = 0x70; - } else if (ST->isTargetDarwin()) { + } else if (STI.isTargetDarwin()) { TlsReg = X86::GS; TlsOffset = 0x60 + 90*8; // See pthread_machdep.h. Steal TLS slot 90. - } else if (ST->isTargetFreeBSD()) { + } else if (STI.isTargetFreeBSD()) { TlsReg = X86::FS; TlsOffset = 0x18; } else { @@ -1502,16 +1501,16 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { BuildMI(checkMBB, DL, TII.get(X86::CMP64rm)).addReg(ScratchReg) .addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg); } else { - if (ST->isTargetLinux()) { + if (STI.isTargetLinux()) { TlsReg = X86::GS; TlsOffset = 0x30; - } else if (ST->isTargetDarwin()) { + } else if (STI.isTargetDarwin()) { TlsReg = X86::GS; TlsOffset = 0x48 + 90*4; - } else if (ST->isTargetWin32()) { + } else if (STI.isTargetWin32()) { TlsReg = X86::FS; TlsOffset = 0x14; // pvArbitrary, reserved for application use - } else if (ST->isTargetFreeBSD()) { + } else if (STI.isTargetFreeBSD()) { report_fatal_error("Segmented stacks not supported on FreeBSD i386."); } else { report_fatal_error("Segmented stacks not supported on this platform."); @@ -1523,10 +1522,10 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg).addReg(X86::ESP) .addImm(1).addReg(0).addImm(-StackSize).addReg(0); - if (ST->isTargetLinux() || ST->isTargetWin32()) { + if (STI.isTargetLinux() || STI.isTargetWin32()) { BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)).addReg(ScratchReg) .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg); - } else if (ST->isTargetDarwin()) { + } else if (STI.isTargetDarwin()) { // TlsOffset doesn't fit into a mod r/m byte so we need an extra register unsigned ScratchReg2; @@ -1632,19 +1631,18 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const { const X86InstrInfo &TII = *TM.getInstrInfo(); MachineFrameInfo *MFI = MF.getFrameInfo(); - const uint64_t SlotSize = TM.getRegisterInfo()->getSlotSize(); + const unsigned SlotSize = TM.getRegisterInfo()->getSlotSize(); const bool Is64Bit = STI.is64Bit(); DebugLoc DL; // HiPE-specific values const unsigned HipeLeafWords = 24; const unsigned CCRegisteredArgs = Is64Bit ? 6 : 5; const unsigned Guaranteed = HipeLeafWords * SlotSize; - const unsigned CallerStkArity = - std::max(0, MF.getFunction()->arg_size() - CCRegisteredArgs); - unsigned MaxStack = - MFI->getStackSize() + CallerStkArity * SlotSize + SlotSize; + unsigned CallerStkArity = MF.getFunction()->arg_size() > CCRegisteredArgs ? + MF.getFunction()->arg_size() - CCRegisteredArgs : 0; + unsigned MaxStack = MFI->getStackSize() + CallerStkArity*SlotSize + SlotSize; - assert(MF.getTarget().getSubtarget().isTargetLinux() && + assert(STI.isTargetLinux() && "HiPE prologue is only supported on Linux operating systems."); // Compute the largest caller's frame that is needed to fit the callees' @@ -1660,31 +1658,37 @@ void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const { for (MachineFunction::iterator MBBI = MF.begin(), MBBE = MF.end(); MBBI != MBBE; ++MBBI) for (MachineBasicBlock::iterator MI = MBBI->begin(), ME = MBBI->end(); - MI != ME; ++MI) - if (MI->isCall()) { - // Get callee operand. - const MachineOperand &MO = MI->getOperand(0); - const Function *F; - - // Only take account of global function calls (no closures etc.). - if (!MO.isGlobal()) continue; - if (!(F = dyn_cast(MO.getGlobal()))) continue; - - // Do not update 'MaxStack' for primitive and built-in functions - // (encoded with names either starting with "erlang."/"bif_" or not - // having a ".", such as a simple .., or an - // "_", such as the BIF "suspend_0") as they are executed on another - // stack. - if ((F->getName().find("erlang.") != std::string::npos) || - (F->getName().find("bif_") != std::string::npos)) continue; - if (F->getName().find_first_of("._") == std::string::npos) - continue; - - const uint64_t CalleeStkArity = - std::max(0, F->arg_size() - CCRegisteredArgs); - MoreStackForCalls = std::max( - MoreStackForCalls, (HipeLeafWords - 1 - CalleeStkArity) * SlotSize); - } + MI != ME; ++MI) { + if (!MI->isCall()) + continue; + + // Get callee operand. + const MachineOperand &MO = MI->getOperand(0); + + // Only take account of global function calls (no closures etc.). + if (!MO.isGlobal()) + continue; + + const Function *F = dyn_cast(MO.getGlobal()); + if (!F) + continue; + + // Do not update 'MaxStack' for primitive and built-in functions + // (encoded with names either starting with "erlang."/"bif_" or not + // having a ".", such as a simple .., or an + // "_", such as the BIF "suspend_0") as they are executed on another + // stack. + if (F->getName().find("erlang.") != StringRef::npos || + F->getName().find("bif_") != StringRef::npos || + F->getName().find_first_of("._") == StringRef::npos) + continue; + + unsigned CalleeStkArity = + F->arg_size() > CCRegisteredArgs ? F->arg_size()-CCRegisteredArgs : 0; + if (HipeLeafWords - 1 > CalleeStkArity) + MoreStackForCalls = std::max(MoreStackForCalls, + (HipeLeafWords - 1 - CalleeStkArity) * SlotSize); + } MaxStack += MoreStackForCalls; } -- cgit v1.1 From 383c6fc458ebd2bb7748483de56a97b68f3a9f2d Mon Sep 17 00:00:00 2001 From: Jyotsna Verma Date: Tue, 19 Feb 2013 18:18:36 +0000 Subject: Hexagon: Sync TSFlags in MCTargetDesc/HexagonBaseInfo.h with HexagonInstrFormats.td. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175537 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h | 88 +++++++++++++++-------- 1 file changed, 60 insertions(+), 28 deletions(-) (limited to 'lib') diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h index 9fc826f..5f9718b 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h @@ -17,6 +17,9 @@ #ifndef HEXAGONBASEINFO_H #define HEXAGONBASEINFO_H +#include "HexagonMCTargetDesc.h" +#include "llvm/Support/ErrorHandling.h" + namespace llvm { /// HexagonII - This namespace holds all of the target specific flags that @@ -28,19 +31,19 @@ namespace HexagonII { // Insn types. // *** Must match HexagonInstrFormat*.td *** enum Type { - TypePSEUDO = 0, - TypeALU32 = 1, - TypeCR = 2, - TypeJR = 3, - TypeJ = 4, - TypeLD = 5, - TypeST = 6, - TypeSYSTEM = 7, - TypeXTYPE = 8, - TypeMEMOP = 9, - TypeNV = 10, - TypePREFIX = 30, // Such as extenders. - TypeMARKER = 31 // Such as end of a HW loop. + TypePSEUDO = 0, + TypeALU32 = 1, + TypeCR = 2, + TypeJR = 3, + TypeJ = 4, + TypeLD = 5, + TypeST = 6, + TypeSYSTEM = 7, + TypeXTYPE = 8, + TypeMEMOP = 9, + TypeNV = 10, + TypePREFIX = 30, // Such as extenders. + TypeENDLOOP = 31 // Such as end of a HW loop. }; enum SubTarget { @@ -65,6 +68,14 @@ namespace HexagonII { BaseRegOffset = 5 // Indirect with register offset }; + enum MemAccessSize { + NoMemAccess = 0, // Not a memory acces instruction. + ByteAccess = 1, // Byte access instruction (memb). + HalfWordAccess = 2, // Half word access instruction (memh). + WordAccess = 3, // Word access instrution (memw). + DoubleWordAccess = 4 // Double word access instruction (memd) + }; + // MCInstrDesc TSFlags // *** Must match HexagonInstrFormat*.td *** enum { @@ -79,46 +90,67 @@ namespace HexagonII { // Predicated instructions. PredicatedPos = 6, PredicatedMask = 0x1, - PredicatedNewPos = 7, + PredicatedFalsePos = 7, + PredicatedFalseMask = 0x1, + PredicatedNewPos = 8, PredicatedNewMask = 0x1, - // Stores that can be newified. - mayNVStorePos = 8, + // New-Value consumer instructions. + NewValuePos = 9, + NewValueMask = 0x1, + + // New-Value producer instructions. + hasNewValuePos = 10, + hasNewValueMask = 0x1, + + // Which operand consumes or produces a new value. + NewValueOpPos = 11, + NewValueOpMask = 0x7, + + // Which bits encode the new value. + NewValueBitsPos = 14, + NewValueBitsMask = 0x3, + + // Stores that can become new-value stores. + mayNVStorePos = 16, mayNVStoreMask = 0x1, - // Dot new value store instructions. - NVStorePos = 9, + // New-value store instructions. + NVStorePos = 17, NVStoreMask = 0x1, // Extendable insns. - ExtendablePos = 10, + ExtendablePos = 18, ExtendableMask = 0x1, // Insns must be extended. - ExtendedPos = 11, + ExtendedPos = 19, ExtendedMask = 0x1, // Which operand may be extended. - ExtendableOpPos = 12, + ExtendableOpPos = 20, ExtendableOpMask = 0x7, // Signed or unsigned range. - ExtentSignedPos = 15, + ExtentSignedPos = 23, ExtentSignedMask = 0x1, // Number of bits of range before extending operand. - ExtentBitsPos = 16, + ExtentBitsPos = 24, ExtentBitsMask = 0x1f, // Valid subtargets - validSubTargetPos = 21, + validSubTargetPos = 29, validSubTargetMask = 0xf, - // Addressing mode for load/store instructions - AddrModePos = 25, - AddrModeMask = 0xf + // Addressing mode for load/store instructions. + AddrModePos = 33, + AddrModeMask = 0x7, - }; + // Access size of memory access instructions (load/store). + MemAccessSizePos = 36, + MemAccesSizeMask = 0x7 + }; // *** The code above must match HexagonInstrFormat*.td *** // -- cgit v1.1 From da5f1ed6406442735fbb4421f8a22bc8a41d4c57 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Tue, 19 Feb 2013 18:41:01 +0000 Subject: Assert that the target provided hints are in the allocation order. Target implementations of getRegAllocationHints() should use the provided allocation order, and they can never return hints outside the order. This is already documented in TargetRegisterInfo.h. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175540 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AllocationOrder.cpp | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'lib') diff --git a/lib/CodeGen/AllocationOrder.cpp b/lib/CodeGen/AllocationOrder.cpp index 1d09d20..3fa1f8f 100644 --- a/lib/CodeGen/AllocationOrder.cpp +++ b/lib/CodeGen/AllocationOrder.cpp @@ -44,4 +44,9 @@ AllocationOrder::AllocationOrder(unsigned VirtReg, dbgs() << '\n'; } }); +#ifndef NDEBUG + for (unsigned I = 0, E = Hints.size(); I != E; ++I) + assert(std::find(Order.begin(), Order.end(), Hints[I]) != Order.end() && + "Target hint is outside allocation order."); +#endif } -- cgit v1.1 From 4fd4c91c40fa40ae4cd671b03056de8c3c961046 Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Tue, 19 Feb 2013 18:55:36 +0000 Subject: ARM: Allocation hints must make sure to be in the alloc order. When creating an allocation hint for a register pair, make sure the hint for the physical register reference is still in the allocation order. rdar://13240556 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175541 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMBaseRegisterInfo.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 0deafae..db33d54 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -205,7 +205,8 @@ ARMBaseRegisterInfo::getRegAllocationHints(unsigned VirtReg, } // First prefer the paired physreg. - if (PairedPhys) + if (PairedPhys && + std::find(Order.begin(), Order.end(), PairedPhys) != Order.end()) Hints.push_back(PairedPhys); // Then prefer even or odd registers. -- cgit v1.1 From 2b49dec41bff103925d63b57f79b076daad4179e Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Tue, 19 Feb 2013 18:57:53 +0000 Subject: Don't trust st_size of a character device. This fixes using /dev/stdin as an input when stdin is connected to a tty, for example. No test, because it's difficult to write a reasonably portable test for this. /dev/stdin isn't a character device when stdin is redirected from a file or connected to a pipe. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175542 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Support/MemoryBuffer.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp index 65b4332..b0a2085 100644 --- a/lib/Support/MemoryBuffer.cpp +++ b/lib/Support/MemoryBuffer.cpp @@ -322,9 +322,9 @@ error_code MemoryBuffer::getOpenFile(int FD, const char *Filename, return error_code(errno, posix_category()); } - // If this is a named pipe, we can't trust the size. Create the memory - // buffer by copying off the stream. - if (S_ISFIFO(FileInfo.st_mode)) { + // If this is a named pipe or character device, we can't trust the size. + // Create the memory buffer by copying off the stream. + if (S_ISFIFO(FileInfo.st_mode) || S_ISCHR(FileInfo.st_mode)) { return getMemoryBufferForStream(FD, Filename, result); } -- cgit v1.1 From a8eae3e35866329f2191a22a34421faa61448508 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Tue, 19 Feb 2013 19:36:55 +0000 Subject: Whitelist files and block devices instead of blacklisting fifos and character devices. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175549 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Support/MemoryBuffer.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp index b0a2085..0d5d4d7 100644 --- a/lib/Support/MemoryBuffer.cpp +++ b/lib/Support/MemoryBuffer.cpp @@ -322,9 +322,10 @@ error_code MemoryBuffer::getOpenFile(int FD, const char *Filename, return error_code(errno, posix_category()); } - // If this is a named pipe or character device, we can't trust the size. - // Create the memory buffer by copying off the stream. - if (S_ISFIFO(FileInfo.st_mode) || S_ISCHR(FileInfo.st_mode)) { + // If this not a file or a block device (e.g. it's a named pipe + // or character device), we can't trust the size. Create the memory + // buffer by copying off the stream. + if (!S_ISREG(FileInfo.st_mode) && !S_ISBLK(FileInfo.st_mode)) { return getMemoryBufferForStream(FD, Filename, result); } -- cgit v1.1 From a175396816a9b28835acfe2cd07250881f1fee6c Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Tue, 19 Feb 2013 19:36:59 +0000 Subject: Fix a bug that was found by the clang static analyzer. The var "AT" is null so we cant deref it. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175550 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/ConstantFold.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/IR/ConstantFold.cpp b/lib/IR/ConstantFold.cpp index 587b7ce..a5a9d9f 100644 --- a/lib/IR/ConstantFold.cpp +++ b/lib/IR/ConstantFold.cpp @@ -846,8 +846,8 @@ Constant *llvm::ConstantFoldInsertValueInstruction(Constant *Agg, else if (ArrayType *AT = dyn_cast(Agg->getType())) NumElts = AT->getNumElements(); else - NumElts = AT->getVectorNumElements(); - + NumElts = Agg->getType()->getVectorNumElements(); + SmallVector Result; for (unsigned i = 0; i != NumElts; ++i) { Constant *C = Agg->getAggregateElement(i); -- cgit v1.1 From 03544ec2a43fab162d25cf44627d1d08430bcccd Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Tue, 19 Feb 2013 20:02:09 +0000 Subject: Fix a bug in mayHaveSideEffects. Functions that do not return are now considered as instructions with side effects. rdar://13227456 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175553 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Instruction.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/IR/Instruction.cpp b/lib/IR/Instruction.cpp index 42df5d7..2b5a0b3 100644 --- a/lib/IR/Instruction.cpp +++ b/lib/IR/Instruction.cpp @@ -455,14 +455,18 @@ bool Instruction::mayWriteToMemory() const { } } -/// mayThrow - Return true if this instruction may throw an exception. -/// bool Instruction::mayThrow() const { if (const CallInst *CI = dyn_cast(this)) return !CI->doesNotThrow(); return isa(this); } +bool Instruction::mayReturn() const { + if (const CallInst *CI = dyn_cast(this)) + return !CI->doesNotReturn(); + return true; +} + /// isAssociative - Return true if the instruction is associative: /// /// Associative operators satisfy: x op (y op z) === (x op y) op z -- cgit v1.1 From 909a0e0e95e2b96f5d07d40fb53c5892bea20f83 Mon Sep 17 00:00:00 2001 From: Arnold Schwaighofer Date: Tue, 19 Feb 2013 20:16:45 +0000 Subject: ARM NEON: Don't need COPY_TO_REGCLASS in pattern In my previous commit: "Merge a f32 bitcast of a v2i32 extractelt A vectorized sitfp on doubles will get scalarized to a sequence of an extract_element of <2 x i32>, a bitcast to f32 and a sitofp. Due to the the extract_element, and the bitcast we will uneccessarily generate moves between scalar and vector registers." I added a pattern containing a copy_to_regclass. The copy_to_regclass is actually not needed. radar://13191881 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175555 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrNEON.td | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 9f68c22..0411ac4 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -5747,9 +5747,7 @@ def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>; // Fold extracting an element out of a v2i32 into a vfp register. def : Pat<(f32 (bitconvert (i32 (extractelt (v2i32 DPR:$src), imm:$lane)))), - (f32 (EXTRACT_SUBREG - (v2f32 (COPY_TO_REGCLASS (v2i32 DPR:$src), DPR)), - (SSubReg_f32_reg imm:$lane)))>; + (f32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>; // Vector lengthening move with load, matching extending loads. -- cgit v1.1 From 608e3554847ef35995b7310ba8acd43ab861e3f0 Mon Sep 17 00:00:00 2001 From: Jakub Staszak Date: Tue, 19 Feb 2013 21:54:59 +0000 Subject: Add obvious constantness. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175560 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelDAGToDAG.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 6f13186..00fbe69 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -280,13 +280,13 @@ namespace { /// getTargetMachine - Return a reference to the TargetMachine, casted /// to the target-specific type. - const X86TargetMachine &getTargetMachine() { + const X86TargetMachine &getTargetMachine() const { return static_cast(TM); } /// getInstrInfo - Return a reference to the TargetInstrInfo, casted /// to the target-specific type. - const X86InstrInfo *getInstrInfo() { + const X86InstrInfo *getInstrInfo() const { return getTargetMachine().getInstrInfo(); } }; -- cgit v1.1 From 77afbdce53aa740777486b0cc4e9df151ae65468 Mon Sep 17 00:00:00 2001 From: Jack Carter Date: Tue, 19 Feb 2013 21:57:35 +0000 Subject: ELF symbol table field st_other support, excluding visibility bits. Generic STO handling at the Target level. The st_other field of the ELF symbol table is one byte in size. The first 2 bytes are used for generic visibility and are currently handled by llvm. The other six bits are processor specific and need to be set at the target level. A couple of notes: The new static methods for accessing and setting the "other" flags in include/llvm/MC/MCELF.h match the style guide and not the other methods in the file. I don't like the inconsistency, but feel I should follow the prescribed lowerUpper() convention. STO_ value definitions are not specified in gnu land as consistently as the STT_ and STB_ fields. Probably because the latter were defined in a standards doc and the former defined partially in code. I have stuck with the full byte definition of the flags. Contributer: Zoran Jovanovic git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175561 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/ELFObjectWriter.cpp | 11 ++++++++--- lib/MC/MCELF.cpp | 15 +++++++++++++++ lib/MC/MCELFStreamer.cpp | 4 ++++ lib/MC/MCStreamer.cpp | 5 +++++ 4 files changed, 32 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp index fb14dc9..0b97f27 100644 --- a/lib/MC/ELFObjectWriter.cpp +++ b/lib/MC/ELFObjectWriter.cpp @@ -546,12 +546,17 @@ void ELFObjectWriter::WriteSymbol(MCDataFragment *SymtabF, bool IsReserved = Data.isCommon() || Data.getSymbol().isAbsolute() || Data.getSymbol().isVariable(); + // Binding and Type share the same byte as upper and lower nibbles uint8_t Binding = MCELF::GetBinding(OrigData); - uint8_t Visibility = MCELF::GetVisibility(OrigData); uint8_t Type = MCELF::GetType(Data); - uint8_t Info = (Binding << ELF_STB_Shift) | (Type << ELF_STT_Shift); - uint8_t Other = Visibility; + + // Other and Visibility share the same byte with Visability using the lower + // 2 bits + uint8_t Visibility = MCELF::GetVisibility(OrigData); + uint8_t Other = MCELF::getOther(OrigData) << + (ELF_Other_Shift - ELF_STV_Shift); + Other |= Visibility; uint64_t Value = SymbolValue(Data, Layout); uint64_t Size = 0; diff --git a/lib/MC/MCELF.cpp b/lib/MC/MCELF.cpp index 4db2846..560cdbc 100644 --- a/lib/MC/MCELF.cpp +++ b/lib/MC/MCELF.cpp @@ -52,6 +52,8 @@ unsigned MCELF::GetType(const MCSymbolData &SD) { return Type; } +// Visibility is stored in the first two bits of st_other +// st_other values are stored in the second byte of get/setFlags void MCELF::SetVisibility(MCSymbolData &SD, unsigned Visibility) { assert(Visibility == ELF::STV_DEFAULT || Visibility == ELF::STV_INTERNAL || Visibility == ELF::STV_HIDDEN || Visibility == ELF::STV_PROTECTED); @@ -68,4 +70,17 @@ unsigned MCELF::GetVisibility(MCSymbolData &SD) { return Visibility; } +// Other is stored in the last six bits of st_other +// st_other values are stored in the second byte of get/setFlags +void MCELF::setOther(MCSymbolData &SD, unsigned Other) { + uint32_t OtherFlags = SD.getFlags() & ~(0x3f << ELF_Other_Shift); + SD.setFlags(OtherFlags | (Other << ELF_Other_Shift)); +} + +unsigned MCELF::getOther(MCSymbolData &SD) { + unsigned Other = + (SD.getFlags() & (0x3f << ELF_Other_Shift)) >> ELF_Other_Shift; + return Other; +} + } diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp index 8ddbfbb..a9c35cc 100644 --- a/lib/MC/MCELFStreamer.cpp +++ b/lib/MC/MCELFStreamer.cpp @@ -504,6 +504,10 @@ void MCELFStreamer::EmitThumbFunc(MCSymbol *Func) { llvm_unreachable("Generic ELF doesn't support this directive"); } +MCSymbolData &MCELFStreamer::getOrCreateSymbolData(MCSymbol *Symbol) { + return getAssembler().getOrCreateSymbolData(*Symbol); +} + void MCELFStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) { llvm_unreachable("ELF doesn't support this directive"); } diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp index 7d79d62..9857f7b 100644 --- a/lib/MC/MCStreamer.cpp +++ b/lib/MC/MCStreamer.cpp @@ -620,3 +620,8 @@ void MCStreamer::Finish() { FinishImpl(); } + +MCSymbolData &MCStreamer::getOrCreateSymbolData(MCSymbol *Symbol) { + report_fatal_error("Not supported!"); + return *(static_cast (NULL)); +} -- cgit v1.1 From ba6f722d6a80efeacb69c12f9322d858affb4d2b Mon Sep 17 00:00:00 2001 From: Jakub Staszak Date: Tue, 19 Feb 2013 22:02:21 +0000 Subject: Fix typos. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175562 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/SROA.cpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index 173f25f..19265db 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -621,7 +621,7 @@ private: } // Disable SRoA for any intrinsics except for lifetime invariants. - // FIXME: What about debug instrinsics? This matches old behavior, but + // FIXME: What about debug intrinsics? This matches old behavior, but // doesn't make sense. void visitIntrinsicInst(IntrinsicInst &II) { if (!IsOffsetKnown) @@ -1277,7 +1277,7 @@ namespace { /// 1) It takes allocations of aggregates and analyzes the ways in which they /// are used to try to split them into smaller allocations, ideally of /// a single scalar data type. It will split up memcpy and memset accesses -/// as necessary and try to isolate invidual scalar accesses. +/// as necessary and try to isolate individual scalar accesses. /// 2) It will transform accesses into forms which are suitable for SSA value /// promotion. This can be replacing a memset with a scalar store of an /// integer value, or it can involve speculating operations on a PHI or @@ -1483,7 +1483,7 @@ private: PN.getName() + ".sroa.speculated"); // Get the TBAA tag and alignment to use from one of the loads. It doesn't - // matter which one we get and if any differ, it doesn't matter. + // matter which one we get and if any differ. LoadInst *SomeLoad = cast(Loads.back()); MDNode *TBAATag = SomeLoad->getMetadata(LLVMContext::MD_tbaa); unsigned Align = SomeLoad->getAlignment(); @@ -1816,7 +1816,7 @@ static Value *getNaturalGEPWithOffset(IRBuilder<> &IRB, const DataLayout &TD, /// The strategy for finding the more natural GEPs is to peel off layers of the /// pointer, walking back through bit casts and GEPs, searching for a base /// pointer from which we can compute a natural GEP with the desired -/// properities. The algorithm tries to fold as many constant indices into +/// properties. The algorithm tries to fold as many constant indices into /// a single GEP as possible, thus making each GEP more independent of the /// surrounding code. static Value *getAdjustedPtr(IRBuilder<> &IRB, const DataLayout &TD, @@ -2062,9 +2062,9 @@ static bool isIntegerWideningViable(const DataLayout &TD, uint64_t Size = TD.getTypeStoreSize(AllocaTy); - // Check the uses to ensure the uses are (likely) promoteable integer uses. + // Check the uses to ensure the uses are (likely) promotable integer uses. // Also ensure that the alloca has a covering load or store. We don't want - // to widen the integer operotains only to fail to promote due to some other + // to widen the integer operations only to fail to promote due to some other // unsplittable entry (which we may make splittable later). bool WholeAllocaOp = false; for (; I != E; ++I) { @@ -2283,7 +2283,7 @@ class AllocaPartitionRewriter : public InstVisitorBeginOffset == 0 && @@ -3590,7 +3590,7 @@ void SROA::deleteDeadInstructions(SmallPtrSet &DeletedAllocas) { /// If there is a domtree available, we attempt to promote using the full power /// of mem2reg. Otherwise, we build and use the AllocaPromoter above which is /// based on the SSAUpdater utilities. This function returns whether any -/// promotion occured. +/// promotion occurred. bool SROA::promoteAllocas(Function &F) { if (PromotableAllocas.empty()) return false; -- cgit v1.1 From ccb3c9c2702f548fd0a7d60a622e6f4fdf0940e7 Mon Sep 17 00:00:00 2001 From: Jack Carter Date: Tue, 19 Feb 2013 22:04:37 +0000 Subject: ELF symbol table field st_other support, excluding visibility bits. Mips (MicroMips) specific STO handling . The st_other field settig for STO_MIPS_MICROMIPS Contributer: Zoran Jovanovic git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175564 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp | 20 +++++++++++++++++++- lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h | 6 +++++- lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp | 10 ++++++---- lib/Target/Mips/MipsAsmPrinter.cpp | 6 ++++++ 4 files changed, 36 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp index 9c454d6..ebcbf9d 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp @@ -9,6 +9,9 @@ #include "MCTargetDesc/MipsELFStreamer.h" #include "MipsSubtarget.h" #include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCELF.h" +#include "llvm/MC/MCELFSymbolFlags.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" @@ -58,7 +61,22 @@ namespace llvm { llvm_unreachable("Unsupported relocation model for e_flags"); MCA.setELFHeaderEFlags(EFlags); + } + + // For llc. Set a symbol's STO flags + void + MipsELFStreamer::emitMipsSTOCG(const MipsSubtarget &Subtarget, + MCSymbol *Sym, + unsigned Val) { + if (hasRawTextSupport()) + return; + MCSymbolData &Data = getOrCreateSymbolData(Sym); + // The "other" values are stored in the last 6 bits of the second byte + // The traditional defines for STO values assume the full byte and thus + // the shift to pack it. + MCELF::setOther(Data, Val >> 2); } -} + +} // namespace llvm diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h index 360dbe1..b10ccc7 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h +++ b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h @@ -12,7 +12,9 @@ #include "llvm/MC/MCELFStreamer.h" namespace llvm { +class MipsAsmPrinter; class MipsSubtarget; +class MCSymbol; class MipsELFStreamer : public MCELFStreamer { public: @@ -24,7 +26,9 @@ public: ~MipsELFStreamer() {} void emitELFHeaderFlagsCG(const MipsSubtarget &Subtarget); -// void emitELFHeaderFlagCG(unsigned Val); + void emitMipsSTOCG(const MipsSubtarget &Subtarget, + MCSymbol *Sym, + unsigned Val); static bool classof(const MCStreamer *S) { return S->getKind() == SK_MipsELFStreamer; diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp index 9f2d1e4..ff3fd90 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp @@ -35,11 +35,13 @@ class MipsMCCodeEmitter : public MCCodeEmitter { void operator=(const MipsMCCodeEmitter &) LLVM_DELETED_FUNCTION; const MCInstrInfo &MCII; MCContext &Ctx; + const MCSubtargetInfo &STI; bool IsLittleEndian; public: - MipsMCCodeEmitter(const MCInstrInfo &mcii, MCContext &Ctx_, bool IsLittle) : - MCII(mcii), Ctx(Ctx_), IsLittleEndian(IsLittle) {} + MipsMCCodeEmitter(const MCInstrInfo &mcii, MCContext &Ctx_, + const MCSubtargetInfo &sti, bool IsLittle) : + MCII(mcii), Ctx(Ctx_), STI (sti), IsLittleEndian(IsLittle) {} ~MipsMCCodeEmitter() {} @@ -95,7 +97,7 @@ MCCodeEmitter *llvm::createMipsMCCodeEmitterEB(const MCInstrInfo &MCII, const MCSubtargetInfo &STI, MCContext &Ctx) { - return new MipsMCCodeEmitter(MCII, Ctx, false); + return new MipsMCCodeEmitter(MCII, Ctx, STI, false); } MCCodeEmitter *llvm::createMipsMCCodeEmitterEL(const MCInstrInfo &MCII, @@ -103,7 +105,7 @@ MCCodeEmitter *llvm::createMipsMCCodeEmitterEL(const MCInstrInfo &MCII, const MCSubtargetInfo &STI, MCContext &Ctx) { - return new MipsMCCodeEmitter(MCII, Ctx, true); + return new MipsMCCodeEmitter(MCII, Ctx, STI, true); } /// EncodeInstruction - Emit the instruction. diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp index 003d890..1876cb6 100644 --- a/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/MipsAsmPrinter.cpp @@ -36,6 +36,7 @@ #include "llvm/MC/MCInst.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/Support/ELF.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/Mangler.h" @@ -231,6 +232,11 @@ void MipsAsmPrinter::EmitFunctionEntryLabel() { // OutStreamer.EmitRawText(StringRef("\t.set\tnomicromips")); OutStreamer.EmitRawText("\t.ent\t" + Twine(CurrentFnSym->getName())); } + + if (Subtarget->inMicroMipsMode()) + if (MipsELFStreamer *MES = dyn_cast(&OutStreamer)) + MES->emitMipsSTOCG(*Subtarget, CurrentFnSym, + (unsigned)ELF::STO_MIPS_MICROMIPS); OutStreamer.EmitLabel(CurrentFnSym); } -- cgit v1.1 From 4263ed33a77a3679545e784da1ab26e79032a467 Mon Sep 17 00:00:00 2001 From: Jakub Staszak Date: Tue, 19 Feb 2013 22:06:38 +0000 Subject: Remove unneeded #includes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175565 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/SROA.cpp | 2 -- 1 file changed, 2 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index 19265db..1e74d74 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -43,14 +43,12 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" -#include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/InstVisitor.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/Local.h" -- cgit v1.1 From c989c61798783f99abe7f8c27baf76bd2aea5067 Mon Sep 17 00:00:00 2001 From: Jack Carter Date: Tue, 19 Feb 2013 22:14:34 +0000 Subject: ELF symbol table field st_other support, excluding visibility bits. Mips (Mips16) specific e_header setting. EF_MIPS_ARCH_ASE_M16 needs to be set in the ELF header flags for Mips16. Contributer: Reed Kotler git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175566 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp index ebcbf9d..e12b176 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp @@ -36,7 +36,10 @@ namespace llvm { MCAssembler& MCA = getAssembler(); unsigned EFlags = MCA.getELFHeaderEFlags(); - EFlags |= ELF::EF_MIPS_NOREORDER; + if (Subtarget.inMips16Mode()) + EFlags |= ELF::EF_MIPS_ARCH_ASE_M16; + else + EFlags |= ELF::EF_MIPS_NOREORDER; // Architecture if (Subtarget.hasMips64r2()) -- cgit v1.1 From 50573b1c274d290a81ab76331104206ea0b09bde Mon Sep 17 00:00:00 2001 From: Jakub Staszak Date: Tue, 19 Feb 2013 22:14:45 +0000 Subject: Minor cleanups. No functionality change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175567 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/SROA.cpp | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index 1e74d74..8231c8b 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -409,9 +409,9 @@ static Value *foldSelectInst(SelectInst &SI) { // early on. if (ConstantInt *CI = dyn_cast(SI.getCondition())) return SI.getOperand(1+CI->isZero()); - if (SI.getOperand(1) == SI.getOperand(2)) { + if (SI.getOperand(1) == SI.getOperand(2)) return SI.getOperand(1); - } + return 0; } @@ -1139,8 +1139,7 @@ void AllocaPartitioning::print(raw_ostream &OS, const_iterator I, void AllocaPartitioning::printUsers(raw_ostream &OS, const_iterator I, StringRef Indent) const { - for (const_use_iterator UI = use_begin(I), UE = use_end(I); - UI != UE; ++UI) { + for (const_use_iterator UI = use_begin(I), UE = use_end(I); UI != UE; ++UI) { if (!UI->U) continue; // Skip dead uses. OS << Indent << " [" << UI->BeginOffset << "," << UI->EndOffset << ") " @@ -1240,7 +1239,7 @@ public: for (SmallVector::const_iterator I = DVIs.begin(), E = DVIs.end(); I != E; ++I) { DbgValueInst *DVI = *I; - Value *Arg = NULL; + Value *Arg = 0; if (StoreInst *SI = dyn_cast(Inst)) { // If an argument is zero extended then use argument directly. The ZExt // may be zapped by an optimization pass in future. @@ -1437,8 +1436,7 @@ private: // We can only transform this if it is safe to push the loads into the // predecessor blocks. The only thing to watch out for is that we can't put // a possibly trapping load in the predecessor if it is a critical edge. - for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; - ++Idx) { + for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) { TerminatorInst *TI = PN.getIncomingBlock(Idx)->getTerminator(); Value *InVal = PN.getIncomingValue(Idx); @@ -3146,9 +3144,8 @@ private: void emitFunc(Type *Ty, Value *&Agg, const Twine &Name) { assert(Ty->isSingleValueType()); // Load the single value and insert it using the indices. - Value *Load = IRB.CreateLoad(IRB.CreateInBoundsGEP(Ptr, GEPIndices, - Name + ".gep"), - Name + ".load"); + Value *GEP = IRB.CreateInBoundsGEP(Ptr, GEPIndices, Name + ".gep"); + Value *Load = IRB.CreateLoad(GEP, Name + ".load"); Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert"); DEBUG(dbgs() << " to: " << *Load << "\n"); } -- cgit v1.1 From bcff7b77348e299d25e7ada9ea5bffb80aab55b2 Mon Sep 17 00:00:00 2001 From: Jakub Staszak Date: Tue, 19 Feb 2013 22:17:58 +0000 Subject: Remove unused variable. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175568 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/SROA.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index 8231c8b..e90fe90 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -1167,8 +1167,7 @@ void AllocaPartitioning::print(raw_ostream &OS) const { } OS << "Partitioning of alloca: " << AI << "\n"; - unsigned Num = 0; - for (const_iterator I = begin(), E = end(); I != E; ++I, ++Num) { + for (const_iterator I = begin(), E = end(); I != E; ++I) { print(OS, I); printUsers(OS, I); } -- cgit v1.1 From 5cdeca8b1d726790fe9687bc4a4d615d299bc151 Mon Sep 17 00:00:00 2001 From: Jack Carter Date: Tue, 19 Feb 2013 22:29:00 +0000 Subject: ELF symbol table field st_other support, excluding visibility bits. Mips (o32 abi) specific e_header setting. EF_MIPS_ABI_O32 needs to be set in the ELF header flags for o32 abi output. Contributer: Reed Kotler git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175569 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'lib') diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp index e12b176..c33bc9a 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp @@ -54,6 +54,10 @@ namespace llvm { if (Subtarget.inMicroMipsMode()) EFlags |= ELF::EF_MIPS_MICROMIPS; + // ABI + if (Subtarget.isABI_O32()) + EFlags |= ELF::EF_MIPS_ABI_O32; + // Relocation Model Reloc::Model RM = Subtarget.getRelocationModel(); if (RM == Reloc::PIC_ || RM == Reloc::Default) -- cgit v1.1 From 2f87fed2479017589051a5a99277e4278f6372e3 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Tue, 19 Feb 2013 22:38:58 +0000 Subject: Update a portability kludge to keep it in sync with changes in the code which uses it. This is not ideal, but it ought to at least restore the behavior to what it was before. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175571 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Support/MemoryBuffer.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp index 0d5d4d7..691b6f5 100644 --- a/lib/Support/MemoryBuffer.cpp +++ b/lib/Support/MemoryBuffer.cpp @@ -33,8 +33,13 @@ #include #else #include -#ifndef S_ISFIFO -#define S_ISFIFO(x) (0) +// Simplistic definitinos of these macros to allow files to be read with +// MapInFilePages. +#ifndef S_ISREG +#define S_ISREG(x) (1) +#endif +#ifndef S_ISBLK +#define S_ISBLK(x) (0) #endif #endif #include -- cgit v1.1 From b86f1e5e557f8a00209eef1c6ecb4532b33d7738 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Tue, 19 Feb 2013 23:50:45 +0000 Subject: [ms-inline asm] Force the use of a base pointer if the MachineFunction includes MS-style inline assembly. This is a follow-on to r175334. Forcing a FP to be emitted doesn't ensure it will be used. Therefore, force the base pointer as well. We now treat MS inline assembly in the same way we treat functions with dynamic stack realignment and VLAs. This guarantees the BP will be used to reference parameters and locals. rdar://13218191 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175576 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86RegisterInfo.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 401eefb..fbe6331 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -389,8 +389,10 @@ bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const { return false; // When we need stack realignment and there are dynamic allocas, we can't - // reference off of the stack pointer, so we reserve a base pointer. - if (needsStackRealignment(MF) && MFI->hasVarSizedObjects()) + // reference off of the stack pointer, so we reserve a base pointer. This + // is also true if the function contain MS-style inline assembly. + if ((needsStackRealignment(MF) && MFI->hasVarSizedObjects()) || + MF.hasMSInlineAsm()) return true; return false; -- cgit v1.1 From 2bb471f2fa3709f5861f20016ff9d24f0098abe4 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Wed, 20 Feb 2013 00:04:41 +0000 Subject: Add the function attributes from an inline asm call. These don't have declarations that set the attribute groups, so we must do it on our own. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175577 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/AsmWriter.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'lib') diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp index 17d49ac..d130a25 100644 --- a/lib/IR/AsmWriter.cpp +++ b/lib/IR/AsmWriter.cpp @@ -553,6 +553,14 @@ void SlotTracker::processFunction() { for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) if (MDNode *N = dyn_cast_or_null(I->getOperand(i))) CreateMetadataSlot(N); + + // Add all the call attributes to the table. This is important for + // inline ASM, which may have attributes but no declaration. + if (CI->isInlineAsm()) { + AttributeSet Attrs = CI->getAttributes().getFnAttributes(); + if (Attrs.hasAttributes(AttributeSet::FunctionIndex)) + CreateAttributeSetSlot(Attrs); + } } // Process metadata attached with this instruction. -- cgit v1.1 From 9e7924d206ded7cc46e1a6e71ad2574bec4d5057 Mon Sep 17 00:00:00 2001 From: Pedro Artigas Date: Wed, 20 Feb 2013 00:10:29 +0000 Subject: clear new map and initialize new variable git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175578 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCContext.cpp | 2 ++ 1 file changed, 2 insertions(+) (limited to 'lib') diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp index a074003..1a7df60 100644 --- a/lib/MC/MCContext.cpp +++ b/lib/MC/MCContext.cpp @@ -83,6 +83,8 @@ void MCContext::reset() { DwarfDebugFlags = StringRef(); MCLineSections.clear(); MCLineSectionOrder.clear(); + DwarfCompileUnitID = 0; + MCLineTableSymbols.clear(); CurrentDwarfLoc = MCDwarfLoc(0,0,0,DWARF2_FLAG_IS_STMT,0,0); // If we have the MachO uniquing map, free it. -- cgit v1.1 From 9bc2c994827f2ff881d0563f0c14134b794b4928 Mon Sep 17 00:00:00 2001 From: Jakub Staszak Date: Wed, 20 Feb 2013 00:17:42 +0000 Subject: Move part of APInt implementation from header to cpp file. These methods require call cpp file anyway, so we wouldn't gain anything by keeping them inline. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175579 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Support/APInt.cpp | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'lib') diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp index 61e503b..07cb057 100644 --- a/lib/Support/APInt.cpp +++ b/lib/Support/APInt.cpp @@ -1876,6 +1876,17 @@ APInt APInt::udiv(const APInt& RHS) const { return Quotient; } +APInt APInt::sdiv(const APInt &RHS) const { + if (isNegative()) { + if (RHS.isNegative()) + return (-(*this)).udiv(-RHS); + return -((-(*this)).udiv(RHS)); + } + if (RHS.isNegative()) + return -(this->udiv(-RHS)); + return this->udiv(RHS); +} + APInt APInt::urem(const APInt& RHS) const { assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); if (isSingleWord()) { @@ -1913,6 +1924,17 @@ APInt APInt::urem(const APInt& RHS) const { return Remainder; } +APInt APInt::srem(const APInt &RHS) const { + if (isNegative()) { + if (RHS.isNegative()) + return -((-(*this)).urem(-RHS)); + return -((-(*this)).urem(RHS)); + } + if (RHS.isNegative()) + return this->urem(-RHS); + return this->urem(RHS); +} + void APInt::udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder) { // Get some size facts about the dividend and divisor @@ -1953,6 +1975,24 @@ void APInt::udivrem(const APInt &LHS, const APInt &RHS, divide(LHS, lhsWords, RHS, rhsWords, &Quotient, &Remainder); } +void APInt::sdivrem(const APInt &LHS, const APInt &RHS, + APInt &Quotient, APInt &Remainder) { + if (LHS.isNegative()) { + if (RHS.isNegative()) + APInt::udivrem(-LHS, -RHS, Quotient, Remainder); + else { + APInt::udivrem(-LHS, RHS, Quotient, Remainder); + Quotient = -Quotient; + } + Remainder = -Remainder; + } else if (RHS.isNegative()) { + APInt::udivrem(LHS, -RHS, Quotient, Remainder); + Quotient = -Quotient; + } else { + APInt::udivrem(LHS, RHS, Quotient, Remainder); + } +} + APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const { APInt Res = *this+RHS; Overflow = isNonNegative() == RHS.isNonNegative() && -- cgit v1.1 From 751bc8d4c9ee4298449fed264571ffc162852e06 Mon Sep 17 00:00:00 2001 From: Jakub Staszak Date: Wed, 20 Feb 2013 00:26:25 +0000 Subject: Fix #includes, so we include only what we really need. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175581 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index 76067a1..2ff37e0 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -15,8 +15,8 @@ #ifndef SCHEDULEDAGSDNODES_H #define SCHEDULEDAGSDNODES_H +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/ScheduleDAG.h" -#include "llvm/CodeGen/SelectionDAG.h" namespace llvm { /// ScheduleDAGSDNodes - A ScheduleDAG for scheduling SDNode-based DAGs. -- cgit v1.1 From 59d580c8d664448f01ea5652ea51b90e0b7e84f6 Mon Sep 17 00:00:00 2001 From: Jakub Staszak Date: Wed, 20 Feb 2013 00:31:54 +0000 Subject: Add missing #include. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175583 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCMCInstLower.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp index 73f7a2c..942c873 100644 --- a/lib/Target/PowerPC/PPCMCInstLower.cpp +++ b/lib/Target/PowerPC/PPCMCInstLower.cpp @@ -17,6 +17,7 @@ #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" -- cgit v1.1 From 65692c809efa46337bf80f12b1795e785a6e7207 Mon Sep 17 00:00:00 2001 From: Reed Kotler Date: Wed, 20 Feb 2013 05:45:15 +0000 Subject: Expand pseudos/macros: SltCCRxRy16, SltiCCRxImmX16, SltiuCCRxImmX16, SltuCCRxRy16 $T8 shows up as register $24 when emitted from C++ code so we had to change some tests that were already there for this functionality. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175593 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips16InstrInfo.cpp | 56 +++++++++++++++++++++++++++++++++++++ lib/Target/Mips/Mips16InstrInfo.h | 14 ++++++++++ lib/Target/Mips/Mips16InstrInfo.td | 50 ++++++++++++++++++++++++++------- 3 files changed, 110 insertions(+), 10 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp index 22cb963..eacc8fc 100644 --- a/lib/Target/Mips/Mips16InstrInfo.cpp +++ b/lib/Target/Mips/Mips16InstrInfo.cpp @@ -184,6 +184,18 @@ bool Mips16InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { case Mips::RetRA16: ExpandRetRA16(MBB, MI, Mips::JrcRa16); break; + case Mips::SltCCRxRy16: + ExpandFEXT_CCRX16_ins(MBB, MI, Mips::SltRxRy16); + break; + case Mips::SltiCCRxImmX16: + ExpandFEXT_CCRXI16_ins(MBB, MI, Mips::SltiRxImm16, Mips::SltiRxImmX16); + break; + case Mips::SltiuCCRxImmX16: + ExpandFEXT_CCRXI16_ins(MBB, MI, Mips::SltiuRxImm16, Mips::SltiuRxImmX16); + break; + case Mips::SltuCCRxRy16: + ExpandFEXT_CCRX16_ins(MBB, MI, Mips::SltuRxRy16); + break; } MBB.erase(MI); @@ -474,6 +486,30 @@ void Mips16InstrInfo::ExpandFEXT_T8I8I16_ins( BuildMI(MBB, I, I->getDebugLoc(), get(BtOpc)).addMBB(target); } +void Mips16InstrInfo::ExpandFEXT_CCRX16_ins( + MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned SltOpc) const { + unsigned CC = I->getOperand(0).getReg(); + unsigned regX = I->getOperand(1).getReg(); + unsigned regY = I->getOperand(2).getReg(); + BuildMI(MBB, I, I->getDebugLoc(), get(SltOpc)).addReg(regX).addReg(regY); + BuildMI(MBB, I, I->getDebugLoc(), + get(Mips::MoveR3216), CC).addReg(Mips::T8); + +} +void Mips16InstrInfo::ExpandFEXT_CCRXI16_ins( + MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned SltiOpc, unsigned SltiXOpc) const { + unsigned CC = I->getOperand(0).getReg(); + unsigned regX = I->getOperand(1).getReg(); + int64_t Imm = I->getOperand(2).getImm(); + unsigned SltOpc = whichOp8u_or_16simm(SltiOpc, SltiXOpc, Imm); + BuildMI(MBB, I, I->getDebugLoc(), get(SltOpc)).addReg(regX).addImm(Imm); + BuildMI(MBB, I, I->getDebugLoc(), + get(Mips::MoveR3216), CC).addReg(Mips::T8); + +} + const MCInstrDesc &Mips16InstrInfo::AddiuSpImm(int64_t Imm) const { if (validSpImm8(Imm)) return get(Mips::AddiuSpImm16); @@ -487,6 +523,26 @@ void Mips16InstrInfo::BuildAddiuSpImm BuildMI(MBB, I, DL, AddiuSpImm(Imm)).addImm(Imm); } +unsigned Mips16InstrInfo::whichOp8_or_16uimm + (unsigned shortOp, unsigned longOp, int64_t Imm) { + if (isUInt<8>(Imm)) + return shortOp; + else if (isUInt<16>(Imm)) + return longOp; + else + llvm_unreachable("immediate field not usable"); +} + +unsigned Mips16InstrInfo::whichOp8u_or_16simm + (unsigned shortOp, unsigned longOp, int64_t Imm) { + if (isUInt<8>(Imm)) + return shortOp; + else if (isInt<16>(Imm)) + return longOp; + else + llvm_unreachable("immediate field not usable"); +} + const MipsInstrInfo *llvm::createMips16InstrInfo(MipsTargetMachine &TM) { return new Mips16InstrInfo(TM); } diff --git a/lib/Target/Mips/Mips16InstrInfo.h b/lib/Target/Mips/Mips16InstrInfo.h index 2699a1c..0048fff 100644 --- a/lib/Target/Mips/Mips16InstrInfo.h +++ b/lib/Target/Mips/Mips16InstrInfo.h @@ -123,6 +123,20 @@ private: MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned BtOpc, unsigned CmpiOpc, unsigned CmpiXOpc) const; + void ExpandFEXT_CCRX16_ins( + MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned SltOpc) const; + + void ExpandFEXT_CCRXI16_ins( + MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned SltiOpc, unsigned SltiXOpc) const; + + static unsigned + whichOp8_or_16uimm (unsigned shortOp, unsigned longOp, int64_t Imm); + + static unsigned + whichOp8u_or_16simm (unsigned shortOp, unsigned longOp, int64_t Imm); + }; } diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td index 0d90df4..1cb4a0e 100644 --- a/lib/Target/Mips/Mips16InstrInfo.td +++ b/lib/Target/Mips/Mips16InstrInfo.td @@ -59,7 +59,16 @@ class FRI16_ins_base op, string asmstr, string asmstr2, class FRI16_ins op, string asmstr, InstrItinClass itin>: FRI16_ins_base; - + +class FRI16R_ins_base op, string asmstr, string asmstr2, + InstrItinClass itin>: + FRI16; + +class FRI16R_ins op, string asmstr, + InstrItinClass itin>: + FRI16R_ins_base; + class F2RI16_ins _op, string asmstr, InstrItinClass itin>: FRI16<_op, (outs CPU16Regs:$rx), (ins CPU16Regs:$rx_, simm16:$imm), @@ -140,6 +149,15 @@ class FEXT_RI16_ins _op, string asmstr, InstrItinClass itin>: FEXT_RI16_ins_base<_op, asmstr, "\t$rx, $imm", itin>; +class FEXT_RI16R_ins_base _op, string asmstr, string asmstr2, + InstrItinClass itin>: + FEXT_RI16<_op, (outs ), (ins CPU16Regs:$rx, simm16:$imm), + !strconcat(asmstr, asmstr2), [], itin>; + +class FEXT_RI16R_ins _op, string asmstr, + InstrItinClass itin>: + FEXT_RI16R_ins_base<_op, asmstr, "\t$rx, $imm", itin>; + class FEXT_RI16_PC_ins _op, string asmstr, InstrItinClass itin>: FEXT_RI16_ins_base<_op, asmstr, "\t$rx, $$pc, $imm", itin>; @@ -384,7 +402,7 @@ class SeliT: // // class SelT: - MipsPseudo16<(outs CPU16Regs:$rd_), + MipsPseudo16<(outs CPU16Regs:$rd_), (ins CPU16Regs:$rd, CPU16Regs:$rs, CPU16Regs:$rl, CPU16Regs:$rr), !strconcat(op2, @@ -692,6 +710,13 @@ def LhuRxRyOffMemX16: // // Format: LI rx, immediate MIPS16e +// Purpose: Load Immediate +// To load a constant into a GPR. +// +def LiRxImm16: FRI16_ins<0b01101, "li", IIAlu>; + +// +// Format: LI rx, immediate MIPS16e // Purpose: Load Immediate (Extended) // To load a constant into a GPR. // @@ -1017,7 +1042,7 @@ def SllvRxRy16 : FRxRxRy16_ins<0b00100, "sllv", IIAlu>; // To record the result of a less-than comparison with a constant. // // -def SltiRxImm16: FRI16_ins<0b01010, "slti", IIAlu> { +def SltiRxImm16: FRI16R_ins<0b01010, "slti", IIAlu> { let Defs = [T8]; } @@ -1027,7 +1052,7 @@ def SltiRxImm16: FRI16_ins<0b01010, "slti", IIAlu> { // To record the result of a less-than comparison with a constant. // // -def SltiRxImmX16: FEXT_RI16_ins<0b01010, "slti", IIAlu> { +def SltiRxImmX16: FEXT_RI16R_ins<0b01010, "slti", IIAlu> { let Defs = [T8]; } @@ -1038,7 +1063,7 @@ def SltiCCRxImmX16: FEXT_CCRXI16_ins<"slti">; // To record the result of a less-than comparison with a constant. // // -def SltiuRxImm16: FRI16_ins<0b01011, "sltiu", IIAlu> { +def SltiuRxImm16: FRI16R_ins<0b01011, "sltiu", IIAlu> { let Defs = [T8]; } @@ -1048,7 +1073,7 @@ def SltiuRxImm16: FRI16_ins<0b01011, "sltiu", IIAlu> { // To record the result of a less-than comparison with a constant. // // -def SltiuRxImmX16: FEXT_RI16_ins<0b01011, "sltiu", IIAlu> { +def SltiuRxImmX16: FEXT_RI16R_ins<0b01011, "sltiu", IIAlu> { let Defs = [T8]; } // @@ -1063,7 +1088,9 @@ def SltiuCCRxImmX16: FEXT_CCRXI16_ins<"sltiu">; // Purpose: Set on Less Than // To record the result of a less-than comparison. // -def SltRxRy16: FRR16_ins<0b00010, "slt", IIAlu>; +def SltRxRy16: FRR16_ins<0b00010, "slt", IIAlu>{ + let Defs = [T8]; +} def SltCCRxRy16: FCCRR16_ins<"slt">; @@ -1071,10 +1098,13 @@ def SltCCRxRy16: FCCRR16_ins<"slt">; // Purpose: Set on Less Than Unsigned // To record the result of an unsigned less-than comparison. // -def SltuRxRy16: FRR16_ins<0b00011, "sltu", IIAlu>; +def SltuRxRy16: FRR16_ins<0b00011, "sltu", IIAlu>{ + let Defs = [T8]; +} def SltuRxRyRz16: FRRTR16_ins<"sltu"> { let isCodeGenOnly=1; + let Defs = [T8]; } @@ -1648,7 +1678,7 @@ def: Mips16Pat // def: Mips16Pat <(setle CPU16Regs:$lhs, CPU16Regs:$rhs), - (XorRxRxRy16 (SltCCRxRy16 CPU16Regs:$rhs, CPU16Regs:$lhs), (LiRxImmX16 1))>; + (XorRxRxRy16 (SltCCRxRy16 CPU16Regs:$rhs, CPU16Regs:$lhs), (LiRxImm16 1))>; // // setlt @@ -1708,7 +1738,7 @@ def: Mips16Pat<(add CPU16Regs:$hi, (MipsLo tglobaladdr:$lo)), // hi/lo relocs -def : Mips16Pat<(MipsHi tglobaladdr:$in), +def : Mips16Pat<(MipsHi tglobaladdr:$in), (SllX16 (LiRxImmX16 tglobaladdr:$in), 16)>; def : Mips16Pat<(MipsHi tjumptable:$in), (SllX16 (LiRxImmX16 tjumptable:$in), 16)>; -- cgit v1.1 From 4f659eccafe34efea2a4ba6e57ad09977e9157c2 Mon Sep 17 00:00:00 2001 From: Cameron Zwarich Date: Wed, 20 Feb 2013 06:46:28 +0000 Subject: Add preservation of SlotIndexes to PHIElimination. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175596 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/PHIElimination.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp index abad5c1..37e2ee1 100644 --- a/lib/CodeGen/PHIElimination.cpp +++ b/lib/CodeGen/PHIElimination.cpp @@ -116,6 +116,7 @@ INITIALIZE_PASS_END(PHIElimination, "phi-node-elimination", void PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved(); + AU.addPreserved(); AU.addPreserved(); AU.addPreserved(); AU.addPreserved(); -- cgit v1.1 From 2650a82cc4166237f698e3bbc315739e43180431 Mon Sep 17 00:00:00 2001 From: Cameron Zwarich Date: Wed, 20 Feb 2013 06:46:32 +0000 Subject: Avoid recomputing an inserted instruction's SlotIndex. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175597 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/PHIElimination.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp index 37e2ee1..061c385 100644 --- a/lib/CodeGen/PHIElimination.cpp +++ b/lib/CodeGen/PHIElimination.cpp @@ -306,10 +306,9 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, // Update LiveIntervals for the new copy or implicit def. if (LIS) { MachineInstr *NewInstr = prior(AfterPHIsIt); - LIS->InsertMachineInstrInMaps(NewInstr); + SlotIndex DestCopyIndex = LIS->InsertMachineInstrInMaps(NewInstr); SlotIndex MBBStartIndex = LIS->getMBBStartIdx(&MBB); - SlotIndex DestCopyIndex = LIS->getInstructionIndex(NewInstr); if (IncomingReg) { // Add the region from the beginning of MBB to the copy instruction to // IncomingReg's live interval. -- cgit v1.1 From 767e04307f70cad9e2ba46b3812504ee1d8c1721 Mon Sep 17 00:00:00 2001 From: Cameron Zwarich Date: Wed, 20 Feb 2013 06:46:34 +0000 Subject: Remove verification after PHIElimination when using LiveIntervals, and move it after the two-address pass. The remaining problems in 'make check' are occurring later. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175598 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/PHIElimination.cpp | 3 --- lib/CodeGen/TwoAddressInstructionPass.cpp | 3 +++ 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp index 061c385..61122fc 100644 --- a/lib/CodeGen/PHIElimination.cpp +++ b/lib/CodeGen/PHIElimination.cpp @@ -172,9 +172,6 @@ bool PHIElimination::runOnMachineFunction(MachineFunction &MF) { ImpDefs.clear(); VRegPHIUseCount.clear(); - if (LIS) - MF.verify(this, "After PHI elimination"); - return Changed; } diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index 8e6f809..f5d41c7 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -1437,6 +1437,9 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { } } + if (LIS) + MF->verify(this, "After two-address instruction pass"); + return MadeChange; } -- cgit v1.1 From 349cf349987a65be89078913e34126898bcdd138 Mon Sep 17 00:00:00 2001 From: Cameron Zwarich Date: Wed, 20 Feb 2013 06:46:41 +0000 Subject: Add SlotIndexes::repairIndexesInRange(), which repairs SlotIndexes after adding and removing instructions. The implementation seems more complicated than it needs to be, but I couldn't find something simpler that dealt with all of the corner cases. Also add a call to repairIndexesInRange() from repairIntervalsInRange(). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175601 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/LiveIntervalAnalysis.cpp | 2 ++ lib/CodeGen/SlotIndexes.cpp | 61 ++++++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+) (limited to 'lib') diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index 0978d73..7b1eed2 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -1044,6 +1044,8 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB, else startIdx = getInstructionIndex(prior(Begin)).getRegSlot(); + Indexes->repairIndexesInRange(MBB, Begin, End); + for (unsigned i = 0, e = OrigRegs.size(); i != e; ++i) { unsigned Reg = OrigRegs[i]; if (!TargetRegisterInfo::isVirtualRegister(Reg)) diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp index 95faafa..b4e562e 100644 --- a/lib/CodeGen/SlotIndexes.cpp +++ b/lib/CodeGen/SlotIndexes.cpp @@ -142,6 +142,67 @@ void SlotIndexes::renumberIndexes(IndexList::iterator curItr) { ++NumLocalRenum; } +// Repair indexes after adding and removing instructions. +void SlotIndexes::repairIndexesInRange(MachineBasicBlock *MBB, + MachineBasicBlock::iterator Begin, + MachineBasicBlock::iterator End) { + bool includeStart = (Begin == MBB->begin()); + SlotIndex startIdx; + if (includeStart) + startIdx = getMBBStartIdx(MBB); + else + startIdx = getInstructionIndex(Begin); + + SlotIndex endIdx; + if (End == MBB->end()) + endIdx = getMBBEndIdx(MBB); + else + endIdx = getInstructionIndex(End); + + // FIXME: Conceptually, this code is implementing an iterator on MBB that + // optionally includes an additional position prior to MBB->begin(), indicated + // by the includeStart flag. This is done so that we can iterate MIs in a MBB + // in parallel with SlotIndexes, but there should be a better way to do this. + IndexList::iterator ListB = startIdx.listEntry(); + IndexList::iterator ListI = endIdx.listEntry(); + MachineBasicBlock::iterator MBBI = End; + bool pastStart = false; + while (ListI != ListB || MBBI != Begin || (includeStart && !pastStart)) { + assert(ListI->getIndex() >= startIdx.getIndex() && + (includeStart || !pastStart) && + "Decremented past the beginning of region to repair."); + + MachineInstr *SlotMI = ListI->getInstr(); + MachineInstr *MI = (MBBI != MBB->end() && !pastStart) ? MBBI : 0; + bool MBBIAtBegin = MBBI == Begin && (!includeStart || pastStart); + + if (SlotMI == MI && !MBBIAtBegin) { + --ListI; + if (MBBI != Begin) + --MBBI; + else + pastStart = true; + } else if (MI && mi2iMap.find(MI) == mi2iMap.end()) { + if (MBBI != Begin) + --MBBI; + else + pastStart = true; + } else { + --ListI; + if (SlotMI) + removeMachineInstrFromMaps(SlotMI); + } + } + + // In theory this could be combined with the previous loop, but it is tricky + // to update the IndexList while we are iterating it. + for (MachineBasicBlock::iterator I = End; I != Begin;) { + --I; + MachineInstr *MI = I; + if (mi2iMap.find(MI) == mi2iMap.end()) + insertMachineInstrInMaps(MI); + } +} #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void SlotIndexes::dump() const { -- cgit v1.1 From 6cf93d740a600024f2de924614a4d4d0dc1cb852 Mon Sep 17 00:00:00 2001 From: Cameron Zwarich Date: Wed, 20 Feb 2013 06:46:46 +0000 Subject: Move the computation of the IsEarlyClobber flag into its own loop, since the correct value is needed in every iteration of the loop for updating LiveIntervals. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175603 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/TwoAddressInstructionPass.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index f5d41c7..cf14b4d 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -1215,6 +1215,11 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, TiedPairList &TiedPairs, unsigned &Dist) { bool IsEarlyClobber = false; + for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) { + const MachineOperand &DstMO = MI->getOperand(TiedPairs[tpi].second); + IsEarlyClobber |= DstMO.isEarlyClobber(); + } + bool RemovedKillFlag = false; bool AllUsesCopied = true; unsigned LastCopiedReg = 0; @@ -1225,7 +1230,6 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, const MachineOperand &DstMO = MI->getOperand(DstIdx); unsigned RegA = DstMO.getReg(); - IsEarlyClobber |= DstMO.isEarlyClobber(); // Grab RegB from the instruction because it may have changed if the // instruction was commuted. -- cgit v1.1 From 9030fc22dd73684901ecb749c9688e289bd1a777 Mon Sep 17 00:00:00 2001 From: Cameron Zwarich Date: Wed, 20 Feb 2013 06:46:48 +0000 Subject: Add support to the two-address pass for updating LiveIntervals in many of the common transformations. This includes updating repairIntervalsInRange() to handle more cases. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175604 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/LiveIntervalAnalysis.cpp | 51 ++++++++++++++++++------ lib/CodeGen/TwoAddressInstructionPass.cpp | 65 +++++++++++++++++++++++++++++-- 2 files changed, 102 insertions(+), 14 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index 7b1eed2..8177db6 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -1038,20 +1038,36 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB, MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, ArrayRef OrigRegs) { - SlotIndex startIdx; - if (Begin == MBB->begin()) - startIdx = getMBBStartIdx(MBB); + SlotIndex endIdx; + if (End == MBB->end()) + endIdx = getMBBEndIdx(MBB).getPrevSlot(); else - startIdx = getInstructionIndex(prior(Begin)).getRegSlot(); + endIdx = getInstructionIndex(End); Indexes->repairIndexesInRange(MBB, Begin, End); + for (MachineBasicBlock::iterator I = End; I != Begin;) { + --I; + MachineInstr *MI = I; + for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(), + MOE = MI->operands_end(); MOI != MOE; ++MOI) { + if (MOI->isReg() && + TargetRegisterInfo::isVirtualRegister(MOI->getReg()) && + !hasInterval(MOI->getReg())) { + LiveInterval &LI = getOrCreateInterval(MOI->getReg()); + computeVirtRegInterval(&LI); + } + } + } + for (unsigned i = 0, e = OrigRegs.size(); i != e; ++i) { unsigned Reg = OrigRegs[i]; if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; LiveInterval &LI = getInterval(Reg); + LiveInterval::iterator LII = LI.FindLiveRangeContaining(endIdx); + for (MachineBasicBlock::iterator I = End; I != Begin;) { --I; MachineInstr *MI = I; @@ -1063,13 +1079,26 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB, if (!MO.isReg() || MO.getReg() != Reg) continue; - assert(MO.isUse() && "Register defs are not yet supported."); - - if (!LI.liveAt(instrIdx)) { - LiveRange *LR = LI.getLiveRangeContaining(startIdx); - assert(LR && "Used registers must be live-in."); - LR->end = instrIdx.getRegSlot(); - break; + if (MO.isDef()) { + assert(LII != LI.end() && + "Dead register defs are not yet supported."); + if (!Indexes->getInstructionFromIndex(LII->start)) { + LII->start = instrIdx.getRegSlot(); + LII->valno->def = instrIdx.getRegSlot(); + } else if (LII->start != instrIdx.getRegSlot()) { + VNInfo *VNI = LI.getNextValue(instrIdx.getRegSlot(), VNInfoAllocator); + LiveRange LR = LiveRange(instrIdx.getRegSlot(), LII->start, VNI); + LII = LI.addRange(LR); + } + } else if (MO.isUse()) { + if (LII == LI.end()) + --LII; + + assert(LII->start < instrIdx && + "Registers with multiple used live ranges are not yet supported."); + SlotIndex endIdx = LII->end; + if (!endIdx.isBlock() && !Indexes->getInstructionFromIndex(endIdx)) + LII->end = instrIdx.getRegSlot(); } } } diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index cf14b4d..0da6662 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -1149,7 +1149,29 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, } LV->addVirtualRegisterKilled(Reg, NewMIs[1]); } + + MachineBasicBlock::iterator Begin; + MachineBasicBlock::iterator End; + SmallVector OrigRegs; + if (LIS) { + Begin = MachineBasicBlock::iterator(NewMIs[0]); + if (Begin != MBB->begin()) + --Begin; + End = next(MachineBasicBlock::iterator(MI)); + + for (MachineInstr::const_mop_iterator MOI = MI.operands_begin(), + MOE = MI.operands_end(); MOI != MOE; ++MOI) { + if (MOI->isReg()) + OrigRegs.push_back(MOI->getReg()); + } + } + MI.eraseFromParent(); + + // Update LiveIntervals. + if (LIS) + LIS->repairIntervalsInRange(MBB, Begin, End, OrigRegs); + mi = NewMIs[1]; if (TransformSuccess) return true; @@ -1223,6 +1245,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, bool RemovedKillFlag = false; bool AllUsesCopied = true; unsigned LastCopiedReg = 0; + SlotIndex LastCopyIdx; unsigned RegB = 0; for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) { unsigned SrcIdx = TiedPairs[tpi].first; @@ -1267,9 +1290,17 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, DistanceMap.insert(std::make_pair(PrevMI, Dist)); DistanceMap[MI] = ++Dist; - SlotIndex CopyIdx; - if (Indexes) - CopyIdx = Indexes->insertMachineInstrInMaps(PrevMI).getRegSlot(); + if (LIS) { + LastCopyIdx = LIS->InsertMachineInstrInMaps(PrevMI).getRegSlot(); + + if (TargetRegisterInfo::isVirtualRegister(RegA)) { + LiveInterval &LI = LIS->getInterval(RegA); + VNInfo *VNI = LI.getNextValue(LastCopyIdx, LIS->getVNInfoAllocator()); + SlotIndex endIdx = + LIS->getInstructionIndex(MI).getRegSlot(IsEarlyClobber); + LI.addRange(LiveRange(LastCopyIdx, endIdx, VNI)); + } + } DEBUG(dbgs() << "\t\tprepend:\t" << *PrevMI); @@ -1315,6 +1346,18 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, LV->addVirtualRegisterKilled(RegB, PrevMI); } + // Update LiveIntervals. + if (LIS) { + LiveInterval &LI = LIS->getInterval(RegB); + SlotIndex MIIdx = LIS->getInstructionIndex(MI); + LiveInterval::const_iterator I = LI.find(MIIdx); + assert(I != LI.end() && "RegB must be live-in to use."); + + SlotIndex UseIdx = MIIdx.getRegSlot(IsEarlyClobber); + if (I->end == UseIdx) + LI.removeRange(LastCopyIdx, UseIdx); + } + } else if (RemovedKillFlag) { // Some tied uses of regB matched their destination registers, so // regB is still used in this instruction, but a kill flag was @@ -1469,6 +1512,13 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) { llvm_unreachable(0); } + SmallVector OrigRegs; + if (LIS) { + OrigRegs.push_back(MI->getOperand(0).getReg()); + for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) + OrigRegs.push_back(MI->getOperand(i).getReg()); + } + bool DefEmitted = false; for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) { MachineOperand &UseMO = MI->getOperand(i); @@ -1512,6 +1562,8 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) { DEBUG(dbgs() << "Inserted: " << *CopyMI); } + MachineBasicBlock::iterator EndMBBI = next(MachineBasicBlock::iterator(MI)); + if (!DefEmitted) { DEBUG(dbgs() << "Turned: " << *MI << " into an IMPLICIT_DEF"); MI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF)); @@ -1521,4 +1573,11 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) { DEBUG(dbgs() << "Eliminated: " << *MI); MI->eraseFromParent(); } + + // Udpate LiveIntervals. + if (LIS) { + if (MBBI != MBB->begin()) + --MBBI; + LIS->repairIntervalsInRange(MBB, MBBI, EndMBBI, OrigRegs); + } } -- cgit v1.1 From 7ab6c76ad1cbf36284ca5b6bd5ee33c625fe3e60 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Wed, 20 Feb 2013 07:21:42 +0000 Subject: Modify the LLVM assembly output so that it uses references to represent function attributes. This makes the LLVM assembly look better. E.g.: define void @foo() #0 { ret void } attributes #0 = { nounwind noinline ssp } git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175605 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/AsmWriter.cpp | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'lib') diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp index d130a25..3f32ac1 100644 --- a/lib/IR/AsmWriter.cpp +++ b/lib/IR/AsmWriter.cpp @@ -66,22 +66,21 @@ static const Module *getModuleFromVal(const Value *V) { return 0; } -static void PrintCallingConv(unsigned cc, raw_ostream &Out) -{ +static void PrintCallingConv(unsigned cc, raw_ostream &Out) { switch (cc) { - case CallingConv::Fast: Out << "fastcc"; break; - case CallingConv::Cold: Out << "coldcc"; break; - case CallingConv::X86_StdCall: Out << "x86_stdcallcc"; break; - case CallingConv::X86_FastCall: Out << "x86_fastcallcc"; break; - case CallingConv::X86_ThisCall: Out << "x86_thiscallcc"; break; - case CallingConv::Intel_OCL_BI: Out << "intel_ocl_bicc"; break; - case CallingConv::ARM_APCS: Out << "arm_apcscc"; break; - case CallingConv::ARM_AAPCS: Out << "arm_aapcscc"; break; - case CallingConv::ARM_AAPCS_VFP:Out << "arm_aapcs_vfpcc"; break; - case CallingConv::MSP430_INTR: Out << "msp430_intrcc"; break; - case CallingConv::PTX_Kernel: Out << "ptx_kernel"; break; - case CallingConv::PTX_Device: Out << "ptx_device"; break; - default: Out << "cc" << cc; break; + default: Out << "cc" << cc; break; + case CallingConv::Fast: Out << "fastcc"; break; + case CallingConv::Cold: Out << "coldcc"; break; + case CallingConv::X86_StdCall: Out << "x86_stdcallcc"; break; + case CallingConv::X86_FastCall: Out << "x86_fastcallcc"; break; + case CallingConv::X86_ThisCall: Out << "x86_thiscallcc"; break; + case CallingConv::Intel_OCL_BI: Out << "intel_ocl_bicc"; break; + case CallingConv::ARM_APCS: Out << "arm_apcscc"; break; + case CallingConv::ARM_AAPCS: Out << "arm_aapcscc"; break; + case CallingConv::ARM_AAPCS_VFP: Out << "arm_aapcs_vfpcc"; break; + case CallingConv::MSP430_INTR: Out << "msp430_intrcc"; break; + case CallingConv::PTX_Kernel: Out << "ptx_kernel"; break; + case CallingConv::PTX_Device: Out << "ptx_device"; break; } } @@ -510,6 +509,7 @@ void SlotTracker::processModule() { CreateModuleSlot(I); // Add all the function attributes to the table. + // FIXME: Add attributes of other objects? AttributeSet FnAttrs = I->getAttributes().getFnAttributes(); if (FnAttrs.hasAttributes(AttributeSet::FunctionIndex)) CreateAttributeSetSlot(FnAttrs); @@ -1662,7 +1662,7 @@ void AssemblyWriter::printFunction(const Function *F) { if (F->hasUnnamedAddr()) Out << " unnamed_addr"; if (Attrs.hasAttributes(AttributeSet::FunctionIndex)) - Out << ' ' << Attrs.getAsString(AttributeSet::FunctionIndex); + Out << " #" << Machine.getAttributeGroupSlot(Attrs.getFnAttributes()); if (F->hasSection()) { Out << " section \""; PrintEscapedString(F->getSection(), Out); -- cgit v1.1 From 5f645953555cee528cd1c0d6faa16d9b89ebba48 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Wed, 20 Feb 2013 07:39:18 +0000 Subject: Fix the (clang -Werror) build by removing an unused member variable. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175607 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp index ff3fd90..96f93a0 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp @@ -35,13 +35,12 @@ class MipsMCCodeEmitter : public MCCodeEmitter { void operator=(const MipsMCCodeEmitter &) LLVM_DELETED_FUNCTION; const MCInstrInfo &MCII; MCContext &Ctx; - const MCSubtargetInfo &STI; bool IsLittleEndian; public: MipsMCCodeEmitter(const MCInstrInfo &mcii, MCContext &Ctx_, const MCSubtargetInfo &sti, bool IsLittle) : - MCII(mcii), Ctx(Ctx_), STI (sti), IsLittleEndian(IsLittle) {} + MCII(mcii), Ctx(Ctx_), IsLittleEndian(IsLittle) {} ~MipsMCCodeEmitter() {} -- cgit v1.1 From fdf45175a8444c421c03627c139777d1de48e516 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Wed, 20 Feb 2013 07:39:20 +0000 Subject: Fully qualify llvm::next to avoid ambiguity when building as C++11. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175608 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/LiveInterval.cpp | 2 +- lib/CodeGen/TwoAddressInstructionPass.cpp | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp index 68f4b16..74793be 100644 --- a/lib/CodeGen/LiveInterval.cpp +++ b/lib/CodeGen/LiveInterval.cpp @@ -440,7 +440,7 @@ void LiveInterval::join(LiveInterval &Other, iterator OutIt = begin(); OutIt->valno = NewVNInfo[LHSValNoAssignments[OutIt->valno->id]]; - for (iterator I = next(OutIt), E = end(); I != E; ++I) { + for (iterator I = llvm::next(OutIt), E = end(); I != E; ++I) { VNInfo* nextValNo = NewVNInfo[LHSValNoAssignments[I->valno->id]]; assert(nextValNo != 0 && "Huh?"); diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index 0da6662..99d3607 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -1157,7 +1157,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, Begin = MachineBasicBlock::iterator(NewMIs[0]); if (Begin != MBB->begin()) --Begin; - End = next(MachineBasicBlock::iterator(MI)); + End = llvm::next(MachineBasicBlock::iterator(MI)); for (MachineInstr::const_mop_iterator MOI = MI.operands_begin(), MOE = MI.operands_end(); MOI != MOE; ++MOI) { @@ -1562,7 +1562,8 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) { DEBUG(dbgs() << "Inserted: " << *CopyMI); } - MachineBasicBlock::iterator EndMBBI = next(MachineBasicBlock::iterator(MI)); + MachineBasicBlock::iterator EndMBBI = + llvm::next(MachineBasicBlock::iterator(MI)); if (!DefEmitted) { DEBUG(dbgs() << "Turned: " << *MI << " into an IMPLICIT_DEF"); -- cgit v1.1 From af3a5420aeb421e83694745c53141683caa123cb Mon Sep 17 00:00:00 2001 From: Logan Chien Date: Wed, 20 Feb 2013 12:21:33 +0000 Subject: Fix thumbv5e frame lowering assertion failure. It is possible that frame pointer is not found in the callee saved info, thus FramePtrSpillFI may be incorrect if we don't check the result of hasFP(MF). Besides, if we enable the stack coloring algorithm, there will be an assertion to ensure the slot is live. But in the test case, %var1 is not live in the prologue of the function, and we will get the assertion failure. Note: There is similar code in ARMFrameLowering.cpp. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175616 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/Thumb1FrameLowering.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp index 123ada6..98efc16 100644 --- a/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -124,14 +124,17 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize); unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize; unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size; - AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + NumBytes); + bool HasFP = hasFP(MF); + if (HasFP) + AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + + NumBytes); AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset); AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset); AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); NumBytes = DPRCSOffset; // Adjust FP so it point to the stack slot that contains the previous FP. - if (hasFP(MF)) { + if (HasFP) { AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr) .addFrameIndex(FramePtrSpillFI).addImm(0) .setMIFlags(MachineInstr::FrameSetup)); @@ -146,7 +149,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes, MachineInstr::FrameSetup); - if (STI.isTargetELF() && hasFP(MF)) + if (STI.isTargetELF() && HasFP) MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() - AFI->getFramePtrSpillOffset()); -- cgit v1.1 From 1479c9bb392325688b72e5829bbb7939c4a079a4 Mon Sep 17 00:00:00 2001 From: Kostya Serebryany Date: Wed, 20 Feb 2013 12:35:15 +0000 Subject: [asan] instrument invoke insns with noreturn attribute (as well as call insns) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175617 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Instrumentation/AddressSanitizer.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 1d59ba5..f116657 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -35,6 +35,7 @@ #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/InstVisitor.h" +#include "llvm/Support/CallSite.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/Debug.h" @@ -1130,12 +1131,12 @@ bool AddressSanitizer::runOnFunction(Function &F) { } else if (isa(BI) && ClMemIntrin) { // ok, take it. } else { - if (CallInst *CI = dyn_cast(BI)) { + CallSite CS(BI); + if (CS) { // A call inside BB. TempsToInstrument.clear(); - if (CI->doesNotReturn()) { - NoReturnCalls.push_back(CI); - } + if (CS.doesNotReturn()) + NoReturnCalls.push_back(CS.getInstruction()); } continue; } -- cgit v1.1 From 52981c4b6016d9f0e295e0771ec0a50dd073b4b3 Mon Sep 17 00:00:00 2001 From: Elena Demikhovsky Date: Wed, 20 Feb 2013 12:42:54 +0000 Subject: I optimized the following patterns: sext <4 x i1> to <4 x i64> sext <4 x i8> to <4 x i64> sext <4 x i16> to <4 x i64> I'm running Combine on SIGN_EXTEND_IN_REG and revert SEXT patterns: (sext_in_reg (v4i64 anyext (v4i32 x )), ExtraVT) -> (v4i64 sext (v4i32 sext_in_reg (v4i32 x , ExtraVT))) The sext_in_reg (v4i32 x) may be lowered to shl+sar operations. The "sar" does not exist on 64-bit operation, so lowering sext_in_reg (v4i64 x) has no vector solution. I also added a cost of this operations to the AVX costs table. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175619 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 37 +++++++++++++++++++++++++++++++ lib/Target/X86/X86TargetTransformInfo.cpp | 3 +++ 2 files changed, 40 insertions(+) (limited to 'lib') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 9ed03cd..a227166 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1323,6 +1323,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setTargetDAGCombine(ISD::ZERO_EXTEND); setTargetDAGCombine(ISD::ANY_EXTEND); setTargetDAGCombine(ISD::SIGN_EXTEND); + setTargetDAGCombine(ISD::SIGN_EXTEND_INREG); setTargetDAGCombine(ISD::TRUNCATE); setTargetDAGCombine(ISD::SINT_TO_FP); setTargetDAGCombine(ISD::SETCC); @@ -17076,6 +17077,41 @@ static SDValue PerformVZEXT_MOVLCombine(SDNode *N, SelectionDAG &DAG) { return SDValue(); } +static SDValue PerformSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { + EVT VT = N->getValueType(0); + if (!VT.isVector()) + return SDValue(); + + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT ExtraVT = cast(N1)->getVT(); + DebugLoc dl = N->getDebugLoc(); + + // The SIGN_EXTEND_INREG to v4i64 is expensive operation on the + // both SSE and AVX2 since there is no sign-extended shift right + // operation on a vector with 64-bit elements. + //(sext_in_reg (v4i64 anyext (v4i32 x )), ExtraVT) -> + // (v4i64 sext (v4i32 sext_in_reg (v4i32 x , ExtraVT))) + if (VT == MVT::v4i64 && (N0.getOpcode() == ISD::ANY_EXTEND || + N0.getOpcode() == ISD::SIGN_EXTEND)) { + SDValue N00 = N0.getOperand(0); + + // EXTLOAD has a better solution on AVX2, + // it may be replaced with X86ISD::VSEXT node. + if (N00.getOpcode() == ISD::LOAD && Subtarget->hasInt256()) + if (!ISD::isNormalLoad(N00.getNode())) + return SDValue(); + + if (N00.getValueType() == MVT::v4i32 && ExtraVT.getSizeInBits() < 128) { + SDValue Tmp = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32, + N00, N1); + return DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i64, Tmp); + } + } + return SDValue(); +} + static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { @@ -17468,6 +17504,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::ANY_EXTEND: case ISD::ZERO_EXTEND: return PerformZExtCombine(N, DAG, DCI, Subtarget); case ISD::SIGN_EXTEND: return PerformSExtCombine(N, DAG, DCI, Subtarget); + case ISD::SIGN_EXTEND_INREG: return PerformSIGN_EXTEND_INREGCombine(N, DAG, Subtarget); case ISD::TRUNCATE: return PerformTruncateCombine(N, DAG,DCI,Subtarget); case ISD::SETCC: return PerformISDSETCCCombine(N, DAG); case X86ISD::SETCC: return PerformSETCCCombine(N, DAG, DCI, Subtarget); diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index f3dfa0e..fefb479 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -232,6 +232,9 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const { { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 1 }, { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 6 }, { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 9 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 8 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 8 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 8 }, { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 3 }, }; -- cgit v1.1 From ad4da0fc321230261b4d0387f0ec216eb8aa50ca Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Wed, 20 Feb 2013 13:00:06 +0000 Subject: Move the SplatByte helper to APInt and generalize it a bit. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175621 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 20 ++++---------------- lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 16 +++------------- 2 files changed, 7 insertions(+), 29 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index a9d40d0..f085e44 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -2539,18 +2539,6 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) { } } -/// SplatByte - Distribute ByteVal over NumBits bits. -// FIXME: Move this helper to a common place. -static APInt SplatByte(unsigned NumBits, uint8_t ByteVal) { - APInt Val = APInt(NumBits, ByteVal); - unsigned Shift = 8; - for (unsigned i = NumBits; i > 8; i >>= 1) { - Val = (Val << Shift) | Val; - Shift <<= 1; - } - return Val; -} - /// ExpandBitCount - Expand the specified bitcount instruction into operations. /// SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, @@ -2568,10 +2556,10 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, // This is the "best" algorithm from // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel - SDValue Mask55 = DAG.getConstant(SplatByte(Len, 0x55), VT); - SDValue Mask33 = DAG.getConstant(SplatByte(Len, 0x33), VT); - SDValue Mask0F = DAG.getConstant(SplatByte(Len, 0x0F), VT); - SDValue Mask01 = DAG.getConstant(SplatByte(Len, 0x01), VT); + SDValue Mask55 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), VT); + SDValue Mask33 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), VT); + SDValue Mask0F = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), VT); + SDValue Mask01 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), VT); // v = v - ((v >> 1) & 0x55555555...) Op = DAG.getNode(ISD::SUB, dl, VT, Op, diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 09885d8..0640311 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3377,17 +3377,6 @@ SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) { &ArgChains[0], ArgChains.size()); } -/// SplatByte - Distribute ByteVal over NumBits bits. -static APInt SplatByte(unsigned NumBits, uint8_t ByteVal) { - APInt Val = APInt(NumBits, ByteVal); - unsigned Shift = 8; - for (unsigned i = NumBits; i > 8; i >>= 1) { - Val = (Val << Shift) | Val; - Shift <<= 1; - } - return Val; -} - /// getMemsetValue - Vectorized representation of the memset value /// operand. static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG, @@ -3396,7 +3385,8 @@ static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG, unsigned NumBits = VT.getScalarType().getSizeInBits(); if (ConstantSDNode *C = dyn_cast(Value)) { - APInt Val = SplatByte(NumBits, C->getZExtValue() & 255); + assert(C->getAPIntValue().getBitWidth() == 8); + APInt Val = APInt::getSplat(NumBits, C->getAPIntValue()); if (VT.isInteger()) return DAG.getConstant(Val, VT); return DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(VT), Val), VT); @@ -3406,7 +3396,7 @@ static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG, if (NumBits > 8) { // Use a multiplication with 0x010101... to extend the input to the // required length. - APInt Magic = SplatByte(NumBits, 0x01); + APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01)); Value = DAG.getNode(ISD::MUL, dl, VT, Value, DAG.getConstant(Magic, VT)); } -- cgit v1.1 From b34c79e4bbe5accbb54d0291e8bef5d2bfef32e4 Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Wed, 20 Feb 2013 15:50:31 +0000 Subject: Fix PR15155: lost vadd/vsplat optimization. During lowering of a BUILD_VECTOR, we look for opportunities to use a vector splat. When the splatted value fits in 5 signed bits, a single splat does the job. When it doesn't fit in 5 bits but does fit in 6, and is an even value, we can splat on half the value and add the result to itself. This last optimization hasn't been working recently because of improved constant folding. To circumvent this, create a pseudo VADD_SPLAT that can be expanded during instruction selection. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175632 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 30 ++++++++++++++++++++++++++++++ lib/Target/PowerPC/PPCISelLowering.cpp | 16 ++++++++-------- lib/Target/PowerPC/PPCISelLowering.h | 5 +++++ 3 files changed, 43 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 0f943e8..01d731a 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -1322,6 +1322,36 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { return CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64, SDValue(Tmp, 0), GA); } + case PPCISD::VADD_SPLAT: { + // Convert: VADD_SPLAT elt, size + // Into: tmp = VSPLTIS[BHW] elt + // VADDU[BHW]M tmp, tmp + // Where: [BHW] = B for size = 1, H for size = 2, W for size = 4 + assert(isa(N->getOperand(0)) && + isa(N->getOperand(1)) && + "Invalid operand on VADD_SPLAT!"); + int EltSize = N->getConstantOperandVal(1); + unsigned Opc1, Opc2; + EVT VT; + if (EltSize == 1) { + Opc1 = PPC::VSPLTISB; + Opc2 = PPC::VADDUBM; + VT = MVT::v16i8; + } else if (EltSize == 2) { + Opc1 = PPC::VSPLTISH; + Opc2 = PPC::VADDUHM; + VT = MVT::v8i16; + } else { + assert(EltSize == 4 && "Invalid element size on VADD_SPLAT!"); + Opc1 = PPC::VSPLTISW; + Opc2 = PPC::VADDUWM; + VT = MVT::v4i32; + } + SDValue Elt = getI32Imm(N->getConstantOperandVal(0)); + SDNode *Tmp = CurDAG->getMachineNode(Opc1, dl, VT, Elt); + SDValue TmpVal = SDValue(Tmp, 0); + return CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal); + } } return SelectCode(N); diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index e789112..f3ef38a 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -594,6 +594,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR"; case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA"; case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L"; + case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT"; } } @@ -5020,14 +5021,13 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, // If this value is in the range [-32,30] and is even, use: // tmp = VSPLTI[bhw], result = add tmp, tmp if (SextVal >= -32 && SextVal <= 30 && (SextVal & 1) == 0) { - // FIXME: This is currently disabled because the ADD will be folded back - // into an invalid BUILD_VECTOR immediately. - return SDValue(); -#if 0 - SDValue Res = BuildSplatI(SextVal >> 1, SplatSize, MVT::Other, DAG, dl); - Res = DAG.getNode(ISD::ADD, dl, Res.getValueType(), Res, Res); - return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); -#endif + // To avoid having the optimization undone by constant folding, we + // convert to a pseudo that will be expanded later. + SDValue Elt = DAG.getConstant(SextVal >> 1, MVT::i32); + EVT VT = Op.getValueType(); + int Size = VT == MVT::v16i8 ? 1 : (VT == MVT::v8i16 ? 2 : 4); + SDValue EltSize = DAG.getConstant(Size, MVT::i32); + return DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize); } // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 12b3df7..7cc2d1a 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -237,6 +237,11 @@ namespace llvm { /// sym@got@dtprel@l. ADDI_DTPREL_L, + /// VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded + /// into an ADD of a VSPLTI with itself during instruction selection. + /// Necessary to avoid losing this optimization due to constant folds. + VADD_SPLAT, + /// STD_32 - This is the STD instruction for use with "32-bit" registers. STD_32 = ISD::FIRST_TARGET_MEMORY_OPCODE, -- cgit v1.1 From d6c98ae63824854ea2175b362a10985cac7cfb32 Mon Sep 17 00:00:00 2001 From: Jyotsna Verma Date: Wed, 20 Feb 2013 16:13:27 +0000 Subject: Hexagon: Move HexagonMCInst.h to MCTargetDesc/HexagonMCInst.h. Add HexagonMCInst class which adds various Hexagon VLIW annotations. In addition, this class also includes some APIs related to the constant extenders. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175634 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Hexagon/Hexagon.h | 5 +- lib/Target/Hexagon/HexagonAsmPrinter.cpp | 14 +- lib/Target/Hexagon/HexagonMCInstLower.cpp | 4 +- .../Hexagon/InstPrinter/HexagonInstPrinter.cpp | 44 ++++-- .../Hexagon/InstPrinter/HexagonInstPrinter.h | 17 +- lib/Target/Hexagon/InstPrinter/LLVMBuild.txt | 2 +- lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt | 3 +- lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.cpp | 175 +++++++++++++++++++++ lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.h | 100 ++++++++++++ .../Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp | 4 +- 10 files changed, 336 insertions(+), 32 deletions(-) create mode 100644 lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.cpp create mode 100644 lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.h (limited to 'lib') diff --git a/lib/Target/Hexagon/Hexagon.h b/lib/Target/Hexagon/Hexagon.h index 7e7f756..dfbefc8 100644 --- a/lib/Target/Hexagon/Hexagon.h +++ b/lib/Target/Hexagon/Hexagon.h @@ -21,9 +21,10 @@ namespace llvm { class FunctionPass; + class ModulePass; class TargetMachine; class MachineInstr; - class MCInst; + class HexagonMCInst; class HexagonAsmPrinter; class HexagonTargetMachine; class raw_ostream; @@ -54,7 +55,7 @@ namespace llvm { TargetAsmBackend *createHexagonAsmBackend(const Target &, const std::string &); */ - void HexagonLowerToMC(const MachineInstr *MI, MCInst &MCI, + void HexagonLowerToMC(const MachineInstr *MI, HexagonMCInst &MCI, HexagonAsmPrinter &AP); } // end namespace llvm; diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp index 58b89d1..88cd3fb 100644 --- a/lib/Target/Hexagon/HexagonAsmPrinter.cpp +++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp @@ -14,12 +14,12 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "asm-printer" -#include "HexagonAsmPrinter.h" #include "Hexagon.h" -#include "HexagonMCInst.h" +#include "HexagonAsmPrinter.h" #include "HexagonMachineFunctionInfo.h" -#include "HexagonSubtarget.h" #include "HexagonTargetMachine.h" +#include "HexagonSubtarget.h" +#include "MCTargetDesc/HexagonMCInst.h" #include "InstPrinter/HexagonInstPrinter.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" @@ -220,8 +220,8 @@ void HexagonAsmPrinter::EmitInstruction(const MachineInstr *MI) { assert((Size+IgnoreCount) == MI->getBundleSize() && "Corrupt Bundle!"); for (unsigned Index = 0; Index < Size; Index++) { HexagonMCInst MCI; - MCI.setStartPacket(Index == 0); - MCI.setEndPacket(Index == (Size-1)); + MCI.setPacketStart(Index == 0); + MCI.setPacketEnd(Index == (Size-1)); HexagonLowerToMC(BundleMIs[Index], MCI, *this); OutStreamer.EmitInstruction(MCI); @@ -230,8 +230,8 @@ void HexagonAsmPrinter::EmitInstruction(const MachineInstr *MI) { else { HexagonMCInst MCI; if (MI->getOpcode() == Hexagon::ENDLOOP0) { - MCI.setStartPacket(true); - MCI.setEndPacket(true); + MCI.setPacketStart(true); + MCI.setPacketEnd(true); } HexagonLowerToMC(MI, MCI, *this); OutStreamer.EmitInstruction(MCI); diff --git a/lib/Target/Hexagon/HexagonMCInstLower.cpp b/lib/Target/Hexagon/HexagonMCInstLower.cpp index db36ac0..f011d51 100644 --- a/lib/Target/Hexagon/HexagonMCInstLower.cpp +++ b/lib/Target/Hexagon/HexagonMCInstLower.cpp @@ -15,6 +15,7 @@ #include "Hexagon.h" #include "HexagonAsmPrinter.h" #include "HexagonMachineFunctionInfo.h" +#include "MCTargetDesc/HexagonMCInst.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/IR/Constants.h" #include "llvm/MC/MCExpr.h" @@ -38,9 +39,10 @@ static MCOperand GetSymbolRef(const MachineOperand& MO, const MCSymbol* Symbol, } // Create an MCInst from a MachineInstr -void llvm::HexagonLowerToMC(const MachineInstr* MI, MCInst& MCI, +void llvm::HexagonLowerToMC(const MachineInstr* MI, HexagonMCInst& MCI, HexagonAsmPrinter& AP) { MCI.setOpcode(MI->getOpcode()); + MCI.setDesc(MI->getDesc()); for (unsigned i = 0, e = MI->getNumOperands(); i < e; i++) { const MachineOperand &MO = MI->getOperand(i); diff --git a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp index c700354..36da6df 100644 --- a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp +++ b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp @@ -12,14 +12,14 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "asm-printer" -#include "HexagonInstPrinter.h" -#include "Hexagon.h" #include "HexagonAsmPrinter.h" -#include "HexagonMCInst.h" +#include "Hexagon.h" +#include "HexagonInstPrinter.h" +#include "MCTargetDesc/HexagonMCInst.h" +#include "llvm/MC/MCInst.h" #include "llvm/ADT/StringExtras.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCInst.h" #include "llvm/Support/raw_ostream.h" #include @@ -28,6 +28,8 @@ using namespace llvm; #define GET_INSTRUCTION_NAME #include "HexagonGenAsmWriter.inc" +const char HexagonInstPrinter::PacketPadding = '\t'; + StringRef HexagonInstPrinter::getOpcodeName(unsigned Opcode) const { return MII.getName(Opcode); } @@ -43,43 +45,42 @@ void HexagonInstPrinter::printInst(const MCInst *MI, raw_ostream &O, void HexagonInstPrinter::printInst(const HexagonMCInst *MI, raw_ostream &O, StringRef Annot) { - const char packetPadding[] = " "; const char startPacket = '{', endPacket = '}'; // TODO: add outer HW loop when it's supported too. if (MI->getOpcode() == Hexagon::ENDLOOP0) { // Ending a harware loop is different from ending an regular packet. - assert(MI->isEndPacket() && "Loop end must also end the packet"); + assert(MI->isPacketEnd() && "Loop-end must also end the packet"); - if (MI->isStartPacket()) { + if (MI->isPacketStart()) { // There must be a packet to end a loop. // FIXME: when shuffling is always run, this shouldn't be needed. HexagonMCInst Nop; StringRef NoAnnot; Nop.setOpcode (Hexagon::NOP); - Nop.setStartPacket (MI->isStartPacket()); + Nop.setPacketStart (MI->isPacketStart()); printInst (&Nop, O, NoAnnot); } // Close the packet. - if (MI->isEndPacket()) - O << packetPadding << endPacket; + if (MI->isPacketEnd()) + O << PacketPadding << endPacket; printInstruction(MI, O); } else { // Prefix the insn opening the packet. - if (MI->isStartPacket()) - O << packetPadding << startPacket << '\n'; + if (MI->isPacketStart()) + O << PacketPadding << startPacket << '\n'; printInstruction(MI, O); // Suffix the insn closing the packet. - if (MI->isEndPacket()) + if (MI->isPacketEnd()) // Suffix the packet in a new line always, since the GNU assembler has // issues with a closing brace on the same line as CONST{32,64}. - O << '\n' << packetPadding << endPacket; + O << '\n' << PacketPadding << endPacket; } printAnnotation(O, Annot); @@ -102,12 +103,23 @@ void HexagonInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, void HexagonInstPrinter::printImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) const { - O << MI->getOperand(OpNo).getImm(); + const MCOperand& MO = MI->getOperand(OpNo); + + if(MO.isExpr()) { + O << *MO.getExpr(); + } else if(MO.isImm()) { + O << MI->getOperand(OpNo).getImm(); + } else { + llvm_unreachable("Unknown operand"); + } } void HexagonInstPrinter::printExtOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) const { - O << MI->getOperand(OpNo).getImm(); + const HexagonMCInst *HMCI = static_cast(MI); + if (HMCI->isConstExtended()) + O << "#"; + printOperand(MI, OpNo, O); } void HexagonInstPrinter::printUnsignedImmOperand(const MCInst *MI, diff --git a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.h b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.h index 902a323..d0cef68 100644 --- a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.h +++ b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.h @@ -14,16 +14,18 @@ #ifndef HEXAGONINSTPRINTER_H #define HEXAGONINSTPRINTER_H -#include "HexagonMCInst.h" #include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCInstrInfo.h" namespace llvm { + class HexagonMCInst; + class HexagonInstPrinter : public MCInstPrinter { public: explicit HexagonInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, const MCRegisterInfo &MRI) - : MCInstPrinter(MAI, MII, MRI) {} + : MCInstPrinter(MAI, MII, MRI), MII(MII) {} virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); void printInst(const HexagonMCInst *MI, raw_ostream &O, StringRef Annot); @@ -65,10 +67,19 @@ namespace llvm { void printSymbolLo(const MCInst *MI, unsigned OpNo, raw_ostream &O) const { printSymbol(MI, OpNo, O, false); } - bool isConstExtended(const MCInst *MI) const; + const MCInstrInfo &getMII() const { + return MII; + } + protected: void printSymbol(const MCInst *MI, unsigned OpNo, raw_ostream &O, bool hi) const; + + static const char PacketPadding; + + private: + const MCInstrInfo &MII; + }; } // end namespace llvm diff --git a/lib/Target/Hexagon/InstPrinter/LLVMBuild.txt b/lib/Target/Hexagon/InstPrinter/LLVMBuild.txt index 8678401..59849aa 100644 --- a/lib/Target/Hexagon/InstPrinter/LLVMBuild.txt +++ b/lib/Target/Hexagon/InstPrinter/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = HexagonAsmPrinter parent = Hexagon -required_libraries = MC Support +required_libraries = HexagonDesc MC Support add_to_library_groups = Hexagon diff --git a/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt b/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt index 8e3da99..62b9b60 100644 --- a/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt @@ -1,6 +1,7 @@ add_llvm_library(LLVMHexagonDesc - HexagonMCTargetDesc.cpp HexagonMCAsmInfo.cpp + HexagonMCInst.cpp + HexagonMCTargetDesc.cpp ) add_dependencies(LLVMHexagonDesc HexagonCommonTableGen) diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.cpp new file mode 100644 index 0000000..9260b4a --- /dev/null +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.cpp @@ -0,0 +1,175 @@ +//===- HexagonMCInst.cpp - Hexagon sub-class of MCInst --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class extends MCInst to allow some Hexagon VLIW annotations. +// +//===----------------------------------------------------------------------===// + +#include "HexagonInstrInfo.h" +#include "MCTargetDesc/HexagonBaseInfo.h" +#include "MCTargetDesc/HexagonMCInst.h" +#include "MCTargetDesc/HexagonMCTargetDesc.h" + +using namespace llvm; + +// Return the slots used by the insn. +unsigned HexagonMCInst::getUnits(const HexagonTargetMachine* TM) const { + const HexagonInstrInfo* QII = TM->getInstrInfo(); + const InstrItineraryData* II = TM->getInstrItineraryData(); + const InstrStage* + IS = II->beginStage(QII->get(this->getOpcode()).getSchedClass()); + + return (IS->getUnits()); +} + +// Return the Hexagon ISA class for the insn. +unsigned HexagonMCInst::getType() const { + const uint64_t F = MCID->TSFlags; + + return ((F >> HexagonII::TypePos) & HexagonII::TypeMask); +} + +// Return whether the insn is an actual insn. +bool HexagonMCInst::isCanon() const { + return (!MCID->isPseudo() && + !isPrefix() && + getType() != HexagonII::TypeENDLOOP); +} + +// Return whether the insn is a prefix. +bool HexagonMCInst::isPrefix() const { + return (getType() == HexagonII::TypePREFIX); +} + +// Return whether the insn is solo, i.e., cannot be in a packet. +bool HexagonMCInst::isSolo() const { + const uint64_t F = MCID->TSFlags; + return ((F >> HexagonII::SoloPos) & HexagonII::SoloMask); +} + +// Return whether the insn is a new-value consumer. +bool HexagonMCInst::isNewValue() const { + const uint64_t F = MCID->TSFlags; + return ((F >> HexagonII::NewValuePos) & HexagonII::NewValueMask); +} + +// Return whether the instruction is a legal new-value producer. +bool HexagonMCInst::hasNewValue() const { + const uint64_t F = MCID->TSFlags; + return ((F >> HexagonII::hasNewValuePos) & HexagonII::hasNewValueMask); +} + +// Return the operand that consumes or produces a new value. +const MCOperand& HexagonMCInst::getNewValue() const { + const uint64_t F = MCID->TSFlags; + const unsigned O = (F >> HexagonII::NewValueOpPos) & + HexagonII::NewValueOpMask; + const MCOperand& MCO = getOperand(O); + + assert ((isNewValue() || hasNewValue()) && MCO.isReg()); + return (MCO); +} + +// Return whether the instruction needs to be constant extended. +// 1) Always return true if the instruction has 'isExtended' flag set. +// +// isExtendable: +// 2) For immediate extended operands, return true only if the value is +// out-of-range. +// 3) For global address, always return true. + +bool HexagonMCInst::isConstExtended(void) const { + if (isExtended()) + return true; + + if (!isExtendable()) + return false; + + short ExtOpNum = getCExtOpNum(); + int MinValue = getMinValue(); + int MaxValue = getMaxValue(); + const MCOperand& MO = getOperand(ExtOpNum); + + // We could be using an instruction with an extendable immediate and shoehorn + // a global address into it. If it is a global address it will be constant + // extended. We do this for COMBINE. + // We currently only handle isGlobal() because it is the only kind of + // object we are going to end up with here for now. + // In the future we probably should add isSymbol(), etc. + if (MO.isExpr()) + return true; + + // If the extendable operand is not 'Immediate' type, the instruction should + // have 'isExtended' flag set. + assert(MO.isImm() && "Extendable operand must be Immediate type"); + + int ImmValue = MO.getImm(); + return (ImmValue < MinValue || ImmValue > MaxValue); +} + +// Return whether the instruction must be always extended. +bool HexagonMCInst::isExtended(void) const { + const uint64_t F = MCID->TSFlags; + return (F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask; +} + +// Return true if the instruction may be extended based on the operand value. +bool HexagonMCInst::isExtendable(void) const { + const uint64_t F = MCID->TSFlags; + return (F >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask; +} + +// Return number of bits in the constant extended operand. +unsigned HexagonMCInst::getBitCount(void) const { + const uint64_t F = MCID->TSFlags; + return ((F >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask); +} + +// Return constant extended operand number. +unsigned short HexagonMCInst::getCExtOpNum(void) const { + const uint64_t F = MCID->TSFlags; + return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask); +} + +// Return whether the operand can be constant extended. +bool HexagonMCInst::isOperandExtended(const unsigned short OperandNum) const { + const uint64_t F = MCID->TSFlags; + return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask) + == OperandNum; +} + +// Return the min value that a constant extendable operand can have +// without being extended. +int HexagonMCInst::getMinValue(void) const { + const uint64_t F = MCID->TSFlags; + unsigned isSigned = (F >> HexagonII::ExtentSignedPos) + & HexagonII::ExtentSignedMask; + unsigned bits = (F >> HexagonII::ExtentBitsPos) + & HexagonII::ExtentBitsMask; + + if (isSigned) // if value is signed + return -1 << (bits - 1); + else + return 0; +} + +// Return the max value that a constant extendable operand can have +// without being extended. +int HexagonMCInst::getMaxValue(void) const { + const uint64_t F = MCID->TSFlags; + unsigned isSigned = (F >> HexagonII::ExtentSignedPos) + & HexagonII::ExtentSignedMask; + unsigned bits = (F >> HexagonII::ExtentBitsPos) + & HexagonII::ExtentBitsMask; + + if (isSigned) // if value is signed + return ~(-1 << (bits - 1)); + else + return ~(-1 << bits); +} diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.h new file mode 100644 index 0000000..3ca71f0 --- /dev/null +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.h @@ -0,0 +1,100 @@ +//===- HexagonMCInst.h - Hexagon sub-class of MCInst ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class extends MCInst to allow some VLIW annotations. +// +//===----------------------------------------------------------------------===// + +#ifndef HEXAGONMCINST_H +#define HEXAGONMCINST_H + +#include "HexagonTargetMachine.h" +#include "llvm/MC/MCInst.h" + +namespace llvm { + class MCOperand; + + class HexagonMCInst: public MCInst { + // MCID is set during instruction lowering. + // It is needed in order to access TSFlags for + // use in checking MC instruction properties. + const MCInstrDesc *MCID; + + // Packet start and end markers + unsigned packetStart: 1, packetEnd: 1; + + public: + explicit HexagonMCInst(): + MCInst(), MCID(0), packetStart(0), packetEnd(0) {}; + HexagonMCInst(const MCInstrDesc& mcid): + MCInst(), MCID(&mcid), packetStart(0), packetEnd(0) {}; + + bool isPacketStart() const { return (packetStart); }; + bool isPacketEnd() const { return (packetEnd); }; + void setPacketStart(bool Y) { packetStart = Y; }; + void setPacketEnd(bool Y) { packetEnd = Y; }; + void resetPacket() { setPacketStart(false); setPacketEnd(false); }; + + // Return the slots used by the insn. + unsigned getUnits(const HexagonTargetMachine* TM) const; + + // Return the Hexagon ISA class for the insn. + unsigned getType() const; + + void setDesc(const MCInstrDesc& mcid) { MCID = &mcid; }; + const MCInstrDesc& getDesc(void) const { return *MCID; }; + + // Return whether the insn is an actual insn. + bool isCanon() const; + + // Return whether the insn is a prefix. + bool isPrefix() const; + + // Return whether the insn is solo, i.e., cannot be in a packet. + bool isSolo() const; + + // Return whether the instruction needs to be constant extended. + bool isConstExtended() const; + + // Return constant extended operand number. + unsigned short getCExtOpNum(void) const; + + // Return whether the insn is a new-value consumer. + bool isNewValue() const; + + // Return whether the instruction is a legal new-value producer. + bool hasNewValue() const; + + // Return the operand that consumes or produces a new value. + const MCOperand& getNewValue() const; + + // Return number of bits in the constant extended operand. + unsigned getBitCount(void) const; + + private: + // Return whether the instruction must be always extended. + bool isExtended() const; + + // Return true if the insn may be extended based on the operand value. + bool isExtendable() const; + + // Return true if the operand can be constant extended. + bool isOperandExtended(const unsigned short OperandNum) const; + + // Return the min value that a constant extendable operand can have + // without being extended. + int getMinValue() const; + + // Return the max value that a constant extendable operand can have + // without being extended. + int getMaxValue() const; + }; +} + +#endif diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp index 737789b..6b1d2d1 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp @@ -13,11 +13,13 @@ #include "HexagonMCTargetDesc.h" #include "HexagonMCAsmInfo.h" +#include "InstPrinter/HexagonInstPrinter.h" +#include "llvm/MC/MachineLocation.h" #include "llvm/MC/MCCodeGenInfo.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/MC/MachineLocation.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" -- cgit v1.1 From 49deebb5ebcde502fa7908362a5c000e7adbb359 Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Wed, 20 Feb 2013 17:31:41 +0000 Subject: Fix bug 14779 for passing anonymous aggregates [patch by Kai Nacke]. The PPC backend doesn't handle these correctly. This patch uses logic similar to that in the X86 and ARM backends to track these arguments properly. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175635 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCISelLowering.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index f3ef38a..338d73f 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -2163,13 +2163,16 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( SmallVector MemOps; unsigned nAltivecParamsAtEnd = 0; Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin(); - for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo, ++FuncArg) { + unsigned CurArgIdx = 0; + for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { SDValue ArgVal; bool needsLoad = false; EVT ObjectVT = Ins[ArgNo].VT; unsigned ObjSize = ObjectVT.getSizeInBits()/8; unsigned ArgSize = ObjSize; ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; + std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx); + CurArgIdx = Ins[ArgNo].OrigArgIndex; unsigned CurArgOffset = ArgOffset; @@ -2504,6 +2507,9 @@ PPCTargetLowering::LowerFormalArguments_Darwin( SmallVector MemOps; unsigned nAltivecParamsAtEnd = 0; + // FIXME: FuncArg and Ins[ArgNo] must reference the same argument. + // When passing anonymous aggregates, this is currently not true. + // See LowerFormalArguments_64SVR4 for a fix. Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin(); for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo, ++FuncArg) { SDValue ArgVal; -- cgit v1.1 From 9b5b8b0b9439da2dd4167ece15db7e13d37f461c Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Wed, 20 Feb 2013 18:03:44 +0000 Subject: [ms-inline asm] Make the comment a bit more verbose. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175641 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86RegisterInfo.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index fbe6331..3b9144d 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -389,8 +389,12 @@ bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const { return false; // When we need stack realignment and there are dynamic allocas, we can't - // reference off of the stack pointer, so we reserve a base pointer. This - // is also true if the function contain MS-style inline assembly. + // reference off of the stack pointer, so we reserve a base pointer. + // + // This is also true if the function contain MS-style inline assembly. We + // do this because if any stack changes occur in the inline assembly, e.g., + // "pusha", then any C local variable or C argument references in the + // inline assembly will be wrong because the SP is not properly tracked. if ((needsStackRealignment(MF) && MFI->hasVarSizedObjects()) || MF.hasMSInlineAsm()) return true; -- cgit v1.1 From eedff3547de6428798d0bd62c40fba3f93820922 Mon Sep 17 00:00:00 2001 From: Michael Liao Date: Wed, 20 Feb 2013 18:04:21 +0000 Subject: Fix PR15267 - When extloading from a vector with non-byte-addressable element, e.g. <4 x i1>, the current logic breaks. Extend the current logic to fix the case where the element type is not byte-addressable by loading all bytes, bit-extracting/packing each element. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175642 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp | 133 ++++++++++++++++++++++--- 1 file changed, 119 insertions(+), 14 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 5d547ed..7b28e69 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -363,30 +363,135 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { EVT SrcVT = LD->getMemoryVT(); ISD::LoadExtType ExtType = LD->getExtensionType(); - SmallVector LoadVals; + SmallVector Vals; SmallVector LoadChains; unsigned NumElem = SrcVT.getVectorNumElements(); - unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8; - for (unsigned Idx=0; IdxgetValueType(0).getScalarType(), - Chain, BasePTR, LD->getPointerInfo().getWithOffset(Idx * Stride), - SrcVT.getScalarType(), - LD->isVolatile(), LD->isNonTemporal(), - LD->getAlignment()); + EVT SrcEltVT = SrcVT.getScalarType(); + EVT DstEltVT = Op.getNode()->getValueType(0).getScalarType(); + + if (SrcVT.getVectorNumElements() > 1 && !SrcEltVT.isByteSized()) { + // When elements in a vector is not byte-addressable, we cannot directly + // load each element by advancing pointer, which could only address bytes. + // Instead, we load all significant words, mask bits off, and concatenate + // them to form each element. Finally, they are extended to destination + // scalar type to build the destination vector. + EVT WideVT = TLI.getPointerTy(); + + assert(WideVT.isRound() && + "Could not handle the sophisticated case when the widest integer is" + " not power of 2."); + assert(WideVT.bitsGE(SrcEltVT) && + "Type is not legalized?"); + + unsigned WideBytes = WideVT.getStoreSize(); + unsigned Offset = 0; + unsigned RemainingBytes = SrcVT.getStoreSize(); + SmallVector LoadVals; + + while (RemainingBytes > 0) { + SDValue ScalarLoad; + unsigned LoadBytes = WideBytes; + + if (RemainingBytes >= LoadBytes) { + ScalarLoad = DAG.getLoad(WideVT, dl, Chain, BasePTR, + LD->getPointerInfo().getWithOffset(Offset), + LD->isVolatile(), LD->isNonTemporal(), + LD->isInvariant(), LD->getAlignment()); + } else { + EVT LoadVT = WideVT; + while (RemainingBytes < LoadBytes) { + LoadBytes >>= 1; // Reduce the load size by half. + LoadVT = EVT::getIntegerVT(*DAG.getContext(), LoadBytes << 3); + } + ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, WideVT, Chain, BasePTR, + LD->getPointerInfo().getWithOffset(Offset), + LoadVT, LD->isVolatile(), + LD->isNonTemporal(), LD->getAlignment()); + } - BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, - DAG.getIntPtrConstant(Stride)); + RemainingBytes -= LoadBytes; + Offset += LoadBytes; + BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, + DAG.getIntPtrConstant(LoadBytes)); + + LoadVals.push_back(ScalarLoad.getValue(0)); + LoadChains.push_back(ScalarLoad.getValue(1)); + } + + // Extract bits, pack and extend/trunc them into destination type. + unsigned SrcEltBits = SrcEltVT.getSizeInBits(); + SDValue SrcEltBitMask = DAG.getConstant((1U << SrcEltBits) - 1, WideVT); + + unsigned BitOffset = 0; + unsigned WideIdx = 0; + unsigned WideBits = WideVT.getSizeInBits(); + + for (unsigned Idx = 0; Idx != NumElem; ++Idx) { + SDValue Lo, Hi, ShAmt; - LoadVals.push_back(ScalarLoad.getValue(0)); - LoadChains.push_back(ScalarLoad.getValue(1)); + if (BitOffset < WideBits) { + ShAmt = DAG.getConstant(BitOffset, TLI.getShiftAmountTy(WideVT)); + Lo = DAG.getNode(ISD::SRL, dl, WideVT, LoadVals[WideIdx], ShAmt); + Lo = DAG.getNode(ISD::AND, dl, WideVT, Lo, SrcEltBitMask); + } + + BitOffset += SrcEltBits; + if (BitOffset >= WideBits) { + WideIdx++; + Offset -= WideBits; + if (Offset > 0) { + ShAmt = DAG.getConstant(SrcEltBits - Offset, + TLI.getShiftAmountTy(WideVT)); + Hi = DAG.getNode(ISD::SHL, dl, WideVT, LoadVals[WideIdx], ShAmt); + Hi = DAG.getNode(ISD::AND, dl, WideVT, Hi, SrcEltBitMask); + } + } + + if (Hi.getNode()) + Lo = DAG.getNode(ISD::OR, dl, WideVT, Lo, Hi); + + switch (ExtType) { + default: llvm_unreachable("Unknown extended-load op!"); + case ISD::EXTLOAD: + Lo = DAG.getAnyExtOrTrunc(Lo, dl, DstEltVT); + break; + case ISD::ZEXTLOAD: + Lo = DAG.getZExtOrTrunc(Lo, dl, DstEltVT); + break; + case ISD::SEXTLOAD: + ShAmt = DAG.getConstant(WideBits - SrcEltBits, + TLI.getShiftAmountTy(WideVT)); + Lo = DAG.getNode(ISD::SHL, dl, WideVT, Lo, ShAmt); + Lo = DAG.getNode(ISD::SRA, dl, WideVT, Lo, ShAmt); + Lo = DAG.getSExtOrTrunc(Lo, dl, DstEltVT); + break; + } + Vals.push_back(Lo); + } + } else { + unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8; + + for (unsigned Idx=0; IdxgetValueType(0).getScalarType(), + Chain, BasePTR, LD->getPointerInfo().getWithOffset(Idx * Stride), + SrcVT.getScalarType(), + LD->isVolatile(), LD->isNonTemporal(), + LD->getAlignment()); + + BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, + DAG.getIntPtrConstant(Stride)); + + Vals.push_back(ScalarLoad.getValue(0)); + LoadChains.push_back(ScalarLoad.getValue(1)); + } } SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &LoadChains[0], LoadChains.size()); SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, dl, - Op.getNode()->getValueType(0), &LoadVals[0], LoadVals.size()); + Op.getNode()->getValueType(0), &Vals[0], Vals.size()); AddLegalizedOperand(Op.getValue(0), Value); AddLegalizedOperand(Op.getValue(1), NewChain); -- cgit v1.1 From 7b170500dcfce130c1e5af1c9150014e69e56819 Mon Sep 17 00:00:00 2001 From: Andrew Kaylor Date: Wed, 20 Feb 2013 18:09:21 +0000 Subject: Adding support for absolute relocations. This occurs in ELF files when a relocation is given with no name and an undefined section. The relocation is applied with an address of zero. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175643 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp index c5b807b..64c9792 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp @@ -432,14 +432,21 @@ void RuntimeDyldImpl::resolveExternalSymbols() { RelocationList &Relocs = i->second; SymbolTableMap::const_iterator Loc = GlobalSymbolTable.find(Name); if (Loc == GlobalSymbolTable.end()) { - // This is an external symbol, try to get it address from - // MemoryManager. - uint8_t *Addr = (uint8_t*) MemMgr->getPointerToNamedFunction(Name.data(), + if (Name.size() == 0) { + // This is an absolute symbol, use an address of zero. + DEBUG(dbgs() << "Resolving absolute relocations." << "\n"); + resolveRelocationList(Relocs, 0); + } + else { + // This is an external symbol, try to get it address from + // MemoryManager. + uint8_t *Addr = (uint8_t*) MemMgr->getPointerToNamedFunction(Name.data(), true); - DEBUG(dbgs() << "Resolving relocations Name: " << Name - << "\t" << format("%p", Addr) - << "\n"); - resolveRelocationList(Relocs, (uintptr_t)Addr); + DEBUG(dbgs() << "Resolving relocations Name: " << Name + << "\t" << format("%p", Addr) + << "\n"); + resolveRelocationList(Relocs, (uintptr_t)Addr); + } } else { report_fatal_error("Expected external symbol"); } -- cgit v1.1 From 1a41f32546019340f27a6f3854f3a73163a25dfe Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Wed, 20 Feb 2013 18:18:12 +0000 Subject: Add a LiveRangeUpdater class. Adding new segments to large LiveIntervals can be expensive because the LiveRange objects after the insertion point may need to be moved left or right. This can cause quadratic behavior when adding a large number of segments to a live range. The LiveRangeUpdater class allows the LIveInterval to be in a temporary invalid state while segments are being added. It maintains an internal gap in the LiveInterval when it is shrinking, and it has a spill area for new segments when the LiveInterval is growing. The behavior is similar to the existing mergeIntervalRanges() function, except it allocates less memory for the spill area, and the algorithm is turned inside out so the loop is driven by the clients. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175644 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/LiveInterval.cpp | 200 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 200 insertions(+) (limited to 'lib') diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp index 74793be..a797848 100644 --- a/lib/CodeGen/LiveInterval.cpp +++ b/lib/CodeGen/LiveInterval.cpp @@ -785,6 +785,206 @@ void LiveRange::print(raw_ostream &os) const { os << *this; } +//===----------------------------------------------------------------------===// +// LiveRangeUpdater class +//===----------------------------------------------------------------------===// +// +// The LiveRangeUpdater class always maintains these invariants: +// +// - When LastStart is invalid, Spills is empty and the iterators are invalid. +// This is the initial state, and the state created by flush(). +// In this state, isDirty() returns false. +// +// Otherwise, segments are kept in three separate areas: +// +// 1. [begin; WriteI) at the front of LI. +// 2. [ReadI; end) at the back of LI. +// 3. Spills. +// +// - LI.begin() <= WriteI <= ReadI <= LI.end(). +// - Segments in all three areas are fully ordered and coalesced. +// - Segments in area 1 precede and can't coalesce with segments in area 2. +// - Segments in Spills precede and can't coalesce with segments in area 2. +// - No coalescing is possible between segments in Spills and segments in area +// 1, and there are no overlapping segments. +// +// The segments in Spills are not ordered with respect to the segments in area +// 1. They need to be merged. +// +// When they exist, Spills.back().start <= LastStart, +// and WriteI[-1].start <= LastStart. + +void LiveRangeUpdater::print(raw_ostream &OS) const { + if (!isDirty()) { + if (LI) + OS << "Clean " << PrintReg(LI->reg) << " updater: " << *LI << '\n'; + else + OS << "Null updater.\n"; + return; + } + assert(LI && "Can't have null LI in dirty updater."); + OS << PrintReg(LI->reg) << " updater with gap = " << (ReadI - WriteI) + << ", last start = " << LastStart + << ":\n Area 1:"; + for (LiveInterval::const_iterator I = LI->begin(); I != WriteI; ++I) + OS << ' ' << *I; + OS << "\n Spills:"; + for (unsigned I = 0, E = Spills.size(); I != E; ++I) + OS << ' ' << Spills[I]; + OS << "\n Area 2:"; + for (LiveInterval::const_iterator I = ReadI, E = LI->end(); I != E; ++I) + OS << ' ' << *I; + OS << '\n'; +} + +void LiveRangeUpdater::dump() const +{ + print(errs()); +} + +// Determine if A and B should be coalesced. +static inline bool coalescable(const LiveRange &A, const LiveRange &B) { + assert(A.start <= B.start && "Unordered live ranges."); + if (A.end == B.start) + return A.valno == B.valno; + if (A.end < B.start) + return false; + assert(A.valno == B.valno && "Cannot overlap different values"); + return true; +} + +void LiveRangeUpdater::add(LiveRange Seg) { + assert(LI && "Cannot add to a null destination"); + + // Flush the state if Start moves backwards. + if (!LastStart.isValid() || LastStart > Seg.start) { + if (isDirty()) + flush(); + // This brings us to an uninitialized state. Reinitialize. + assert(Spills.empty() && "Leftover spilled segments"); + WriteI = ReadI = LI->begin(); + } + + // Remember start for next time. + LastStart = Seg.start; + + // Advance ReadI until it ends after Seg.start. + LiveInterval::iterator E = LI->end(); + if (ReadI != E && ReadI->end <= Seg.start) { + // First try to close the gap between WriteI and ReadI with spills. + if (ReadI != WriteI) + mergeSpills(); + // Then advance ReadI. + if (ReadI == WriteI) + ReadI = WriteI = LI->find(Seg.start); + else + while (ReadI != E && ReadI->end <= Seg.start) + *WriteI++ = *ReadI++; + } + + assert(ReadI == E || ReadI->end > Seg.start); + + // Check if the ReadI segment begins early. + if (ReadI != E && ReadI->start <= Seg.start) { + assert(ReadI->valno == Seg.valno && "Cannot overlap different values"); + // Bail if Seg is completely contained in ReadI. + if (ReadI->end >= Seg.end) + return; + // Coalesce into Seg. + Seg.start = ReadI->start; + ++ReadI; + } + + // Coalesce as much as possible from ReadI into Seg. + while (ReadI != E && coalescable(Seg, *ReadI)) { + Seg.end = std::max(Seg.end, ReadI->end); + ++ReadI; + } + + // Try coalescing Spills.back() into Seg. + if (!Spills.empty() && coalescable(Spills.back(), Seg)) { + Seg.start = Spills.back().start; + Seg.end = std::max(Spills.back().end, Seg.end); + Spills.pop_back(); + } + + // Try coalescing Seg into WriteI[-1]. + if (WriteI != LI->begin() && coalescable(WriteI[-1], Seg)) { + WriteI[-1].end = std::max(WriteI[-1].end, Seg.end); + return; + } + + // Seg doesn't coalesce with anything, and needs to be inserted somewhere. + if (WriteI != ReadI) { + *WriteI++ = Seg; + return; + } + + // Finally, append to LI or Spills. + if (WriteI == E) { + LI->ranges.push_back(Seg); + WriteI = ReadI = LI->ranges.end(); + } else + Spills.push_back(Seg); +} + +// Merge as many spilled segments as possible into the gap between WriteI +// and ReadI. Advance WriteI to reflect the inserted instructions. +void LiveRangeUpdater::mergeSpills() { + // Perform a backwards merge of Spills and [SpillI;WriteI). + size_t GapSize = ReadI - WriteI; + size_t NumMoved = std::min(Spills.size(), GapSize); + LiveInterval::iterator Src = WriteI; + LiveInterval::iterator Dst = Src + NumMoved; + LiveInterval::iterator SpillSrc = Spills.end(); + LiveInterval::iterator B = LI->begin(); + + // This is the new WriteI position after merging spills. + WriteI = Dst; + + // Now merge Src and Spills backwards. + while (Src != Dst) { + if (Src != B && Src[-1].start > SpillSrc[-1].start) + *--Dst = *--Src; + else + *--Dst = *--SpillSrc; + } + assert(NumMoved == size_t(Spills.end() - SpillSrc)); + Spills.erase(SpillSrc, Spills.end()); +} + +void LiveRangeUpdater::flush() { + if (!isDirty()) + return; + // Clear the dirty state. + LastStart = SlotIndex(); + + assert(LI && "Cannot add to a null destination"); + + // Nothing to merge? + if (Spills.empty()) { + LI->ranges.erase(WriteI, ReadI); + LI->verify(); + return; + } + + // Resize the WriteI - ReadI gap to match Spills. + size_t GapSize = ReadI - WriteI; + if (GapSize < Spills.size()) { + // The gap is too small. Make some room. + size_t WritePos = WriteI - LI->begin(); + LI->ranges.insert(ReadI, Spills.size() - GapSize, LiveRange()); + // This also invalidated ReadI, but it is recomputed below. + WriteI = LI->ranges.begin() + WritePos; + } else { + // Shrink the gap if necessary. + LI->ranges.erase(WriteI + Spills.size(), ReadI); + } + ReadI = WriteI + Spills.size(); + mergeSpills(); + LI->verify(); +} + unsigned ConnectedVNInfoEqClasses::Classify(const LiveInterval *LI) { // Create initial equivalence classes. EqClass.clear(); -- cgit v1.1 From d983d4c6ed5ef69ca2d2e07350cc346245f35b87 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Wed, 20 Feb 2013 18:18:15 +0000 Subject: Use LiveRangeUpdater instead of mergeIntervalRanges. Performance is the same, but LiveRangeUpdater has a more flexible interface. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175645 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/LiveInterval.cpp | 151 ++++--------------------------------------- 1 file changed, 11 insertions(+), 140 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp index a797848..54f0da0 100644 --- a/lib/CodeGen/LiveInterval.cpp +++ b/lib/CodeGen/LiveInterval.cpp @@ -486,148 +486,14 @@ void LiveInterval::join(LiveInterval &Other, valnos.resize(NumNewVals); // shrinkify // Okay, now insert the RHS live ranges into the LHS. + LiveRangeUpdater Updater(this); unsigned RangeNo = 0; for (iterator I = Other.begin(), E = Other.end(); I != E; ++I, ++RangeNo) { // Map the valno in the other live range to the current live range. - I->valno = NewVNInfo[OtherAssignments[RangeNo]]; - assert(I->valno && "Adding a dead range?"); + VNInfo *VNI = NewVNInfo[OtherAssignments[RangeNo]]; + assert(VNI && "Adding a dead range?"); + Updater.add(I->start, I->end, VNI); } - mergeIntervalRanges(Other); - - verify(); -} - -/// \brief Helper function for merging in another LiveInterval's ranges. -/// -/// This is a helper routine implementing an efficient merge of another -/// LiveIntervals ranges into the current interval. -/// -/// \param LHSValNo If non-NULL, set as the new value number for every range -/// from RHS which is merged into the LHS. -/// \param RHSValNo If non-NULL, then only ranges in RHS whose original value -/// number maches this value number will be merged into LHS. -void LiveInterval::mergeIntervalRanges(const LiveInterval &RHS, - VNInfo *LHSValNo, - const VNInfo *RHSValNo) { - if (RHS.empty()) - return; - - // Ensure we're starting with a valid range. Note that we don't verify RHS - // because it may have had its value numbers adjusted in preparation for - // merging. - verify(); - - // The strategy for merging these efficiently is as follows: - // - // 1) Find the beginning of the impacted ranges in the LHS. - // 2) Create a new, merged sub-squence of ranges merging from the position in - // #1 until either LHS or RHS is exhausted. Any part of LHS between RHS - // entries being merged will be copied into this new range. - // 3) Replace the relevant section in LHS with these newly merged ranges. - // 4) Append any remaning ranges from RHS if LHS is exhausted in #2. - // - // We don't follow the typical in-place merge strategy for sorted ranges of - // appending the new ranges to the back and then using std::inplace_merge - // because one step of the merge can both mutate the original elements and - // remove elements from the original. Essentially, because the merge includes - // collapsing overlapping ranges, a more complex approach is required. - - // We do an initial binary search to optimize for a common pattern: a large - // LHS, and a very small RHS. - const_iterator RI = RHS.begin(), RE = RHS.end(); - iterator LE = end(), LI = std::upper_bound(begin(), LE, *RI); - - // Merge into NewRanges until one of the ranges is exhausted. - SmallVector NewRanges; - - // Keep track of where to begin the replacement. - iterator ReplaceI = LI; - - // If there are preceding ranges in the LHS, put the last one into NewRanges - // so we can optionally extend it. Adjust the replacement point accordingly. - if (LI != begin()) { - ReplaceI = llvm::prior(LI); - NewRanges.push_back(*ReplaceI); - } - - // Now loop over the mergable portions of both LHS and RHS, merging into - // NewRanges. - while (LI != LE && RI != RE) { - // Skip incoming ranges with the wrong value. - if (RHSValNo && RI->valno != RHSValNo) { - ++RI; - continue; - } - - // Select the first range. We pick the earliest start point, and then the - // largest range. - LiveRange R = *LI; - if (*RI < R) { - R = *RI; - ++RI; - if (LHSValNo) - R.valno = LHSValNo; - } else { - ++LI; - } - - if (NewRanges.empty()) { - NewRanges.push_back(R); - continue; - } - - LiveRange &LastR = NewRanges.back(); - if (R.valno == LastR.valno) { - // Try to merge this range into the last one. - if (R.start <= LastR.end) { - LastR.end = std::max(LastR.end, R.end); - continue; - } - } else { - // We can't merge ranges across a value number. - assert(R.start >= LastR.end && - "Cannot overlap two LiveRanges with differing ValID's"); - } - - // If all else fails, just append the range. - NewRanges.push_back(R); - } - assert(RI == RE || LI == LE); - - // Check for being able to merge into the trailing sequence of ranges on the LHS. - if (!NewRanges.empty()) - for (; LI != LE && (LI->valno == NewRanges.back().valno && - LI->start <= NewRanges.back().end); - ++LI) - NewRanges.back().end = std::max(NewRanges.back().end, LI->end); - - // Replace the ranges in the LHS with the newly merged ones. It would be - // really nice if there were a move-supporting 'replace' directly in - // SmallVector, but as there is not, we pay the price of copies to avoid - // wasted memory allocations. - SmallVectorImpl::iterator NRI = NewRanges.begin(), - NRE = NewRanges.end(); - for (; ReplaceI != LI && NRI != NRE; ++ReplaceI, ++NRI) - *ReplaceI = *NRI; - if (NRI == NRE) - ranges.erase(ReplaceI, LI); - else - ranges.insert(LI, NRI, NRE); - - // And finally insert any trailing end of RHS (if we have one). - for (; RI != RE; ++RI) { - LiveRange R = *RI; - if (LHSValNo) - R.valno = LHSValNo; - if (!ranges.empty() && - ranges.back().valno == R.valno && R.start <= ranges.back().end) - ranges.back().end = std::max(ranges.back().end, R.end); - else - ranges.push_back(R); - } - - // Ensure we finished with a valid new sequence of ranges. - verify(); } /// MergeRangesInAsValue - Merge all of the intervals in RHS into this live @@ -636,7 +502,9 @@ void LiveInterval::mergeIntervalRanges(const LiveInterval &RHS, /// the overlapping LiveRanges have the specified value number. void LiveInterval::MergeRangesInAsValue(const LiveInterval &RHS, VNInfo *LHSValNo) { - mergeIntervalRanges(RHS, LHSValNo); + LiveRangeUpdater Updater(this); + for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) + Updater.add(I->start, I->end, LHSValNo); } /// MergeValueInAsValue - Merge all of the live ranges of a specific val# @@ -647,7 +515,10 @@ void LiveInterval::MergeRangesInAsValue(const LiveInterval &RHS, void LiveInterval::MergeValueInAsValue(const LiveInterval &RHS, const VNInfo *RHSValNo, VNInfo *LHSValNo) { - mergeIntervalRanges(RHS, LHSValNo, RHSValNo); + LiveRangeUpdater Updater(this); + for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) + if (I->valno == RHSValNo) + Updater.add(I->start, I->end, LHSValNo); } /// MergeValueNumberInto - This method is called when two value nubmers -- cgit v1.1 From 0464565baee4b9a1b660aafb70778db571730edc Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Wed, 20 Feb 2013 18:24:30 +0000 Subject: On PowerPC, the cache-flush instructions dcbf and icbi are treated as loads. On FreeBSD, add PROT_READ page protection flag before flushing cache. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175646 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Support/Unix/Memory.inc | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'lib') diff --git a/lib/Support/Unix/Memory.inc b/lib/Support/Unix/Memory.inc index 40d6b3f..a66e3c7 100644 --- a/lib/Support/Unix/Memory.inc +++ b/lib/Support/Unix/Memory.inc @@ -51,7 +51,11 @@ int getPosixProtectionFlags(unsigned Flags) { llvm::sys::Memory::MF_EXEC: return PROT_READ | PROT_WRITE | PROT_EXEC; case llvm::sys::Memory::MF_EXEC: +#if defined(__FreeBSD__) + return PROT_READ | PROT_EXEC; +#else return PROT_EXEC; +#endif default: llvm_unreachable("Illegal memory protection flag specified!"); } -- cgit v1.1 From 29fe150bff0f167e85e1b44efe344bf28cb7fe0f Mon Sep 17 00:00:00 2001 From: Andrew Kaylor Date: Wed, 20 Feb 2013 18:24:34 +0000 Subject: Formatting, grammar git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175647 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp index 64c9792..409b25f 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp @@ -436,9 +436,8 @@ void RuntimeDyldImpl::resolveExternalSymbols() { // This is an absolute symbol, use an address of zero. DEBUG(dbgs() << "Resolving absolute relocations." << "\n"); resolveRelocationList(Relocs, 0); - } - else { - // This is an external symbol, try to get it address from + } else { + // This is an external symbol, try to get its address from // MemoryManager. uint8_t *Addr = (uint8_t*) MemMgr->getPointerToNamedFunction(Name.data(), true); -- cgit v1.1 From 09adcf7dcacf991e7d5ddc2cb6e5f751f67fcc4a Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Wed, 20 Feb 2013 19:15:01 +0000 Subject: SIGQUIT is a "kill" signal, rather than an "int" signal, in this context. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175648 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Support/Unix/Signals.inc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Support/Unix/Signals.inc b/lib/Support/Unix/Signals.inc index af9e739..85be888 100644 --- a/lib/Support/Unix/Signals.inc +++ b/lib/Support/Unix/Signals.inc @@ -49,7 +49,7 @@ static std::vector > CallBacksToRun; // IntSigs - Signals that may interrupt the program at any time. static const int IntSigs[] = { - SIGHUP, SIGINT, SIGQUIT, SIGPIPE, SIGTERM, SIGUSR1, SIGUSR2 + SIGHUP, SIGINT, SIGPIPE, SIGTERM, SIGUSR1, SIGUSR2 }; static const int *const IntSigsEnd = IntSigs + sizeof(IntSigs) / sizeof(IntSigs[0]); @@ -57,7 +57,7 @@ static const int *const IntSigsEnd = // KillSigs - Signals that are synchronous with the program that will cause it // to die. static const int KillSigs[] = { - SIGILL, SIGTRAP, SIGABRT, SIGFPE, SIGBUS, SIGSEGV + SIGILL, SIGTRAP, SIGABRT, SIGFPE, SIGBUS, SIGSEGV, SIGQUIT #ifdef SIGSYS , SIGSYS #endif -- cgit v1.1 From 033ee0f111b572ad4f167676e3c4bb4b71fe2da8 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Wed, 20 Feb 2013 19:25:09 +0000 Subject: Add comment in Memory.inc explaining r175646. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175650 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Support/Unix/Memory.inc | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'lib') diff --git a/lib/Support/Unix/Memory.inc b/lib/Support/Unix/Memory.inc index a66e3c7..e00394e 100644 --- a/lib/Support/Unix/Memory.inc +++ b/lib/Support/Unix/Memory.inc @@ -52,6 +52,13 @@ int getPosixProtectionFlags(unsigned Flags) { return PROT_READ | PROT_WRITE | PROT_EXEC; case llvm::sys::Memory::MF_EXEC: #if defined(__FreeBSD__) + // On PowerPC, having an executable page that has no read permission + // can have unintended consequences. The function InvalidateInstruction- + // Cache uses instructions dcbf and icbi, both of which are treated by + // the processor as loads. If the page has no read permissions, + // executing these instructions will result in a segmentation fault. + // Somehow, this problem is not present on Linux, but it does happen + // on FreeBSD. return PROT_READ | PROT_EXEC; #else return PROT_EXEC; -- cgit v1.1 From 7afb104ed5ce8328da01bdf6fd927489bbaed4b9 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Wed, 20 Feb 2013 19:28:46 +0000 Subject: Rewrite comments. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175651 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Support/Unix/Signals.inc | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Support/Unix/Signals.inc b/lib/Support/Unix/Signals.inc index 85be888..66338f1 100644 --- a/lib/Support/Unix/Signals.inc +++ b/lib/Support/Unix/Signals.inc @@ -47,15 +47,17 @@ static void (*InterruptFunction)() = 0; static std::vector FilesToRemove; static std::vector > CallBacksToRun; -// IntSigs - Signals that may interrupt the program at any time. +// IntSigs - Signals that represent requested termination. There's no bug +// or failure, or if there is, it's not our direct responsibility. For whatever +// reason, our continued execution is no longer desirable. static const int IntSigs[] = { SIGHUP, SIGINT, SIGPIPE, SIGTERM, SIGUSR1, SIGUSR2 }; static const int *const IntSigsEnd = IntSigs + sizeof(IntSigs) / sizeof(IntSigs[0]); -// KillSigs - Signals that are synchronous with the program that will cause it -// to die. +// KillSigs - Signals that represent that we have a bug, and our prompt +// termination has been ordered. static const int KillSigs[] = { SIGILL, SIGTRAP, SIGABRT, SIGFPE, SIGBUS, SIGSEGV, SIGQUIT #ifdef SIGSYS -- cgit v1.1 From 6aecad1074817fae6453bb78ac0d917954dfe9bf Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Wed, 20 Feb 2013 20:15:55 +0000 Subject: Update a comment that looks to have been accidentally deleted many moons ago. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175658 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/SCCP.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp index 3e935d8..837ed9b 100644 --- a/lib/Transforms/Scalar/SCCP.cpp +++ b/lib/Transforms/Scalar/SCCP.cpp @@ -1185,7 +1185,7 @@ void SCCPSolver::Solve() { DEBUG(dbgs() << "\nPopped off OI-WL: " << *I << '\n'); // "I" got into the work list because it either made the transition from - // bottom to constant + // bottom to constant, or to overdefined. // // Anything on this worklist that is overdefined need not be visited // since all of its users will have already been marked as overdefined -- cgit v1.1 From abc402886e407e21d845cccc15723cffd6e2dc20 Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Wed, 20 Feb 2013 20:41:42 +0000 Subject: Additional fixes for bug 15155. This handles the cases where the 6-bit splat element is odd, converting to a three-instruction sequence to add or subtract two splats. With this fix, the XFAIL in test/CodeGen/PowerPC/vec_constants.ll is removed. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175663 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 59 ++++++++++++++++++++++++++++------ lib/Target/PowerPC/PPCISelLowering.cpp | 35 +++++++------------- lib/Target/PowerPC/PPCISelLowering.h | 5 +-- 3 files changed, 64 insertions(+), 35 deletions(-) (limited to 'lib') diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 01d731a..1453506 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -1323,34 +1323,75 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SDValue(Tmp, 0), GA); } case PPCISD::VADD_SPLAT: { - // Convert: VADD_SPLAT elt, size - // Into: tmp = VSPLTIS[BHW] elt - // VADDU[BHW]M tmp, tmp - // Where: [BHW] = B for size = 1, H for size = 2, W for size = 4 + // This expands into one of three sequences, depending on whether + // the first operand is odd or even, positive or negative. assert(isa(N->getOperand(0)) && isa(N->getOperand(1)) && "Invalid operand on VADD_SPLAT!"); + + int Elt = N->getConstantOperandVal(0); int EltSize = N->getConstantOperandVal(1); - unsigned Opc1, Opc2; + unsigned Opc1, Opc2, Opc3; EVT VT; + if (EltSize == 1) { Opc1 = PPC::VSPLTISB; Opc2 = PPC::VADDUBM; + Opc3 = PPC::VSUBUBM; VT = MVT::v16i8; } else if (EltSize == 2) { Opc1 = PPC::VSPLTISH; Opc2 = PPC::VADDUHM; + Opc3 = PPC::VSUBUHM; VT = MVT::v8i16; } else { assert(EltSize == 4 && "Invalid element size on VADD_SPLAT!"); Opc1 = PPC::VSPLTISW; Opc2 = PPC::VADDUWM; + Opc3 = PPC::VSUBUWM; VT = MVT::v4i32; } - SDValue Elt = getI32Imm(N->getConstantOperandVal(0)); - SDNode *Tmp = CurDAG->getMachineNode(Opc1, dl, VT, Elt); - SDValue TmpVal = SDValue(Tmp, 0); - return CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal); + + if ((Elt & 1) == 0) { + // Elt is even, in the range [-32,-18] + [16,30]. + // + // Convert: VADD_SPLAT elt, size + // Into: tmp = VSPLTIS[BHW] elt + // VADDU[BHW]M tmp, tmp + // Where: [BHW] = B for size = 1, H for size = 2, W for size = 4 + SDValue EltVal = getI32Imm(Elt >> 1); + SDNode *Tmp = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); + SDValue TmpVal = SDValue(Tmp, 0); + return CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal); + + } else if (Elt > 0) { + // Elt is odd and positive, in the range [17,31]. + // + // Convert: VADD_SPLAT elt, size + // Into: tmp1 = VSPLTIS[BHW] elt-16 + // tmp2 = VSPLTIS[BHW] -16 + // VSUBU[BHW]M tmp1, tmp2 + SDValue EltVal = getI32Imm(Elt - 16); + SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); + EltVal = getI32Imm(-16); + SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); + return CurDAG->getMachineNode(Opc3, dl, VT, SDValue(Tmp1, 0), + SDValue(Tmp2, 0)); + + } else { + // Elt is odd and negative, in the range [-31,-17]. + // + // Convert: VADD_SPLAT elt, size + // Into: tmp1 = VSPLTIS[BHW] elt+16 + // tmp2 = VSPLTIS[BHW] -16 + // VADDU[BHW]M tmp1, tmp2 + SDValue EltVal = getI32Imm(Elt + 16); + SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); + EltVal = getI32Imm(-16); + SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); + return CurDAG->getMachineNode(Opc2, dl, VT, SDValue(Tmp1, 0), + SDValue(Tmp2, 0)); + } } } diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 338d73f..6d2aacd 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -5025,11 +5025,17 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, // Two instruction sequences. // If this value is in the range [-32,30] and is even, use: - // tmp = VSPLTI[bhw], result = add tmp, tmp - if (SextVal >= -32 && SextVal <= 30 && (SextVal & 1) == 0) { - // To avoid having the optimization undone by constant folding, we - // convert to a pseudo that will be expanded later. - SDValue Elt = DAG.getConstant(SextVal >> 1, MVT::i32); + // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2) + // If this value is in the range [17,31] and is odd, use: + // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16) + // If this value is in the range [-31,-17] and is odd, use: + // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16) + // Note the last two are three-instruction sequences. + if (SextVal >= -32 && SextVal <= 31) { + // To avoid having these optimizations undone by constant folding, + // we convert to a pseudo that will be expanded later into one of + // the above forms. + SDValue Elt = DAG.getConstant(SextVal, MVT::i32); EVT VT = Op.getValueType(); int Size = VT == MVT::v16i8 ? 1 : (VT == MVT::v8i16 ? 2 : 4); SDValue EltSize = DAG.getConstant(Size, MVT::i32); @@ -5129,25 +5135,6 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, } } - // Three instruction sequences. - - // Odd, in range [17,31]: (vsplti C)-(vsplti -16). - // FIXME: Disabled because the add gets constant folded. - if (0 && SextVal >= 0 && SextVal <= 31) { - SDValue LHS = BuildSplatI(SextVal-16, SplatSize, MVT::Other, DAG, dl); - SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl); - LHS = DAG.getNode(ISD::SUB, dl, LHS.getValueType(), LHS, RHS); - return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), LHS); - } - // Odd, in range [-31,-17]: (vsplti C)+(vsplti -16). - // FIXME: Disabled because the add gets constant folded. - if (0 && SextVal >= -31 && SextVal <= 0) { - SDValue LHS = BuildSplatI(SextVal+16, SplatSize, MVT::Other, DAG, dl); - SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl); - LHS = DAG.getNode(ISD::ADD, dl, LHS.getValueType(), LHS, RHS); - return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), LHS); - } - return SDValue(); } diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 7cc2d1a..1fa88f3 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -238,8 +238,9 @@ namespace llvm { ADDI_DTPREL_L, /// VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded - /// into an ADD of a VSPLTI with itself during instruction selection. - /// Necessary to avoid losing this optimization due to constant folds. + /// during instruction selection to optimize a BUILD_VECTOR into + /// operations on splats. This is necessary to avoid losing these + /// optimizations due to constant folding. VADD_SPLAT, /// STD_32 - This is the STD instruction for use with "32-bit" registers. -- cgit v1.1 From 3450f800aa65c91f0496816ba6061a422a74c1fe Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Wed, 20 Feb 2013 21:13:59 +0000 Subject: Update TargetLowering ivars for name policy. http://llvm.org/docs/CodingStandards.html#name-types-functions-variables-and-enumerators-properly ivars should be camel-case and start with an upper-case letter. A few in TargetLowering were starting with a lower-case letter. No functional change intended. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175667 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/TargetLoweringBase.cpp | 10 +++++----- lib/Target/ARM/ARMISelLowering.cpp | 16 ++++++++-------- lib/Target/Hexagon/HexagonISelLowering.cpp | 4 ++-- lib/Target/Mips/MipsISelLowering.cpp | 2 +- lib/Target/NVPTX/NVPTXISelLowering.cpp | 6 +++--- lib/Target/PowerPC/PPCISelLowering.cpp | 14 +++++++------- lib/Target/X86/X86ISelLowering.cpp | 16 ++++++++-------- lib/Target/XCore/XCoreISelLowering.cpp | 6 +++--- 8 files changed, 37 insertions(+), 37 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp index f2329dc..2a02f6a 100644 --- a/lib/CodeGen/TargetLoweringBase.cpp +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -707,17 +707,17 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm, PointerTy = MVT::getIntegerVT(8*TD->getPointerSize(0)); memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*)); memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray)); - maxStoresPerMemset = maxStoresPerMemcpy = maxStoresPerMemmove = 8; - maxStoresPerMemsetOptSize = maxStoresPerMemcpyOptSize - = maxStoresPerMemmoveOptSize = 4; - benefitFromCodePlacementOpt = false; + MaxStoresPerMemset = MaxStoresPerMemcpy = MaxStoresPerMemmove = 8; + MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize + = MaxStoresPerMemmoveOptSize = 4; + BenefitFromCodePlacementOpt = false; UseUnderscoreSetJmp = false; UseUnderscoreLongJmp = false; SelectIsExpensive = false; IntDivIsCheap = false; Pow2DivIsCheap = false; JumpIsExpensive = false; - predictableSelectIsExpensive = false; + PredictableSelectIsExpensive = false; StackPointerRegisterToSaveRestore = 0; ExceptionPointerRegister = 0; ExceptionSelectorRegister = 0; diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index bd53334a..ef96e56 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -835,21 +835,21 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setSchedulingPreference(Sched::Hybrid); //// temporary - rewrite interface to use type - maxStoresPerMemset = 8; - maxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 8 : 4; - maxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores - maxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 4 : 2; - maxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores - maxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 4 : 2; + MaxStoresPerMemset = 8; + MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 8 : 4; + MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores + MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 4 : 2; + MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores + MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 4 : 2; // On ARM arguments smaller than 4 bytes are extended, so all arguments // are at least 4 bytes aligned. setMinStackArgumentAlignment(4); - benefitFromCodePlacementOpt = true; + BenefitFromCodePlacementOpt = true; // Prefer likely predicted branches to selects on out-of-order cores. - predictableSelectIsExpensive = Subtarget->isLikeA9(); + PredictableSelectIsExpensive = Subtarget->isLikeA9(); setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2); } diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp index 857b15f..99a5db3 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -1052,8 +1052,8 @@ HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine setPrefLoopAlignment(4); // Limits for inline expansion of memcpy/memmove - maxStoresPerMemcpy = 6; - maxStoresPerMemmove = 6; + MaxStoresPerMemcpy = 6; + MaxStoresPerMemmove = 6; // // Library calls for unsupported operations diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 088e669..e0080e6 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -520,7 +520,7 @@ MipsTargetLowering(MipsTargetMachine &TM) setExceptionPointerRegister(IsN64 ? Mips::A0_64 : Mips::A0); setExceptionSelectorRegister(IsN64 ? Mips::A1_64 : Mips::A1); - maxStoresPerMemcpy = 16; + MaxStoresPerMemcpy = 16; } bool diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index 9ba2a1d..5ee747a 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -75,9 +75,9 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM) // always lower memset, memcpy, and memmove intrinsics to load/store // instructions, rather // then generating calls to memset, mempcy or memmove. - maxStoresPerMemset = (unsigned)0xFFFFFFFF; - maxStoresPerMemcpy = (unsigned)0xFFFFFFFF; - maxStoresPerMemmove = (unsigned)0xFFFFFFFF; + MaxStoresPerMemset = (unsigned)0xFFFFFFFF; + MaxStoresPerMemcpy = (unsigned)0xFFFFFFFF; + MaxStoresPerMemmove = (unsigned)0xFFFFFFFF; setBooleanContents(ZeroOrNegativeOneBooleanContent); diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 6d2aacd..60f6ea0 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -500,15 +500,15 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) // friends. Gcc uses same threshold of 128 bytes (= 32 word stores). if (Subtarget->getDarwinDirective() == PPC::DIR_E500mc || Subtarget->getDarwinDirective() == PPC::DIR_E5500) { - maxStoresPerMemset = 32; - maxStoresPerMemsetOptSize = 16; - maxStoresPerMemcpy = 32; - maxStoresPerMemcpyOptSize = 8; - maxStoresPerMemmove = 32; - maxStoresPerMemmoveOptSize = 8; + MaxStoresPerMemset = 32; + MaxStoresPerMemsetOptSize = 16; + MaxStoresPerMemcpy = 32; + MaxStoresPerMemcpyOptSize = 8; + MaxStoresPerMemmove = 32; + MaxStoresPerMemmoveOptSize = 8; setPrefFunctionAlignment(4); - benefitFromCodePlacementOpt = true; + BenefitFromCodePlacementOpt = true; } } diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index a227166..2315ac7 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1335,17 +1335,17 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // On Darwin, -Os means optimize for size without hurting performance, // do not reduce the limit. - maxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores - maxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 16 : 8; - maxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores - maxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 8 : 4; - maxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores - maxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 8 : 4; + MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores + MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 16 : 8; + MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores + MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 8 : 4; + MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores + MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 8 : 4; setPrefLoopAlignment(4); // 2^4 bytes. - benefitFromCodePlacementOpt = true; + BenefitFromCodePlacementOpt = true; // Predictable cmov don't hurt on atom because it's in-order. - predictableSelectIsExpensive = !Subtarget->isAtom(); + PredictableSelectIsExpensive = !Subtarget->isAtom(); setPrefFunctionAlignment(4); // 2^4 bytes. } diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index 541dd2f..f8a9125 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -156,9 +156,9 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM) // We want to custom lower some of our intrinsics. setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); - maxStoresPerMemset = maxStoresPerMemsetOptSize = 4; - maxStoresPerMemmove = maxStoresPerMemmoveOptSize - = maxStoresPerMemcpy = maxStoresPerMemcpyOptSize = 2; + MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 4; + MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize + = MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 2; // We have target-specific dag combine patterns for the following nodes: setTargetDAGCombine(ISD::STORE); -- cgit v1.1 From 64f3e763cd8e4f32f91ae5b44ac4bd9986afddf2 Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Wed, 20 Feb 2013 21:31:28 +0000 Subject: R600: Update for name changes from r175667. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175668 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDILISelLowering.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Target/R600/AMDILISelLowering.cpp b/lib/Target/R600/AMDILISelLowering.cpp index 3480ac8..f65e1f3 100644 --- a/lib/Target/R600/AMDILISelLowering.cpp +++ b/lib/Target/R600/AMDILISelLowering.cpp @@ -220,9 +220,9 @@ void AMDGPUTargetLowering::InitAMDILLowering() { setSelectIsExpensive(true); setJumpIsExpensive(true); - maxStoresPerMemcpy = 4096; - maxStoresPerMemmove = 4096; - maxStoresPerMemset = 4096; + MaxStoresPerMemcpy = 4096; + MaxStoresPerMemmove = 4096; + MaxStoresPerMemset = 4096; } -- cgit v1.1 From c46e2df74cf75a33742f57d2b4d6c6fcf73bced9 Mon Sep 17 00:00:00 2001 From: Arnold Schwaighofer Date: Wed, 20 Feb 2013 21:33:32 +0000 Subject: DAGCombiner: Fold pointless truncate, bitcast, buildvector series (2xi32) (truncate ((2xi64) bitcast (buildvector i32 a, i32 x, i32 b, i32 y))) can be folded into a (2xi32) (buildvector i32 a, i32 b). Such a DAG would cause uneccessary vdup instructions followed by vmovn instructions. We generate this code on ARM NEON for a setcc olt, 2xf64, 2xf64. For example, in the vectorized version of the code below. double A[N]; double B[N]; void test_double_compare_to_double() { int i; for(i=0;i Opnds; + for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset) + Opnds.push_back(BuildVect.getOperand(i)); + + return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, &Opnds[0], + Opnds.size()); + } + } + // See if we can simplify the input to this truncate through knowledge that // only the low bits are being used. // For example "trunc (or (shl x, 8), y)" // -> trunc y -- cgit v1.1 From 0e827ebc783ded58c11aeb0d66bed43e214de2de Mon Sep 17 00:00:00 2001 From: Cameron Zwarich Date: Wed, 20 Feb 2013 22:09:57 +0000 Subject: Make repairIntervalsInRange() more robust. There are now no longer any liveness- related failures when running 'make check' without LiveVariables with the verifier enabled. Some of the remaining failures elsewhere may still be fallout from incorrect updating of LiveIntervals or the few missing cases left in the two-address pass. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175672 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/LiveIntervalAnalysis.cpp | 73 ++++++++++++++++++++++++++++-------- 1 file changed, 58 insertions(+), 15 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index 8177db6..e07922b 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -1066,13 +1066,27 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB, continue; LiveInterval &LI = getInterval(Reg); - LiveInterval::iterator LII = LI.FindLiveRangeContaining(endIdx); + // FIXME: Should we support undefs that gain defs? + if (!LI.hasAtLeastOneValue()) + continue; + + LiveInterval::iterator LII = LI.find(endIdx); + SlotIndex lastUseIdx; + if (LII != LI.end() && LII->start < endIdx) + lastUseIdx = LII->end; + else + --LII; for (MachineBasicBlock::iterator I = End; I != Begin;) { --I; MachineInstr *MI = I; SlotIndex instrIdx = getInstructionIndex(MI); + bool isStartValid = getInstructionFromIndex(LII->start); + bool isEndValid = getInstructionFromIndex(LII->end); + + // FIXME: This doesn't currently handle early-clobber or multiple removed + // defs inside of the region to repair. for (MachineInstr::mop_iterator OI = MI->operands_begin(), OE = MI->operands_end(); OI != OE; ++OI) { const MachineOperand &MO = *OI; @@ -1080,25 +1094,54 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB, continue; if (MO.isDef()) { - assert(LII != LI.end() && - "Dead register defs are not yet supported."); - if (!Indexes->getInstructionFromIndex(LII->start)) { - LII->start = instrIdx.getRegSlot(); - LII->valno->def = instrIdx.getRegSlot(); + if (!isStartValid) { + if (LII->end.isDead()) { + SlotIndex prevStart; + if (LII != LI.begin()) + prevStart = llvm::prior(LII)->start; + + // FIXME: This could be more efficient if there was a removeRange + // method that returned an iterator. + LI.removeRange(*LII, true); + if (prevStart.isValid()) + LII = LI.find(prevStart); + else + LII = LI.begin(); + } else { + LII->start = instrIdx.getRegSlot(); + LII->valno->def = instrIdx.getRegSlot(); + if (MO.getSubReg() && !MO.isUndef()) + lastUseIdx = instrIdx.getRegSlot(); + else + lastUseIdx = SlotIndex(); + continue; + } + } + + if (!lastUseIdx.isValid()) { + VNInfo *VNI = LI.getNextValue(instrIdx.getRegSlot(), + VNInfoAllocator); + LiveRange LR(instrIdx.getRegSlot(), instrIdx.getDeadSlot(), VNI); + LII = LI.addRange(LR); } else if (LII->start != instrIdx.getRegSlot()) { - VNInfo *VNI = LI.getNextValue(instrIdx.getRegSlot(), VNInfoAllocator); - LiveRange LR = LiveRange(instrIdx.getRegSlot(), LII->start, VNI); + VNInfo *VNI = LI.getNextValue(instrIdx.getRegSlot(), + VNInfoAllocator); + LiveRange LR(instrIdx.getRegSlot(), lastUseIdx, VNI); LII = LI.addRange(LR); } - } else if (MO.isUse()) { - if (LII == LI.end()) - --LII; - assert(LII->start < instrIdx && - "Registers with multiple used live ranges are not yet supported."); - SlotIndex endIdx = LII->end; - if (!endIdx.isBlock() && !Indexes->getInstructionFromIndex(endIdx)) + if (MO.getSubReg() && !MO.isUndef()) + lastUseIdx = instrIdx.getRegSlot(); + else + lastUseIdx = SlotIndex(); + } else if (MO.isUse()) { + // FIXME: This should probably be handled outside of this branch, + // either as part of the def case (for defs inside of the region) or + // after the loop over the region. + if (!isEndValid && !LII->end.isBlock()) LII->end = instrIdx.getRegSlot(); + if (!lastUseIdx.isValid()) + lastUseIdx = instrIdx.getRegSlot(); } } } -- cgit v1.1 From c5b6135fb55362f5c052625043ebf3286f799f86 Mon Sep 17 00:00:00 2001 From: Cameron Zwarich Date: Wed, 20 Feb 2013 22:10:00 +0000 Subject: Find anchoring end points for repairIntervalsInRange and repairIndexesInRange automatically. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175673 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/LiveIntervalAnalysis.cpp | 7 +++++++ lib/CodeGen/SlotIndexes.cpp | 9 +++++++++ lib/CodeGen/TwoAddressInstructionPass.cpp | 17 +++++------------ 3 files changed, 21 insertions(+), 12 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index e07922b..a716e8b 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -1038,6 +1038,13 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB, MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, ArrayRef OrigRegs) { + // Find anchor points, which are at the beginning/end of blocks or at + // instructions that already have indexes. + while (Begin != MBB->begin() && !Indexes->hasIndex(Begin)) + --Begin; + while (End != MBB->end() && !Indexes->hasIndex(End)) + ++End; + SlotIndex endIdx; if (End == MBB->end()) endIdx = getMBBEndIdx(MBB).getPrevSlot(); diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp index b4e562e..f293794 100644 --- a/lib/CodeGen/SlotIndexes.cpp +++ b/lib/CodeGen/SlotIndexes.cpp @@ -146,6 +146,15 @@ void SlotIndexes::renumberIndexes(IndexList::iterator curItr) { void SlotIndexes::repairIndexesInRange(MachineBasicBlock *MBB, MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End) { + // FIXME: Is this really necessary? The only caller repairIntervalsForRange() + // does the same thing. + // Find anchor points, which are at the beginning/end of blocks or at + // instructions that already have indexes. + while (Begin != MBB->begin() && !hasIndex(Begin)) + --Begin; + while (End != MBB->end() && !hasIndex(End)) + ++End; + bool includeStart = (Begin == MBB->begin()); SlotIndex startIdx; if (includeStart) diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index 99d3607..43d0655 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -1150,15 +1150,8 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, LV->addVirtualRegisterKilled(Reg, NewMIs[1]); } - MachineBasicBlock::iterator Begin; - MachineBasicBlock::iterator End; SmallVector OrigRegs; if (LIS) { - Begin = MachineBasicBlock::iterator(NewMIs[0]); - if (Begin != MBB->begin()) - --Begin; - End = llvm::next(MachineBasicBlock::iterator(MI)); - for (MachineInstr::const_mop_iterator MOI = MI.operands_begin(), MOE = MI.operands_end(); MOI != MOE; ++MOI) { if (MOI->isReg()) @@ -1169,8 +1162,11 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, MI.eraseFromParent(); // Update LiveIntervals. - if (LIS) + if (LIS) { + MachineBasicBlock::iterator Begin(NewMIs[0]); + MachineBasicBlock::iterator End(NewMIs[1]); LIS->repairIntervalsInRange(MBB, Begin, End, OrigRegs); + } mi = NewMIs[1]; if (TransformSuccess) @@ -1576,9 +1572,6 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) { } // Udpate LiveIntervals. - if (LIS) { - if (MBBI != MBB->begin()) - --MBBI; + if (LIS) LIS->repairIntervalsInRange(MBB, MBBI, EndMBBI, OrigRegs); - } } -- cgit v1.1 From 6189288766d9cf2e1cf82c1b41655e33754da83b Mon Sep 17 00:00:00 2001 From: Cameron Zwarich Date: Wed, 20 Feb 2013 22:10:02 +0000 Subject: Only use LiveIntervals in TwoAddressInstructionPass, not a mix of Liveintervals and SlotIndexes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175674 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/TwoAddressInstructionPass.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index 43d0655..45d2a1b 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -67,7 +67,6 @@ class TwoAddressInstructionPass : public MachineFunctionPass { const InstrItineraryData *InstrItins; MachineRegisterInfo *MRI; LiveVariables *LV; - SlotIndexes *Indexes; LiveIntervals *LIS; AliasAnalysis *AA; CodeGenOpt::Level OptLevel; @@ -533,8 +532,8 @@ commuteInstruction(MachineBasicBlock::iterator &mi, if (LV) // Update live variables LV->replaceKillInstruction(RegC, MI, NewMI); - if (Indexes) - Indexes->replaceMachineInstrInMaps(MI, NewMI); + if (LIS) + LIS->ReplaceMachineInstrInMaps(MI, NewMI); MBB->insert(mi, NewMI); // Insert the new inst MBB->erase(mi); // Nuke the old inst. @@ -587,8 +586,8 @@ TwoAddressInstructionPass::convertInstTo3Addr(MachineBasicBlock::iterator &mi, DEBUG(dbgs() << "2addr: TO 3-ADDR: " << *NewMI); bool Sunk = false; - if (Indexes) - Indexes->replaceMachineInstrInMaps(mi, NewMI); + if (LIS) + LIS->ReplaceMachineInstrInMaps(mi, NewMI); if (NewMI->findRegisterUseOperand(RegB, false, TRI)) // FIXME: Temporary workaround. If the new instruction doesn't @@ -1378,7 +1377,6 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { TII = TM.getInstrInfo(); TRI = TM.getRegisterInfo(); InstrItins = TM.getInstrItineraryData(); - Indexes = getAnalysisIfAvailable(); LV = getAnalysisIfAvailable(); LIS = getAnalysisIfAvailable(); AA = &getAnalysis(); -- cgit v1.1 From cb2ae3d98e3bb36e5813f8f69b00d39efd026dcd Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Wed, 20 Feb 2013 22:21:35 +0000 Subject: MCParser: Update method names per coding guidelines. s/AddDirectiveHandler/addDirectiveHandler/ s/ParseMSInlineAsm/parseMSInlineAsm/ s/ParseIdentifier/parseIdentifier/ s/ParseStringToEndOfStatement/parseStringToEndOfStatement/ s/ParseEscapedString/parseEscapedString/ s/EatToEndOfStatement/eatToEndOfStatement/ s/ParseExpression/parseExpression/ s/ParseParenExpression/parseParenExpression/ s/ParseAbsoluteExpression/parseAbsoluteExpression/ s/CheckForValidSection/checkForValidSection/ http://llvm.org/docs/CodingStandards.html#name-types-functions-variables-and-enumerators-properly No functional change intended. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175675 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCParser/AsmParser.cpp | 220 +++++++++++----------- lib/MC/MCParser/COFFAsmParser.cpp | 76 ++++---- lib/MC/MCParser/DarwinAsmParser.cpp | 158 ++++++++-------- lib/MC/MCParser/ELFAsmParser.cpp | 82 ++++---- lib/MC/MCParser/MCAsmParser.cpp | 4 +- lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp | 16 +- lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 60 +++--- lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp | 4 +- lib/Target/Mips/AsmParser/MipsAsmParser.cpp | 38 ++-- lib/Target/X86/AsmParser/X86AsmParser.cpp | 30 +-- 10 files changed, 344 insertions(+), 344 deletions(-) (limited to 'lib') diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index 9c998ff..6ab49ec 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -140,7 +140,7 @@ private: /// ExtensionDirectiveMap - maps directive names to handler methods in parser /// extensions. Extensions register themselves in this map by calling - /// AddDirectiveHandler. + /// addDirectiveHandler. StringMap ExtensionDirectiveMap; /// MacroMap - Map of currently defined macros. @@ -177,7 +177,7 @@ public: virtual bool Run(bool NoInitialTextSection, bool NoFinalize = false); - virtual void AddDirectiveHandler(StringRef Directive, + virtual void addDirectiveHandler(StringRef Directive, ExtensionDirectiveHandler Handler) { ExtensionDirectiveMap[Directive] = Handler; } @@ -210,7 +210,7 @@ public: void setParsingInlineAsm(bool V) { ParsingInlineAsm = V; } bool isParsingInlineAsm() { return ParsingInlineAsm; } - bool ParseMSInlineAsm(void *AsmLoc, std::string &AsmString, + bool parseMSInlineAsm(void *AsmLoc, std::string &AsmString, unsigned &NumOutputs, unsigned &NumInputs, SmallVectorImpl > &OpDecls, SmallVectorImpl &Constraints, @@ -219,17 +219,17 @@ public: const MCInstPrinter *IP, MCAsmParserSemaCallback &SI); - bool ParseExpression(const MCExpr *&Res); - virtual bool ParseExpression(const MCExpr *&Res, SMLoc &EndLoc); - virtual bool ParseParenExpression(const MCExpr *&Res, SMLoc &EndLoc); - virtual bool ParseAbsoluteExpression(int64_t &Res); + bool parseExpression(const MCExpr *&Res); + virtual bool parseExpression(const MCExpr *&Res, SMLoc &EndLoc); + virtual bool parseParenExpression(const MCExpr *&Res, SMLoc &EndLoc); + virtual bool parseAbsoluteExpression(int64_t &Res); - /// ParseIdentifier - Parse an identifier or string (as a quoted identifier) + /// parseIdentifier - Parse an identifier or string (as a quoted identifier) /// and set \p Res to the identifier contents. - virtual bool ParseIdentifier(StringRef &Res); - virtual void EatToEndOfStatement(); + virtual bool parseIdentifier(StringRef &Res); + virtual void eatToEndOfStatement(); - virtual void CheckForValidSection(); + virtual void checkForValidSection(); /// } private: @@ -307,7 +307,7 @@ private: /// \brief Parse up to the end of statement and a return the contents from the /// current token until the end of the statement; the current token on exit /// will be either the EndOfStatement or EOF. - virtual StringRef ParseStringToEndOfStatement(); + virtual StringRef parseStringToEndOfStatement(); /// \brief Parse until the end of a statement or a comma is encountered, /// return the contents from the current token up to the end or comma. @@ -430,7 +430,7 @@ private: bool ParseDirectiveElseIf(SMLoc DirectiveLoc); // ".elseif" bool ParseDirectiveElse(SMLoc DirectiveLoc); // ".else" bool ParseDirectiveEndIf(SMLoc DirectiveLoc); // .endif - virtual bool ParseEscapedString(std::string &Data); + virtual bool parseEscapedString(std::string &Data); const MCExpr *ApplyModifierToExpr(const MCExpr *E, MCSymbolRefExpr::VariantKind Variant); @@ -618,7 +618,7 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) { // We had an error, validate that one was emitted and recover by skipping to // the next line. assert(HadError && "Parse statement returned an error, but none emitted!"); - EatToEndOfStatement(); + eatToEndOfStatement(); } if (TheCondState.TheCond != StartingCondState.TheCond || @@ -665,15 +665,15 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) { return HadError; } -void AsmParser::CheckForValidSection() { +void AsmParser::checkForValidSection() { if (!ParsingInlineAsm && !getStreamer().getCurrentSection()) { TokError("expected section directive before assembly directive"); Out.InitToTextSection(); } } -/// EatToEndOfStatement - Throw away the rest of the line for testing purposes. -void AsmParser::EatToEndOfStatement() { +/// eatToEndOfStatement - Throw away the rest of the line for testing purposes. +void AsmParser::eatToEndOfStatement() { while (Lexer.isNot(AsmToken::EndOfStatement) && Lexer.isNot(AsmToken::Eof)) Lex(); @@ -683,7 +683,7 @@ void AsmParser::EatToEndOfStatement() { Lex(); } -StringRef AsmParser::ParseStringToEndOfStatement() { +StringRef AsmParser::parseStringToEndOfStatement() { const char *Start = getTok().getLoc().getPointer(); while (Lexer.isNot(AsmToken::EndOfStatement) && @@ -712,7 +712,7 @@ StringRef AsmParser::ParseStringToComma() { /// parenexpr ::= expr) /// bool AsmParser::ParseParenExpr(const MCExpr *&Res, SMLoc &EndLoc) { - if (ParseExpression(Res)) return true; + if (parseExpression(Res)) return true; if (Lexer.isNot(AsmToken::RParen)) return TokError("expected ')' in parentheses expression"); EndLoc = Lexer.getTok().getEndLoc(); @@ -726,7 +726,7 @@ bool AsmParser::ParseParenExpr(const MCExpr *&Res, SMLoc &EndLoc) { /// bracketexpr ::= expr] /// bool AsmParser::ParseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc) { - if (ParseExpression(Res)) return true; + if (parseExpression(Res)) return true; if (Lexer.isNot(AsmToken::RBrac)) return TokError("expected ']' in brackets expression"); EndLoc = Lexer.getTok().getEndLoc(); @@ -759,7 +759,7 @@ bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { case AsmToken::String: case AsmToken::Identifier: { StringRef Identifier; - if (ParseIdentifier(Identifier)) { + if (parseIdentifier(Identifier)) { if (FirstTokenKind == AsmToken::Dollar) return Error(FirstTokenLoc, "invalid token in expression"); return true; @@ -864,9 +864,9 @@ bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { } } -bool AsmParser::ParseExpression(const MCExpr *&Res) { +bool AsmParser::parseExpression(const MCExpr *&Res) { SMLoc EndLoc; - return ParseExpression(Res, EndLoc); + return parseExpression(Res, EndLoc); } const MCExpr * @@ -917,7 +917,7 @@ AsmParser::ApplyModifierToExpr(const MCExpr *E, llvm_unreachable("Invalid expression kind!"); } -/// ParseExpression - Parse an expression and return it. +/// parseExpression - Parse an expression and return it. /// /// expr ::= expr &&,|| expr -> lowest. /// expr ::= expr |,^,&,! expr @@ -927,7 +927,7 @@ AsmParser::ApplyModifierToExpr(const MCExpr *E, /// expr ::= expr *,/,% expr -> highest. /// expr ::= primaryexpr /// -bool AsmParser::ParseExpression(const MCExpr *&Res, SMLoc &EndLoc) { +bool AsmParser::parseExpression(const MCExpr *&Res, SMLoc &EndLoc) { // Parse the expression. Res = 0; if (ParsePrimaryExpr(Res, EndLoc) || ParseBinOpRHS(1, Res, EndLoc)) @@ -965,17 +965,17 @@ bool AsmParser::ParseExpression(const MCExpr *&Res, SMLoc &EndLoc) { return false; } -bool AsmParser::ParseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) { +bool AsmParser::parseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) { Res = 0; return ParseParenExpr(Res, EndLoc) || ParseBinOpRHS(1, Res, EndLoc); } -bool AsmParser::ParseAbsoluteExpression(int64_t &Res) { +bool AsmParser::parseAbsoluteExpression(int64_t &Res) { const MCExpr *Expr; SMLoc StartLoc = Lexer.getLoc(); - if (ParseExpression(Expr)) + if (parseExpression(Expr)) return true; if (!Expr->EvaluateAsAbsolute(Res)) @@ -1134,7 +1134,7 @@ bool AsmParser::ParseStatement(ParseStatementInfo &Info) { // Treat '.' as a valid identifier in this context. Lex(); IDVal = "."; - } else if (ParseIdentifier(IDVal)) { + } else if (parseIdentifier(IDVal)) { if (!TheCondState.Ignore) return TokError("unexpected token at start of statement"); IDVal = ""; @@ -1177,7 +1177,7 @@ bool AsmParser::ParseStatement(ParseStatementInfo &Info) { // Ignore the statement if in the middle of inactive conditional // (e.g. ".if 0"). if (TheCondState.Ignore) { - EatToEndOfStatement(); + eatToEndOfStatement(); return false; } @@ -1186,7 +1186,7 @@ bool AsmParser::ParseStatement(ParseStatementInfo &Info) { // See what kind of statement we have. switch (Lexer.getKind()) { case AsmToken::Colon: { - CheckForValidSection(); + checkForValidSection(); // identifier ':' -> Label. Lex(); @@ -1332,7 +1332,7 @@ bool AsmParser::ParseStatement(ParseStatementInfo &Info) { case DK_ZERO: return ParseDirectiveZero(); case DK_EXTERN: - EatToEndOfStatement(); // .extern is the default, ignore it. + eatToEndOfStatement(); // .extern is the default, ignore it. return false; case DK_GLOBL: case DK_GLOBAL: @@ -1460,7 +1460,7 @@ bool AsmParser::ParseStatement(ParseStatementInfo &Info) { if (ParsingInlineAsm && (IDVal == "align" || IDVal == "ALIGN")) return ParseDirectiveMSAlign(IDLoc, Info); - CheckForValidSection(); + checkForValidSection(); // Canonicalize the opcode to lower case. std::string OpcodeStr = IDVal.lower(); @@ -2002,7 +2002,7 @@ bool AsmParser::ParseAssignment(StringRef Name, bool allow_redef, SMLoc EqualLoc = Lexer.getLoc(); const MCExpr *Value; - if (ParseExpression(Value)) + if (parseExpression(Value)) return true; // Note: we don't count b as used in "a = b". This is to allow @@ -2059,10 +2059,10 @@ bool AsmParser::ParseAssignment(StringRef Name, bool allow_redef, return false; } -/// ParseIdentifier: +/// parseIdentifier: /// ::= identifier /// ::= string -bool AsmParser::ParseIdentifier(StringRef &Res) { +bool AsmParser::parseIdentifier(StringRef &Res) { // The assembler has relaxed rules for accepting identifiers, in particular we // allow things like '.globl $foo', which would normally be separate // tokens. At this level, we have already lexed so we cannot (currently) @@ -2105,7 +2105,7 @@ bool AsmParser::ParseIdentifier(StringRef &Res) { bool AsmParser::ParseDirectiveSet(StringRef IDVal, bool allow_redef) { StringRef Name; - if (ParseIdentifier(Name)) + if (parseIdentifier(Name)) return TokError("expected identifier after '" + Twine(IDVal) + "'"); if (getLexer().isNot(AsmToken::Comma)) @@ -2115,7 +2115,7 @@ bool AsmParser::ParseDirectiveSet(StringRef IDVal, bool allow_redef) { return ParseAssignment(Name, allow_redef, true); } -bool AsmParser::ParseEscapedString(std::string &Data) { +bool AsmParser::parseEscapedString(std::string &Data) { assert(getLexer().is(AsmToken::String) && "Unexpected current token!"); Data = ""; @@ -2177,14 +2177,14 @@ bool AsmParser::ParseEscapedString(std::string &Data) { /// ::= ( .ascii | .asciz | .string ) [ "string" ( , "string" )* ] bool AsmParser::ParseDirectiveAscii(StringRef IDVal, bool ZeroTerminated) { if (getLexer().isNot(AsmToken::EndOfStatement)) { - CheckForValidSection(); + checkForValidSection(); for (;;) { if (getLexer().isNot(AsmToken::String)) return TokError("expected string in '" + Twine(IDVal) + "' directive"); std::string Data; - if (ParseEscapedString(Data)) + if (parseEscapedString(Data)) return true; getStreamer().EmitBytes(Data, DEFAULT_ADDRSPACE); @@ -2210,12 +2210,12 @@ bool AsmParser::ParseDirectiveAscii(StringRef IDVal, bool ZeroTerminated) { /// ::= (.byte | .short | ... ) [ expression (, expression)* ] bool AsmParser::ParseDirectiveValue(unsigned Size) { if (getLexer().isNot(AsmToken::EndOfStatement)) { - CheckForValidSection(); + checkForValidSection(); for (;;) { const MCExpr *Value; SMLoc ExprLoc = getLexer().getLoc(); - if (ParseExpression(Value)) + if (parseExpression(Value)) return true; // Special case constant expressions to match code generator. @@ -2246,7 +2246,7 @@ bool AsmParser::ParseDirectiveValue(unsigned Size) { /// ::= (.single | .double) [ expression (, expression)* ] bool AsmParser::ParseDirectiveRealValue(const fltSemantics &Semantics) { if (getLexer().isNot(AsmToken::EndOfStatement)) { - CheckForValidSection(); + checkForValidSection(); for (;;) { // We don't truly support arithmetic on floating point expressions, so we @@ -2303,16 +2303,16 @@ bool AsmParser::ParseDirectiveRealValue(const fltSemantics &Semantics) { /// ParseDirectiveZero /// ::= .zero expression bool AsmParser::ParseDirectiveZero() { - CheckForValidSection(); + checkForValidSection(); int64_t NumBytes; - if (ParseAbsoluteExpression(NumBytes)) + if (parseAbsoluteExpression(NumBytes)) return true; int64_t Val = 0; if (getLexer().is(AsmToken::Comma)) { Lex(); - if (ParseAbsoluteExpression(Val)) + if (parseAbsoluteExpression(Val)) return true; } @@ -2329,10 +2329,10 @@ bool AsmParser::ParseDirectiveZero() { /// ParseDirectiveFill /// ::= .fill expression , expression , expression bool AsmParser::ParseDirectiveFill() { - CheckForValidSection(); + checkForValidSection(); int64_t NumValues; - if (ParseAbsoluteExpression(NumValues)) + if (parseAbsoluteExpression(NumValues)) return true; if (getLexer().isNot(AsmToken::Comma)) @@ -2340,7 +2340,7 @@ bool AsmParser::ParseDirectiveFill() { Lex(); int64_t FillSize; - if (ParseAbsoluteExpression(FillSize)) + if (parseAbsoluteExpression(FillSize)) return true; if (getLexer().isNot(AsmToken::Comma)) @@ -2348,7 +2348,7 @@ bool AsmParser::ParseDirectiveFill() { Lex(); int64_t FillExpr; - if (ParseAbsoluteExpression(FillExpr)) + if (parseAbsoluteExpression(FillExpr)) return true; if (getLexer().isNot(AsmToken::EndOfStatement)) @@ -2368,11 +2368,11 @@ bool AsmParser::ParseDirectiveFill() { /// ParseDirectiveOrg /// ::= .org expression [ , expression ] bool AsmParser::ParseDirectiveOrg() { - CheckForValidSection(); + checkForValidSection(); const MCExpr *Offset; SMLoc Loc = getTok().getLoc(); - if (ParseExpression(Offset)) + if (parseExpression(Offset)) return true; // Parse optional fill expression. @@ -2382,7 +2382,7 @@ bool AsmParser::ParseDirectiveOrg() { return TokError("unexpected token in '.org' directive"); Lex(); - if (ParseAbsoluteExpression(FillExpr)) + if (parseAbsoluteExpression(FillExpr)) return true; if (getLexer().isNot(AsmToken::EndOfStatement)) @@ -2403,11 +2403,11 @@ bool AsmParser::ParseDirectiveOrg() { /// ParseDirectiveAlign /// ::= {.align, ...} expression [ , expression [ , expression ]] bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) { - CheckForValidSection(); + checkForValidSection(); SMLoc AlignmentLoc = getLexer().getLoc(); int64_t Alignment; - if (ParseAbsoluteExpression(Alignment)) + if (parseAbsoluteExpression(Alignment)) return true; SMLoc MaxBytesLoc; @@ -2424,7 +2424,7 @@ bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) { // .align 3,,4 if (getLexer().isNot(AsmToken::Comma)) { HasFillExpr = true; - if (ParseAbsoluteExpression(FillExpr)) + if (parseAbsoluteExpression(FillExpr)) return true; } @@ -2434,7 +2434,7 @@ bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) { Lex(); MaxBytesLoc = getLexer().getLoc(); - if (ParseAbsoluteExpression(MaxBytesToFill)) + if (parseAbsoluteExpression(MaxBytesToFill)) return true; if (getLexer().isNot(AsmToken::EndOfStatement)) @@ -2608,7 +2608,7 @@ bool AsmParser::ParseDirectiveLoc() { StringRef Name; SMLoc Loc = getTok().getLoc(); - if (ParseIdentifier(Name)) + if (parseIdentifier(Name)) return TokError("unexpected token in '.loc' directive"); if (Name == "basic_block") @@ -2620,7 +2620,7 @@ bool AsmParser::ParseDirectiveLoc() { else if (Name == "is_stmt") { Loc = getTok().getLoc(); const MCExpr *Value; - if (ParseExpression(Value)) + if (parseExpression(Value)) return true; // The expression must be the constant 0 or 1. if (const MCConstantExpr *MCE = dyn_cast(Value)) { @@ -2639,7 +2639,7 @@ bool AsmParser::ParseDirectiveLoc() { else if (Name == "isa") { Loc = getTok().getLoc(); const MCExpr *Value; - if (ParseExpression(Value)) + if (parseExpression(Value)) return true; // The expression must be a constant greater or equal to 0. if (const MCConstantExpr *MCE = dyn_cast(Value)) { @@ -2653,7 +2653,7 @@ bool AsmParser::ParseDirectiveLoc() { } } else if (Name == "discriminator") { - if (ParseAbsoluteExpression(Discriminator)) + if (parseAbsoluteExpression(Discriminator)) return true; } else { @@ -2684,7 +2684,7 @@ bool AsmParser::ParseDirectiveCFISections() { bool EH = false; bool Debug = false; - if (ParseIdentifier(Name)) + if (parseIdentifier(Name)) return TokError("Expected an identifier"); if (Name == ".eh_frame") @@ -2695,7 +2695,7 @@ bool AsmParser::ParseDirectiveCFISections() { if (getLexer().is(AsmToken::Comma)) { Lex(); - if (ParseIdentifier(Name)) + if (parseIdentifier(Name)) return TokError("Expected an identifier"); if (Name == ".eh_frame") @@ -2732,7 +2732,7 @@ bool AsmParser::ParseRegisterOrRegisterNumber(int64_t &Register, return true; Register = getContext().getRegisterInfo().getDwarfRegNum(RegNo, true); } else - return ParseAbsoluteExpression(Register); + return parseAbsoluteExpression(Register); return false; } @@ -2749,7 +2749,7 @@ bool AsmParser::ParseDirectiveCFIDefCfa(SMLoc DirectiveLoc) { Lex(); int64_t Offset = 0; - if (ParseAbsoluteExpression(Offset)) + if (parseAbsoluteExpression(Offset)) return true; getStreamer().EmitCFIDefCfa(Register, Offset); @@ -2760,7 +2760,7 @@ bool AsmParser::ParseDirectiveCFIDefCfa(SMLoc DirectiveLoc) { /// ::= .cfi_def_cfa_offset offset bool AsmParser::ParseDirectiveCFIDefCfaOffset() { int64_t Offset = 0; - if (ParseAbsoluteExpression(Offset)) + if (parseAbsoluteExpression(Offset)) return true; getStreamer().EmitCFIDefCfaOffset(Offset); @@ -2790,7 +2790,7 @@ bool AsmParser::ParseDirectiveCFIRegister(SMLoc DirectiveLoc) { /// ::= .cfi_adjust_cfa_offset adjustment bool AsmParser::ParseDirectiveCFIAdjustCfaOffset() { int64_t Adjustment = 0; - if (ParseAbsoluteExpression(Adjustment)) + if (parseAbsoluteExpression(Adjustment)) return true; getStreamer().EmitCFIAdjustCfaOffset(Adjustment); @@ -2821,7 +2821,7 @@ bool AsmParser::ParseDirectiveCFIOffset(SMLoc DirectiveLoc) { return TokError("unexpected token in directive"); Lex(); - if (ParseAbsoluteExpression(Offset)) + if (parseAbsoluteExpression(Offset)) return true; getStreamer().EmitCFIOffset(Register, Offset); @@ -2841,7 +2841,7 @@ bool AsmParser::ParseDirectiveCFIRelOffset(SMLoc DirectiveLoc) { Lex(); int64_t Offset = 0; - if (ParseAbsoluteExpression(Offset)) + if (parseAbsoluteExpression(Offset)) return true; getStreamer().EmitCFIRelOffset(Register, Offset); @@ -2876,7 +2876,7 @@ static bool isValidEncoding(int64_t Encoding) { /// ::= .cfi_lsda encoding, [symbol_name] bool AsmParser::ParseDirectiveCFIPersonalityOrLsda(bool IsPersonality) { int64_t Encoding = 0; - if (ParseAbsoluteExpression(Encoding)) + if (parseAbsoluteExpression(Encoding)) return true; if (Encoding == dwarf::DW_EH_PE_omit) return false; @@ -2889,7 +2889,7 @@ bool AsmParser::ParseDirectiveCFIPersonalityOrLsda(bool IsPersonality) { Lex(); StringRef Name; - if (ParseIdentifier(Name)) + if (parseIdentifier(Name)) return TokError("expected identifier in directive"); MCSymbol *Sym = getContext().GetOrCreateSymbol(Name); @@ -2943,7 +2943,7 @@ bool AsmParser::ParseDirectiveCFIRestore(SMLoc DirectiveLoc) { bool AsmParser::ParseDirectiveCFIEscape() { std::string Values; int64_t CurrValue; - if (ParseAbsoluteExpression(CurrValue)) + if (parseAbsoluteExpression(CurrValue)) return true; Values.push_back((uint8_t)CurrValue); @@ -2951,7 +2951,7 @@ bool AsmParser::ParseDirectiveCFIEscape() { while (getLexer().is(AsmToken::Comma)) { Lex(); - if (ParseAbsoluteExpression(CurrValue)) + if (parseAbsoluteExpression(CurrValue)) return true; Values.push_back((uint8_t)CurrValue); @@ -3000,7 +3000,7 @@ bool AsmParser::ParseDirectiveMacrosOnOff(StringRef Directive) { /// ::= .macro name [parameters] bool AsmParser::ParseDirectiveMacro(SMLoc DirectiveLoc) { StringRef Name; - if (ParseIdentifier(Name)) + if (parseIdentifier(Name)) return TokError("expected identifier in '.macro' directive"); MCAsmMacroParameters Parameters; @@ -3010,7 +3010,7 @@ bool AsmParser::ParseDirectiveMacro(SMLoc DirectiveLoc) { if (getLexer().isNot(AsmToken::EndOfStatement)) { for (;;) { MCAsmMacroParameter Parameter; - if (ParseIdentifier(Parameter.first)) + if (parseIdentifier(Parameter.first)) return TokError("expected identifier in '.macro' directive"); if (getLexer().is(AsmToken::Equal)) { @@ -3052,7 +3052,7 @@ bool AsmParser::ParseDirectiveMacro(SMLoc DirectiveLoc) { } // Otherwise, scan til the end of the statement. - EatToEndOfStatement(); + eatToEndOfStatement(); } if (LookupMacro(Name)) { @@ -3193,7 +3193,7 @@ bool AsmParser::ParseDirectiveEndMacro(StringRef Directive) { /// ::= .purgem bool AsmParser::ParseDirectivePurgeMacro(SMLoc DirectiveLoc) { StringRef Name; - if (ParseIdentifier(Name)) + if (parseIdentifier(Name)) return TokError("expected identifier in '.purgem' directive"); if (getLexer().isNot(AsmToken::EndOfStatement)) @@ -3209,13 +3209,13 @@ bool AsmParser::ParseDirectivePurgeMacro(SMLoc DirectiveLoc) { /// ParseDirectiveBundleAlignMode /// ::= {.bundle_align_mode} expression bool AsmParser::ParseDirectiveBundleAlignMode() { - CheckForValidSection(); + checkForValidSection(); // Expect a single argument: an expression that evaluates to a constant // in the inclusive range 0-30. SMLoc ExprLoc = getLexer().getLoc(); int64_t AlignSizePow2; - if (ParseAbsoluteExpression(AlignSizePow2)) + if (parseAbsoluteExpression(AlignSizePow2)) return true; else if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token after expression in" @@ -3235,7 +3235,7 @@ bool AsmParser::ParseDirectiveBundleAlignMode() { /// ParseDirectiveBundleLock /// ::= {.bundle_lock} [align_to_end] bool AsmParser::ParseDirectiveBundleLock() { - CheckForValidSection(); + checkForValidSection(); bool AlignToEnd = false; if (getLexer().isNot(AsmToken::EndOfStatement)) { @@ -3244,7 +3244,7 @@ bool AsmParser::ParseDirectiveBundleLock() { const char *kInvalidOptionError = "invalid option for '.bundle_lock' directive"; - if (ParseIdentifier(Option)) + if (parseIdentifier(Option)) return Error(Loc, kInvalidOptionError); if (Option != "align_to_end") @@ -3264,7 +3264,7 @@ bool AsmParser::ParseDirectiveBundleLock() { /// ParseDirectiveBundleLock /// ::= {.bundle_lock} bool AsmParser::ParseDirectiveBundleUnlock() { - CheckForValidSection(); + checkForValidSection(); if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in '.bundle_unlock' directive"); @@ -3277,10 +3277,10 @@ bool AsmParser::ParseDirectiveBundleUnlock() { /// ParseDirectiveSpace /// ::= (.skip | .space) expression [ , expression ] bool AsmParser::ParseDirectiveSpace(StringRef IDVal) { - CheckForValidSection(); + checkForValidSection(); int64_t NumBytes; - if (ParseAbsoluteExpression(NumBytes)) + if (parseAbsoluteExpression(NumBytes)) return true; int64_t FillExpr = 0; @@ -3289,7 +3289,7 @@ bool AsmParser::ParseDirectiveSpace(StringRef IDVal) { return TokError("unexpected token in '" + Twine(IDVal) + "' directive"); Lex(); - if (ParseAbsoluteExpression(FillExpr)) + if (parseAbsoluteExpression(FillExpr)) return true; if (getLexer().isNot(AsmToken::EndOfStatement)) @@ -3311,10 +3311,10 @@ bool AsmParser::ParseDirectiveSpace(StringRef IDVal) { /// ParseDirectiveLEB128 /// ::= (.sleb128 | .uleb128) expression bool AsmParser::ParseDirectiveLEB128(bool Signed) { - CheckForValidSection(); + checkForValidSection(); const MCExpr *Value; - if (ParseExpression(Value)) + if (parseExpression(Value)) return true; if (getLexer().isNot(AsmToken::EndOfStatement)) @@ -3336,7 +3336,7 @@ bool AsmParser::ParseDirectiveSymbolAttribute(MCSymbolAttr Attr) { StringRef Name; SMLoc Loc = getTok().getLoc(); - if (ParseIdentifier(Name)) + if (parseIdentifier(Name)) return Error(Loc, "expected identifier in directive"); MCSymbol *Sym = getContext().GetOrCreateSymbol(Name); @@ -3363,11 +3363,11 @@ bool AsmParser::ParseDirectiveSymbolAttribute(MCSymbolAttr Attr) { /// ParseDirectiveComm /// ::= ( .comm | .lcomm ) identifier , size_expression [ , align_expression ] bool AsmParser::ParseDirectiveComm(bool IsLocal) { - CheckForValidSection(); + checkForValidSection(); SMLoc IDLoc = getLexer().getLoc(); StringRef Name; - if (ParseIdentifier(Name)) + if (parseIdentifier(Name)) return TokError("expected identifier in directive"); // Handle the identifier as the key symbol. @@ -3379,7 +3379,7 @@ bool AsmParser::ParseDirectiveComm(bool IsLocal) { int64_t Size; SMLoc SizeLoc = getLexer().getLoc(); - if (ParseAbsoluteExpression(Size)) + if (parseAbsoluteExpression(Size)) return true; int64_t Pow2Alignment = 0; @@ -3387,7 +3387,7 @@ bool AsmParser::ParseDirectiveComm(bool IsLocal) { if (getLexer().is(AsmToken::Comma)) { Lex(); Pow2AlignmentLoc = getLexer().getLoc(); - if (ParseAbsoluteExpression(Pow2Alignment)) + if (parseAbsoluteExpression(Pow2Alignment)) return true; LCOMM::LCOMMType LCOMM = Lexer.getMAI().getLCOMMDirectiveAlignmentType(); @@ -3440,7 +3440,7 @@ bool AsmParser::ParseDirectiveAbort() { // FIXME: Use loc from directive. SMLoc Loc = getLexer().getLoc(); - StringRef Str = ParseStringToEndOfStatement(); + StringRef Str = parseStringToEndOfStatement(); if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in '.abort' directive"); @@ -3512,10 +3512,10 @@ bool AsmParser::ParseDirectiveIf(SMLoc DirectiveLoc) { TheCondStack.push_back(TheCondState); TheCondState.TheCond = AsmCond::IfCond; if (TheCondState.Ignore) { - EatToEndOfStatement(); + eatToEndOfStatement(); } else { int64_t ExprValue; - if (ParseAbsoluteExpression(ExprValue)) + if (parseAbsoluteExpression(ExprValue)) return true; if (getLexer().isNot(AsmToken::EndOfStatement)) @@ -3537,9 +3537,9 @@ bool AsmParser::ParseDirectiveIfb(SMLoc DirectiveLoc, bool ExpectBlank) { TheCondState.TheCond = AsmCond::IfCond; if (TheCondState.Ignore) { - EatToEndOfStatement(); + eatToEndOfStatement(); } else { - StringRef Str = ParseStringToEndOfStatement(); + StringRef Str = parseStringToEndOfStatement(); if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in '.ifb' directive"); @@ -3560,7 +3560,7 @@ bool AsmParser::ParseDirectiveIfc(SMLoc DirectiveLoc, bool ExpectEqual) { TheCondState.TheCond = AsmCond::IfCond; if (TheCondState.Ignore) { - EatToEndOfStatement(); + eatToEndOfStatement(); } else { StringRef Str1 = ParseStringToComma(); @@ -3569,7 +3569,7 @@ bool AsmParser::ParseDirectiveIfc(SMLoc DirectiveLoc, bool ExpectEqual) { Lex(); - StringRef Str2 = ParseStringToEndOfStatement(); + StringRef Str2 = parseStringToEndOfStatement(); if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in '.ifc' directive"); @@ -3591,9 +3591,9 @@ bool AsmParser::ParseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined) { TheCondState.TheCond = AsmCond::IfCond; if (TheCondState.Ignore) { - EatToEndOfStatement(); + eatToEndOfStatement(); } else { - if (ParseIdentifier(Name)) + if (parseIdentifier(Name)) return TokError("expected identifier after '.ifdef'"); Lex(); @@ -3624,11 +3624,11 @@ bool AsmParser::ParseDirectiveElseIf(SMLoc DirectiveLoc) { LastIgnoreState = TheCondStack.back().Ignore; if (LastIgnoreState || TheCondState.CondMet) { TheCondState.Ignore = true; - EatToEndOfStatement(); + eatToEndOfStatement(); } else { int64_t ExprValue; - if (ParseAbsoluteExpression(ExprValue)) + if (parseAbsoluteExpression(ExprValue)) return true; if (getLexer().isNot(AsmToken::EndOfStatement)) @@ -3822,7 +3822,7 @@ MCAsmMacro *AsmParser::ParseMacroLikeBody(SMLoc DirectiveLoc) { } // Otherwise, scan till the end of the statement. - EatToEndOfStatement(); + eatToEndOfStatement(); } const char *BodyStart = StartToken.getLoc().getPointer(); @@ -3858,7 +3858,7 @@ void AsmParser::InstantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc, bool AsmParser::ParseDirectiveRept(SMLoc DirectiveLoc) { int64_t Count; - if (ParseAbsoluteExpression(Count)) + if (parseAbsoluteExpression(Count)) return TokError("unexpected token in '.rept' directive"); if (Count < 0) @@ -3896,7 +3896,7 @@ bool AsmParser::ParseDirectiveIrp(SMLoc DirectiveLoc) { MCAsmMacroParameters Parameters; MCAsmMacroParameter Parameter; - if (ParseIdentifier(Parameter.first)) + if (parseIdentifier(Parameter.first)) return TokError("expected identifier in '.irp' directive"); Parameters.push_back(Parameter); @@ -3942,7 +3942,7 @@ bool AsmParser::ParseDirectiveIrpc(SMLoc DirectiveLoc) { MCAsmMacroParameters Parameters; MCAsmMacroParameter Parameter; - if (ParseIdentifier(Parameter.first)) + if (parseIdentifier(Parameter.first)) return TokError("expected identifier in '.irpc' directive"); Parameters.push_back(Parameter); @@ -4006,7 +4006,7 @@ bool AsmParser::ParseDirectiveMSEmit(SMLoc IDLoc, ParseStatementInfo &Info, size_t Len) { const MCExpr *Value; SMLoc ExprLoc = getLexer().getLoc(); - if (ParseExpression(Value)) + if (parseExpression(Value)) return true; const MCConstantExpr *MCE = dyn_cast(Value); if (!MCE) @@ -4022,7 +4022,7 @@ bool AsmParser::ParseDirectiveMSEmit(SMLoc IDLoc, ParseStatementInfo &Info, bool AsmParser::ParseDirectiveMSAlign(SMLoc IDLoc, ParseStatementInfo &Info) { const MCExpr *Value; SMLoc ExprLoc = getLexer().getLoc(); - if (ParseExpression(Value)) + if (parseExpression(Value)) return true; const MCConstantExpr *MCE = dyn_cast(Value); if (!MCE) @@ -4063,7 +4063,7 @@ static int RewritesSort(const void *A, const void *B) { } bool -AsmParser::ParseMSInlineAsm(void *AsmLoc, std::string &AsmString, +AsmParser::parseMSInlineAsm(void *AsmLoc, std::string &AsmString, unsigned &NumOutputs, unsigned &NumInputs, SmallVectorImpl > &OpDecls, SmallVectorImpl &Constraints, diff --git a/lib/MC/MCParser/COFFAsmParser.cpp b/lib/MC/MCParser/COFFAsmParser.cpp index d0cc0c5..a50eab2 100644 --- a/lib/MC/MCParser/COFFAsmParser.cpp +++ b/lib/MC/MCParser/COFFAsmParser.cpp @@ -25,10 +25,10 @@ namespace { class COFFAsmParser : public MCAsmParserExtension { template - void AddDirectiveHandler(StringRef Directive) { + void addDirectiveHandler(StringRef Directive) { MCAsmParser::ExtensionDirectiveHandler Handler = std::make_pair( this, HandleDirective); - getParser().AddDirectiveHandler(Directive, Handler); + getParser().addDirectiveHandler(Directive, Handler); } bool ParseSectionSwitch(StringRef Section, @@ -39,43 +39,43 @@ class COFFAsmParser : public MCAsmParserExtension { // Call the base implementation. MCAsmParserExtension::Initialize(Parser); - AddDirectiveHandler<&COFFAsmParser::ParseSectionDirectiveText>(".text"); - AddDirectiveHandler<&COFFAsmParser::ParseSectionDirectiveData>(".data"); - AddDirectiveHandler<&COFFAsmParser::ParseSectionDirectiveBSS>(".bss"); - AddDirectiveHandler<&COFFAsmParser::ParseDirectiveDef>(".def"); - AddDirectiveHandler<&COFFAsmParser::ParseDirectiveScl>(".scl"); - AddDirectiveHandler<&COFFAsmParser::ParseDirectiveType>(".type"); - AddDirectiveHandler<&COFFAsmParser::ParseDirectiveEndef>(".endef"); - AddDirectiveHandler<&COFFAsmParser::ParseDirectiveSecRel32>(".secrel32"); + addDirectiveHandler<&COFFAsmParser::ParseSectionDirectiveText>(".text"); + addDirectiveHandler<&COFFAsmParser::ParseSectionDirectiveData>(".data"); + addDirectiveHandler<&COFFAsmParser::ParseSectionDirectiveBSS>(".bss"); + addDirectiveHandler<&COFFAsmParser::ParseDirectiveDef>(".def"); + addDirectiveHandler<&COFFAsmParser::ParseDirectiveScl>(".scl"); + addDirectiveHandler<&COFFAsmParser::ParseDirectiveType>(".type"); + addDirectiveHandler<&COFFAsmParser::ParseDirectiveEndef>(".endef"); + addDirectiveHandler<&COFFAsmParser::ParseDirectiveSecRel32>(".secrel32"); // Win64 EH directives. - AddDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveStartProc>( + addDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveStartProc>( ".seh_proc"); - AddDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveEndProc>( + addDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveEndProc>( ".seh_endproc"); - AddDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveStartChained>( + addDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveStartChained>( ".seh_startchained"); - AddDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveEndChained>( + addDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveEndChained>( ".seh_endchained"); - AddDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveHandler>( + addDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveHandler>( ".seh_handler"); - AddDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveHandlerData>( + addDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveHandlerData>( ".seh_handlerdata"); - AddDirectiveHandler<&COFFAsmParser::ParseSEHDirectivePushReg>( + addDirectiveHandler<&COFFAsmParser::ParseSEHDirectivePushReg>( ".seh_pushreg"); - AddDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveSetFrame>( + addDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveSetFrame>( ".seh_setframe"); - AddDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveAllocStack>( + addDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveAllocStack>( ".seh_stackalloc"); - AddDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveSaveReg>( + addDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveSaveReg>( ".seh_savereg"); - AddDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveSaveXMM>( + addDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveSaveXMM>( ".seh_savexmm"); - AddDirectiveHandler<&COFFAsmParser::ParseSEHDirectivePushFrame>( + addDirectiveHandler<&COFFAsmParser::ParseSEHDirectivePushFrame>( ".seh_pushframe"); - AddDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveEndProlog>( + addDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveEndProlog>( ".seh_endprologue"); - AddDirectiveHandler<&COFFAsmParser::ParseDirectiveSymbolAttribute>(".weak"); + addDirectiveHandler<&COFFAsmParser::ParseDirectiveSymbolAttribute>(".weak"); } bool ParseSectionDirectiveText(StringRef, SMLoc) { @@ -141,7 +141,7 @@ bool COFFAsmParser::ParseDirectiveSymbolAttribute(StringRef Directive, SMLoc) { for (;;) { StringRef Name; - if (getParser().ParseIdentifier(Name)) + if (getParser().parseIdentifier(Name)) return TokError("expected identifier in directive"); MCSymbol *Sym = getContext().GetOrCreateSymbol(Name); @@ -177,7 +177,7 @@ bool COFFAsmParser::ParseSectionSwitch(StringRef Section, bool COFFAsmParser::ParseDirectiveDef(StringRef, SMLoc) { StringRef SymbolName; - if (getParser().ParseIdentifier(SymbolName)) + if (getParser().parseIdentifier(SymbolName)) return TokError("expected identifier in directive"); MCSymbol *Sym = getContext().GetOrCreateSymbol(SymbolName); @@ -190,7 +190,7 @@ bool COFFAsmParser::ParseDirectiveDef(StringRef, SMLoc) { bool COFFAsmParser::ParseDirectiveScl(StringRef, SMLoc) { int64_t SymbolStorageClass; - if (getParser().ParseAbsoluteExpression(SymbolStorageClass)) + if (getParser().parseAbsoluteExpression(SymbolStorageClass)) return true; if (getLexer().isNot(AsmToken::EndOfStatement)) @@ -203,7 +203,7 @@ bool COFFAsmParser::ParseDirectiveScl(StringRef, SMLoc) { bool COFFAsmParser::ParseDirectiveType(StringRef, SMLoc) { int64_t Type; - if (getParser().ParseAbsoluteExpression(Type)) + if (getParser().parseAbsoluteExpression(Type)) return true; if (getLexer().isNot(AsmToken::EndOfStatement)) @@ -222,7 +222,7 @@ bool COFFAsmParser::ParseDirectiveEndef(StringRef, SMLoc) { bool COFFAsmParser::ParseDirectiveSecRel32(StringRef, SMLoc) { StringRef SymbolID; - if (getParser().ParseIdentifier(SymbolID)) + if (getParser().parseIdentifier(SymbolID)) return true; if (getLexer().isNot(AsmToken::EndOfStatement)) @@ -237,7 +237,7 @@ bool COFFAsmParser::ParseDirectiveSecRel32(StringRef, SMLoc) { bool COFFAsmParser::ParseSEHDirectiveStartProc(StringRef, SMLoc) { StringRef SymbolID; - if (getParser().ParseIdentifier(SymbolID)) + if (getParser().parseIdentifier(SymbolID)) return true; if (getLexer().isNot(AsmToken::EndOfStatement)) @@ -270,7 +270,7 @@ bool COFFAsmParser::ParseSEHDirectiveEndChained(StringRef, SMLoc) { bool COFFAsmParser::ParseSEHDirectiveHandler(StringRef, SMLoc) { StringRef SymbolID; - if (getParser().ParseIdentifier(SymbolID)) + if (getParser().parseIdentifier(SymbolID)) return true; if (getLexer().isNot(AsmToken::Comma)) @@ -323,7 +323,7 @@ bool COFFAsmParser::ParseSEHDirectiveSetFrame(StringRef, SMLoc L) { Lex(); SMLoc startLoc = getLexer().getLoc(); - if (getParser().ParseAbsoluteExpression(Off)) + if (getParser().parseAbsoluteExpression(Off)) return true; if (Off & 0x0F) @@ -340,7 +340,7 @@ bool COFFAsmParser::ParseSEHDirectiveSetFrame(StringRef, SMLoc L) { bool COFFAsmParser::ParseSEHDirectiveAllocStack(StringRef, SMLoc) { int64_t Size; SMLoc startLoc = getLexer().getLoc(); - if (getParser().ParseAbsoluteExpression(Size)) + if (getParser().parseAbsoluteExpression(Size)) return true; if (Size & 7) @@ -364,7 +364,7 @@ bool COFFAsmParser::ParseSEHDirectiveSaveReg(StringRef, SMLoc L) { Lex(); SMLoc startLoc = getLexer().getLoc(); - if (getParser().ParseAbsoluteExpression(Off)) + if (getParser().parseAbsoluteExpression(Off)) return true; if (Off & 7) @@ -391,7 +391,7 @@ bool COFFAsmParser::ParseSEHDirectiveSaveXMM(StringRef, SMLoc L) { Lex(); SMLoc startLoc = getLexer().getLoc(); - if (getParser().ParseAbsoluteExpression(Off)) + if (getParser().parseAbsoluteExpression(Off)) return true; if (getLexer().isNot(AsmToken::EndOfStatement)) @@ -412,7 +412,7 @@ bool COFFAsmParser::ParseSEHDirectivePushFrame(StringRef, SMLoc) { if (getLexer().is(AsmToken::At)) { SMLoc startLoc = getLexer().getLoc(); Lex(); - if (!getParser().ParseIdentifier(CodeID)) { + if (!getParser().parseIdentifier(CodeID)) { if (CodeID != "code") return Error(startLoc, "expected @code"); Code = true; @@ -439,7 +439,7 @@ bool COFFAsmParser::ParseAtUnwindOrAtExcept(bool &unwind, bool &except) { return TokError("a handler attribute must begin with '@'"); SMLoc startLoc = getLexer().getLoc(); Lex(); - if (getParser().ParseIdentifier(identifier)) + if (getParser().parseIdentifier(identifier)) return Error(startLoc, "expected @unwind or @except"); if (identifier == "unwind") unwind = true; @@ -480,7 +480,7 @@ bool COFFAsmParser::ParseSEHRegisterNumber(unsigned &RegNo) { } else { int64_t n; - if (getParser().ParseAbsoluteExpression(n)) + if (getParser().parseAbsoluteExpression(n)) return true; if (n > 15) return Error(startLoc, "register number is too high"); diff --git a/lib/MC/MCParser/DarwinAsmParser.cpp b/lib/MC/MCParser/DarwinAsmParser.cpp index 9029c6d..6d6409f 100644 --- a/lib/MC/MCParser/DarwinAsmParser.cpp +++ b/lib/MC/MCParser/DarwinAsmParser.cpp @@ -27,10 +27,10 @@ namespace { /// Darwin targets. class DarwinAsmParser : public MCAsmParserExtension { template - void AddDirectiveHandler(StringRef Directive) { + void addDirectiveHandler(StringRef Directive) { MCAsmParser::ExtensionDirectiveHandler Handler = std::make_pair( this, HandleDirective); - getParser().AddDirectiveHandler(Directive, Handler); + getParser().addDirectiveHandler(Directive, Handler); } bool ParseSectionSwitch(const char *Segment, const char *Section, @@ -44,122 +44,122 @@ public: // Call the base implementation. this->MCAsmParserExtension::Initialize(Parser); - AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveDesc>(".desc"); - AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveLsym>(".lsym"); - AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveSubsectionsViaSymbols>( + addDirectiveHandler<&DarwinAsmParser::ParseDirectiveDesc>(".desc"); + addDirectiveHandler<&DarwinAsmParser::ParseDirectiveLsym>(".lsym"); + addDirectiveHandler<&DarwinAsmParser::ParseDirectiveSubsectionsViaSymbols>( ".subsections_via_symbols"); - AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveDumpOrLoad>(".dump"); - AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveDumpOrLoad>(".load"); - AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveSection>(".section"); - AddDirectiveHandler<&DarwinAsmParser::ParseDirectivePushSection>( + addDirectiveHandler<&DarwinAsmParser::ParseDirectiveDumpOrLoad>(".dump"); + addDirectiveHandler<&DarwinAsmParser::ParseDirectiveDumpOrLoad>(".load"); + addDirectiveHandler<&DarwinAsmParser::ParseDirectiveSection>(".section"); + addDirectiveHandler<&DarwinAsmParser::ParseDirectivePushSection>( ".pushsection"); - AddDirectiveHandler<&DarwinAsmParser::ParseDirectivePopSection>( + addDirectiveHandler<&DarwinAsmParser::ParseDirectivePopSection>( ".popsection"); - AddDirectiveHandler<&DarwinAsmParser::ParseDirectivePrevious>(".previous"); - AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveSecureLogUnique>( + addDirectiveHandler<&DarwinAsmParser::ParseDirectivePrevious>(".previous"); + addDirectiveHandler<&DarwinAsmParser::ParseDirectiveSecureLogUnique>( ".secure_log_unique"); - AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveSecureLogReset>( + addDirectiveHandler<&DarwinAsmParser::ParseDirectiveSecureLogReset>( ".secure_log_reset"); - AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveTBSS>(".tbss"); - AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveZerofill>(".zerofill"); + addDirectiveHandler<&DarwinAsmParser::ParseDirectiveTBSS>(".tbss"); + addDirectiveHandler<&DarwinAsmParser::ParseDirectiveZerofill>(".zerofill"); - AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveDataRegion>( + addDirectiveHandler<&DarwinAsmParser::ParseDirectiveDataRegion>( ".data_region"); - AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveDataRegionEnd>( + addDirectiveHandler<&DarwinAsmParser::ParseDirectiveDataRegionEnd>( ".end_data_region"); // Special section directives. - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveConst>(".const"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveConstData>( + addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveConst>(".const"); + addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveConstData>( ".const_data"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveConstructor>( + addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveConstructor>( ".constructor"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveCString>( + addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveCString>( ".cstring"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveData>(".data"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveDestructor>( + addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveData>(".data"); + addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveDestructor>( ".destructor"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveDyld>(".dyld"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveFVMLibInit0>( + addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveDyld>(".dyld"); + addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveFVMLibInit0>( ".fvmlib_init0"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveFVMLibInit1>( + addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveFVMLibInit1>( ".fvmlib_init1"); - AddDirectiveHandler< + addDirectiveHandler< &DarwinAsmParser::ParseSectionDirectiveLazySymbolPointers>( ".lazy_symbol_pointer"); - AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveLinkerOption>( + addDirectiveHandler<&DarwinAsmParser::ParseDirectiveLinkerOption>( ".linker_option"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveLiteral16>( + addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveLiteral16>( ".literal16"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveLiteral4>( + addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveLiteral4>( ".literal4"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveLiteral8>( + addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveLiteral8>( ".literal8"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveModInitFunc>( + addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveModInitFunc>( ".mod_init_func"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveModTermFunc>( + addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveModTermFunc>( ".mod_term_func"); - AddDirectiveHandler< + addDirectiveHandler< &DarwinAsmParser::ParseSectionDirectiveNonLazySymbolPointers>( ".non_lazy_symbol_pointer"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCCatClsMeth>( + addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCCatClsMeth>( ".objc_cat_cls_meth"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCCatInstMeth>( + addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCCatInstMeth>( ".objc_cat_inst_meth"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCCategory>( + addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCCategory>( ".objc_category"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClass>( + addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClass>( ".objc_class"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClassNames>( + addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClassNames>( ".objc_class_names"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClassVars>( + addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClassVars>( ".objc_class_vars"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClsMeth>( + addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClsMeth>( ".objc_cls_meth"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClsRefs>( + addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClsRefs>( ".objc_cls_refs"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCInstMeth>( + addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCInstMeth>( ".objc_inst_meth"); - AddDirectiveHandler< + addDirectiveHandler< &DarwinAsmParser::ParseSectionDirectiveObjCInstanceVars>( ".objc_instance_vars"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCMessageRefs>( + addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCMessageRefs>( ".objc_message_refs"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCMetaClass>( + addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCMetaClass>( ".objc_meta_class"); - AddDirectiveHandler< + addDirectiveHandler< &DarwinAsmParser::ParseSectionDirectiveObjCMethVarNames>( ".objc_meth_var_names"); - AddDirectiveHandler< + addDirectiveHandler< &DarwinAsmParser::ParseSectionDirectiveObjCMethVarTypes>( ".objc_meth_var_types"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCModuleInfo>( + addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCModuleInfo>( ".objc_module_info"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCProtocol>( + addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCProtocol>( ".objc_protocol"); - AddDirectiveHandler< + addDirectiveHandler< &DarwinAsmParser::ParseSectionDirectiveObjCSelectorStrs>( ".objc_selector_strs"); - AddDirectiveHandler< + addDirectiveHandler< &DarwinAsmParser::ParseSectionDirectiveObjCStringObject>( ".objc_string_object"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCSymbols>( + addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCSymbols>( ".objc_symbols"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectivePICSymbolStub>( + addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectivePICSymbolStub>( ".picsymbol_stub"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveStaticConst>( + addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveStaticConst>( ".static_const"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveStaticData>( + addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveStaticData>( ".static_data"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveSymbolStub>( + addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveSymbolStub>( ".symbol_stub"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveTData>(".tdata"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveText>(".text"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveThreadInitFunc>( + addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveTData>(".tdata"); + addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveText>(".text"); + addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveThreadInitFunc>( ".thread_init_func"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveTLV>(".tlv"); + addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveTLV>(".tlv"); - AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveIdent>(".ident"); + addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveIdent>(".ident"); } bool ParseDirectiveDesc(StringRef, SMLoc); @@ -345,7 +345,7 @@ public: } bool ParseSectionDirectiveIdent(StringRef, SMLoc) { // Darwin silently ignores the .ident directive. - getParser().EatToEndOfStatement(); + getParser().eatToEndOfStatement(); return false; } bool ParseSectionDirectiveThreadInitFunc(StringRef, SMLoc) { @@ -390,7 +390,7 @@ bool DarwinAsmParser::ParseSectionSwitch(const char *Segment, /// ::= .desc identifier , expression bool DarwinAsmParser::ParseDirectiveDesc(StringRef, SMLoc) { StringRef Name; - if (getParser().ParseIdentifier(Name)) + if (getParser().parseIdentifier(Name)) return TokError("expected identifier in directive"); // Handle the identifier as the key symbol. @@ -401,7 +401,7 @@ bool DarwinAsmParser::ParseDirectiveDesc(StringRef, SMLoc) { Lex(); int64_t DescValue; - if (getParser().ParseAbsoluteExpression(DescValue)) + if (getParser().parseAbsoluteExpression(DescValue)) return true; if (getLexer().isNot(AsmToken::EndOfStatement)) @@ -447,7 +447,7 @@ bool DarwinAsmParser::ParseDirectiveLinkerOption(StringRef IDVal, SMLoc) { return TokError("expected string in '" + Twine(IDVal) + "' directive"); std::string Data; - if (getParser().ParseEscapedString(Data)) + if (getParser().parseEscapedString(Data)) return true; Args.push_back(Data); @@ -469,7 +469,7 @@ bool DarwinAsmParser::ParseDirectiveLinkerOption(StringRef IDVal, SMLoc) { /// ::= .lsym identifier , expression bool DarwinAsmParser::ParseDirectiveLsym(StringRef, SMLoc) { StringRef Name; - if (getParser().ParseIdentifier(Name)) + if (getParser().parseIdentifier(Name)) return TokError("expected identifier in directive"); // Handle the identifier as the key symbol. @@ -480,7 +480,7 @@ bool DarwinAsmParser::ParseDirectiveLsym(StringRef, SMLoc) { Lex(); const MCExpr *Value; - if (getParser().ParseExpression(Value)) + if (getParser().parseExpression(Value)) return true; if (getLexer().isNot(AsmToken::EndOfStatement)) @@ -501,7 +501,7 @@ bool DarwinAsmParser::ParseDirectiveSection(StringRef, SMLoc) { SMLoc Loc = getLexer().getLoc(); StringRef SectionName; - if (getParser().ParseIdentifier(SectionName)) + if (getParser().parseIdentifier(SectionName)) return Error(Loc, "expected identifier after '.section' directive"); // Verify there is a following comma. @@ -576,7 +576,7 @@ bool DarwinAsmParser::ParseDirectivePrevious(StringRef DirName, SMLoc) { /// ParseDirectiveSecureLogUnique /// ::= .secure_log_unique ... message ... bool DarwinAsmParser::ParseDirectiveSecureLogUnique(StringRef, SMLoc IDLoc) { - StringRef LogMessage = getParser().ParseStringToEndOfStatement(); + StringRef LogMessage = getParser().parseStringToEndOfStatement(); if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in '.secure_log_unique' directive"); @@ -644,7 +644,7 @@ bool DarwinAsmParser::ParseDirectiveSubsectionsViaSymbols(StringRef, SMLoc) { bool DarwinAsmParser::ParseDirectiveTBSS(StringRef, SMLoc) { SMLoc IDLoc = getLexer().getLoc(); StringRef Name; - if (getParser().ParseIdentifier(Name)) + if (getParser().parseIdentifier(Name)) return TokError("expected identifier in directive"); // Handle the identifier as the key symbol. @@ -656,7 +656,7 @@ bool DarwinAsmParser::ParseDirectiveTBSS(StringRef, SMLoc) { int64_t Size; SMLoc SizeLoc = getLexer().getLoc(); - if (getParser().ParseAbsoluteExpression(Size)) + if (getParser().parseAbsoluteExpression(Size)) return true; int64_t Pow2Alignment = 0; @@ -664,7 +664,7 @@ bool DarwinAsmParser::ParseDirectiveTBSS(StringRef, SMLoc) { if (getLexer().is(AsmToken::Comma)) { Lex(); Pow2AlignmentLoc = getLexer().getLoc(); - if (getParser().ParseAbsoluteExpression(Pow2Alignment)) + if (getParser().parseAbsoluteExpression(Pow2Alignment)) return true; } @@ -699,7 +699,7 @@ bool DarwinAsmParser::ParseDirectiveTBSS(StringRef, SMLoc) { /// , align_expression ]] bool DarwinAsmParser::ParseDirectiveZerofill(StringRef, SMLoc) { StringRef Segment; - if (getParser().ParseIdentifier(Segment)) + if (getParser().parseIdentifier(Segment)) return TokError("expected segment name after '.zerofill' directive"); if (getLexer().isNot(AsmToken::Comma)) @@ -707,7 +707,7 @@ bool DarwinAsmParser::ParseDirectiveZerofill(StringRef, SMLoc) { Lex(); StringRef Section; - if (getParser().ParseIdentifier(Section)) + if (getParser().parseIdentifier(Section)) return TokError("expected section name after comma in '.zerofill' " "directive"); @@ -727,7 +727,7 @@ bool DarwinAsmParser::ParseDirectiveZerofill(StringRef, SMLoc) { SMLoc IDLoc = getLexer().getLoc(); StringRef IDStr; - if (getParser().ParseIdentifier(IDStr)) + if (getParser().parseIdentifier(IDStr)) return TokError("expected identifier in directive"); // handle the identifier as the key symbol. @@ -739,7 +739,7 @@ bool DarwinAsmParser::ParseDirectiveZerofill(StringRef, SMLoc) { int64_t Size; SMLoc SizeLoc = getLexer().getLoc(); - if (getParser().ParseAbsoluteExpression(Size)) + if (getParser().parseAbsoluteExpression(Size)) return true; int64_t Pow2Alignment = 0; @@ -747,7 +747,7 @@ bool DarwinAsmParser::ParseDirectiveZerofill(StringRef, SMLoc) { if (getLexer().is(AsmToken::Comma)) { Lex(); Pow2AlignmentLoc = getLexer().getLoc(); - if (getParser().ParseAbsoluteExpression(Pow2Alignment)) + if (getParser().parseAbsoluteExpression(Pow2Alignment)) return true; } @@ -791,7 +791,7 @@ bool DarwinAsmParser::ParseDirectiveDataRegion(StringRef, SMLoc) { } StringRef RegionType; SMLoc Loc = getParser().getTok().getLoc(); - if (getParser().ParseIdentifier(RegionType)) + if (getParser().parseIdentifier(RegionType)) return TokError("expected region type after '.data_region' directive"); int Kind = StringSwitch(RegionType) .Case("jt8", MCDR_DataRegionJT8) diff --git a/lib/MC/MCParser/ELFAsmParser.cpp b/lib/MC/MCParser/ELFAsmParser.cpp index 7921abb..4c45e08 100644 --- a/lib/MC/MCParser/ELFAsmParser.cpp +++ b/lib/MC/MCParser/ELFAsmParser.cpp @@ -23,11 +23,11 @@ namespace { class ELFAsmParser : public MCAsmParserExtension { template - void AddDirectiveHandler(StringRef Directive) { + void addDirectiveHandler(StringRef Directive) { MCAsmParser::ExtensionDirectiveHandler Handler = std::make_pair( this, HandleDirective); - getParser().AddDirectiveHandler(Directive, Handler); + getParser().addDirectiveHandler(Directive, Handler); } bool ParseSectionSwitch(StringRef Section, unsigned Type, @@ -43,38 +43,38 @@ public: // Call the base implementation. this->MCAsmParserExtension::Initialize(Parser); - AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveData>(".data"); - AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveText>(".text"); - AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveBSS>(".bss"); - AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveRoData>(".rodata"); - AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveTData>(".tdata"); - AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveTBSS>(".tbss"); - AddDirectiveHandler< + addDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveData>(".data"); + addDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveText>(".text"); + addDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveBSS>(".bss"); + addDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveRoData>(".rodata"); + addDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveTData>(".tdata"); + addDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveTBSS>(".tbss"); + addDirectiveHandler< &ELFAsmParser::ParseSectionDirectiveDataRel>(".data.rel"); - AddDirectiveHandler< + addDirectiveHandler< &ELFAsmParser::ParseSectionDirectiveDataRelRo>(".data.rel.ro"); - AddDirectiveHandler< + addDirectiveHandler< &ELFAsmParser::ParseSectionDirectiveDataRelRoLocal>(".data.rel.ro.local"); - AddDirectiveHandler< + addDirectiveHandler< &ELFAsmParser::ParseSectionDirectiveEhFrame>(".eh_frame"); - AddDirectiveHandler<&ELFAsmParser::ParseDirectiveSection>(".section"); - AddDirectiveHandler< + addDirectiveHandler<&ELFAsmParser::ParseDirectiveSection>(".section"); + addDirectiveHandler< &ELFAsmParser::ParseDirectivePushSection>(".pushsection"); - AddDirectiveHandler<&ELFAsmParser::ParseDirectivePopSection>(".popsection"); - AddDirectiveHandler<&ELFAsmParser::ParseDirectiveSize>(".size"); - AddDirectiveHandler<&ELFAsmParser::ParseDirectivePrevious>(".previous"); - AddDirectiveHandler<&ELFAsmParser::ParseDirectiveType>(".type"); - AddDirectiveHandler<&ELFAsmParser::ParseDirectiveIdent>(".ident"); - AddDirectiveHandler<&ELFAsmParser::ParseDirectiveSymver>(".symver"); - AddDirectiveHandler<&ELFAsmParser::ParseDirectiveVersion>(".version"); - AddDirectiveHandler<&ELFAsmParser::ParseDirectiveWeakref>(".weakref"); - AddDirectiveHandler<&ELFAsmParser::ParseDirectiveSymbolAttribute>(".weak"); - AddDirectiveHandler<&ELFAsmParser::ParseDirectiveSymbolAttribute>(".local"); - AddDirectiveHandler< + addDirectiveHandler<&ELFAsmParser::ParseDirectivePopSection>(".popsection"); + addDirectiveHandler<&ELFAsmParser::ParseDirectiveSize>(".size"); + addDirectiveHandler<&ELFAsmParser::ParseDirectivePrevious>(".previous"); + addDirectiveHandler<&ELFAsmParser::ParseDirectiveType>(".type"); + addDirectiveHandler<&ELFAsmParser::ParseDirectiveIdent>(".ident"); + addDirectiveHandler<&ELFAsmParser::ParseDirectiveSymver>(".symver"); + addDirectiveHandler<&ELFAsmParser::ParseDirectiveVersion>(".version"); + addDirectiveHandler<&ELFAsmParser::ParseDirectiveWeakref>(".weakref"); + addDirectiveHandler<&ELFAsmParser::ParseDirectiveSymbolAttribute>(".weak"); + addDirectiveHandler<&ELFAsmParser::ParseDirectiveSymbolAttribute>(".local"); + addDirectiveHandler< &ELFAsmParser::ParseDirectiveSymbolAttribute>(".protected"); - AddDirectiveHandler< + addDirectiveHandler< &ELFAsmParser::ParseDirectiveSymbolAttribute>(".internal"); - AddDirectiveHandler< + addDirectiveHandler< &ELFAsmParser::ParseDirectiveSymbolAttribute>(".hidden"); } @@ -169,7 +169,7 @@ bool ELFAsmParser::ParseDirectiveSymbolAttribute(StringRef Directive, SMLoc) { for (;;) { StringRef Name; - if (getParser().ParseIdentifier(Name)) + if (getParser().parseIdentifier(Name)) return TokError("expected identifier in directive"); MCSymbol *Sym = getContext().GetOrCreateSymbol(Name); @@ -203,7 +203,7 @@ bool ELFAsmParser::ParseSectionSwitch(StringRef Section, unsigned Type, bool ELFAsmParser::ParseDirectiveSize(StringRef, SMLoc) { StringRef Name; - if (getParser().ParseIdentifier(Name)) + if (getParser().parseIdentifier(Name)) return TokError("expected identifier in directive"); MCSymbol *Sym = getContext().GetOrCreateSymbol(Name); @@ -212,7 +212,7 @@ bool ELFAsmParser::ParseDirectiveSize(StringRef, SMLoc) { Lex(); const MCExpr *Expr; - if (getParser().ParseExpression(Expr)) + if (getParser().parseExpression(Expr)) return true; if (getLexer().isNot(AsmToken::EndOfStatement)) @@ -224,7 +224,7 @@ bool ELFAsmParser::ParseDirectiveSize(StringRef, SMLoc) { bool ELFAsmParser::ParseSectionName(StringRef &SectionName) { // A section name can contain -, so we cannot just use - // ParseIdentifier. + // parseIdentifier. SMLoc FirstLoc = getLexer().getLoc(); unsigned Size = 0; @@ -377,14 +377,14 @@ bool ELFAsmParser::ParseDirectiveSection(StringRef, SMLoc) { return TokError("expected '@' or '%' before type"); Lex(); - if (getParser().ParseIdentifier(TypeName)) + if (getParser().parseIdentifier(TypeName)) return TokError("expected identifier in directive"); if (Mergeable) { if (getLexer().isNot(AsmToken::Comma)) return TokError("expected the entry size"); Lex(); - if (getParser().ParseAbsoluteExpression(Size)) + if (getParser().parseAbsoluteExpression(Size)) return true; if (Size <= 0) return TokError("entry size must be positive"); @@ -394,12 +394,12 @@ bool ELFAsmParser::ParseDirectiveSection(StringRef, SMLoc) { if (getLexer().isNot(AsmToken::Comma)) return TokError("expected group name"); Lex(); - if (getParser().ParseIdentifier(GroupName)) + if (getParser().parseIdentifier(GroupName)) return true; if (getLexer().is(AsmToken::Comma)) { Lex(); StringRef Linkage; - if (getParser().ParseIdentifier(Linkage)) + if (getParser().parseIdentifier(Linkage)) return true; if (Linkage != "comdat") return TokError("Linkage must be 'comdat'"); @@ -461,7 +461,7 @@ bool ELFAsmParser::ParseDirectivePrevious(StringRef DirName, SMLoc) { /// ::= .type identifier , @attribute bool ELFAsmParser::ParseDirectiveType(StringRef, SMLoc) { StringRef Name; - if (getParser().ParseIdentifier(Name)) + if (getParser().parseIdentifier(Name)) return TokError("expected identifier in directive"); // Handle the identifier as the key symbol. @@ -479,7 +479,7 @@ bool ELFAsmParser::ParseDirectiveType(StringRef, SMLoc) { SMLoc TypeLoc; TypeLoc = getLexer().getLoc(); - if (getParser().ParseIdentifier(Type)) + if (getParser().parseIdentifier(Type)) return TokError("expected symbol type in directive"); MCSymbolAttr Attr = StringSwitch(Type) @@ -538,7 +538,7 @@ bool ELFAsmParser::ParseDirectiveIdent(StringRef, SMLoc) { /// ::= .symver foo, bar2@zed bool ELFAsmParser::ParseDirectiveSymver(StringRef, SMLoc) { StringRef Name; - if (getParser().ParseIdentifier(Name)) + if (getParser().parseIdentifier(Name)) return TokError("expected identifier in directive"); if (getLexer().isNot(AsmToken::Comma)) @@ -547,7 +547,7 @@ bool ELFAsmParser::ParseDirectiveSymver(StringRef, SMLoc) { Lex(); StringRef AliasName; - if (getParser().ParseIdentifier(AliasName)) + if (getParser().parseIdentifier(AliasName)) return TokError("expected identifier in directive"); if (AliasName.find('@') == StringRef::npos) @@ -593,7 +593,7 @@ bool ELFAsmParser::ParseDirectiveWeakref(StringRef, SMLoc) { // FIXME: Share code with the other alias building directives. StringRef AliasName; - if (getParser().ParseIdentifier(AliasName)) + if (getParser().parseIdentifier(AliasName)) return TokError("expected identifier in directive"); if (getLexer().isNot(AsmToken::Comma)) @@ -602,7 +602,7 @@ bool ELFAsmParser::ParseDirectiveWeakref(StringRef, SMLoc) { Lex(); StringRef Name; - if (getParser().ParseIdentifier(Name)) + if (getParser().parseIdentifier(Name)) return TokError("expected identifier in directive"); MCSymbol *Alias = getContext().GetOrCreateSymbol(AliasName); diff --git a/lib/MC/MCParser/MCAsmParser.cpp b/lib/MC/MCParser/MCAsmParser.cpp index a8b00cd..6e1ebad 100644 --- a/lib/MC/MCParser/MCAsmParser.cpp +++ b/lib/MC/MCParser/MCAsmParser.cpp @@ -38,9 +38,9 @@ bool MCAsmParser::TokError(const Twine &Msg, ArrayRef Ranges) { return true; } -bool MCAsmParser::ParseExpression(const MCExpr *&Res) { +bool MCAsmParser::parseExpression(const MCExpr *&Res) { SMLoc L; - return ParseExpression(Res, L); + return parseExpression(Res, L); } void MCParsedAsmOperand::dump() const { diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index f871ecf..c1695da 100644 --- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -1155,7 +1155,7 @@ AArch64AsmParser::ParseImmediate(const MCExpr *&ExprVal) { return ResTy; const MCExpr *SubExprVal; - if (getParser().ParseExpression(SubExprVal)) + if (getParser().parseExpression(SubExprVal)) return MatchOperand_ParseFail; ExprVal = AArch64MCExpr::Create(RefKind, SubExprVal, getContext()); @@ -1163,7 +1163,7 @@ AArch64AsmParser::ParseImmediate(const MCExpr *&ExprVal) { } // No weird AArch64MCExpr prefix - return getParser().ParseExpression(ExprVal) + return getParser().parseExpression(ExprVal) ? MatchOperand_ParseFail : MatchOperand_Success; } @@ -1823,7 +1823,7 @@ bool AArch64AsmParser::ParseInstruction(ParseInstructionInfo &Info, if (Code == A64CC::Invalid) { Error(S, "invalid condition code"); - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return true; } @@ -1838,7 +1838,7 @@ bool AArch64AsmParser::ParseInstruction(ParseInstructionInfo &Info, if (getLexer().isNot(AsmToken::EndOfStatement)) { // Read the first operand. if (ParseOperand(Operands, Mnemonic)) { - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return true; } @@ -1847,7 +1847,7 @@ bool AArch64AsmParser::ParseInstruction(ParseInstructionInfo &Info, // Parse and remember the operand. if (ParseOperand(Operands, Mnemonic)) { - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return true; } @@ -1876,7 +1876,7 @@ bool AArch64AsmParser::ParseInstruction(ParseInstructionInfo &Info, if (getLexer().isNot(AsmToken::EndOfStatement)) { SMLoc Loc = getLexer().getLoc(); - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return Error(Loc, "expected comma before next operand"); } @@ -1906,7 +1906,7 @@ bool AArch64AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { if (getLexer().isNot(AsmToken::EndOfStatement)) { for (;;) { const MCExpr *Value; - if (getParser().ParseExpression(Value)) + if (getParser().parseExpression(Value)) return true; getParser().getStreamer().EmitValue(Value, Size, 0/*addrspace*/); @@ -1929,7 +1929,7 @@ bool AArch64AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { // ::= .tlsdesccall symbol bool AArch64AsmParser::ParseDirectiveTLSDescCall(SMLoc L) { StringRef Name; - if (getParser().ParseIdentifier(Name)) + if (getParser().parseIdentifier(Name)) return Error(L, "expected symbol after directive"); MCSymbol *Sym = getContext().GetOrCreateSymbol(Name); diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 8d6cf3c..c95cc1b 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -2568,7 +2568,7 @@ int ARMAsmParser::tryParseShiftRegister( Parser.Lex(); // Eat hash. SMLoc ImmLoc = Parser.getTok().getLoc(); const MCExpr *ShiftExpr = 0; - if (getParser().ParseExpression(ShiftExpr, EndLoc)) { + if (getParser().parseExpression(ShiftExpr, EndLoc)) { Error(ImmLoc, "invalid immediate shift value"); return -1; } @@ -2651,7 +2651,7 @@ tryParseRegisterWithWriteBack(SmallVectorImpl &Operands) { Parser.Lex(); // Eat left bracket token. const MCExpr *ImmVal; - if (getParser().ParseExpression(ImmVal)) + if (getParser().parseExpression(ImmVal)) return true; const MCConstantExpr *MCE = dyn_cast(ImmVal); if (!MCE) @@ -2796,7 +2796,7 @@ parseCoprocOptionOperand(SmallVectorImpl &Operands) { const MCExpr *Expr; SMLoc Loc = Parser.getTok().getLoc(); - if (getParser().ParseExpression(Expr)) { + if (getParser().parseExpression(Expr)) { Error(Loc, "illegal expression"); return MatchOperand_ParseFail; } @@ -3009,7 +3009,7 @@ parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index, SMLoc &EndLoc) { const MCExpr *LaneIndex; SMLoc Loc = Parser.getTok().getLoc(); - if (getParser().ParseExpression(LaneIndex)) { + if (getParser().parseExpression(LaneIndex)) { Error(Loc, "illegal expression"); return MatchOperand_ParseFail; } @@ -3327,7 +3327,7 @@ parseMemBarrierOptOperand(SmallVectorImpl &Operands) { SMLoc Loc = Parser.getTok().getLoc(); const MCExpr *MemBarrierID; - if (getParser().ParseExpression(MemBarrierID)) { + if (getParser().parseExpression(MemBarrierID)) { Error(Loc, "illegal expression"); return MatchOperand_ParseFail; } @@ -3543,7 +3543,7 @@ parsePKHImm(SmallVectorImpl &Operands, StringRef Op, const MCExpr *ShiftAmount; SMLoc Loc = Parser.getTok().getLoc(); SMLoc EndLoc; - if (getParser().ParseExpression(ShiftAmount, EndLoc)) { + if (getParser().parseExpression(ShiftAmount, EndLoc)) { Error(Loc, "illegal expression"); return MatchOperand_ParseFail; } @@ -3623,7 +3623,7 @@ parseShifterImm(SmallVectorImpl &Operands) { const MCExpr *ShiftAmount; SMLoc EndLoc; - if (getParser().ParseExpression(ShiftAmount, EndLoc)) { + if (getParser().parseExpression(ShiftAmount, EndLoc)) { Error(ExLoc, "malformed shift expression"); return MatchOperand_ParseFail; } @@ -3684,7 +3684,7 @@ parseRotImm(SmallVectorImpl &Operands) { const MCExpr *ShiftAmount; SMLoc EndLoc; - if (getParser().ParseExpression(ShiftAmount, EndLoc)) { + if (getParser().parseExpression(ShiftAmount, EndLoc)) { Error(ExLoc, "malformed rotate expression"); return MatchOperand_ParseFail; } @@ -3721,7 +3721,7 @@ parseBitfield(SmallVectorImpl &Operands) { const MCExpr *LSBExpr; SMLoc E = Parser.getTok().getLoc(); - if (getParser().ParseExpression(LSBExpr)) { + if (getParser().parseExpression(LSBExpr)) { Error(E, "malformed immediate expression"); return MatchOperand_ParseFail; } @@ -3754,7 +3754,7 @@ parseBitfield(SmallVectorImpl &Operands) { const MCExpr *WidthExpr; SMLoc EndLoc; - if (getParser().ParseExpression(WidthExpr, EndLoc)) { + if (getParser().parseExpression(WidthExpr, EndLoc)) { Error(E, "malformed immediate expression"); return MatchOperand_ParseFail; } @@ -3850,7 +3850,7 @@ parseAM3Offset(SmallVectorImpl &Operands) { bool isNegative = Parser.getTok().is(AsmToken::Minus); const MCExpr *Offset; SMLoc E; - if (getParser().ParseExpression(Offset, E)) + if (getParser().parseExpression(Offset, E)) return MatchOperand_ParseFail; const MCConstantExpr *CE = dyn_cast(Offset); if (!CE) { @@ -4272,7 +4272,7 @@ parseMemory(SmallVectorImpl &Operands) { E = Parser.getTok().getLoc(); const MCExpr *Expr; - if (getParser().ParseExpression(Expr)) + if (getParser().parseExpression(Expr)) return true; // The expression has to be a constant. Memory references with relocations @@ -4328,7 +4328,7 @@ parseMemory(SmallVectorImpl &Operands) { bool isNegative = getParser().getTok().is(AsmToken::Minus); const MCExpr *Offset; - if (getParser().ParseExpression(Offset)) + if (getParser().parseExpression(Offset)) return true; // The expression has to be a constant. Memory references with relocations @@ -4447,7 +4447,7 @@ bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St, Parser.Lex(); // Eat hash token. const MCExpr *Expr; - if (getParser().ParseExpression(Expr)) + if (getParser().parseExpression(Expr)) return true; // Range check the immediate. // lsl, ror: 0 <= imm <= 31 @@ -4476,7 +4476,7 @@ bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St, ARMAsmParser::OperandMatchResultTy ARMAsmParser:: parseFPImm(SmallVectorImpl &Operands) { // Anything that can accept a floating point constant as an operand - // needs to go through here, as the regular ParseExpression is + // needs to go through here, as the regular parseExpression is // integer only. // // This routine still creates a generic Immediate operand, containing @@ -4596,7 +4596,7 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl &Operands, // identifier (like labels) as expressions and create them as immediates. const MCExpr *IdVal; S = Parser.getTok().getLoc(); - if (getParser().ParseExpression(IdVal)) + if (getParser().parseExpression(IdVal)) return true; E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); Operands.push_back(ARMOperand::CreateImm(IdVal, S, E)); @@ -4615,7 +4615,7 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl &Operands, if (Parser.getTok().isNot(AsmToken::Colon)) { bool isNegative = Parser.getTok().is(AsmToken::Minus); const MCExpr *ImmVal; - if (getParser().ParseExpression(ImmVal)) + if (getParser().parseExpression(ImmVal)) return true; const MCConstantExpr *CE = dyn_cast(ImmVal); if (CE) { @@ -4639,7 +4639,7 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl &Operands, return true; const MCExpr *SubExprVal; - if (getParser().ParseExpression(SubExprVal)) + if (getParser().parseExpression(SubExprVal)) return true; const MCExpr *ExprVal = ARMMCExpr::Create(RefKind, SubExprVal, @@ -5012,7 +5012,7 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, // In Thumb1, only the branch (B) instruction can be predicated. if (isThumbOne() && PredicationCode != ARMCC::AL && Mnemonic != "b") { - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return Error(NameLoc, "conditional execution not supported in Thumb1"); } @@ -5026,14 +5026,14 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, if (Mnemonic == "it") { SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + 2); if (ITMask.size() > 3) { - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return Error(Loc, "too many conditions on IT instruction"); } unsigned Mask = 8; for (unsigned i = ITMask.size(); i != 0; --i) { char pos = ITMask[i - 1]; if (pos != 't' && pos != 'e') { - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return Error(Loc, "illegal IT block condition mask '" + ITMask + "'"); } Mask >>= 1; @@ -5059,14 +5059,14 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, // If we had a carry-set on an instruction that can't do that, issue an // error. if (!CanAcceptCarrySet && CarrySetting) { - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return Error(NameLoc, "instruction '" + Mnemonic + "' can not set flags, but 's' suffix specified"); } // If we had a predication code on an instruction that can't do that, issue an // error. if (!CanAcceptPredicationCode && PredicationCode != ARMCC::AL) { - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return Error(NameLoc, "instruction '" + Mnemonic + "' is not predicable, but condition code specified"); } @@ -5115,7 +5115,7 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, if (getLexer().isNot(AsmToken::EndOfStatement)) { // Read the first operand. if (parseOperand(Operands, Mnemonic)) { - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return true; } @@ -5124,7 +5124,7 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, // Parse and remember the operand. if (parseOperand(Operands, Mnemonic)) { - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return true; } } @@ -5132,7 +5132,7 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, if (getLexer().isNot(AsmToken::EndOfStatement)) { SMLoc Loc = getLexer().getLoc(); - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return Error(Loc, "unexpected token in argument list"); } @@ -7614,7 +7614,7 @@ bool ARMAsmParser::parseDirectiveWord(unsigned Size, SMLoc L) { if (getLexer().isNot(AsmToken::EndOfStatement)) { for (;;) { const MCExpr *Value; - if (getParser().ParseExpression(Value)) + if (getParser().parseExpression(Value)) return true; getParser().getStreamer().EmitValue(Value, Size); @@ -7761,13 +7761,13 @@ bool ARMAsmParser::parseDirectiveReq(StringRef Name, SMLoc L) { unsigned Reg; SMLoc SRegLoc, ERegLoc; if (ParseRegister(Reg, SRegLoc, ERegLoc)) { - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return Error(SRegLoc, "register name expected"); } // Shouldn't be anything else. if (Parser.getTok().isNot(AsmToken::EndOfStatement)) { - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return Error(Parser.getTok().getLoc(), "unexpected input in .req directive."); } @@ -7785,7 +7785,7 @@ bool ARMAsmParser::parseDirectiveReq(StringRef Name, SMLoc L) { /// ::= .unreq registername bool ARMAsmParser::parseDirectiveUnreq(SMLoc L) { if (Parser.getTok().isNot(AsmToken::Identifier)) { - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return Error(L, "unexpected input in .unreq directive."); } RegisterReqs.erase(Parser.getTok().getIdentifier()); diff --git a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp index d73c20f..ad495ff 100644 --- a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp +++ b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp @@ -451,7 +451,7 @@ MBlazeOperand *MBlazeAsmParser::ParseImmediate() { case AsmToken::Minus: case AsmToken::Integer: case AsmToken::Identifier: - if (getParser().ParseExpression(EVal)) + if (getParser().parseExpression(EVal)) return 0; return MBlazeOperand::CreateImm(EVal, S, E); @@ -537,7 +537,7 @@ bool MBlazeAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { if (getLexer().isNot(AsmToken::EndOfStatement)) { for (;;) { const MCExpr *Value; - if (getParser().ParseExpression(Value)) + if (getParser().parseExpression(Value)) return true; getParser().getStreamer().EmitValue(Value, Size); diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 1f143d1..2fa514f 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -811,7 +811,7 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl&Operands, } // maybe it is a symbol reference StringRef Identifier; - if (Parser.ParseIdentifier(Identifier)) + if (Parser.parseIdentifier(Identifier)) return true; SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); @@ -834,7 +834,7 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl&Operands, // quoted label names const MCExpr *IdVal; SMLoc S = Parser.getTok().getLoc(); - if (getParser().ParseExpression(IdVal)) + if (getParser().parseExpression(IdVal)) return true; SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); Operands.push_back(MipsOperand::CreateImm(IdVal, S, E)); @@ -886,7 +886,7 @@ bool MipsAsmParser::parseRelocOperand(const MCExpr *&Res) { } else break; } - if (getParser().ParseParenExpression(IdVal,EndLoc)) + if (getParser().parseParenExpression(IdVal,EndLoc)) return true; while (getLexer().getKind() == AsmToken::RParen) @@ -937,7 +937,7 @@ bool MipsAsmParser::parseMemOffset(const MCExpr *&Res) { case AsmToken::Integer: case AsmToken::Minus: case AsmToken::Plus: - return (getParser().ParseExpression(Res)); + return (getParser().parseExpression(Res)); case AsmToken::Percent: return parseRelocOperand(Res); case AsmToken::LParen: @@ -1203,13 +1203,13 @@ parseMathOperation(StringRef Name, SMLoc NameLoc, // Read the first operand. if (ParseOperand(Operands, Name)) { SMLoc Loc = getLexer().getLoc(); - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return Error(Loc, "unexpected token in argument list"); } if (getLexer().isNot(AsmToken::Comma)) { SMLoc Loc = getLexer().getLoc(); - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return Error(Loc, "unexpected token in argument list"); } @@ -1221,14 +1221,14 @@ parseMathOperation(StringRef Name, SMLoc NameLoc, // Parse and remember the operand. if (ParseOperand(Operands, Name)) { SMLoc Loc = getLexer().getLoc(); - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return Error(Loc, "unexpected token in argument list"); } } if (getLexer().isNot(AsmToken::EndOfStatement)) { SMLoc Loc = getLexer().getLoc(); - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return Error(Loc, "unexpected token in argument list"); } @@ -1290,7 +1290,7 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, // Read the first operand. if (ParseOperand(Operands, Name)) { SMLoc Loc = getLexer().getLoc(); - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return Error(Loc, "unexpected token in argument list"); } @@ -1300,7 +1300,7 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, // Parse and remember the operand. if (ParseOperand(Operands, Name)) { SMLoc Loc = getLexer().getLoc(); - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return Error(Loc, "unexpected token in argument list"); } } @@ -1308,7 +1308,7 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, if (getLexer().isNot(AsmToken::EndOfStatement)) { SMLoc Loc = getLexer().getLoc(); - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return Error(Loc, "unexpected token in argument list"); } @@ -1318,7 +1318,7 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, bool MipsAsmParser::reportParseError(StringRef ErrorMsg) { SMLoc Loc = getLexer().getLoc(); - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return Error(Loc, ErrorMsg); } @@ -1446,11 +1446,11 @@ bool MipsAsmParser::parseDirectiveSet() { return parseSetNoMacroDirective(); } else if (Tok.getString() == "nomips16") { // ignore this directive for now - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return false; } else if (Tok.getString() == "nomicromips") { // ignore this directive for now - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return false; } @@ -1463,7 +1463,7 @@ bool MipsAsmParser::parseDirectiveWord(unsigned Size, SMLoc L) { if (getLexer().isNot(AsmToken::EndOfStatement)) { for (;;) { const MCExpr *Value; - if (getParser().ParseExpression(Value)) + if (getParser().parseExpression(Value)) return true; getParser().getStreamer().EmitValue(Value, Size); @@ -1500,7 +1500,7 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) { if (IDVal == ".frame") { // ignore this directive for now - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return false; } @@ -1510,19 +1510,19 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) { if (IDVal == ".fmask") { // ignore this directive for now - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return false; } if (IDVal == ".mask") { // ignore this directive for now - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return false; } if (IDVal == ".gpword") { // ignore this directive for now - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return false; } diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 8c4c447..b2c6d55 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -906,7 +906,7 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, if (getLexer().is(AsmToken::Identifier)) { if (ParseRegister(TmpReg, Start, End)) { const MCExpr *Disp; - if (getParser().ParseExpression(Disp, End)) + if (getParser().parseExpression(Disp, End)) return 0; if (getLexer().isNot(AsmToken::RBrac)) @@ -951,7 +951,7 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SM.onRegister(TmpReg); UpdateLocLex = false; break; - } else if (!getParser().ParseExpression(Disp, End)) { + } else if (!getParser().parseExpression(Disp, End)) { SM.onDispExpr(); UpdateLocLex = false; break; @@ -1033,7 +1033,7 @@ X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg, SMLoc Start) { } const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext()); - if (getParser().ParseExpression(Disp, End)) + if (getParser().parseExpression(Disp, End)) return 0; bool NeedSizeDir = false; @@ -1135,7 +1135,7 @@ X86Operand *X86AsmParser::ParseIntelOffsetOfOperator(SMLoc Start) { SMLoc End; const MCExpr *Val; - if (getParser().ParseExpression(Val, End)) + if (getParser().parseExpression(Val, End)) return ErrorOperand(Start, "Unable to parse expression!"); // Don't emit the offset operator. @@ -1169,7 +1169,7 @@ X86Operand *X86AsmParser::ParseIntelOperator(SMLoc Start, unsigned OpKind) { SMLoc End; const MCExpr *Val; - if (getParser().ParseExpression(Val, End)) + if (getParser().parseExpression(Val, End)) return 0; unsigned Length = 0, Size = 0, Type = 0; @@ -1220,7 +1220,7 @@ X86Operand *X86AsmParser::ParseIntelOperand() { if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Real) || getLexer().is(AsmToken::Minus)) { const MCExpr *Val; - if (!getParser().ParseExpression(Val, End)) { + if (!getParser().parseExpression(Val, End)) { return X86Operand::CreateImm(Val, Start, End); } } @@ -1271,7 +1271,7 @@ X86Operand *X86AsmParser::ParseATTOperand() { SMLoc Start = Parser.getTok().getLoc(), End; Parser.Lex(); const MCExpr *Val; - if (getParser().ParseExpression(Val, End)) + if (getParser().parseExpression(Val, End)) return 0; return X86Operand::CreateImm(Val, Start, End); } @@ -1289,7 +1289,7 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext()); if (getLexer().isNot(AsmToken::LParen)) { SMLoc ExprEnd; - if (getParser().ParseExpression(Disp, ExprEnd)) return 0; + if (getParser().parseExpression(Disp, ExprEnd)) return 0; // After parsing the base expression we could either have a parenthesized // memory address or not. If not, return now. If so, eat the (. @@ -1315,7 +1315,7 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { SMLoc ExprEnd; // It must be an parenthesized expression, parse it now. - if (getParser().ParseParenExpression(Disp, ExprEnd)) + if (getParser().parseParenExpression(Disp, ExprEnd)) return 0; // After parsing the base expression we could either have a parenthesized @@ -1375,7 +1375,7 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { SMLoc Loc = Parser.getTok().getLoc(); int64_t ScaleVal; - if (getParser().ParseAbsoluteExpression(ScaleVal)){ + if (getParser().parseAbsoluteExpression(ScaleVal)){ Error(Loc, "expected scale expression"); return 0; } @@ -1394,7 +1394,7 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { SMLoc Loc = Parser.getTok().getLoc(); int64_t Value; - if (getParser().ParseAbsoluteExpression(Value)) + if (getParser().parseAbsoluteExpression(Value)) return 0; if (Value != 1) @@ -1535,7 +1535,7 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, if (X86Operand *Op = ParseOperand()) Operands.push_back(Op); else { - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return true; } @@ -1546,14 +1546,14 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, if (X86Operand *Op = ParseOperand()) Operands.push_back(Op); else { - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return true; } } if (getLexer().isNot(AsmToken::EndOfStatement)) { SMLoc Loc = getLexer().getLoc(); - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return Error(Loc, "unexpected token in argument list"); } } @@ -2243,7 +2243,7 @@ bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { if (getLexer().isNot(AsmToken::EndOfStatement)) { for (;;) { const MCExpr *Value; - if (getParser().ParseExpression(Value)) + if (getParser().parseExpression(Value)) return true; getParser().getStreamer().EmitValue(Value, Size); -- cgit v1.1 From 3179b179467a07e601be5172213850786a985f74 Mon Sep 17 00:00:00 2001 From: Erik Verbruggen Date: Wed, 20 Feb 2013 22:33:46 +0000 Subject: Fix accidental concatenation for "outputuntil" in the -debug-buffer-size option description. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175682 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Support/Debug.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Support/Debug.cpp b/lib/Support/Debug.cpp index 0c0f15e..d9cb8a9 100644 --- a/lib/Support/Debug.cpp +++ b/lib/Support/Debug.cpp @@ -44,7 +44,7 @@ Debug("debug", cl::desc("Enable debug output"), cl::Hidden, //until program termination. static cl::opt DebugBufferSize("debug-buffer-size", - cl::desc("Buffer the last N characters of debug output" + cl::desc("Buffer the last N characters of debug output " "until program termination. " "[default 0 -- immediate print-out]"), cl::Hidden, -- cgit v1.1 From b7a1dda9c91b3d1821f4235c35a0d62c62d18848 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Wed, 20 Feb 2013 23:04:11 +0000 Subject: Add and remove the attribute from the correct slot. The slot that we're adding/removing the attribute from may not be the same as the attribute coming in. Make sure that they match up before we try to add/remove them. PR15313 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175684 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Function.cpp | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/IR/Function.cpp b/lib/IR/Function.cpp index 839e496..15c05e7 100644 --- a/lib/IR/Function.cpp +++ b/lib/IR/Function.cpp @@ -125,12 +125,22 @@ bool Argument::hasStructRetAttr() const { /// addAttr - Add attributes to an argument. void Argument::addAttr(AttributeSet AS) { - getParent()->addAttributes(getArgNo() + 1, AS); + assert(AS.getNumSlots() == 1 && + "Trying to add more than one attribute set to an argument!"); + AttrBuilder B(AS, AS.getSlotIndex(0)); + getParent()->addAttributes(getArgNo() + 1, + AttributeSet::get(Parent->getContext(), + getArgNo() + 1, B)); } /// removeAttr - Remove attributes from an argument. void Argument::removeAttr(AttributeSet AS) { - getParent()->removeAttributes(getArgNo() + 1, AS); + assert(AS.getNumSlots() == 1 && + "Trying to remove more than one attribute set from an argument!"); + AttrBuilder B(AS, AS.getSlotIndex(0)); + getParent()->removeAttributes(getArgNo() + 1, + AttributeSet::get(Parent->getContext(), + getArgNo() + 1, B)); } //===----------------------------------------------------------------------===// -- cgit v1.1 From beda6ab879e35b6f7d998da980b30e3844d3bbeb Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Wed, 20 Feb 2013 23:08:26 +0000 Subject: Copy single reaching defs directly into the LiveInterval. When findReachingDefs() finds that only one value can reach the basic block, just copy the work list of visited blocks directly into the live interval. Sort the block list and use a LiveRangeUpdater to make the bulk add fast. When multiple reaching defs are found, transfer the work list to the updateSSA() work list as before. Also use LiveRangeUpdater in updateLiveIns() following updateSSA(). This makes live interval analysis more than 3x faster on one huge test case. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175685 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/LiveRangeCalc.cpp | 94 +++++++++++++++++++++++++++---------------- lib/CodeGen/LiveRangeCalc.h | 29 +++++++------ 2 files changed, 76 insertions(+), 47 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/LiveRangeCalc.cpp b/lib/CodeGen/LiveRangeCalc.cpp index c3ff4f1..dede490 100644 --- a/lib/CodeGen/LiveRangeCalc.cpp +++ b/lib/CodeGen/LiveRangeCalc.cpp @@ -18,10 +18,11 @@ using namespace llvm; -void LiveRangeCalc::reset(const MachineFunction *MF, +void LiveRangeCalc::reset(const MachineFunction *mf, SlotIndexes *SI, MachineDominatorTree *MDT, VNInfo::Allocator *VNIA) { + MF = mf; MRI = &MF->getRegInfo(); Indexes = SI; DomTree = MDT; @@ -104,28 +105,28 @@ void LiveRangeCalc::extendToUses(LiveInterval *LI, unsigned Reg) { // Transfer information from the LiveIn vector to the live ranges. -void LiveRangeCalc::updateLiveIns(VNInfo *OverrideVNI) { +void LiveRangeCalc::updateLiveIns() { + LiveRangeUpdater Updater; for (SmallVectorImpl::iterator I = LiveIn.begin(), E = LiveIn.end(); I != E; ++I) { if (!I->DomNode) continue; MachineBasicBlock *MBB = I->DomNode->getBlock(); - - VNInfo *VNI = OverrideVNI ? OverrideVNI : I->Value; - assert(VNI && "No live-in value found"); - + assert(I->Value && "No live-in value found"); SlotIndex Start, End; tie(Start, End) = Indexes->getMBBRange(MBB); if (I->Kill.isValid()) - I->LI->addRange(LiveRange(Start, I->Kill, VNI)); + // Value is killed inside this block. + End = I->Kill; else { - I->LI->addRange(LiveRange(Start, End, VNI)); - // The value is live-through, update LiveOut as well. Defer the Domtree - // lookup until it is needed. + // The value is live-through, update LiveOut as well. + // Defer the Domtree lookup until it is needed. assert(Seen.test(MBB->getNumber())); - LiveOut[MBB] = LiveOutPair(VNI, (MachineDomTreeNode *)0); + LiveOut[MBB] = LiveOutPair(I->Value, (MachineDomTreeNode *)0); } + Updater.setDest(I->LI); + Updater.add(Start, End, I->Value); } LiveIn.clear(); } @@ -150,13 +151,11 @@ void LiveRangeCalc::extend(LiveInterval *LI, // multiple values, and we may need to create even more phi-defs to preserve // VNInfo SSA form. Perform a search for all predecessor blocks where we // know the dominating VNInfo. - VNInfo *VNI = findReachingDefs(LI, KillMBB, Kill, PhysReg); + if (findReachingDefs(LI, KillMBB, Kill, PhysReg)) + return; // When there were multiple different values, we may need new PHIs. - if (!VNI) - updateSSA(); - - updateLiveIns(VNI); + calculateValues(); } @@ -167,16 +166,18 @@ void LiveRangeCalc::calculateValues() { assert(Indexes && "Missing SlotIndexes"); assert(DomTree && "Missing dominator tree"); updateSSA(); - updateLiveIns(0); + updateLiveIns(); } -VNInfo *LiveRangeCalc::findReachingDefs(LiveInterval *LI, - MachineBasicBlock *KillMBB, - SlotIndex Kill, - unsigned PhysReg) { - // Blocks where LI should be live-in. - SmallVector WorkList(1, KillMBB); +bool LiveRangeCalc::findReachingDefs(LiveInterval *LI, + MachineBasicBlock *KillMBB, + SlotIndex Kill, + unsigned PhysReg) { + unsigned KillMBBNum = KillMBB->getNumber(); + + // Block numbers where LI should be live-in. + SmallVector WorkList(1, KillMBBNum); // Remember if we have seen more than one value. bool UniqueVNI = true; @@ -184,7 +185,7 @@ VNInfo *LiveRangeCalc::findReachingDefs(LiveInterval *LI, // Using Seen as a visited set, perform a BFS for all reaching defs. for (unsigned i = 0; i != WorkList.size(); ++i) { - MachineBasicBlock *MBB = WorkList[i]; + MachineBasicBlock *MBB = MF->getBlockNumbered(WorkList[i]); #ifndef NDEBUG if (MBB->pred_empty()) { @@ -231,25 +232,50 @@ VNInfo *LiveRangeCalc::findReachingDefs(LiveInterval *LI, // No, we need a live-in value for Pred as well if (Pred != KillMBB) - WorkList.push_back(Pred); + WorkList.push_back(Pred->getNumber()); else // Loopback to KillMBB, so value is really live through. Kill = SlotIndex(); } } - // Transfer WorkList to LiveInBlocks in reverse order. - // This ordering works best with updateSSA(). LiveIn.clear(); - LiveIn.reserve(WorkList.size()); - while(!WorkList.empty()) - addLiveInBlock(LI, DomTree->getNode(WorkList.pop_back_val())); - // The kill block may not be live-through. - assert(LiveIn.back().DomNode->getBlock() == KillMBB); - LiveIn.back().Kill = Kill; + // Both updateSSA() and LiveRangeUpdater benefit from ordered blocks, but + // neither require it. Skip the sorting overhead for small updates. + if (WorkList.size() > 4) + array_pod_sort(WorkList.begin(), WorkList.end()); + + // If a unique reaching def was found, blit in the live ranges immediately. + if (UniqueVNI) { + LiveRangeUpdater Updater(LI); + for (SmallVectorImpl::const_iterator + I = WorkList.begin(), E = WorkList.end(); I != E; ++I) { + SlotIndex Start, End; + tie(Start, End) = Indexes->getMBBRange(*I); + // Trim the live range in KillMBB. + if (*I == KillMBBNum && Kill.isValid()) + End = Kill; + else + LiveOut[MF->getBlockNumbered(*I)] = + LiveOutPair(TheVNI, (MachineDomTreeNode *)0); + Updater.add(Start, End, TheVNI); + } + return true; + } + + // Multiple values were found, so transfer the work list to the LiveIn array + // where UpdateSSA will use it as a work list. + LiveIn.reserve(WorkList.size()); + for (SmallVectorImpl::const_iterator + I = WorkList.begin(), E = WorkList.end(); I != E; ++I) { + MachineBasicBlock *MBB = MF->getBlockNumbered(*I); + addLiveInBlock(LI, DomTree->getNode(MBB)); + if (MBB == KillMBB) + LiveIn.back().Kill = Kill; + } - return UniqueVNI ? TheVNI : 0; + return false; } diff --git a/lib/CodeGen/LiveRangeCalc.h b/lib/CodeGen/LiveRangeCalc.h index 909829b..57cab7b 100644 --- a/lib/CodeGen/LiveRangeCalc.h +++ b/lib/CodeGen/LiveRangeCalc.h @@ -34,6 +34,7 @@ template class DomTreeNodeBase; typedef DomTreeNodeBase MachineDomTreeNode; class LiveRangeCalc { + const MachineFunction *MF; const MachineRegisterInfo *MRI; SlotIndexes *Indexes; MachineDominatorTree *DomTree; @@ -100,17 +101,20 @@ class LiveRangeCalc { /// used to add entries directly. SmallVector LiveIn; - /// findReachingDefs - Assuming that LI is live-in to KillMBB and killed at - /// Kill, search for values that can reach KillMBB. All blocks that need LI - /// to be live-in are added to LiveIn. If a unique reaching def is found, - /// its value is returned, if Kill is jointly dominated by multiple values, - /// NULL is returned. + /// Assuming that LI is live-in to KillMBB and killed at Kill, find the set + /// of defs that can reach it. + /// + /// If only one def can reach Kill, all paths from the def to kill are added + /// to LI, and the function returns true. + /// + /// If multiple values can reach Kill, the blocks that need LI to be live in + /// are added to the LiveIn array, and the function returns false. /// /// PhysReg, when set, is used to verify live-in lists on basic blocks. - VNInfo *findReachingDefs(LiveInterval *LI, - MachineBasicBlock *KillMBB, - SlotIndex Kill, - unsigned PhysReg); + bool findReachingDefs(LiveInterval *LI, + MachineBasicBlock *KillMBB, + SlotIndex Kill, + unsigned PhysReg); /// updateSSA - Compute the values that will be live in to all requested /// blocks in LiveIn. Create PHI-def values as required to preserve SSA form. @@ -119,12 +123,11 @@ class LiveRangeCalc { /// blocks. No values are read from the live ranges. void updateSSA(); - /// updateLiveIns - Add liveness as specified in the LiveIn vector, using VNI - /// as a wildcard value for LiveIn entries without a value. - void updateLiveIns(VNInfo *VNI); + /// Add liveness as specified in the LiveIn vector. + void updateLiveIns(); public: - LiveRangeCalc() : MRI(0), Indexes(0), DomTree(0), Alloc(0) {} + LiveRangeCalc() : MF(0), MRI(0), Indexes(0), DomTree(0), Alloc(0) {} //===--------------------------------------------------------------------===// // High-level interface. -- cgit v1.1 From 99e98551bf8719764f9345ce856118f3f1a9c441 Mon Sep 17 00:00:00 2001 From: Jack Carter Date: Wed, 20 Feb 2013 23:11:17 +0000 Subject: ELF symbol table field st_other support, excluding visibility bits. Mips specific standalone assembler directive "set at". This directive changes the general purpose register that the assembler will use when given the symbolic register name $at. This does not include negative testing. That will come in a future patch. A side affect of this patch recognizes the different GPR register names for temporaries between old abi and new abi so a test case for that is included. Contributer: Vladimir Medic git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175686 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/AsmParser/MipsAsmParser.cpp | 169 ++++++++++++++-------------- 1 file changed, 84 insertions(+), 85 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 2fa514f..088589f 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -147,6 +147,8 @@ class MipsAsmParser : public MCTargetAsmParser { int matchRegisterName(StringRef Symbol, bool is64BitReg); + int matchCPURegisterName(StringRef Symbol); + int matchRegisterByNumber(unsigned RegNum, unsigned RegClass); void setFpFormat(FpFormatTy Format) { @@ -163,7 +165,7 @@ class MipsAsmParser : public MCTargetAsmParser { unsigned getReg(int RC,int RegNo); - unsigned getATReg(); + int getATReg(); public: MipsAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser) : MCTargetAsmParser(), STI(sti), Parser(parser) { @@ -192,7 +194,7 @@ public: Kind_HW64Regs, Kind_FGR32Regs, Kind_FGR64Regs, - Kind_AFGR32Regs, + Kind_AFGR64Regs, Kind_CCRRegs }; @@ -574,84 +576,72 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, return true; } -int MipsAsmParser::matchRegisterName(StringRef Name, bool is64BitReg) { - +int MipsAsmParser::matchCPURegisterName(StringRef Name) { int CC; - if (!is64BitReg) + + if (Name == "at") + return getATReg(); + CC = StringSwitch(Name) - .Case("zero", Mips::ZERO) - .Case("a0", Mips::A0) - .Case("a1", Mips::A1) - .Case("a2", Mips::A2) - .Case("a3", Mips::A3) - .Case("v0", Mips::V0) - .Case("v1", Mips::V1) - .Case("s0", Mips::S0) - .Case("s1", Mips::S1) - .Case("s2", Mips::S2) - .Case("s3", Mips::S3) - .Case("s4", Mips::S4) - .Case("s5", Mips::S5) - .Case("s6", Mips::S6) - .Case("s7", Mips::S7) - .Case("k0", Mips::K0) - .Case("k1", Mips::K1) - .Case("sp", Mips::SP) - .Case("fp", Mips::FP) - .Case("gp", Mips::GP) - .Case("ra", Mips::RA) - .Case("t0", Mips::T0) - .Case("t1", Mips::T1) - .Case("t2", Mips::T2) - .Case("t3", Mips::T3) - .Case("t4", Mips::T4) - .Case("t5", Mips::T5) - .Case("t6", Mips::T6) - .Case("t7", Mips::T7) - .Case("t8", Mips::T8) - .Case("t9", Mips::T9) - .Case("at", Mips::AT) - .Case("fcc0", Mips::FCC0) - .Default(-1); - else + .Case("zero", 0) + .Case("a0", 4) + .Case("a1", 5) + .Case("a2", 6) + .Case("a3", 7) + .Case("v0", 2) + .Case("v1", 3) + .Case("s0", 16) + .Case("s1", 17) + .Case("s2", 18) + .Case("s3", 19) + .Case("s4", 20) + .Case("s5", 21) + .Case("s6", 22) + .Case("s7", 23) + .Case("k0", 26) + .Case("k1", 27) + .Case("sp", 29) + .Case("fp", 30) + .Case("gp", 28) + .Case("ra", 31) + .Case("t0", 8) + .Case("t1", 9) + .Case("t2", 10) + .Case("t3", 11) + .Case("t4", 12) + .Case("t5", 13) + .Case("t6", 14) + .Case("t7", 15) + .Case("t8", 24) + .Case("t9", 25) + .Default(-1); + + // Although SGI documentation just cut out t0-t3 for n32/n64, + // GNU pushes the values of t0-t3 to override the o32/o64 values for t4-t7 + // We are supporting both cases, so for t0-t3 we'll just push them to t4-t7. + if (isMips64() && 8 <= CC && CC <= 11) + CC += 4; + + if (CC == -1 && isMips64()) CC = StringSwitch(Name) - .Case("zero", Mips::ZERO_64) - .Case("at", Mips::AT_64) - .Case("v0", Mips::V0_64) - .Case("v1", Mips::V1_64) - .Case("a0", Mips::A0_64) - .Case("a1", Mips::A1_64) - .Case("a2", Mips::A2_64) - .Case("a3", Mips::A3_64) - .Case("a4", Mips::T0_64) - .Case("a5", Mips::T1_64) - .Case("a6", Mips::T2_64) - .Case("a7", Mips::T3_64) - .Case("t4", Mips::T4_64) - .Case("t5", Mips::T5_64) - .Case("t6", Mips::T6_64) - .Case("t7", Mips::T7_64) - .Case("s0", Mips::S0_64) - .Case("s1", Mips::S1_64) - .Case("s2", Mips::S2_64) - .Case("s3", Mips::S3_64) - .Case("s4", Mips::S4_64) - .Case("s5", Mips::S5_64) - .Case("s6", Mips::S6_64) - .Case("s7", Mips::S7_64) - .Case("t8", Mips::T8_64) - .Case("t9", Mips::T9_64) - .Case("kt0", Mips::K0_64) - .Case("kt1", Mips::K1_64) - .Case("gp", Mips::GP_64) - .Case("sp", Mips::SP_64) - .Case("fp", Mips::FP_64) - .Case("s8", Mips::FP_64) - .Case("ra", Mips::RA_64) + .Case("a4", 8) + .Case("a5", 9) + .Case("a6", 10) + .Case("a7", 11) + .Case("kt0", 26) + .Case("kt1", 27) + .Case("s8", 30) .Default(-1); + return CC; +} +int MipsAsmParser::matchRegisterName(StringRef Name, bool is64BitReg) { + + int CC; + CC = matchCPURegisterName(Name); if (CC != -1) - return CC; + return matchRegisterByNumber(CC,is64BitReg?Mips::CPU64RegsRegClassID: + Mips::CPURegsRegClassID); if (Name[0] == 'f') { StringRef NumString = Name.substr(1); @@ -715,12 +705,8 @@ bool MipsAssemblerOptions::setATReg(unsigned Reg) { return true; } -unsigned MipsAsmParser::getATReg() { - unsigned Reg = Options.getATRegNum(); - if (isMips64()) - return getReg(Mips::CPU64RegsRegClassID,Reg); - - return getReg(Mips::CPURegsRegClassID,Reg); +int MipsAsmParser::getATReg() { + return Options.getATRegNum(); } unsigned MipsAsmParser::getReg(int RC,int RegNo) { @@ -1239,16 +1225,18 @@ parseMathOperation(StringRef Name, SMLoc NameLoc, bool MipsAsmParser:: ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, SmallVectorImpl &Operands) { + StringRef Mnemonic; // floating point instructions: should register be treated as double? if (requestsDoubleOperand(Name)) { setFpFormat(FP_FORMAT_D); Operands.push_back(MipsOperand::CreateToken(Name, NameLoc)); + Mnemonic = Name; } else { setDefaultFpFormat(); // Create the leading tokens for the mnemonic, split by '.' characters. size_t Start = 0, Next = Name.find('.'); - StringRef Mnemonic = Name.slice(Start, Next); + Mnemonic = Name.slice(Start, Next); Operands.push_back(MipsOperand::CreateToken(Mnemonic, NameLoc)); @@ -1288,7 +1276,7 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, // Read the remaining operands. if (getLexer().isNot(AsmToken::EndOfStatement)) { // Read the first operand. - if (ParseOperand(Operands, Name)) { + if (ParseOperand(Operands, Mnemonic)) { SMLoc Loc = getLexer().getLoc(); Parser.eatToEndOfStatement(); return Error(Loc, "unexpected token in argument list"); @@ -1341,6 +1329,7 @@ bool MipsAsmParser::parseSetAtDirective() { // line can be // .set at - defaults to $1 // or .set at=$reg + int AtRegNo; getParser().Lex(); if (getLexer().is(AsmToken::EndOfStatement)) { Options.setATReg(1); @@ -1353,12 +1342,22 @@ bool MipsAsmParser::parseSetAtDirective() { return false; } Parser.Lex(); // eat '$' - if (getLexer().isNot(AsmToken::Integer)) { + const AsmToken &Reg = Parser.getTok(); + if (Reg.is(AsmToken::Identifier)) { + AtRegNo = matchCPURegisterName(Reg.getIdentifier()); + } else if (Reg.is(AsmToken::Integer)) { + AtRegNo = Reg.getIntVal(); + } else { reportParseError("unexpected token in statement"); return false; } - const AsmToken &Reg = Parser.getTok(); - if (!Options.setATReg(Reg.getIntVal())) { + + if ( AtRegNo < 1 || AtRegNo > 31) { + reportParseError("unexpected token in statement"); + return false; + } + + if (!Options.setATReg(AtRegNo)) { reportParseError("unexpected token in statement"); return false; } -- cgit v1.1 From 063d49f767e971f5cc77205d7ee8f8be36d9b013 Mon Sep 17 00:00:00 2001 From: Pedro Artigas Date: Wed, 20 Feb 2013 23:30:56 +0000 Subject: as the allocator is reset zero out the number of bytes allocated, this was just missed before but probably what was intended. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175687 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Support/Allocator.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/Support/Allocator.cpp b/lib/Support/Allocator.cpp index e269cf9..3c4191b 100644 --- a/lib/Support/Allocator.cpp +++ b/lib/Support/Allocator.cpp @@ -83,6 +83,7 @@ void BumpPtrAllocator::Reset() { CurSlab->NextPtr = 0; CurPtr = (char*)(CurSlab + 1); End = ((char*)CurSlab) + CurSlab->Size; + BytesAllocated = 0; } /// Allocate - Allocate space at the specified alignment. -- cgit v1.1 From 100c93c850911a33a4de59103beafd204d9fd150 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Wed, 20 Feb 2013 23:36:57 +0000 Subject: Kill of TransferDeadFlag - Dead copies and subreg-to-reg instructions should just be turned into kills on the spot. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175688 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/ExpandPostRAPseudos.cpp | 38 +++++++++++++------------------------ 1 file changed, 13 insertions(+), 25 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/ExpandPostRAPseudos.cpp b/lib/CodeGen/ExpandPostRAPseudos.cpp index 0b9e83d..f71c453 100644 --- a/lib/CodeGen/ExpandPostRAPseudos.cpp +++ b/lib/CodeGen/ExpandPostRAPseudos.cpp @@ -49,8 +49,6 @@ private: bool LowerSubregToReg(MachineInstr *MI); bool LowerCopy(MachineInstr *MI); - void TransferDeadFlag(MachineInstr *MI, unsigned DstReg, - const TargetRegisterInfo *TRI); void TransferImplicitDefs(MachineInstr *MI); }; } // end anonymous namespace @@ -61,21 +59,6 @@ char &llvm::ExpandPostRAPseudosID = ExpandPostRA::ID; INITIALIZE_PASS(ExpandPostRA, "postrapseudos", "Post-RA pseudo instruction expansion pass", false, false) -/// TransferDeadFlag - MI is a pseudo-instruction with DstReg dead, -/// and the lowered replacement instructions immediately precede it. -/// Mark the replacement instructions with the dead flag. -void -ExpandPostRA::TransferDeadFlag(MachineInstr *MI, unsigned DstReg, - const TargetRegisterInfo *TRI) { - for (MachineBasicBlock::iterator MII = - prior(MachineBasicBlock::iterator(MI)); ; --MII) { - if (MII->addRegisterDead(DstReg, TRI)) - break; - assert(MII != MI->getParent()->begin() && - "copyPhysReg output doesn't reference destination register!"); - } -} - /// TransferImplicitDefs - MI is a pseudo-instruction, and the lowered /// replacement instructions immediately precede it. Copy any implicit-def /// operands from MI to the replacement instruction. @@ -128,17 +111,17 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) { } DEBUG(dbgs() << "subreg: eliminated!"); } else { + if (MI->getOperand(0).isDead()) { + MI->setDesc(TII->get(TargetOpcode::KILL)); + DEBUG(dbgs() << "subreg: replaced by: " << *MI); + return true; + } TII->copyPhysReg(*MBB, MI, MI->getDebugLoc(), DstSubReg, InsReg, MI->getOperand(2).isKill()); - // Implicitly define DstReg for subsequent uses. MachineBasicBlock::iterator CopyMI = MI; --CopyMI; CopyMI->addRegisterDefined(DstReg); - - // Transfer the kill/dead flags, if needed. - if (MI->getOperand(0).isDead()) - TransferDeadFlag(MI, DstSubReg, TRI); DEBUG(dbgs() << "subreg: " << *CopyMI); } @@ -151,11 +134,18 @@ bool ExpandPostRA::LowerCopy(MachineInstr *MI) { MachineOperand &DstMO = MI->getOperand(0); MachineOperand &SrcMO = MI->getOperand(1); + if (DstMO.isDead()) { + DEBUG(dbgs() << "dead copy: " << *MI); + MI->setDesc(TII->get(TargetOpcode::KILL)); + DEBUG(dbgs() << "replaced by: " << *MI); + return true; + } + if (SrcMO.getReg() == DstMO.getReg()) { DEBUG(dbgs() << "identity copy: " << *MI); // No need to insert an identity copy instruction, but replace with a KILL // if liveness is changed. - if (DstMO.isDead() || SrcMO.isUndef() || MI->getNumOperands() > 2) { + if (SrcMO.isUndef() || MI->getNumOperands() > 2) { // We must make sure the super-register gets killed. Replace the // instruction with KILL. MI->setDesc(TII->get(TargetOpcode::KILL)); @@ -171,8 +161,6 @@ bool ExpandPostRA::LowerCopy(MachineInstr *MI) { TII->copyPhysReg(*MI->getParent(), MI, MI->getDebugLoc(), DstMO.getReg(), SrcMO.getReg(), SrcMO.isKill()); - if (DstMO.isDead()) - TransferDeadFlag(MI, DstMO.getReg(), TRI); if (MI->getNumOperands() > 2) TransferImplicitDefs(MI); DEBUG({ -- cgit v1.1 From 9bd7c3cc1306b6b2abc472d1e6ca2f7d0f3f3fbb Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Wed, 20 Feb 2013 23:51:10 +0000 Subject: Don't allocate memory in LiveInterval::join(). Rewrite value numbers directly in the 'Other' LiveInterval which is moribund anyway. This avoids allocating the OtherAssignments vector. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175690 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/LiveInterval.cpp | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp index 54f0da0..dccd847 100644 --- a/lib/CodeGen/LiveInterval.cpp +++ b/lib/CodeGen/LiveInterval.cpp @@ -464,10 +464,12 @@ void LiveInterval::join(LiveInterval &Other, ranges.erase(OutIt, end()); } - // Remember assignements because val# ids are changing. - SmallVector OtherAssignments; + // Rewrite Other values before changing the VNInfo ids. + // This can leave Other in an invalid state because we're not coalescing + // touching segments that now have identical values. That's OK since Other is + // not supposed to be valid after calling join(); for (iterator I = Other.begin(), E = Other.end(); I != E; ++I) - OtherAssignments.push_back(RHSValNoAssignments[I->valno->id]); + I->valno = NewVNInfo[RHSValNoAssignments[I->valno->id]]; // Update val# info. Renumber them and make sure they all belong to this // LiveInterval now. Also remove dead val#'s. @@ -487,13 +489,8 @@ void LiveInterval::join(LiveInterval &Other, // Okay, now insert the RHS live ranges into the LHS. LiveRangeUpdater Updater(this); - unsigned RangeNo = 0; - for (iterator I = Other.begin(), E = Other.end(); I != E; ++I, ++RangeNo) { - // Map the valno in the other live range to the current live range. - VNInfo *VNI = NewVNInfo[OtherAssignments[RangeNo]]; - assert(VNI && "Adding a dead range?"); - Updater.add(I->start, I->end, VNI); - } + for (iterator I = Other.begin(), E = Other.end(); I != E; ++I) + Updater.add(*I); } /// MergeRangesInAsValue - Merge all of the intervals in RHS into this live -- cgit v1.1 From 4ffec108a927606c51ea6fe1f3a13863d50d1c48 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Wed, 20 Feb 2013 23:57:30 +0000 Subject: Formatting. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175692 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/ConstantFolding.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index c6aff9d..c99925d 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -1162,8 +1162,7 @@ llvm::canConstantFoldCallTo(const Function *F) { switch (Name[0]) { default: return false; case 'a': - return Name == "acos" || Name == "asin" || - Name == "atan" || Name == "atan2"; + return Name == "acos" || Name == "asin" || Name == "atan" || Name =="atan2"; case 'c': return Name == "cos" || Name == "ceil" || Name == "cosf" || Name == "cosh"; case 'e': -- cgit v1.1 From 06ab2c828a5605abec36eb0d6749940fa6eb7391 Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Thu, 21 Feb 2013 00:05:29 +0000 Subject: Relocation enablement for PPC DAG postprocessing pass git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175693 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp | 16 +++++++++++++++- lib/Target/PowerPC/PPC.h | 22 ++++++++++++++-------- lib/Target/PowerPC/PPCInstrInfo.td | 4 ++-- lib/Target/PowerPC/PPCMCInstLower.cpp | 6 ++++++ 4 files changed, 37 insertions(+), 11 deletions(-) (limited to 'lib') diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp index d61e741..ad41f3a 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp @@ -151,7 +151,21 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target, Type = ELF::R_PPC64_TOC; break; case PPC::fixup_ppc_toc16: - Type = ELF::R_PPC64_TOC16; + switch (Modifier) { + default: llvm_unreachable("Unsupported Modifier"); + case MCSymbolRefExpr::VK_PPC_DTPREL16_LO: + Type = ELF::R_PPC64_DTPREL16_LO; + break; + case MCSymbolRefExpr::VK_None: + Type = ELF::R_PPC64_TOC16; + break; + case MCSymbolRefExpr::VK_PPC_TOC16_LO: + Type = ELF::R_PPC64_TOC16_LO; + break; + case MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_LO: + Type = ELF::R_PPC64_GOT_TLSLD16_LO; + break; + } break; case PPC::fixup_ppc_toc16_ds: switch (Modifier) { diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h index 72e0ce3..f71979f 100644 --- a/lib/Target/PowerPC/PPC.h +++ b/lib/Target/PowerPC/PPC.h @@ -57,26 +57,32 @@ namespace llvm { /// MO_PIC_FLAG - If this bit is set, the symbol reference is relative to /// the function's picbase, e.g. lo16(symbol-picbase). - MO_PIC_FLAG = 4, + MO_PIC_FLAG = 2, /// MO_NLP_FLAG - If this bit is set, the symbol reference is actually to /// the non_lazy_ptr for the global, e.g. lo16(symbol$non_lazy_ptr-picbase). - MO_NLP_FLAG = 8, + MO_NLP_FLAG = 4, /// MO_NLP_HIDDEN_FLAG - If this bit is set, the symbol reference is to a /// symbol with hidden visibility. This causes a different kind of /// non-lazy-pointer to be generated. - MO_NLP_HIDDEN_FLAG = 16, + MO_NLP_HIDDEN_FLAG = 8, /// The next are not flags but distinct values. - MO_ACCESS_MASK = 0xe0, + MO_ACCESS_MASK = 0xf0, /// MO_LO16, MO_HA16 - lo16(symbol) and ha16(symbol) - MO_LO16 = 1 << 5, - MO_HA16 = 2 << 5, + MO_LO16 = 1 << 4, + MO_HA16 = 2 << 4, - MO_TPREL16_HA = 3 << 5, - MO_TPREL16_LO = 4 << 5 + MO_TPREL16_HA = 3 << 4, + MO_TPREL16_LO = 4 << 4, + + /// These values identify relocations on immediates folded + /// into memory operations. + MO_DTPREL16_LO = 5 << 4, + MO_TLSLD16_LO = 6 << 4, + MO_TOC16_LO = 7 << 4 }; } // end namespace PPCII diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 8c077b7..8519bf1 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -346,7 +346,7 @@ def crbitm: Operand { // Address operands def memri : Operand { let PrintMethod = "printMemRegImm"; - let MIOperandInfo = (ops i32imm:$imm, ptr_rc:$reg); + let MIOperandInfo = (ops symbolLo:$imm, ptr_rc:$reg); let EncoderMethod = "getMemRIEncoding"; } def memrr : Operand { @@ -355,7 +355,7 @@ def memrr : Operand { } def memrix : Operand { // memri where the imm is shifted 2 bits. let PrintMethod = "printMemRegImmShifted"; - let MIOperandInfo = (ops i32imm:$imm, ptr_rc:$reg); + let MIOperandInfo = (ops symbolLo:$imm, ptr_rc:$reg); let EncoderMethod = "getMemRIXEncoding"; } diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp index 942c873..9b0df3e 100644 --- a/lib/Target/PowerPC/PPCMCInstLower.cpp +++ b/lib/Target/PowerPC/PPCMCInstLower.cpp @@ -115,6 +115,12 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, break; case PPCII::MO_TPREL16_LO: RefKind = MCSymbolRefExpr::VK_PPC_TPREL16_LO; break; + case PPCII::MO_DTPREL16_LO: RefKind = MCSymbolRefExpr::VK_PPC_DTPREL16_LO; + break; + case PPCII::MO_TLSLD16_LO: RefKind = MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_LO; + break; + case PPCII::MO_TOC16_LO: RefKind = MCSymbolRefExpr::VK_PPC_TOC16_LO; + break; } // FIXME: This isn't right, but we don't have a good way to express this in -- cgit v1.1 From 5c43245bf459c77077b607e1b55e6928cfbe464e Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Thu, 21 Feb 2013 00:27:28 +0000 Subject: Provide a "None" value for convenience when using Optional() This implementation of NoneType/None does have some holes but I haven't found one that doesn't - open to improvement. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175696 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Support/LockFileManager.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Support/LockFileManager.cpp b/lib/Support/LockFileManager.cpp index 31eec75..92d8b83 100644 --- a/lib/Support/LockFileManager.cpp +++ b/lib/Support/LockFileManager.cpp @@ -31,7 +31,7 @@ LockFileManager::readLockFile(StringRef LockFileName) { // to read, so we just return. bool Exists = false; if (sys::fs::exists(LockFileName, Exists) || !Exists) - return Optional >(); + return None; // Read the owning host and PID out of the lock file. If it appears that the // owning process is dead, the lock file is invalid. @@ -45,7 +45,7 @@ LockFileManager::readLockFile(StringRef LockFileName) { // Delete the lock file. It's invalid anyway. bool Existed; sys::fs::remove(LockFileName, Existed); - return Optional >(); + return None; } bool LockFileManager::processStillExecuting(StringRef Hostname, int PID) { -- cgit v1.1 From 421021157eda12453b4fea7ea853d8c472bd8532 Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Thu, 21 Feb 2013 00:38:25 +0000 Subject: PPCDAGToDAGISel::PostprocessISelDAG() This patch implements the PPCDAGToDAGISel::PostprocessISelDAG virtual method to perform post-selection peephole optimizations on the DAG representation. One optimization is implemented here: folds to clean up complex addressing expressions for thread-local storage and medium code model. It will also be useful for large code model sequences when those are added later. I originally thought about doing this on the MI representation prior to register assignment, but it's difficult to do effective global dead code elimination at that point. DCE is trivial on the DAG representation. A typical example of a candidate code sequence in assembly: addis 3, 2, globalvar@toc@ha addi 3, 3, globalvar@toc@l lwz 5, 0(3) When the final instruction is a load or store with an immediate offset of zero, the offset from the add-immediate can replace the zero, provided the relocation information is carried along: addis 3, 2, globalvar@toc@ha lwz 5, globalvar@toc@l(3) Since the addi can in general have multiple uses, we need to only delete the instruction when the last use is removed. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175697 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 155 +++++++++++++++++++++++++++++++++ 1 file changed, 155 insertions(+) (limited to 'lib') diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 1453506..8663dd4 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -67,6 +67,8 @@ namespace { return true; } + virtual void PostprocessISelDAG(); + /// getI32Imm - Return a target constant with the specified value, of type /// i32. inline SDValue getI32Imm(unsigned Imm) { @@ -1398,6 +1400,159 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { return SelectCode(N); } +/// PostProcessISelDAG - Perform some late peephole optimizations +/// on the DAG representation. +void PPCDAGToDAGISel::PostprocessISelDAG() { + + // Skip peepholes at -O0. + if (TM.getOptLevel() == CodeGenOpt::None) + return; + + // These optimizations are currently supported only for 64-bit SVR4. + if (PPCSubTarget.isDarwin() || !PPCSubTarget.isPPC64()) + return; + + SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode()); + ++Position; + + while (Position != CurDAG->allnodes_begin()) { + SDNode *N = --Position; + // Skip dead nodes and any non-machine opcodes. + if (N->use_empty() || !N->isMachineOpcode()) + continue; + + unsigned FirstOp; + unsigned StorageOpcode = N->getMachineOpcode(); + + switch (StorageOpcode) { + default: continue; + + case PPC::LBZ: + case PPC::LBZ8: + case PPC::LD: + case PPC::LFD: + case PPC::LFS: + case PPC::LHA: + case PPC::LHA8: + case PPC::LHZ: + case PPC::LHZ8: + case PPC::LWA: + case PPC::LWZ: + case PPC::LWZ8: + FirstOp = 0; + break; + + case PPC::STB: + case PPC::STB8: + case PPC::STD: + case PPC::STFD: + case PPC::STFS: + case PPC::STH: + case PPC::STH8: + case PPC::STW: + case PPC::STW8: + FirstOp = 1; + break; + } + + // If this is a load or store with a zero offset, we may be able to + // fold an add-immediate into the memory operation. + if (!isa(N->getOperand(FirstOp)) || + N->getConstantOperandVal(FirstOp) != 0) + continue; + + SDValue Base = N->getOperand(FirstOp + 1); + if (!Base.isMachineOpcode()) + continue; + + unsigned Flags = 0; + bool ReplaceFlags = true; + + // When the feeding operation is an add-immediate of some sort, + // determine whether we need to add relocation information to the + // target flags on the immediate operand when we fold it into the + // load instruction. + // + // For something like ADDItocL, the relocation information is + // inferred from the opcode; when we process it in the AsmPrinter, + // we add the necessary relocation there. A load, though, can receive + // relocation from various flavors of ADDIxxx, so we need to carry + // the relocation information in the target flags. + switch (Base.getMachineOpcode()) { + default: continue; + + case PPC::ADDI8: + case PPC::ADDI8L: + case PPC::ADDIL: + // In some cases (such as TLS) the relocation information + // is already in place on the operand, so copying the operand + // is sufficient. + ReplaceFlags = false; + // For these cases, the immediate may not be divisible by 4, in + // which case the fold is illegal for DS-form instructions. (The + // other cases provide aligned addresses and are always safe.) + if ((StorageOpcode == PPC::LWA || + StorageOpcode == PPC::LD || + StorageOpcode == PPC::STD) && + (!isa(Base.getOperand(1)) || + Base.getConstantOperandVal(1) % 4 != 0)) + continue; + break; + case PPC::ADDIdtprelL: + Flags = PPCII::MO_DTPREL16_LO; + break; + case PPC::ADDItlsldL: + Flags = PPCII::MO_TLSLD16_LO; + break; + case PPC::ADDItocL: + Flags = PPCII::MO_TOC16_LO; + break; + } + + // We found an opportunity. Reverse the operands from the add + // immediate and substitute them into the load or store. If + // needed, update the target flags for the immediate operand to + // reflect the necessary relocation information. + DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: "); + DEBUG(Base->dump(CurDAG)); + DEBUG(dbgs() << "\nN: "); + DEBUG(N->dump(CurDAG)); + DEBUG(dbgs() << "\n"); + + SDValue ImmOpnd = Base.getOperand(1); + + // If the relocation information isn't already present on the + // immediate operand, add it now. + if (ReplaceFlags) { + GlobalAddressSDNode *GA = dyn_cast(ImmOpnd); + + if (GA) { + DebugLoc dl = GA->getDebugLoc(); + const GlobalValue *GV = GA->getGlobal(); + ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, 0, Flags); + } else { + ConstantPoolSDNode *CP = dyn_cast(ImmOpnd); + if (CP) { + const Constant *C = CP->getConstVal(); + ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64, + CP->getAlignment(), + 0, Flags); + } + } + } + + if (FirstOp == 1) // Store + (void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd, + Base.getOperand(0), N->getOperand(3)); + else // Load + (void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0), + N->getOperand(2)); + + // The add-immediate may now be dead, in which case remove it. + if (Base.getNode()->use_empty()) + CurDAG->RemoveDeadNode(Base.getNode()); + } +} /// createPPCISelDag - This pass converts a legalized DAG into a -- cgit v1.1 From 77217229ba1bbc92f3a53099fa91bcdaa7797da8 Mon Sep 17 00:00:00 2001 From: Jack Carter Date: Thu, 21 Feb 2013 02:09:31 +0000 Subject: Mips specific standalone assembler addressing mode %hi and %lo. The constructs %hi() and %lo() represent the high and low 16 bits of the address. Because the 16 bit offset field of an LW instruction is interpreted as signed, if bit 15 of the low part is 1 then the low part will act as a negative and 1 needs to be added to the high part. Contributer: Vladimir Medic git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175707 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/AsmParser/MipsAsmParser.cpp | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'lib') diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 088589f..ade6084 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -888,7 +888,12 @@ bool MipsAsmParser::parseRelocOperand(const MCExpr *&Res) { if (Str == "lo") { Val = Val & 0xffff; } else if (Str == "hi") { + int LoSign = Val & 0x8000; Val = (Val & 0xffff0000) >> 16; + //lower part is treated as signed int, so if it is negative + //we must add 1 to hi part to compensate + if (LoSign) + Val++; } Res = MCConstantExpr::Create(Val, getContext()); return false; -- cgit v1.1 From ffbe432595c78ba28c8a9d200bf92996eed5e5d9 Mon Sep 17 00:00:00 2001 From: Reed Kotler Date: Thu, 21 Feb 2013 04:22:38 +0000 Subject: Expand the sel pseudo/macro. This generates basic blocks where previously there were inline br .+4 instructions. Soon everything can enjoy the full instruction scheduling experience. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175718 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips16InstrInfo.td | 20 ++++++++++ lib/Target/Mips/MipsISelLowering.cpp | 75 +++++++++++++++++++++++++++++++++++- lib/Target/Mips/MipsISelLowering.h | 3 ++ 3 files changed, 97 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td index 1cb4a0e..9e07b95 100644 --- a/lib/Target/Mips/Mips16InstrInfo.td +++ b/lib/Target/Mips/Mips16InstrInfo.td @@ -76,6 +76,10 @@ class F2RI16_ins _op, string asmstr, let Constraints = "$rx_ = $rx"; } +class FRI16_B_ins _op, string asmstr, + InstrItinClass itin>: + FRI16<_op, (outs), (ins CPU16Regs:$rx, brtarget:$imm), + !strconcat(asmstr, "\t$rx, $imm # 16 bit inst"), [], itin>; // // Compare a register and immediate and place result in CC // Implicit use of T8 @@ -362,6 +366,7 @@ class Sel: !strconcat(op, "\t$rt, .+4\n\t\n\tmove $rd, $rs"), []> { //let isCodeGenOnly=1; let Constraints = "$rd = $rd_"; + let usesCustomInserter = 1; } // @@ -520,6 +525,14 @@ def AndRxRxRy16: FRxRxRy16_ins<0b01100, "and", IIAlu>, ArithLogic16Defs<1>; // // Format: BEQZ rx, offset MIPS16e +// Purpose: Branch on Equal to Zero +// To test a GPR then do a PC-relative conditional branch. +// +def BeqzRxImm16: FRI16_B_ins<0b00100, "beqz", IIAlu>, cbranch16; + + +// +// Format: BEQZ rx, offset MIPS16e // Purpose: Branch on Equal to Zero (Extended) // To test a GPR then do a PC-relative conditional branch. // @@ -533,6 +546,13 @@ def BimmX16: FEXT_I16_ins<0b00010, "b", IIAlu>, branch16; // // Format: BNEZ rx, offset MIPS16e +// Purpose: Branch on Not Equal to Zero +// To test a GPR then do a PC-relative conditional branch. +// +def BnezRxImm16: FRI16_B_ins<0b00101, "bnez", IIAlu>, cbranch16; + +// +// Format: BNEZ rx, offset MIPS16e // Purpose: Branch on Not Equal to Zero (Extended) // To test a GPR then do a PC-relative conditional branch. // diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index e0080e6..5605759 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -55,6 +55,12 @@ Mips16HardFloat("mips16-hard-float", cl::NotHidden, cl::desc("MIPS: mips16 hard float enable."), cl::init(false)); +static cl::opt DontExpandCondPseudos16( + "mips16-dont-expand-cond-pseudo", + cl::init(false), + cl::desc("Dont expand conditional move related " + "pseudos for Mips 16"), + cl::Hidden); static const uint16_t O32IntRegs[4] = { @@ -1230,11 +1236,74 @@ MipsTargetLowering::EmitBPOSGE32(MachineInstr *MI, MachineBasicBlock *BB) const{ return Sink; } +MachineBasicBlock *MipsTargetLowering::EmitSel16(unsigned Opc, MachineInstr *MI, + MachineBasicBlock *BB) const { + if (DontExpandCondPseudos16) + return BB; + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + DebugLoc dl = MI->getDebugLoc(); + // To "insert" a SELECT_CC instruction, we actually have to insert the + // diamond control-flow pattern. The incoming instruction knows the + // destination vreg to set, the condition code register to branch on, the + // true/false values to select between, and a branch opcode to use. + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator It = BB; + ++It; + + // thisMBB: + // ... + // TrueVal = ... + // setcc r1, r2, r3 + // bNE r1, r0, copy1MBB + // fallthrough --> copy0MBB + MachineBasicBlock *thisMBB = BB; + MachineFunction *F = BB->getParent(); + MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, copy0MBB); + F->insert(It, sinkMBB); + + // Transfer the remainder of BB and its successor edges to sinkMBB. + sinkMBB->splice(sinkMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(BB); + + // Next, add the true and fallthrough blocks as its successors. + BB->addSuccessor(copy0MBB); + BB->addSuccessor(sinkMBB); + + BuildMI(BB, dl, TII->get(Opc)).addReg(MI->getOperand(3).getReg()) + .addMBB(sinkMBB); + + // copy0MBB: + // %FalseValue = ... + // # fallthrough to sinkMBB + BB = copy0MBB; + + // Update machine-CFG edges + BB->addSuccessor(sinkMBB); + + // sinkMBB: + // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ] + // ... + BB = sinkMBB; + + BuildMI(*BB, BB->begin(), dl, + TII->get(Mips::PHI), MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB) + .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB); + + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + MachineBasicBlock * MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const { switch (MI->getOpcode()) { - default: llvm_unreachable("Unexpected instr type to insert"); + default: + llvm_unreachable("Unexpected instr type to insert"); case Mips::ATOMIC_LOAD_ADD_I8: case Mips::ATOMIC_LOAD_ADD_I8_P8: return EmitAtomicBinaryPartword(MI, BB, 1, Mips::ADDu); @@ -1340,6 +1409,10 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, return EmitAtomicCmpSwap(MI, BB, 8); case Mips::BPOSGE32_PSEUDO: return EmitBPOSGE32(MI, BB); + case Mips::SelBeqZ: + return EmitSel16(Mips::BeqzRxImm16, MI, BB); + case Mips::SelBneZ: + return EmitSel16(Mips::BnezRxImm16, MI, BB); } } diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 3b46355..2531a20 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -404,6 +404,9 @@ namespace llvm { MachineBasicBlock *BB, unsigned Size) const; MachineBasicBlock *EmitAtomicCmpSwapPartword(MachineInstr *MI, MachineBasicBlock *BB, unsigned Size) const; + MachineBasicBlock *EmitSel16(unsigned Opc, MachineInstr *MI, + MachineBasicBlock *BB) const; + }; } -- cgit v1.1 From 214df4285a974c61450477cbcf5c4a196d574a6a Mon Sep 17 00:00:00 2001 From: Cameron Zwarich Date: Thu, 21 Feb 2013 04:33:02 +0000 Subject: Update isKilledAt in TwoAddressInstructionPass.cpp to use LiveIntervals when available. With this commit there are no longer any assertion or verifier failures when running 'make check' without LiveVariables. There are still 56 failing tests with codegen differences and 1 unexpectedly passing test. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175719 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/TwoAddressInstructionPass.cpp | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index 45d2a1b..d4108fb 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -347,11 +347,33 @@ static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII, /// static bool isKilled(MachineInstr &MI, unsigned Reg, const MachineRegisterInfo *MRI, - const TargetInstrInfo *TII) { + const TargetInstrInfo *TII, + LiveIntervals *LIS) { MachineInstr *DefMI = &MI; for (;;) { - if (!DefMI->killsRegister(Reg)) + if (LIS && TargetRegisterInfo::isVirtualRegister(Reg) && + !LIS->isNotInMIMap(DefMI)) { + // FIXME: Sometimes tryInstructionTransform() will add instructions and + // test whether they can be folded before keeping them. In this case it + // sets a kill before recursively calling tryInstructionTransform() again. + // If there is no interval available, we assume that this instruction is + // one of those. A kill flag is manually inserted on the operand so the + // check below will handle it. + LiveInterval &LI = LIS->getInterval(Reg); + // This is to match the kill flag version where undefs don't have kill + // flags. + if (!LI.hasAtLeastOneValue()) + return false; + + SlotIndex useIdx = LIS->getInstructionIndex(DefMI); + LiveInterval::const_iterator I = LI.find(useIdx); + assert(I != LI.end() && "Reg must be live-in to use."); + if (!SlotIndex::isSameInstr(I->end, useIdx)) + return false; + } else if (!DefMI->killsRegister(Reg)) { return false; + } + if (TargetRegisterInfo::isPhysicalRegister(Reg)) return true; MachineRegisterInfo::def_iterator Begin = MRI->def_begin(Reg); @@ -1000,7 +1022,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, assert(TargetRegisterInfo::isVirtualRegister(regB) && "cannot make instruction into two-address form"); - bool regBKilled = isKilled(MI, regB, MRI, TII); + bool regBKilled = isKilled(MI, regB, MRI, TII, LIS); if (TargetRegisterInfo::isVirtualRegister(regA)) scanUses(regA); @@ -1020,7 +1042,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, if (regCIdx != ~0U) { regC = MI.getOperand(regCIdx).getReg(); - if (!regBKilled && isKilled(MI, regC, MRI, TII)) + if (!regBKilled && isKilled(MI, regC, MRI, TII, LIS)) // If C dies but B does not, swap the B and C operands. // This makes the live ranges of A and C joinable. TryCommute = true; -- cgit v1.1 From 3a9805f26ead8746cb56645cb909a7b64d165b83 Mon Sep 17 00:00:00 2001 From: Cameron Zwarich Date: Thu, 21 Feb 2013 07:02:28 +0000 Subject: Split part of isKilled() into a separate function for use elsewhere. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175726 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/TwoAddressInstructionPass.cpp | 50 +++++++++++++++++-------------- 1 file changed, 28 insertions(+), 22 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index d4108fb..8d242ec 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -330,6 +330,33 @@ static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII, return true; } +/// isPLainlyKilled - Test if the given register value, which is used by the +// given instruction, is killed by the given instruction. +static bool isPlainlyKilled(MachineInstr *MI, unsigned Reg, + LiveIntervals *LIS) { + if (LIS && TargetRegisterInfo::isVirtualRegister(Reg) && + !LIS->isNotInMIMap(MI)) { + // FIXME: Sometimes tryInstructionTransform() will add instructions and + // test whether they can be folded before keeping them. In this case it + // sets a kill before recursively calling tryInstructionTransform() again. + // If there is no interval available, we assume that this instruction is + // one of those. A kill flag is manually inserted on the operand so the + // check below will handle it. + LiveInterval &LI = LIS->getInterval(Reg); + // This is to match the kill flag version where undefs don't have kill + // flags. + if (!LI.hasAtLeastOneValue()) + return false; + + SlotIndex useIdx = LIS->getInstructionIndex(MI); + LiveInterval::const_iterator I = LI.find(useIdx); + assert(I != LI.end() && "Reg must be live-in to use."); + return SlotIndex::isSameInstr(I->end, useIdx); + } + + return MI->killsRegister(Reg); +} + /// isKilled - Test if the given register value, which is used by the given /// instruction, is killed by the given instruction. This looks through /// coalescable copies to see if the original value is potentially not killed. @@ -351,29 +378,8 @@ static bool isKilled(MachineInstr &MI, unsigned Reg, LiveIntervals *LIS) { MachineInstr *DefMI = &MI; for (;;) { - if (LIS && TargetRegisterInfo::isVirtualRegister(Reg) && - !LIS->isNotInMIMap(DefMI)) { - // FIXME: Sometimes tryInstructionTransform() will add instructions and - // test whether they can be folded before keeping them. In this case it - // sets a kill before recursively calling tryInstructionTransform() again. - // If there is no interval available, we assume that this instruction is - // one of those. A kill flag is manually inserted on the operand so the - // check below will handle it. - LiveInterval &LI = LIS->getInterval(Reg); - // This is to match the kill flag version where undefs don't have kill - // flags. - if (!LI.hasAtLeastOneValue()) - return false; - - SlotIndex useIdx = LIS->getInstructionIndex(DefMI); - LiveInterval::const_iterator I = LI.find(useIdx); - assert(I != LI.end() && "Reg must be live-in to use."); - if (!SlotIndex::isSameInstr(I->end, useIdx)) - return false; - } else if (!DefMI->killsRegister(Reg)) { + if (!isPlainlyKilled(DefMI, Reg, LIS)) return false; - } - if (TargetRegisterInfo::isPhysicalRegister(Reg)) return true; MachineRegisterInfo::def_iterator Begin = MRI->def_begin(Reg); -- cgit v1.1 From 17cec5a68523fe346fb752b1661cc8e640dd520b Mon Sep 17 00:00:00 2001 From: Cameron Zwarich Date: Thu, 21 Feb 2013 07:02:30 +0000 Subject: Make another kill check LiveIntervals-aware. This brings the number of remaining failures in 'make check' without LiveVariables down to 39, with 1 unexpectedly passing test. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175727 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/TwoAddressInstructionPass.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index 8d242ec..e0dba3f 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -499,7 +499,7 @@ isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC, // insert => %reg1030 = MOV8rr %reg1029 // %reg1030 = ADD8rr %reg1029, %reg1028, %EFLAGS - if (!MI->killsRegister(regC)) + if (!isPlainlyKilled(MI, regC, LIS)) return false; // Ok, we have something like: -- cgit v1.1 From a566d63b61f2a29e89696abba1729ac53b9843e6 Mon Sep 17 00:00:00 2001 From: Cameron Zwarich Date: Thu, 21 Feb 2013 08:51:55 +0000 Subject: Use getInterval() instead of getOrCreateInterval(). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175731 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/PHIElimination.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp index 61122fc..b84d9d3 100644 --- a/lib/CodeGen/PHIElimination.cpp +++ b/lib/CodeGen/PHIElimination.cpp @@ -319,7 +319,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, IncomingVNI)); } - LiveInterval &DestLI = LIS->getOrCreateInterval(DestReg); + LiveInterval &DestLI = LIS->getInterval(DestReg); if (NewInstr->getOperand(0).isDead()) { // A dead PHI's live range begins and ends at the start of the MBB, but // the lowered copy, which will still be dead, needs to begin and end at -- cgit v1.1 From 197a60a66612ab274a734066962a10126a11fb53 Mon Sep 17 00:00:00 2001 From: Cameron Zwarich Date: Thu, 21 Feb 2013 08:51:58 +0000 Subject: Don't rely on the isDead() MachineOperand flag when updating LiveIntervals. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175732 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/PHIElimination.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp index b84d9d3..5584708 100644 --- a/lib/CodeGen/PHIElimination.cpp +++ b/lib/CodeGen/PHIElimination.cpp @@ -320,7 +320,9 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, } LiveInterval &DestLI = LIS->getInterval(DestReg); - if (NewInstr->getOperand(0).isDead()) { + assert(DestLI.begin() != DestLI.end() && + "PHIs should have nonempty LiveIntervals."); + if (DestLI.endIndex().isDead()) { // A dead PHI's live range begins and ends at the start of the MBB, but // the lowered copy, which will still be dead, needs to begin and end at // the copy instruction. -- cgit v1.1 From 74bf7a8467262ad60c8b13582bd6b07fd30b5550 Mon Sep 17 00:00:00 2001 From: Michel Danzer Date: Thu, 21 Feb 2013 08:57:10 +0000 Subject: R600/SI: Make sure M0 is loaded for V_INTERP_MOV_F32 NOTE: This is a candidate for the Mesa stable branch. Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175733 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIInstructions.td | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index b4a263d..aef239c 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1311,7 +1311,8 @@ def : Pat < def : Pat < (int_SI_fs_interp_constant imm:$attr_chan, imm:$attr, SReg_32:$params), - (V_INTERP_MOV_F32 INTERP.P0, imm:$attr_chan, imm:$attr, SReg_32:$params) + (V_INTERP_MOV_F32 INTERP.P0, imm:$attr_chan, imm:$attr, + (S_MOV_B32 SReg_32:$params)) >; def : Pat < -- cgit v1.1 From 0514595b9b20c9d807a3e31ba6bc270fb6c3f9e7 Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Thu, 21 Feb 2013 14:35:42 +0000 Subject: Code review cleanup for r175697 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175739 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) (limited to 'lib') diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 8663dd4..561099b 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -1524,20 +1524,16 @@ void PPCDAGToDAGISel::PostprocessISelDAG() { // If the relocation information isn't already present on the // immediate operand, add it now. if (ReplaceFlags) { - GlobalAddressSDNode *GA = dyn_cast(ImmOpnd); - - if (GA) { + if (GlobalAddressSDNode *GA = dyn_cast(ImmOpnd)) { DebugLoc dl = GA->getDebugLoc(); const GlobalValue *GV = GA->getGlobal(); ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, 0, Flags); - } else { - ConstantPoolSDNode *CP = dyn_cast(ImmOpnd); - if (CP) { - const Constant *C = CP->getConstVal(); - ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64, - CP->getAlignment(), - 0, Flags); - } + } + else if (ConstantPoolSDNode *CP = dyn_cast(ImmOpnd)) { + const Constant *C = CP->getConstVal(); + ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64, + CP->getAlignment(), + 0, Flags); } } -- cgit v1.1 From 70a9ca9420b1428147da512721b4456f8836586e Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 21 Feb 2013 15:06:59 +0000 Subject: R600: Fix for Unigine when MachineSched is enabled Fixes for-loop.cl piglit test Patch By: Vincent Lejeune Reviewed-by: Tom Stellard NOTE: This is a candidate for the Mesa stable branch. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175742 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/R600Instructions.td | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index d24a363..8242df9 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -1587,6 +1587,7 @@ def PRED_X : InstR600 < (ins R600_Reg32:$src0, i32imm:$src1, i32imm:$flags), "", [], NullALU> { let FlagOperandIdx = 3; + let isTerminator = 1; } let isTerminator = 1, isBranch = 1, isBarrier = 1 in { -- cgit v1.1 From 8c0b3a0d31f2eb04d96f63b72e189fe82f8b4a4f Mon Sep 17 00:00:00 2001 From: Christian Konig Date: Thu, 21 Feb 2013 15:16:44 +0000 Subject: R600/SI: cleanup SIInstrInfo.td and SIInstrFormat.td MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Those two files got mixed up. Signed-off-by: Christian König Reviewed-by: Tom Stellard Reviewed-by: Michel Dänzer git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175746 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIInstrFormats.td | 500 ++++++++++++++++++++++++++---------- lib/Target/R600/SIInstrInfo.td | 515 ++++++++++++-------------------------- 2 files changed, 527 insertions(+), 488 deletions(-) (limited to 'lib') diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td index 40e37aa..fe417d6 100644 --- a/lib/Target/R600/SIInstrFormats.td +++ b/lib/Target/R600/SIInstrFormats.td @@ -1,4 +1,4 @@ -//===-- SIInstrFormats.td - SI Instruction Formats ------------------------===// +//===-- SIInstrFormats.td - SI Instruction Encodings ----------------------===// // // The LLVM Compiler Infrastructure // @@ -9,180 +9,418 @@ // // SI Instruction format definitions. // -// Instructions with _32 take 32-bit operands. -// Instructions with _64 take 64-bit operands. -// -// VOP_* instructions can use either a 32-bit or 64-bit encoding. The 32-bit -// encoding is the standard encoding, but instruction that make use of -// any of the instruction modifiers must use the 64-bit encoding. -// -// Instructions with _e32 use the 32-bit encoding. -// Instructions with _e64 use the 64-bit encoding. -// //===----------------------------------------------------------------------===// -class VOP3_32 op, string opName, list pattern> - : VOP3 ; +class InstSI pattern> : + AMDGPUInst { + + field bits<1> VM_CNT = 0; + field bits<1> EXP_CNT = 0; + field bits<1> LGKM_CNT = 0; + + let TSFlags{0} = VM_CNT; + let TSFlags{1} = EXP_CNT; + let TSFlags{2} = LGKM_CNT; +} + +class Enc32 pattern> : + InstSI { + + field bits<32> Inst; + let Size = 4; +} -class VOP3_64 op, string opName, list pattern> - : VOP3 ; +class Enc64 pattern> : + InstSI { -class SOP1_32 op, string opName, list pattern> - : SOP1 ; + field bits<64> Inst; + let Size = 8; +} -class SOP1_64 op, string opName, list pattern> - : SOP1 ; +//===----------------------------------------------------------------------===// +// Scalar operations +//===----------------------------------------------------------------------===// -class SOP2_32 op, string opName, list pattern> - : SOP2 ; +class SOP1 op, dag outs, dag ins, string asm, list pattern> : + Enc32 { -class SOP2_64 op, string opName, list pattern> - : SOP2 ; + bits<7> SDST; + bits<8> SSRC0; -class VOP1_Helper op, RegisterClass vrc, RegisterClass arc, - string opName, list pattern> : - VOP1 < - op, (outs vrc:$dst), (ins arc:$src0), opName, pattern - >; + let Inst{7-0} = SSRC0; + let Inst{15-8} = op; + let Inst{22-16} = SDST; + let Inst{31-23} = 0x17d; //encoding; -multiclass VOP1_32 op, string opName, list pattern> { - def _e32: VOP1_Helper ; - def _e64 : VOP3_32 <{1, 1, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, - opName, [] - >; + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; } -multiclass VOP1_64 op, string opName, list pattern> { +class SOP2 op, dag outs, dag ins, string asm, list pattern> : + Enc32 { + + bits<7> SDST; + bits<8> SSRC0; + bits<8> SSRC1; - def _e32 : VOP1_Helper ; + let Inst{7-0} = SSRC0; + let Inst{15-8} = SSRC1; + let Inst{22-16} = SDST; + let Inst{29-23} = op; + let Inst{31-30} = 0x2; // encoding - def _e64 : VOP3_64 < - {1, 1, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, - opName, [] - >; + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; } -class VOP2_Helper op, RegisterClass vrc, RegisterClass arc, - string opName, list pattern> : - VOP2 < - op, (outs vrc:$dst), (ins arc:$src0, vrc:$src1), opName, pattern - >; +class SOPC op, dag outs, dag ins, string asm, list pattern> : + Enc32 { -multiclass VOP2_32 op, string opName, list pattern> { + bits<8> SSRC0; + bits<8> SSRC1; - def _e32 : VOP2_Helper ; + let Inst{7-0} = SSRC0; + let Inst{15-8} = SSRC1; + let Inst{22-16} = op; + let Inst{31-23} = 0x17e; - def _e64 : VOP3_32 <{1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, - opName, [] - >; + let DisableEncoding = "$dst"; + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; } -multiclass VOP2_64 op, string opName, list pattern> { - def _e32: VOP2_Helper ; +class SOPK op, dag outs, dag ins, string asm, list pattern> : + Enc32 { - def _e64 : VOP3_64 < - {1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, - opName, [] - >; + bits <7> SDST; + bits <16> SIMM16; + + let Inst{15-0} = SIMM16; + let Inst{22-16} = SDST; + let Inst{27-23} = op; + let Inst{31-28} = 0xb; //encoding + + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; } -class SOPK_32 op, string opName, list pattern> - : SOPK ; +class SOPP op, dag ins, string asm, list pattern> : Enc32 < + (outs), + ins, + asm, + pattern > { -class SOPK_64 op, string opName, list pattern> - : SOPK ; + bits <16> SIMM16; -multiclass VOPC_Helper op, RegisterClass vrc, RegisterClass arc, - string opName, list pattern> { + let Inst{15-0} = SIMM16; + let Inst{22-16} = op; + let Inst{31-23} = 0x17f; // encoding - def _e32 : VOPC ; - def _e64 : VOP3 < - {0, op{7}, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, - (outs SReg_64:$dst), - (ins arc:$src0, vrc:$src1, - InstFlag:$abs, InstFlag:$clamp, - InstFlag:$omod, InstFlag:$neg), - opName, pattern - > { - let SRC2 = 0x80; - } + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; } -multiclass VOPC_32 op, string opName, list pattern> - : VOPC_Helper ; - -multiclass VOPC_64 op, string opName, list pattern> - : VOPC_Helper ; +class SMRD op, bits<1> imm, dag outs, dag ins, string asm, + list pattern> : Enc32 { + + bits<7> SDST; + bits<6> SBASE; + bits<8> OFFSET; + + let Inst{7-0} = OFFSET; + let Inst{8} = imm; + let Inst{14-9} = SBASE; + let Inst{21-15} = SDST; + let Inst{26-22} = op; + let Inst{31-27} = 0x18; //encoding + + let LGKM_CNT = 1; +} -class SOPC_32 op, string opName, list pattern> - : SOPC ; +//===----------------------------------------------------------------------===// +// Vector ALU operations +//===----------------------------------------------------------------------===// + +let Uses = [EXEC] in { + +class VOP1 op, dag outs, dag ins, string asm, list pattern> : + Enc32 { + + bits<8> VDST; + bits<9> SRC0; + + let Inst{8-0} = SRC0; + let Inst{16-9} = op; + let Inst{24-17} = VDST; + let Inst{31-25} = 0x3f; //encoding + + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; +} -class SOPC_64 op, string opName, list pattern> - : SOPC ; +class VOP2 op, dag outs, dag ins, string asm, list pattern> : + Enc32 { + + bits<8> VDST; + bits<9> SRC0; + bits<8> VSRC1; + + let Inst{8-0} = SRC0; + let Inst{16-9} = VSRC1; + let Inst{24-17} = VDST; + let Inst{30-25} = op; + let Inst{31} = 0x0; //encoding + + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; +} -class MIMG_Load_Helper op, string asm> : MIMG < - op, - (outs VReg_128:$vdata), - (ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128, - i1imm:$tfe, i1imm:$lwe, i1imm:$slc, VReg_32:$vaddr, - GPR4Align:$srsrc, GPR4Align:$ssamp), - asm, - []> { - let mayLoad = 1; +class VOP3 op, dag outs, dag ins, string asm, list pattern> : + Enc64 { + + bits<8> VDST; + bits<9> SRC0; + bits<9> SRC1; + bits<9> SRC2; + bits<3> ABS; + bits<1> CLAMP; + bits<2> OMOD; + bits<3> NEG; + + let Inst{7-0} = VDST; + let Inst{10-8} = ABS; + let Inst{11} = CLAMP; + let Inst{25-17} = op; + let Inst{31-26} = 0x34; //encoding + let Inst{40-32} = SRC0; + let Inst{49-41} = SRC1; + let Inst{58-50} = SRC2; + let Inst{60-59} = OMOD; + let Inst{63-61} = NEG; + + let mayLoad = 0; let mayStore = 0; + let hasSideEffects = 0; } -class MTBUF_Store_Helper op, string asm, RegisterClass regClass> : MTBUF < - op, - (outs), - (ins regClass:$vdata, i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, - i1imm:$addr64, i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr, - GPR4Align:$srsrc, i1imm:$slc, i1imm:$tfe, SSrc_32:$soffset), - asm, - []> { - let mayStore = 1; +class VOP3b op, dag outs, dag ins, string asm, list pattern> : + Enc64 { + + bits<8> VDST; + bits<9> SRC0; + bits<9> SRC1; + bits<9> SRC2; + bits<7> SDST; + bits<2> OMOD; + bits<3> NEG; + + let Inst{7-0} = VDST; + let Inst{14-8} = SDST; + let Inst{25-17} = op; + let Inst{31-26} = 0x34; //encoding + let Inst{40-32} = SRC0; + let Inst{49-41} = SRC1; + let Inst{58-50} = SRC2; + let Inst{60-59} = OMOD; + let Inst{63-61} = NEG; + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; } -class MUBUF_Load_Helper op, string asm, RegisterClass regClass> : MUBUF < - op, - (outs regClass:$dst), - (ins i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64, - i1imm:$lds, VReg_32:$vaddr, GPR4Align:$srsrc, i1imm:$slc, - i1imm:$tfe, SSrc_32:$soffset), - asm, - []> { - let mayLoad = 1; +class VOPC op, dag ins, string asm, list pattern> : + Enc32 <(outs VCCReg:$dst), ins, asm, pattern> { + + bits<9> SRC0; + bits<8> VSRC1; + + let Inst{8-0} = SRC0; + let Inst{16-9} = VSRC1; + let Inst{24-17} = op; + let Inst{31-25} = 0x3e; + + let DisableEncoding = "$dst"; + let mayLoad = 0; let mayStore = 0; + let hasSideEffects = 0; } -class MTBUF_Load_Helper op, string asm, RegisterClass regClass> : MTBUF < - op, - (outs regClass:$dst), - (ins i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64, - i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr, GPR4Align:$srsrc, - i1imm:$slc, i1imm:$tfe, SSrc_32:$soffset), - asm, - []> { +class VINTRP op, dag outs, dag ins, string asm, list pattern> : + Enc32 { + + bits<8> VDST; + bits<8> VSRC; + bits<2> ATTRCHAN; + bits<6> ATTR; + + let Inst{7-0} = VSRC; + let Inst{9-8} = ATTRCHAN; + let Inst{15-10} = ATTR; + let Inst{17-16} = op; + let Inst{25-18} = VDST; + let Inst{31-26} = 0x32; // encoding + + let neverHasSideEffects = 1; let mayLoad = 1; let mayStore = 0; } -multiclass SMRD_Helper op, string asm, RegisterClass dstClass> { - def _IMM : SMRD < - op, 1, - (outs dstClass:$dst), - (ins GPR2Align:$sbase, i32imm:$offset), - asm, - [] - >; +} // End Uses = [EXEC] + +//===----------------------------------------------------------------------===// +// Vector I/O operations +//===----------------------------------------------------------------------===// - def _SGPR : SMRD < - op, 0, - (outs dstClass:$dst), - (ins GPR2Align:$sbase, SReg_32:$soff), - asm, - [] - >; +let Uses = [EXEC] in { + +class MUBUF op, dag outs, dag ins, string asm, list pattern> : + Enc64 { + + bits<8> VDATA; + bits<12> OFFSET; + bits<1> OFFEN; + bits<1> IDXEN; + bits<1> GLC; + bits<1> ADDR64; + bits<1> LDS; + bits<8> VADDR; + bits<5> SRSRC; + bits<1> SLC; + bits<1> TFE; + bits<8> SOFFSET; + + let Inst{11-0} = OFFSET; + let Inst{12} = OFFEN; + let Inst{13} = IDXEN; + let Inst{14} = GLC; + let Inst{15} = ADDR64; + let Inst{16} = LDS; + let Inst{24-18} = op; + let Inst{31-26} = 0x38; //encoding + let Inst{39-32} = VADDR; + let Inst{47-40} = VDATA; + let Inst{52-48} = SRSRC; + let Inst{54} = SLC; + let Inst{55} = TFE; + let Inst{63-56} = SOFFSET; + + let VM_CNT = 1; + let EXP_CNT = 1; + + let neverHasSideEffects = 1; +} + +class MTBUF op, dag outs, dag ins, string asm, list pattern> : + Enc64 { + + bits<8> VDATA; + bits<12> OFFSET; + bits<1> OFFEN; + bits<1> IDXEN; + bits<1> GLC; + bits<1> ADDR64; + bits<4> DFMT; + bits<3> NFMT; + bits<8> VADDR; + bits<5> SRSRC; + bits<1> SLC; + bits<1> TFE; + bits<8> SOFFSET; + + let Inst{11-0} = OFFSET; + let Inst{12} = OFFEN; + let Inst{13} = IDXEN; + let Inst{14} = GLC; + let Inst{15} = ADDR64; + let Inst{18-16} = op; + let Inst{22-19} = DFMT; + let Inst{25-23} = NFMT; + let Inst{31-26} = 0x3a; //encoding + let Inst{39-32} = VADDR; + let Inst{47-40} = VDATA; + let Inst{52-48} = SRSRC; + let Inst{54} = SLC; + let Inst{55} = TFE; + let Inst{63-56} = SOFFSET; + + let VM_CNT = 1; + let EXP_CNT = 1; + + let neverHasSideEffects = 1; +} + +class MIMG op, dag outs, dag ins, string asm, list pattern> : + Enc64 { + + bits<8> VDATA; + bits<4> DMASK; + bits<1> UNORM; + bits<1> GLC; + bits<1> DA; + bits<1> R128; + bits<1> TFE; + bits<1> LWE; + bits<1> SLC; + bits<8> VADDR; + bits<5> SRSRC; + bits<5> SSAMP; + + let Inst{11-8} = DMASK; + let Inst{12} = UNORM; + let Inst{13} = GLC; + let Inst{14} = DA; + let Inst{15} = R128; + let Inst{16} = TFE; + let Inst{17} = LWE; + let Inst{24-18} = op; + let Inst{25} = SLC; + let Inst{31-26} = 0x3c; + let Inst{39-32} = VADDR; + let Inst{47-40} = VDATA; + let Inst{52-48} = SRSRC; + let Inst{57-53} = SSAMP; + + let VM_CNT = 1; + let EXP_CNT = 1; +} + +def EXP : Enc64< + (outs), + (ins i32imm:$en, i32imm:$tgt, i32imm:$compr, i32imm:$done, i32imm:$vm, + VReg_32:$src0, VReg_32:$src1, VReg_32:$src2, VReg_32:$src3), + "EXP $en, $tgt, $compr, $done, $vm, $src0, $src1, $src2, $src3", + [] > { + + bits<4> EN; + bits<6> TGT; + bits<1> COMPR; + bits<1> DONE; + bits<1> VM; + bits<8> VSRC0; + bits<8> VSRC1; + bits<8> VSRC2; + bits<8> VSRC3; + + let Inst{3-0} = EN; + let Inst{9-4} = TGT; + let Inst{10} = COMPR; + let Inst{11} = DONE; + let Inst{12} = VM; + let Inst{31-26} = 0x3e; + let Inst{39-32} = VSRC0; + let Inst{47-40} = VSRC1; + let Inst{55-48} = VSRC2; + let Inst{63-56} = VSRC3; + + let EXP_CNT = 1; } +} // End Uses = [EXEC] diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index 8c4e5af..cf0d5b9 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -1,4 +1,4 @@ -//===-- SIInstrInfo.td - SI Instruction Encodings ---------*- tablegen -*--===// +//===-- SIInstrInfo.td - SI Instruction Infos -------------*- tablegen -*--===// // // The LLVM Compiler Infrastructure // @@ -44,31 +44,10 @@ class InlineImm : ImmLeaf ; -class InstSI pattern> : - AMDGPUInst { - field bits<1> VM_CNT = 0; - field bits<1> EXP_CNT = 0; - field bits<1> LGKM_CNT = 0; - - let TSFlags{0} = VM_CNT; - let TSFlags{1} = EXP_CNT; - let TSFlags{2} = LGKM_CNT; -} - -class Enc32 pattern> : - InstSI { - - field bits<32> Inst; - let Size = 4; -} - -class Enc64 pattern> : - InstSI { - - field bits<64> Inst; - let Size = 8; -} +//===----------------------------------------------------------------------===// +// SI assembler operands +//===----------------------------------------------------------------------===// class SIOperand : Operand { let EncoderMethod = "encodeOperand"; @@ -85,381 +64,203 @@ class GPR2Align : Operand { let MIOperandInfo = (ops rc:$reg); } -let Uses = [EXEC] in { - -def EXP : Enc64< - (outs), - (ins i32imm:$en, i32imm:$tgt, i32imm:$compr, i32imm:$done, i32imm:$vm, - VReg_32:$src0, VReg_32:$src1, VReg_32:$src2, VReg_32:$src3), - "EXP $en, $tgt, $compr, $done, $vm, $src0, $src1, $src2, $src3", - [] > { - - bits<4> EN; - bits<6> TGT; - bits<1> COMPR; - bits<1> DONE; - bits<1> VM; - bits<8> VSRC0; - bits<8> VSRC1; - bits<8> VSRC2; - bits<8> VSRC3; - - let Inst{3-0} = EN; - let Inst{9-4} = TGT; - let Inst{10} = COMPR; - let Inst{11} = DONE; - let Inst{12} = VM; - let Inst{31-26} = 0x3e; - let Inst{39-32} = VSRC0; - let Inst{47-40} = VSRC1; - let Inst{55-48} = VSRC2; - let Inst{63-56} = VSRC3; - - let EXP_CNT = 1; -} - -class MIMG op, dag outs, dag ins, string asm, list pattern> : - Enc64 { - - bits<8> VDATA; - bits<4> DMASK; - bits<1> UNORM; - bits<1> GLC; - bits<1> DA; - bits<1> R128; - bits<1> TFE; - bits<1> LWE; - bits<1> SLC; - bits<8> VADDR; - bits<5> SRSRC; - bits<5> SSAMP; - - let Inst{11-8} = DMASK; - let Inst{12} = UNORM; - let Inst{13} = GLC; - let Inst{14} = DA; - let Inst{15} = R128; - let Inst{16} = TFE; - let Inst{17} = LWE; - let Inst{24-18} = op; - let Inst{25} = SLC; - let Inst{31-26} = 0x3c; - let Inst{39-32} = VADDR; - let Inst{47-40} = VDATA; - let Inst{52-48} = SRSRC; - let Inst{57-53} = SSAMP; - - let VM_CNT = 1; - let EXP_CNT = 1; -} - -class MTBUF op, dag outs, dag ins, string asm, list pattern> : - Enc64 { - - bits<8> VDATA; - bits<12> OFFSET; - bits<1> OFFEN; - bits<1> IDXEN; - bits<1> GLC; - bits<1> ADDR64; - bits<4> DFMT; - bits<3> NFMT; - bits<8> VADDR; - bits<5> SRSRC; - bits<1> SLC; - bits<1> TFE; - bits<8> SOFFSET; - - let Inst{11-0} = OFFSET; - let Inst{12} = OFFEN; - let Inst{13} = IDXEN; - let Inst{14} = GLC; - let Inst{15} = ADDR64; - let Inst{18-16} = op; - let Inst{22-19} = DFMT; - let Inst{25-23} = NFMT; - let Inst{31-26} = 0x3a; //encoding - let Inst{39-32} = VADDR; - let Inst{47-40} = VDATA; - let Inst{52-48} = SRSRC; - let Inst{54} = SLC; - let Inst{55} = TFE; - let Inst{63-56} = SOFFSET; - - let VM_CNT = 1; - let EXP_CNT = 1; - - let neverHasSideEffects = 1; -} +include "SIInstrFormats.td" -class MUBUF op, dag outs, dag ins, string asm, list pattern> : - Enc64 { - - bits<8> VDATA; - bits<12> OFFSET; - bits<1> OFFEN; - bits<1> IDXEN; - bits<1> GLC; - bits<1> ADDR64; - bits<1> LDS; - bits<8> VADDR; - bits<5> SRSRC; - bits<1> SLC; - bits<1> TFE; - bits<8> SOFFSET; - - let Inst{11-0} = OFFSET; - let Inst{12} = OFFEN; - let Inst{13} = IDXEN; - let Inst{14} = GLC; - let Inst{15} = ADDR64; - let Inst{16} = LDS; - let Inst{24-18} = op; - let Inst{31-26} = 0x38; //encoding - let Inst{39-32} = VADDR; - let Inst{47-40} = VDATA; - let Inst{52-48} = SRSRC; - let Inst{54} = SLC; - let Inst{55} = TFE; - let Inst{63-56} = SOFFSET; - - let VM_CNT = 1; - let EXP_CNT = 1; - - let neverHasSideEffects = 1; -} +//===----------------------------------------------------------------------===// +// +// SI Instruction multiclass helpers. +// +// Instructions with _32 take 32-bit operands. +// Instructions with _64 take 64-bit operands. +// +// VOP_* instructions can use either a 32-bit or 64-bit encoding. The 32-bit +// encoding is the standard encoding, but instruction that make use of +// any of the instruction modifiers must use the 64-bit encoding. +// +// Instructions with _e32 use the 32-bit encoding. +// Instructions with _e64 use the 64-bit encoding. +// +//===----------------------------------------------------------------------===// -} // End Uses = [EXEC] +//===----------------------------------------------------------------------===// +// Scalar classes +//===----------------------------------------------------------------------===// -class SMRD op, bits<1> imm, dag outs, dag ins, string asm, - list pattern> : Enc32 { +class SOP1_32 op, string opName, list pattern> + : SOP1 ; - bits<7> SDST; - bits<6> SBASE; - bits<8> OFFSET; - - let Inst{7-0} = OFFSET; - let Inst{8} = imm; - let Inst{14-9} = SBASE; - let Inst{21-15} = SDST; - let Inst{26-22} = op; - let Inst{31-27} = 0x18; //encoding +class SOP1_64 op, string opName, list pattern> + : SOP1 ; - let LGKM_CNT = 1; -} +class SOP2_32 op, string opName, list pattern> + : SOP2 ; -class SOP1 op, dag outs, dag ins, string asm, list pattern> : - Enc32 { +class SOP2_64 op, string opName, list pattern> + : SOP2 ; - bits<7> SDST; - bits<8> SSRC0; +class SOPC_32 op, string opName, list pattern> + : SOPC ; - let Inst{7-0} = SSRC0; - let Inst{15-8} = op; - let Inst{22-16} = SDST; - let Inst{31-23} = 0x17d; //encoding; +class SOPC_64 op, string opName, list pattern> + : SOPC ; - let mayLoad = 0; - let mayStore = 0; - let hasSideEffects = 0; -} +class SOPK_32 op, string opName, list pattern> + : SOPK ; -class SOP2 op, dag outs, dag ins, string asm, list pattern> : - Enc32 { - - bits<7> SDST; - bits<8> SSRC0; - bits<8> SSRC1; +class SOPK_64 op, string opName, list pattern> + : SOPK ; - let Inst{7-0} = SSRC0; - let Inst{15-8} = SSRC1; - let Inst{22-16} = SDST; - let Inst{29-23} = op; - let Inst{31-30} = 0x2; // encoding +multiclass SMRD_Helper op, string asm, RegisterClass dstClass> { + def _IMM : SMRD < + op, 1, (outs dstClass:$dst), + (ins GPR2Align:$sbase, i32imm:$offset), + asm, [] + >; - let mayLoad = 0; - let mayStore = 0; - let hasSideEffects = 0; + def _SGPR : SMRD < + op, 0, (outs dstClass:$dst), + (ins GPR2Align:$sbase, SReg_32:$soff), + asm, [] + >; } -class SOPC op, dag outs, dag ins, string asm, list pattern> : - Enc32 { +//===----------------------------------------------------------------------===// +// Vector ALU classes +//===----------------------------------------------------------------------===// - bits<8> SSRC0; - bits<8> SSRC1; +class VOP3_32 op, string opName, list pattern> : VOP3 < + op, (outs VReg_32:$dst), + (ins VSrc_32:$src0, VReg_32:$src1, VReg_32:$src2, i32imm:$src3, + i32imm:$src4, i32imm:$src5, i32imm:$src6), + opName, pattern +>; - let Inst{7-0} = SSRC0; - let Inst{15-8} = SSRC1; - let Inst{22-16} = op; - let Inst{31-23} = 0x17e; +class VOP3_64 op, string opName, list pattern> : VOP3 < + op, (outs VReg_64:$dst), + (ins VSrc_64:$src0, VReg_64:$src1, VReg_64:$src2, + i32imm:$src3, i32imm:$src4, i32imm:$src5, i32imm:$src6), + opName, pattern +>; - let DisableEncoding = "$dst"; - let mayLoad = 0; - let mayStore = 0; - let hasSideEffects = 0; +class VOP1_Helper op, RegisterClass vrc, RegisterClass arc, + string opName, list pattern> : + VOP1 < + op, (outs vrc:$dst), (ins arc:$src0), opName, pattern + >; + +multiclass VOP1_32 op, string opName, list pattern> { + def _e32: VOP1_Helper ; + def _e64 : VOP3_32 <{1, 1, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, + opName, [] + >; } -class SOPK op, dag outs, dag ins, string asm, list pattern> : - Enc32 { +multiclass VOP1_64 op, string opName, list pattern> { - bits <7> SDST; - bits <16> SIMM16; - - let Inst{15-0} = SIMM16; - let Inst{22-16} = SDST; - let Inst{27-23} = op; - let Inst{31-28} = 0xb; //encoding + def _e32 : VOP1_Helper ; - let mayLoad = 0; - let mayStore = 0; - let hasSideEffects = 0; + def _e64 : VOP3_64 < + {1, 1, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, + opName, [] + >; } -class SOPP op, dag ins, string asm, list pattern> : Enc32 < - (outs), - ins, - asm, - pattern > { +class VOP2_Helper op, RegisterClass vrc, RegisterClass arc, + string opName, list pattern> : + VOP2 < + op, (outs vrc:$dst), (ins arc:$src0, vrc:$src1), opName, pattern + >; - bits <16> SIMM16; +multiclass VOP2_32 op, string opName, list pattern> { - let Inst{15-0} = SIMM16; - let Inst{22-16} = op; - let Inst{31-23} = 0x17f; // encoding + def _e32 : VOP2_Helper ; - let mayLoad = 0; - let mayStore = 0; - let hasSideEffects = 0; + def _e64 : VOP3_32 <{1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, + opName, [] + >; } - -let Uses = [EXEC] in { - -class VINTRP op, dag outs, dag ins, string asm, list pattern> : - Enc32 { - bits<8> VDST; - bits<8> VSRC; - bits<2> ATTRCHAN; - bits<6> ATTR; +multiclass VOP2_64 op, string opName, list pattern> { + def _e32: VOP2_Helper ; - let Inst{7-0} = VSRC; - let Inst{9-8} = ATTRCHAN; - let Inst{15-10} = ATTR; - let Inst{17-16} = op; - let Inst{25-18} = VDST; - let Inst{31-26} = 0x32; // encoding - - let neverHasSideEffects = 1; - let mayLoad = 1; - let mayStore = 0; + def _e64 : VOP3_64 < + {1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, + opName, [] + >; } -class VOP1 op, dag outs, dag ins, string asm, list pattern> : - Enc32 { - - bits<8> VDST; - bits<9> SRC0; - - let Inst{8-0} = SRC0; - let Inst{16-9} = op; - let Inst{24-17} = VDST; - let Inst{31-25} = 0x3f; //encoding - - let mayLoad = 0; - let mayStore = 0; - let hasSideEffects = 0; +multiclass VOPC_Helper op, RegisterClass vrc, RegisterClass arc, + string opName, list pattern> { + + def _e32 : VOPC ; + def _e64 : VOP3 < + {0, op{7}, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, + (outs SReg_64:$dst), + (ins arc:$src0, vrc:$src1, + InstFlag:$abs, InstFlag:$clamp, + InstFlag:$omod, InstFlag:$neg), + opName, pattern + > { + let SRC2 = 0x80; + } } -class VOP2 op, dag outs, dag ins, string asm, list pattern> : - Enc32 { - - bits<8> VDST; - bits<9> SRC0; - bits<8> VSRC1; - - let Inst{8-0} = SRC0; - let Inst{16-9} = VSRC1; - let Inst{24-17} = VDST; - let Inst{30-25} = op; - let Inst{31} = 0x0; //encoding - +multiclass VOPC_32 op, string opName, list pattern> + : VOPC_Helper ; + +multiclass VOPC_64 op, string opName, list pattern> + : VOPC_Helper ; + +//===----------------------------------------------------------------------===// +// Vector I/O classes +//===----------------------------------------------------------------------===// + +class MTBUF_Store_Helper op, string asm, RegisterClass regClass> : MTBUF < + op, + (outs), + (ins regClass:$vdata, i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, + i1imm:$addr64, i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr, + GPR4Align:$srsrc, i1imm:$slc, i1imm:$tfe, SSrc_32:$soffset), + asm, + []> { + let mayStore = 1; let mayLoad = 0; - let mayStore = 0; - let hasSideEffects = 0; } -class VOP3 op, dag outs, dag ins, string asm, list pattern> : - Enc64 { - - bits<8> VDST; - bits<9> SRC0; - bits<9> SRC1; - bits<9> SRC2; - bits<3> ABS; - bits<1> CLAMP; - bits<2> OMOD; - bits<3> NEG; - - let Inst{7-0} = VDST; - let Inst{10-8} = ABS; - let Inst{11} = CLAMP; - let Inst{25-17} = op; - let Inst{31-26} = 0x34; //encoding - let Inst{40-32} = SRC0; - let Inst{49-41} = SRC1; - let Inst{58-50} = SRC2; - let Inst{60-59} = OMOD; - let Inst{63-61} = NEG; - - let mayLoad = 0; +class MUBUF_Load_Helper op, string asm, RegisterClass regClass> : MUBUF < + op, + (outs regClass:$dst), + (ins i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64, + i1imm:$lds, VReg_32:$vaddr, GPR4Align:$srsrc, i1imm:$slc, + i1imm:$tfe, SSrc_32:$soffset), + asm, + []> { + let mayLoad = 1; let mayStore = 0; - let hasSideEffects = 0; } -class VOP3b op, dag outs, dag ins, string asm, list pattern> : - Enc64 { - - bits<8> VDST; - bits<9> SRC0; - bits<9> SRC1; - bits<9> SRC2; - bits<7> SDST; - bits<2> OMOD; - bits<3> NEG; - - let Inst{7-0} = VDST; - let Inst{14-8} = SDST; - let Inst{25-17} = op; - let Inst{31-26} = 0x34; //encoding - let Inst{40-32} = SRC0; - let Inst{49-41} = SRC1; - let Inst{58-50} = SRC2; - let Inst{60-59} = OMOD; - let Inst{63-61} = NEG; - - let mayLoad = 0; +class MTBUF_Load_Helper op, string asm, RegisterClass regClass> : MTBUF < + op, + (outs regClass:$dst), + (ins i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64, + i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr, GPR4Align:$srsrc, + i1imm:$slc, i1imm:$tfe, SSrc_32:$soffset), + asm, + []> { + let mayLoad = 1; let mayStore = 0; - let hasSideEffects = 0; } -class VOPC op, dag ins, string asm, list pattern> : - Enc32 <(outs VCCReg:$dst), ins, asm, pattern> { - - bits<9> SRC0; - bits<8> VSRC1; - - let Inst{8-0} = SRC0; - let Inst{16-9} = VSRC1; - let Inst{24-17} = op; - let Inst{31-25} = 0x3e; - - let DisableEncoding = "$dst"; - let mayLoad = 0; +class MIMG_Load_Helper op, string asm> : MIMG < + op, + (outs VReg_128:$vdata), + (ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128, + i1imm:$tfe, i1imm:$lwe, i1imm:$slc, VReg_32:$vaddr, + GPR4Align:$srsrc, GPR4Align:$ssamp), + asm, + []> { + let mayLoad = 1; let mayStore = 0; - let hasSideEffects = 0; } -} // End Uses = [EXEC] - -include "SIInstrFormats.td" include "SIInstructions.td" -- cgit v1.1 From 7fa9957b16ee314b294da8abbec70bd2f1dfa608 Mon Sep 17 00:00:00 2001 From: Christian Konig Date: Thu, 21 Feb 2013 15:16:49 +0000 Subject: R600/SI: add constant for inline zero operand MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Christian König Reviewed-by: Tom Stellard Reviewed-by: Michel Dänzer git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175747 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIInstrInfo.td | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index cf0d5b9..8b90d45 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -49,9 +49,8 @@ class InlineImm : ImmLeaf : Operand { - let EncoderMethod = "encodeOperand"; - let MIOperandInfo = opInfo; +def SIOperand { + int ZERO = 0x80; } class GPR4Align : Operand { @@ -201,7 +200,7 @@ multiclass VOPC_Helper op, RegisterClass vrc, RegisterClass arc, InstFlag:$omod, InstFlag:$neg), opName, pattern > { - let SRC2 = 0x80; + let SRC2 = SIOperand.ZERO; } } -- cgit v1.1 From a38ccb4b32fca60264b734090a00cb850bcfbaf7 Mon Sep 17 00:00:00 2001 From: Christian Konig Date: Thu, 21 Feb 2013 15:16:53 +0000 Subject: R600/SI: rework VOP1_* patterns v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixing asm operation names. v2: use ZERO constant, also add asm operands Signed-off-by: Christian König Reviewed-by: Tom Stellard Reviewed-by: Michel Dänzer git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175748 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIInstrInfo.td | 38 +++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) (limited to 'lib') diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index 8b90d45..2b31307 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -141,29 +141,33 @@ class VOP3_64 op, string opName, list pattern> : VOP3 < opName, pattern >; -class VOP1_Helper op, RegisterClass vrc, RegisterClass arc, - string opName, list pattern> : - VOP1 < - op, (outs vrc:$dst), (ins arc:$src0), opName, pattern - >; +multiclass VOP1_Helper op, RegisterClass drc, RegisterClass src, + string opName, list pattern> { -multiclass VOP1_32 op, string opName, list pattern> { - def _e32: VOP1_Helper ; - def _e64 : VOP3_32 <{1, 1, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, - opName, [] + def _e32: VOP1 < + op, (outs drc:$dst), (ins src:$src0), + opName#"_e32 $dst, $src0", pattern >; -} -multiclass VOP1_64 op, string opName, list pattern> { - - def _e32 : VOP1_Helper ; - - def _e64 : VOP3_64 < + def _e64 : VOP3 < {1, 1, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, - opName, [] - >; + (outs drc:$dst), + (ins src:$src0, + i32imm:$abs, i32imm:$clamp, + i32imm:$omod, i32imm:$neg), + opName#"_e64 $dst, $src0, $abs, $clamp, $omod, $neg", [] + > { + let SRC1 = SIOperand.ZERO; + let SRC2 = SIOperand.ZERO; + } } +multiclass VOP1_32 op, string opName, list pattern> + : VOP1_Helper ; + +multiclass VOP1_64 op, string opName, list pattern> + : VOP1_Helper ; + class VOP2_Helper op, RegisterClass vrc, RegisterClass arc, string opName, list pattern> : VOP2 < -- cgit v1.1 From 477963aff4f7fd93c3dfdb253c2983dc9f0450f9 Mon Sep 17 00:00:00 2001 From: Christian Konig Date: Thu, 21 Feb 2013 15:16:58 +0000 Subject: R600/SI: rework VOP2_* pattern v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixing asm operation names. v2: use ZERO constant, also add asm operands Signed-off-by: Christian König Reviewed-by: Tom Stellard Reviewed-by: Michel Dänzer git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175749 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIISelLowering.cpp | 3 --- lib/Target/R600/SIInstrInfo.td | 37 +++++++++++++++++++------------------ 2 files changed, 19 insertions(+), 21 deletions(-) (limited to 'lib') diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 4085890..5a468ae 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -75,7 +75,6 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( .addOperand(MI->getOperand(0)) .addOperand(MI->getOperand(1)) .addImm(0x80) // SRC1 - .addImm(0x80) // SRC2 .addImm(0) // ABS .addImm(1) // CLAMP .addImm(0) // OMOD @@ -88,7 +87,6 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( .addOperand(MI->getOperand(0)) .addOperand(MI->getOperand(1)) .addImm(0x80) // SRC1 - .addImm(0x80) // SRC2 .addImm(1) // ABS .addImm(0) // CLAMP .addImm(0) // OMOD @@ -101,7 +99,6 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( .addOperand(MI->getOperand(0)) .addOperand(MI->getOperand(1)) .addImm(0x80) // SRC1 - .addImm(0x80) // SRC2 .addImm(0) // ABS .addImm(0) // CLAMP .addImm(0) // OMOD diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index 2b31307..dc18a71 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -168,29 +168,30 @@ multiclass VOP1_32 op, string opName, list pattern> multiclass VOP1_64 op, string opName, list pattern> : VOP1_Helper ; -class VOP2_Helper op, RegisterClass vrc, RegisterClass arc, - string opName, list pattern> : - VOP2 < - op, (outs vrc:$dst), (ins arc:$src0, vrc:$src1), opName, pattern +multiclass VOP2_Helper op, RegisterClass vrc, RegisterClass arc, + string opName, list pattern> { + def _e32 : VOP2 < + op, (outs vrc:$dst), (ins arc:$src0, vrc:$src1), + opName#"_e32 $dst, $src0, $src1", pattern >; -multiclass VOP2_32 op, string opName, list pattern> { - - def _e32 : VOP2_Helper ; - - def _e64 : VOP3_32 <{1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, - opName, [] - >; + def _e64 : VOP3 < + {1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, + (outs vrc:$dst), + (ins arc:$src0, vrc:$src1, + i32imm:$abs, i32imm:$clamp, + i32imm:$omod, i32imm:$neg), + opName#"_e64 $dst, $src0, $src1, $abs, $clamp, $omod, $neg", [] + > { + let SRC2 = SIOperand.ZERO; + } } -multiclass VOP2_64 op, string opName, list pattern> { - def _e32: VOP2_Helper ; +multiclass VOP2_32 op, string opName, list pattern> + : VOP2_Helper ; - def _e64 : VOP3_64 < - {1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, - opName, [] - >; -} +multiclass VOP2_64 op, string opName, list pattern> + : VOP2_Helper ; multiclass VOPC_Helper op, RegisterClass vrc, RegisterClass arc, string opName, list pattern> { -- cgit v1.1 From 7b3dab2673128257b6bf9a3eaa4fe5aad9c9a675 Mon Sep 17 00:00:00 2001 From: Christian Konig Date: Thu, 21 Feb 2013 15:17:04 +0000 Subject: R600/SI: simplify VOPC_* pattern v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixing asm operation names. v2: fix name of the e64 encoding, also add asm operands Signed-off-by: Christian König Reviewed-by: Tom Stellard Reviewed-by: Michel Dänzer git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175750 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUInstructions.td | 5 + lib/Target/R600/SIInstrInfo.td | 23 +- lib/Target/R600/SIInstructions.td | 444 +++++++++++++++------------------- 3 files changed, 217 insertions(+), 255 deletions(-) (limited to 'lib') diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td index 0559a5a..960f108 100644 --- a/lib/Target/R600/AMDGPUInstructions.td +++ b/lib/Target/R600/AMDGPUInstructions.td @@ -77,6 +77,11 @@ def COND_LE : PatLeaf < case ISD::SETLE: return true;}}}] >; +def COND_NULL : PatLeaf < + (cond), + [{return false;}] +>; + //===----------------------------------------------------------------------===// // Load/Store Pattern Fragments //===----------------------------------------------------------------------===// diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index dc18a71..0808f24 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -194,26 +194,35 @@ multiclass VOP2_64 op, string opName, list pattern> : VOP2_Helper ; multiclass VOPC_Helper op, RegisterClass vrc, RegisterClass arc, - string opName, list pattern> { + string opName, ValueType vt, PatLeaf cond> { + + def _e32 : VOPC < + op, (ins arc:$src0, vrc:$src1), + opName#"_e32 $dst, $src0, $src1", [] + >; - def _e32 : VOPC ; def _e64 : VOP3 < {0, op{7}, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, (outs SReg_64:$dst), (ins arc:$src0, vrc:$src1, InstFlag:$abs, InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg), - opName, pattern + opName#"_e64 $dst, $src0, $src1, $abs, $clamp, $omod, $neg", + !if(!eq(!cast(cond), "COND_NULL"), [], + [(set SReg_64:$dst, (i1 (setcc (vt arc:$src0), vrc:$src1, cond)))] + ) > { let SRC2 = SIOperand.ZERO; } } -multiclass VOPC_32 op, string opName, list pattern> - : VOPC_Helper ; +multiclass VOPC_32 op, string opName, + ValueType vt = untyped, PatLeaf cond = COND_NULL> + : VOPC_Helper ; -multiclass VOPC_64 op, string opName, list pattern> - : VOPC_Helper ; +multiclass VOPC_64 op, string opName, + ValueType vt = untyped, PatLeaf cond = COND_NULL> + : VOPC_Helper ; //===----------------------------------------------------------------------===// // Vector I/O classes diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index aef239c..b7e44fe 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -127,286 +127,234 @@ def S_GETREG_REGRD_B32 : SOPK_32 <0x00000014, "S_GETREG_REGRD_B32", []>; //def S_SETREG_IMM32_B32 : SOPK_32 <0x00000015, "S_SETREG_IMM32_B32", []>; //def EXP : EXP_ <0x00000000, "EXP", []>; -defm V_CMP_F_F32 : VOPC_32 <0x00000000, "V_CMP_F_F32", []>; -defm V_CMP_LT_F32 : VOPC_32 <0x00000001, "V_CMP_LT_F32", []>; -def : Pat < - (i1 (setcc (f32 VSrc_32:$src0), VReg_32:$src1, COND_LT)), - (V_CMP_LT_F32_e64 VSrc_32:$src0, VReg_32:$src1) ->; -defm V_CMP_EQ_F32 : VOPC_32 <0x00000002, "V_CMP_EQ_F32", []>; -def : Pat < - (i1 (setcc (f32 VSrc_32:$src0), VReg_32:$src1, COND_EQ)), - (V_CMP_EQ_F32_e64 VSrc_32:$src0, VReg_32:$src1) ->; -defm V_CMP_LE_F32 : VOPC_32 <0x00000003, "V_CMP_LE_F32", []>; -def : Pat < - (i1 (setcc (f32 VSrc_32:$src0), VReg_32:$src1, COND_LE)), - (V_CMP_LE_F32_e64 VSrc_32:$src0, VReg_32:$src1) ->; -defm V_CMP_GT_F32 : VOPC_32 <0x00000004, "V_CMP_GT_F32", []>; -def : Pat < - (i1 (setcc (f32 VSrc_32:$src0), VReg_32:$src1, COND_GT)), - (V_CMP_GT_F32_e64 VSrc_32:$src0, VReg_32:$src1) ->; -defm V_CMP_LG_F32 : VOPC_32 <0x00000005, "V_CMP_LG_F32", []>; -def : Pat < - (i1 (setcc (f32 VSrc_32:$src0), VReg_32:$src1, COND_NE)), - (V_CMP_LG_F32_e64 VSrc_32:$src0, VReg_32:$src1) ->; -defm V_CMP_GE_F32 : VOPC_32 <0x00000006, "V_CMP_GE_F32", []>; -def : Pat < - (i1 (setcc (f32 VSrc_32:$src0), VReg_32:$src1, COND_GE)), - (V_CMP_GE_F32_e64 VSrc_32:$src0, VReg_32:$src1) ->; -defm V_CMP_O_F32 : VOPC_32 <0x00000007, "V_CMP_O_F32", []>; -defm V_CMP_U_F32 : VOPC_32 <0x00000008, "V_CMP_U_F32", []>; -defm V_CMP_NGE_F32 : VOPC_32 <0x00000009, "V_CMP_NGE_F32", []>; -defm V_CMP_NLG_F32 : VOPC_32 <0x0000000a, "V_CMP_NLG_F32", []>; -defm V_CMP_NGT_F32 : VOPC_32 <0x0000000b, "V_CMP_NGT_F32", []>; -defm V_CMP_NLE_F32 : VOPC_32 <0x0000000c, "V_CMP_NLE_F32", []>; -defm V_CMP_NEQ_F32 : VOPC_32 <0x0000000d, "V_CMP_NEQ_F32", []>; -def : Pat < - (i1 (setcc (f32 VSrc_32:$src0), VReg_32:$src1, COND_NE)), - (V_CMP_NEQ_F32_e64 VSrc_32:$src0, VReg_32:$src1) ->; -defm V_CMP_NLT_F32 : VOPC_32 <0x0000000e, "V_CMP_NLT_F32", []>; -defm V_CMP_TRU_F32 : VOPC_32 <0x0000000f, "V_CMP_TRU_F32", []>; +defm V_CMP_F_F32 : VOPC_32 <0x00000000, "V_CMP_F_F32">; +defm V_CMP_LT_F32 : VOPC_32 <0x00000001, "V_CMP_LT_F32", f32, COND_LT>; +defm V_CMP_EQ_F32 : VOPC_32 <0x00000002, "V_CMP_EQ_F32", f32, COND_EQ>; +defm V_CMP_LE_F32 : VOPC_32 <0x00000003, "V_CMP_LE_F32", f32, COND_LE>; +defm V_CMP_GT_F32 : VOPC_32 <0x00000004, "V_CMP_GT_F32", f32, COND_GT>; +defm V_CMP_LG_F32 : VOPC_32 <0x00000005, "V_CMP_LG_F32", f32, COND_NE>; +defm V_CMP_GE_F32 : VOPC_32 <0x00000006, "V_CMP_GE_F32", f32, COND_GE>; +defm V_CMP_O_F32 : VOPC_32 <0x00000007, "V_CMP_O_F32">; +defm V_CMP_U_F32 : VOPC_32 <0x00000008, "V_CMP_U_F32">; +defm V_CMP_NGE_F32 : VOPC_32 <0x00000009, "V_CMP_NGE_F32">; +defm V_CMP_NLG_F32 : VOPC_32 <0x0000000a, "V_CMP_NLG_F32">; +defm V_CMP_NGT_F32 : VOPC_32 <0x0000000b, "V_CMP_NGT_F32">; +defm V_CMP_NLE_F32 : VOPC_32 <0x0000000c, "V_CMP_NLE_F32">; +defm V_CMP_NEQ_F32 : VOPC_32 <0x0000000d, "V_CMP_NEQ_F32", f32, COND_NE>; +defm V_CMP_NLT_F32 : VOPC_32 <0x0000000e, "V_CMP_NLT_F32">; +defm V_CMP_TRU_F32 : VOPC_32 <0x0000000f, "V_CMP_TRU_F32">; //Side effect is writing to EXEC let hasSideEffects = 1 in { -defm V_CMPX_F_F32 : VOPC_32 <0x00000010, "V_CMPX_F_F32", []>; -defm V_CMPX_LT_F32 : VOPC_32 <0x00000011, "V_CMPX_LT_F32", []>; -defm V_CMPX_EQ_F32 : VOPC_32 <0x00000012, "V_CMPX_EQ_F32", []>; -defm V_CMPX_LE_F32 : VOPC_32 <0x00000013, "V_CMPX_LE_F32", []>; -defm V_CMPX_GT_F32 : VOPC_32 <0x00000014, "V_CMPX_GT_F32", []>; -defm V_CMPX_LG_F32 : VOPC_32 <0x00000015, "V_CMPX_LG_F32", []>; -defm V_CMPX_GE_F32 : VOPC_32 <0x00000016, "V_CMPX_GE_F32", []>; -defm V_CMPX_O_F32 : VOPC_32 <0x00000017, "V_CMPX_O_F32", []>; -defm V_CMPX_U_F32 : VOPC_32 <0x00000018, "V_CMPX_U_F32", []>; -defm V_CMPX_NGE_F32 : VOPC_32 <0x00000019, "V_CMPX_NGE_F32", []>; -defm V_CMPX_NLG_F32 : VOPC_32 <0x0000001a, "V_CMPX_NLG_F32", []>; -defm V_CMPX_NGT_F32 : VOPC_32 <0x0000001b, "V_CMPX_NGT_F32", []>; -defm V_CMPX_NLE_F32 : VOPC_32 <0x0000001c, "V_CMPX_NLE_F32", []>; -defm V_CMPX_NEQ_F32 : VOPC_32 <0x0000001d, "V_CMPX_NEQ_F32", []>; -defm V_CMPX_NLT_F32 : VOPC_32 <0x0000001e, "V_CMPX_NLT_F32", []>; -defm V_CMPX_TRU_F32 : VOPC_32 <0x0000001f, "V_CMPX_TRU_F32", []>; +defm V_CMPX_F_F32 : VOPC_32 <0x00000010, "V_CMPX_F_F32">; +defm V_CMPX_LT_F32 : VOPC_32 <0x00000011, "V_CMPX_LT_F32">; +defm V_CMPX_EQ_F32 : VOPC_32 <0x00000012, "V_CMPX_EQ_F32">; +defm V_CMPX_LE_F32 : VOPC_32 <0x00000013, "V_CMPX_LE_F32">; +defm V_CMPX_GT_F32 : VOPC_32 <0x00000014, "V_CMPX_GT_F32">; +defm V_CMPX_LG_F32 : VOPC_32 <0x00000015, "V_CMPX_LG_F32">; +defm V_CMPX_GE_F32 : VOPC_32 <0x00000016, "V_CMPX_GE_F32">; +defm V_CMPX_O_F32 : VOPC_32 <0x00000017, "V_CMPX_O_F32">; +defm V_CMPX_U_F32 : VOPC_32 <0x00000018, "V_CMPX_U_F32">; +defm V_CMPX_NGE_F32 : VOPC_32 <0x00000019, "V_CMPX_NGE_F32">; +defm V_CMPX_NLG_F32 : VOPC_32 <0x0000001a, "V_CMPX_NLG_F32">; +defm V_CMPX_NGT_F32 : VOPC_32 <0x0000001b, "V_CMPX_NGT_F32">; +defm V_CMPX_NLE_F32 : VOPC_32 <0x0000001c, "V_CMPX_NLE_F32">; +defm V_CMPX_NEQ_F32 : VOPC_32 <0x0000001d, "V_CMPX_NEQ_F32">; +defm V_CMPX_NLT_F32 : VOPC_32 <0x0000001e, "V_CMPX_NLT_F32">; +defm V_CMPX_TRU_F32 : VOPC_32 <0x0000001f, "V_CMPX_TRU_F32">; } // End hasSideEffects = 1 -defm V_CMP_F_F64 : VOPC_64 <0x00000020, "V_CMP_F_F64", []>; -defm V_CMP_LT_F64 : VOPC_64 <0x00000021, "V_CMP_LT_F64", []>; -defm V_CMP_EQ_F64 : VOPC_64 <0x00000022, "V_CMP_EQ_F64", []>; -defm V_CMP_LE_F64 : VOPC_64 <0x00000023, "V_CMP_LE_F64", []>; -defm V_CMP_GT_F64 : VOPC_64 <0x00000024, "V_CMP_GT_F64", []>; -defm V_CMP_LG_F64 : VOPC_64 <0x00000025, "V_CMP_LG_F64", []>; -defm V_CMP_GE_F64 : VOPC_64 <0x00000026, "V_CMP_GE_F64", []>; -defm V_CMP_O_F64 : VOPC_64 <0x00000027, "V_CMP_O_F64", []>; -defm V_CMP_U_F64 : VOPC_64 <0x00000028, "V_CMP_U_F64", []>; -defm V_CMP_NGE_F64 : VOPC_64 <0x00000029, "V_CMP_NGE_F64", []>; -defm V_CMP_NLG_F64 : VOPC_64 <0x0000002a, "V_CMP_NLG_F64", []>; -defm V_CMP_NGT_F64 : VOPC_64 <0x0000002b, "V_CMP_NGT_F64", []>; -defm V_CMP_NLE_F64 : VOPC_64 <0x0000002c, "V_CMP_NLE_F64", []>; -defm V_CMP_NEQ_F64 : VOPC_64 <0x0000002d, "V_CMP_NEQ_F64", []>; -defm V_CMP_NLT_F64 : VOPC_64 <0x0000002e, "V_CMP_NLT_F64", []>; -defm V_CMP_TRU_F64 : VOPC_64 <0x0000002f, "V_CMP_TRU_F64", []>; +defm V_CMP_F_F64 : VOPC_64 <0x00000020, "V_CMP_F_F64">; +defm V_CMP_LT_F64 : VOPC_64 <0x00000021, "V_CMP_LT_F64">; +defm V_CMP_EQ_F64 : VOPC_64 <0x00000022, "V_CMP_EQ_F64">; +defm V_CMP_LE_F64 : VOPC_64 <0x00000023, "V_CMP_LE_F64">; +defm V_CMP_GT_F64 : VOPC_64 <0x00000024, "V_CMP_GT_F64">; +defm V_CMP_LG_F64 : VOPC_64 <0x00000025, "V_CMP_LG_F64">; +defm V_CMP_GE_F64 : VOPC_64 <0x00000026, "V_CMP_GE_F64">; +defm V_CMP_O_F64 : VOPC_64 <0x00000027, "V_CMP_O_F64">; +defm V_CMP_U_F64 : VOPC_64 <0x00000028, "V_CMP_U_F64">; +defm V_CMP_NGE_F64 : VOPC_64 <0x00000029, "V_CMP_NGE_F64">; +defm V_CMP_NLG_F64 : VOPC_64 <0x0000002a, "V_CMP_NLG_F64">; +defm V_CMP_NGT_F64 : VOPC_64 <0x0000002b, "V_CMP_NGT_F64">; +defm V_CMP_NLE_F64 : VOPC_64 <0x0000002c, "V_CMP_NLE_F64">; +defm V_CMP_NEQ_F64 : VOPC_64 <0x0000002d, "V_CMP_NEQ_F64">; +defm V_CMP_NLT_F64 : VOPC_64 <0x0000002e, "V_CMP_NLT_F64">; +defm V_CMP_TRU_F64 : VOPC_64 <0x0000002f, "V_CMP_TRU_F64">; //Side effect is writing to EXEC let hasSideEffects = 1 in { -defm V_CMPX_F_F64 : VOPC_64 <0x00000030, "V_CMPX_F_F64", []>; -defm V_CMPX_LT_F64 : VOPC_64 <0x00000031, "V_CMPX_LT_F64", []>; -defm V_CMPX_EQ_F64 : VOPC_64 <0x00000032, "V_CMPX_EQ_F64", []>; -defm V_CMPX_LE_F64 : VOPC_64 <0x00000033, "V_CMPX_LE_F64", []>; -defm V_CMPX_GT_F64 : VOPC_64 <0x00000034, "V_CMPX_GT_F64", []>; -defm V_CMPX_LG_F64 : VOPC_64 <0x00000035, "V_CMPX_LG_F64", []>; -defm V_CMPX_GE_F64 : VOPC_64 <0x00000036, "V_CMPX_GE_F64", []>; -defm V_CMPX_O_F64 : VOPC_64 <0x00000037, "V_CMPX_O_F64", []>; -defm V_CMPX_U_F64 : VOPC_64 <0x00000038, "V_CMPX_U_F64", []>; -defm V_CMPX_NGE_F64 : VOPC_64 <0x00000039, "V_CMPX_NGE_F64", []>; -defm V_CMPX_NLG_F64 : VOPC_64 <0x0000003a, "V_CMPX_NLG_F64", []>; -defm V_CMPX_NGT_F64 : VOPC_64 <0x0000003b, "V_CMPX_NGT_F64", []>; -defm V_CMPX_NLE_F64 : VOPC_64 <0x0000003c, "V_CMPX_NLE_F64", []>; -defm V_CMPX_NEQ_F64 : VOPC_64 <0x0000003d, "V_CMPX_NEQ_F64", []>; -defm V_CMPX_NLT_F64 : VOPC_64 <0x0000003e, "V_CMPX_NLT_F64", []>; -defm V_CMPX_TRU_F64 : VOPC_64 <0x0000003f, "V_CMPX_TRU_F64", []>; +defm V_CMPX_F_F64 : VOPC_64 <0x00000030, "V_CMPX_F_F64">; +defm V_CMPX_LT_F64 : VOPC_64 <0x00000031, "V_CMPX_LT_F64">; +defm V_CMPX_EQ_F64 : VOPC_64 <0x00000032, "V_CMPX_EQ_F64">; +defm V_CMPX_LE_F64 : VOPC_64 <0x00000033, "V_CMPX_LE_F64">; +defm V_CMPX_GT_F64 : VOPC_64 <0x00000034, "V_CMPX_GT_F64">; +defm V_CMPX_LG_F64 : VOPC_64 <0x00000035, "V_CMPX_LG_F64">; +defm V_CMPX_GE_F64 : VOPC_64 <0x00000036, "V_CMPX_GE_F64">; +defm V_CMPX_O_F64 : VOPC_64 <0x00000037, "V_CMPX_O_F64">; +defm V_CMPX_U_F64 : VOPC_64 <0x00000038, "V_CMPX_U_F64">; +defm V_CMPX_NGE_F64 : VOPC_64 <0x00000039, "V_CMPX_NGE_F64">; +defm V_CMPX_NLG_F64 : VOPC_64 <0x0000003a, "V_CMPX_NLG_F64">; +defm V_CMPX_NGT_F64 : VOPC_64 <0x0000003b, "V_CMPX_NGT_F64">; +defm V_CMPX_NLE_F64 : VOPC_64 <0x0000003c, "V_CMPX_NLE_F64">; +defm V_CMPX_NEQ_F64 : VOPC_64 <0x0000003d, "V_CMPX_NEQ_F64">; +defm V_CMPX_NLT_F64 : VOPC_64 <0x0000003e, "V_CMPX_NLT_F64">; +defm V_CMPX_TRU_F64 : VOPC_64 <0x0000003f, "V_CMPX_TRU_F64">; } // End hasSideEffects = 1 -defm V_CMPS_F_F32 : VOPC_32 <0x00000040, "V_CMPS_F_F32", []>; -defm V_CMPS_LT_F32 : VOPC_32 <0x00000041, "V_CMPS_LT_F32", []>; -defm V_CMPS_EQ_F32 : VOPC_32 <0x00000042, "V_CMPS_EQ_F32", []>; -defm V_CMPS_LE_F32 : VOPC_32 <0x00000043, "V_CMPS_LE_F32", []>; -defm V_CMPS_GT_F32 : VOPC_32 <0x00000044, "V_CMPS_GT_F32", []>; -defm V_CMPS_LG_F32 : VOPC_32 <0x00000045, "V_CMPS_LG_F32", []>; -defm V_CMPS_GE_F32 : VOPC_32 <0x00000046, "V_CMPS_GE_F32", []>; -defm V_CMPS_O_F32 : VOPC_32 <0x00000047, "V_CMPS_O_F32", []>; -defm V_CMPS_U_F32 : VOPC_32 <0x00000048, "V_CMPS_U_F32", []>; -defm V_CMPS_NGE_F32 : VOPC_32 <0x00000049, "V_CMPS_NGE_F32", []>; -defm V_CMPS_NLG_F32 : VOPC_32 <0x0000004a, "V_CMPS_NLG_F32", []>; -defm V_CMPS_NGT_F32 : VOPC_32 <0x0000004b, "V_CMPS_NGT_F32", []>; -defm V_CMPS_NLE_F32 : VOPC_32 <0x0000004c, "V_CMPS_NLE_F32", []>; -defm V_CMPS_NEQ_F32 : VOPC_32 <0x0000004d, "V_CMPS_NEQ_F32", []>; -defm V_CMPS_NLT_F32 : VOPC_32 <0x0000004e, "V_CMPS_NLT_F32", []>; -defm V_CMPS_TRU_F32 : VOPC_32 <0x0000004f, "V_CMPS_TRU_F32", []>; -defm V_CMPSX_F_F32 : VOPC_32 <0x00000050, "V_CMPSX_F_F32", []>; -defm V_CMPSX_LT_F32 : VOPC_32 <0x00000051, "V_CMPSX_LT_F32", []>; -defm V_CMPSX_EQ_F32 : VOPC_32 <0x00000052, "V_CMPSX_EQ_F32", []>; -defm V_CMPSX_LE_F32 : VOPC_32 <0x00000053, "V_CMPSX_LE_F32", []>; -defm V_CMPSX_GT_F32 : VOPC_32 <0x00000054, "V_CMPSX_GT_F32", []>; -defm V_CMPSX_LG_F32 : VOPC_32 <0x00000055, "V_CMPSX_LG_F32", []>; -defm V_CMPSX_GE_F32 : VOPC_32 <0x00000056, "V_CMPSX_GE_F32", []>; -defm V_CMPSX_O_F32 : VOPC_32 <0x00000057, "V_CMPSX_O_F32", []>; -defm V_CMPSX_U_F32 : VOPC_32 <0x00000058, "V_CMPSX_U_F32", []>; -defm V_CMPSX_NGE_F32 : VOPC_32 <0x00000059, "V_CMPSX_NGE_F32", []>; -defm V_CMPSX_NLG_F32 : VOPC_32 <0x0000005a, "V_CMPSX_NLG_F32", []>; -defm V_CMPSX_NGT_F32 : VOPC_32 <0x0000005b, "V_CMPSX_NGT_F32", []>; -defm V_CMPSX_NLE_F32 : VOPC_32 <0x0000005c, "V_CMPSX_NLE_F32", []>; -defm V_CMPSX_NEQ_F32 : VOPC_32 <0x0000005d, "V_CMPSX_NEQ_F32", []>; -defm V_CMPSX_NLT_F32 : VOPC_32 <0x0000005e, "V_CMPSX_NLT_F32", []>; -defm V_CMPSX_TRU_F32 : VOPC_32 <0x0000005f, "V_CMPSX_TRU_F32", []>; -defm V_CMPS_F_F64 : VOPC_64 <0x00000060, "V_CMPS_F_F64", []>; -defm V_CMPS_LT_F64 : VOPC_64 <0x00000061, "V_CMPS_LT_F64", []>; -defm V_CMPS_EQ_F64 : VOPC_64 <0x00000062, "V_CMPS_EQ_F64", []>; -defm V_CMPS_LE_F64 : VOPC_64 <0x00000063, "V_CMPS_LE_F64", []>; -defm V_CMPS_GT_F64 : VOPC_64 <0x00000064, "V_CMPS_GT_F64", []>; -defm V_CMPS_LG_F64 : VOPC_64 <0x00000065, "V_CMPS_LG_F64", []>; -defm V_CMPS_GE_F64 : VOPC_64 <0x00000066, "V_CMPS_GE_F64", []>; -defm V_CMPS_O_F64 : VOPC_64 <0x00000067, "V_CMPS_O_F64", []>; -defm V_CMPS_U_F64 : VOPC_64 <0x00000068, "V_CMPS_U_F64", []>; -defm V_CMPS_NGE_F64 : VOPC_64 <0x00000069, "V_CMPS_NGE_F64", []>; -defm V_CMPS_NLG_F64 : VOPC_64 <0x0000006a, "V_CMPS_NLG_F64", []>; -defm V_CMPS_NGT_F64 : VOPC_64 <0x0000006b, "V_CMPS_NGT_F64", []>; -defm V_CMPS_NLE_F64 : VOPC_64 <0x0000006c, "V_CMPS_NLE_F64", []>; -defm V_CMPS_NEQ_F64 : VOPC_64 <0x0000006d, "V_CMPS_NEQ_F64", []>; -defm V_CMPS_NLT_F64 : VOPC_64 <0x0000006e, "V_CMPS_NLT_F64", []>; -defm V_CMPS_TRU_F64 : VOPC_64 <0x0000006f, "V_CMPS_TRU_F64", []>; -defm V_CMPSX_F_F64 : VOPC_64 <0x00000070, "V_CMPSX_F_F64", []>; -defm V_CMPSX_LT_F64 : VOPC_64 <0x00000071, "V_CMPSX_LT_F64", []>; -defm V_CMPSX_EQ_F64 : VOPC_64 <0x00000072, "V_CMPSX_EQ_F64", []>; -defm V_CMPSX_LE_F64 : VOPC_64 <0x00000073, "V_CMPSX_LE_F64", []>; -defm V_CMPSX_GT_F64 : VOPC_64 <0x00000074, "V_CMPSX_GT_F64", []>; -defm V_CMPSX_LG_F64 : VOPC_64 <0x00000075, "V_CMPSX_LG_F64", []>; -defm V_CMPSX_GE_F64 : VOPC_64 <0x00000076, "V_CMPSX_GE_F64", []>; -defm V_CMPSX_O_F64 : VOPC_64 <0x00000077, "V_CMPSX_O_F64", []>; -defm V_CMPSX_U_F64 : VOPC_64 <0x00000078, "V_CMPSX_U_F64", []>; -defm V_CMPSX_NGE_F64 : VOPC_64 <0x00000079, "V_CMPSX_NGE_F64", []>; -defm V_CMPSX_NLG_F64 : VOPC_64 <0x0000007a, "V_CMPSX_NLG_F64", []>; -defm V_CMPSX_NGT_F64 : VOPC_64 <0x0000007b, "V_CMPSX_NGT_F64", []>; -defm V_CMPSX_NLE_F64 : VOPC_64 <0x0000007c, "V_CMPSX_NLE_F64", []>; -defm V_CMPSX_NEQ_F64 : VOPC_64 <0x0000007d, "V_CMPSX_NEQ_F64", []>; -defm V_CMPSX_NLT_F64 : VOPC_64 <0x0000007e, "V_CMPSX_NLT_F64", []>; -defm V_CMPSX_TRU_F64 : VOPC_64 <0x0000007f, "V_CMPSX_TRU_F64", []>; -defm V_CMP_F_I32 : VOPC_32 <0x00000080, "V_CMP_F_I32", []>; -defm V_CMP_LT_I32 : VOPC_32 <0x00000081, "V_CMP_LT_I32", []>; -def : Pat < - (i1 (setcc (i32 VSrc_32:$src0), VReg_32:$src1, COND_LT)), - (V_CMP_LT_I32_e64 VSrc_32:$src0, VReg_32:$src1) ->; -defm V_CMP_EQ_I32 : VOPC_32 <0x00000082, "V_CMP_EQ_I32", []>; -def : Pat < - (i1 (setcc (i32 VSrc_32:$src0), VReg_32:$src1, COND_EQ)), - (V_CMP_EQ_I32_e64 VSrc_32:$src0, VReg_32:$src1) ->; -defm V_CMP_LE_I32 : VOPC_32 <0x00000083, "V_CMP_LE_I32", []>; -def : Pat < - (i1 (setcc (i32 VSrc_32:$src0), VReg_32:$src1, COND_LE)), - (V_CMP_LE_I32_e64 VSrc_32:$src0, VReg_32:$src1) ->; -defm V_CMP_GT_I32 : VOPC_32 <0x00000084, "V_CMP_GT_I32", []>; -def : Pat < - (i1 (setcc (i32 VSrc_32:$src0), VReg_32:$src1, COND_GT)), - (V_CMP_GT_I32_e64 VSrc_32:$src0, VReg_32:$src1) ->; -defm V_CMP_NE_I32 : VOPC_32 <0x00000085, "V_CMP_NE_I32", []>; -def : Pat < - (i1 (setcc (i32 VSrc_32:$src0), VReg_32:$src1, COND_NE)), - (V_CMP_NE_I32_e64 VSrc_32:$src0, VReg_32:$src1) ->; -defm V_CMP_GE_I32 : VOPC_32 <0x00000086, "V_CMP_GE_I32", []>; -def : Pat < - (i1 (setcc (i32 VSrc_32:$src0), VReg_32:$src1, COND_GE)), - (V_CMP_GE_I32_e64 VSrc_32:$src0, VReg_32:$src1) ->; -defm V_CMP_T_I32 : VOPC_32 <0x00000087, "V_CMP_T_I32", []>; +defm V_CMPS_F_F32 : VOPC_32 <0x00000040, "V_CMPS_F_F32">; +defm V_CMPS_LT_F32 : VOPC_32 <0x00000041, "V_CMPS_LT_F32">; +defm V_CMPS_EQ_F32 : VOPC_32 <0x00000042, "V_CMPS_EQ_F32">; +defm V_CMPS_LE_F32 : VOPC_32 <0x00000043, "V_CMPS_LE_F32">; +defm V_CMPS_GT_F32 : VOPC_32 <0x00000044, "V_CMPS_GT_F32">; +defm V_CMPS_LG_F32 : VOPC_32 <0x00000045, "V_CMPS_LG_F32">; +defm V_CMPS_GE_F32 : VOPC_32 <0x00000046, "V_CMPS_GE_F32">; +defm V_CMPS_O_F32 : VOPC_32 <0x00000047, "V_CMPS_O_F32">; +defm V_CMPS_U_F32 : VOPC_32 <0x00000048, "V_CMPS_U_F32">; +defm V_CMPS_NGE_F32 : VOPC_32 <0x00000049, "V_CMPS_NGE_F32">; +defm V_CMPS_NLG_F32 : VOPC_32 <0x0000004a, "V_CMPS_NLG_F32">; +defm V_CMPS_NGT_F32 : VOPC_32 <0x0000004b, "V_CMPS_NGT_F32">; +defm V_CMPS_NLE_F32 : VOPC_32 <0x0000004c, "V_CMPS_NLE_F32">; +defm V_CMPS_NEQ_F32 : VOPC_32 <0x0000004d, "V_CMPS_NEQ_F32">; +defm V_CMPS_NLT_F32 : VOPC_32 <0x0000004e, "V_CMPS_NLT_F32">; +defm V_CMPS_TRU_F32 : VOPC_32 <0x0000004f, "V_CMPS_TRU_F32">; +defm V_CMPSX_F_F32 : VOPC_32 <0x00000050, "V_CMPSX_F_F32">; +defm V_CMPSX_LT_F32 : VOPC_32 <0x00000051, "V_CMPSX_LT_F32">; +defm V_CMPSX_EQ_F32 : VOPC_32 <0x00000052, "V_CMPSX_EQ_F32">; +defm V_CMPSX_LE_F32 : VOPC_32 <0x00000053, "V_CMPSX_LE_F32">; +defm V_CMPSX_GT_F32 : VOPC_32 <0x00000054, "V_CMPSX_GT_F32">; +defm V_CMPSX_LG_F32 : VOPC_32 <0x00000055, "V_CMPSX_LG_F32">; +defm V_CMPSX_GE_F32 : VOPC_32 <0x00000056, "V_CMPSX_GE_F32">; +defm V_CMPSX_O_F32 : VOPC_32 <0x00000057, "V_CMPSX_O_F32">; +defm V_CMPSX_U_F32 : VOPC_32 <0x00000058, "V_CMPSX_U_F32">; +defm V_CMPSX_NGE_F32 : VOPC_32 <0x00000059, "V_CMPSX_NGE_F32">; +defm V_CMPSX_NLG_F32 : VOPC_32 <0x0000005a, "V_CMPSX_NLG_F32">; +defm V_CMPSX_NGT_F32 : VOPC_32 <0x0000005b, "V_CMPSX_NGT_F32">; +defm V_CMPSX_NLE_F32 : VOPC_32 <0x0000005c, "V_CMPSX_NLE_F32">; +defm V_CMPSX_NEQ_F32 : VOPC_32 <0x0000005d, "V_CMPSX_NEQ_F32">; +defm V_CMPSX_NLT_F32 : VOPC_32 <0x0000005e, "V_CMPSX_NLT_F32">; +defm V_CMPSX_TRU_F32 : VOPC_32 <0x0000005f, "V_CMPSX_TRU_F32">; +defm V_CMPS_F_F64 : VOPC_64 <0x00000060, "V_CMPS_F_F64">; +defm V_CMPS_LT_F64 : VOPC_64 <0x00000061, "V_CMPS_LT_F64">; +defm V_CMPS_EQ_F64 : VOPC_64 <0x00000062, "V_CMPS_EQ_F64">; +defm V_CMPS_LE_F64 : VOPC_64 <0x00000063, "V_CMPS_LE_F64">; +defm V_CMPS_GT_F64 : VOPC_64 <0x00000064, "V_CMPS_GT_F64">; +defm V_CMPS_LG_F64 : VOPC_64 <0x00000065, "V_CMPS_LG_F64">; +defm V_CMPS_GE_F64 : VOPC_64 <0x00000066, "V_CMPS_GE_F64">; +defm V_CMPS_O_F64 : VOPC_64 <0x00000067, "V_CMPS_O_F64">; +defm V_CMPS_U_F64 : VOPC_64 <0x00000068, "V_CMPS_U_F64">; +defm V_CMPS_NGE_F64 : VOPC_64 <0x00000069, "V_CMPS_NGE_F64">; +defm V_CMPS_NLG_F64 : VOPC_64 <0x0000006a, "V_CMPS_NLG_F64">; +defm V_CMPS_NGT_F64 : VOPC_64 <0x0000006b, "V_CMPS_NGT_F64">; +defm V_CMPS_NLE_F64 : VOPC_64 <0x0000006c, "V_CMPS_NLE_F64">; +defm V_CMPS_NEQ_F64 : VOPC_64 <0x0000006d, "V_CMPS_NEQ_F64">; +defm V_CMPS_NLT_F64 : VOPC_64 <0x0000006e, "V_CMPS_NLT_F64">; +defm V_CMPS_TRU_F64 : VOPC_64 <0x0000006f, "V_CMPS_TRU_F64">; +defm V_CMPSX_F_F64 : VOPC_64 <0x00000070, "V_CMPSX_F_F64">; +defm V_CMPSX_LT_F64 : VOPC_64 <0x00000071, "V_CMPSX_LT_F64">; +defm V_CMPSX_EQ_F64 : VOPC_64 <0x00000072, "V_CMPSX_EQ_F64">; +defm V_CMPSX_LE_F64 : VOPC_64 <0x00000073, "V_CMPSX_LE_F64">; +defm V_CMPSX_GT_F64 : VOPC_64 <0x00000074, "V_CMPSX_GT_F64">; +defm V_CMPSX_LG_F64 : VOPC_64 <0x00000075, "V_CMPSX_LG_F64">; +defm V_CMPSX_GE_F64 : VOPC_64 <0x00000076, "V_CMPSX_GE_F64">; +defm V_CMPSX_O_F64 : VOPC_64 <0x00000077, "V_CMPSX_O_F64">; +defm V_CMPSX_U_F64 : VOPC_64 <0x00000078, "V_CMPSX_U_F64">; +defm V_CMPSX_NGE_F64 : VOPC_64 <0x00000079, "V_CMPSX_NGE_F64">; +defm V_CMPSX_NLG_F64 : VOPC_64 <0x0000007a, "V_CMPSX_NLG_F64">; +defm V_CMPSX_NGT_F64 : VOPC_64 <0x0000007b, "V_CMPSX_NGT_F64">; +defm V_CMPSX_NLE_F64 : VOPC_64 <0x0000007c, "V_CMPSX_NLE_F64">; +defm V_CMPSX_NEQ_F64 : VOPC_64 <0x0000007d, "V_CMPSX_NEQ_F64">; +defm V_CMPSX_NLT_F64 : VOPC_64 <0x0000007e, "V_CMPSX_NLT_F64">; +defm V_CMPSX_TRU_F64 : VOPC_64 <0x0000007f, "V_CMPSX_TRU_F64">; +defm V_CMP_F_I32 : VOPC_32 <0x00000080, "V_CMP_F_I32">; +defm V_CMP_LT_I32 : VOPC_32 <0x00000081, "V_CMP_LT_I32", i32, COND_LT>; +defm V_CMP_EQ_I32 : VOPC_32 <0x00000082, "V_CMP_EQ_I32", i32, COND_EQ>; +defm V_CMP_LE_I32 : VOPC_32 <0x00000083, "V_CMP_LE_I32", i32, COND_LE>; +defm V_CMP_GT_I32 : VOPC_32 <0x00000084, "V_CMP_GT_I32", i32, COND_GT>; +defm V_CMP_NE_I32 : VOPC_32 <0x00000085, "V_CMP_NE_I32", i32, COND_NE>; +defm V_CMP_GE_I32 : VOPC_32 <0x00000086, "V_CMP_GE_I32", i32, COND_GE>; +defm V_CMP_T_I32 : VOPC_32 <0x00000087, "V_CMP_T_I32">; let hasSideEffects = 1 in { -defm V_CMPX_F_I32 : VOPC_32 <0x00000090, "V_CMPX_F_I32", []>; -defm V_CMPX_LT_I32 : VOPC_32 <0x00000091, "V_CMPX_LT_I32", []>; -defm V_CMPX_EQ_I32 : VOPC_32 <0x00000092, "V_CMPX_EQ_I32", []>; -defm V_CMPX_LE_I32 : VOPC_32 <0x00000093, "V_CMPX_LE_I32", []>; -defm V_CMPX_GT_I32 : VOPC_32 <0x00000094, "V_CMPX_GT_I32", []>; -defm V_CMPX_NE_I32 : VOPC_32 <0x00000095, "V_CMPX_NE_I32", []>; -defm V_CMPX_GE_I32 : VOPC_32 <0x00000096, "V_CMPX_GE_I32", []>; -defm V_CMPX_T_I32 : VOPC_32 <0x00000097, "V_CMPX_T_I32", []>; +defm V_CMPX_F_I32 : VOPC_32 <0x00000090, "V_CMPX_F_I32">; +defm V_CMPX_LT_I32 : VOPC_32 <0x00000091, "V_CMPX_LT_I32">; +defm V_CMPX_EQ_I32 : VOPC_32 <0x00000092, "V_CMPX_EQ_I32">; +defm V_CMPX_LE_I32 : VOPC_32 <0x00000093, "V_CMPX_LE_I32">; +defm V_CMPX_GT_I32 : VOPC_32 <0x00000094, "V_CMPX_GT_I32">; +defm V_CMPX_NE_I32 : VOPC_32 <0x00000095, "V_CMPX_NE_I32">; +defm V_CMPX_GE_I32 : VOPC_32 <0x00000096, "V_CMPX_GE_I32">; +defm V_CMPX_T_I32 : VOPC_32 <0x00000097, "V_CMPX_T_I32">; } // End hasSideEffects -defm V_CMP_F_I64 : VOPC_64 <0x000000a0, "V_CMP_F_I64", []>; -defm V_CMP_LT_I64 : VOPC_64 <0x000000a1, "V_CMP_LT_I64", []>; -defm V_CMP_EQ_I64 : VOPC_64 <0x000000a2, "V_CMP_EQ_I64", []>; -defm V_CMP_LE_I64 : VOPC_64 <0x000000a3, "V_CMP_LE_I64", []>; -defm V_CMP_GT_I64 : VOPC_64 <0x000000a4, "V_CMP_GT_I64", []>; -defm V_CMP_NE_I64 : VOPC_64 <0x000000a5, "V_CMP_NE_I64", []>; -defm V_CMP_GE_I64 : VOPC_64 <0x000000a6, "V_CMP_GE_I64", []>; -defm V_CMP_T_I64 : VOPC_64 <0x000000a7, "V_CMP_T_I64", []>; +defm V_CMP_F_I64 : VOPC_64 <0x000000a0, "V_CMP_F_I64">; +defm V_CMP_LT_I64 : VOPC_64 <0x000000a1, "V_CMP_LT_I64">; +defm V_CMP_EQ_I64 : VOPC_64 <0x000000a2, "V_CMP_EQ_I64">; +defm V_CMP_LE_I64 : VOPC_64 <0x000000a3, "V_CMP_LE_I64">; +defm V_CMP_GT_I64 : VOPC_64 <0x000000a4, "V_CMP_GT_I64">; +defm V_CMP_NE_I64 : VOPC_64 <0x000000a5, "V_CMP_NE_I64">; +defm V_CMP_GE_I64 : VOPC_64 <0x000000a6, "V_CMP_GE_I64">; +defm V_CMP_T_I64 : VOPC_64 <0x000000a7, "V_CMP_T_I64">; let hasSideEffects = 1 in { -defm V_CMPX_F_I64 : VOPC_64 <0x000000b0, "V_CMPX_F_I64", []>; -defm V_CMPX_LT_I64 : VOPC_64 <0x000000b1, "V_CMPX_LT_I64", []>; -defm V_CMPX_EQ_I64 : VOPC_64 <0x000000b2, "V_CMPX_EQ_I64", []>; -defm V_CMPX_LE_I64 : VOPC_64 <0x000000b3, "V_CMPX_LE_I64", []>; -defm V_CMPX_GT_I64 : VOPC_64 <0x000000b4, "V_CMPX_GT_I64", []>; -defm V_CMPX_NE_I64 : VOPC_64 <0x000000b5, "V_CMPX_NE_I64", []>; -defm V_CMPX_GE_I64 : VOPC_64 <0x000000b6, "V_CMPX_GE_I64", []>; -defm V_CMPX_T_I64 : VOPC_64 <0x000000b7, "V_CMPX_T_I64", []>; +defm V_CMPX_F_I64 : VOPC_64 <0x000000b0, "V_CMPX_F_I64">; +defm V_CMPX_LT_I64 : VOPC_64 <0x000000b1, "V_CMPX_LT_I64">; +defm V_CMPX_EQ_I64 : VOPC_64 <0x000000b2, "V_CMPX_EQ_I64">; +defm V_CMPX_LE_I64 : VOPC_64 <0x000000b3, "V_CMPX_LE_I64">; +defm V_CMPX_GT_I64 : VOPC_64 <0x000000b4, "V_CMPX_GT_I64">; +defm V_CMPX_NE_I64 : VOPC_64 <0x000000b5, "V_CMPX_NE_I64">; +defm V_CMPX_GE_I64 : VOPC_64 <0x000000b6, "V_CMPX_GE_I64">; +defm V_CMPX_T_I64 : VOPC_64 <0x000000b7, "V_CMPX_T_I64">; } // End hasSideEffects -defm V_CMP_F_U32 : VOPC_32 <0x000000c0, "V_CMP_F_U32", []>; -defm V_CMP_LT_U32 : VOPC_32 <0x000000c1, "V_CMP_LT_U32", []>; -defm V_CMP_EQ_U32 : VOPC_32 <0x000000c2, "V_CMP_EQ_U32", []>; -defm V_CMP_LE_U32 : VOPC_32 <0x000000c3, "V_CMP_LE_U32", []>; -defm V_CMP_GT_U32 : VOPC_32 <0x000000c4, "V_CMP_GT_U32", []>; -defm V_CMP_NE_U32 : VOPC_32 <0x000000c5, "V_CMP_NE_U32", []>; -defm V_CMP_GE_U32 : VOPC_32 <0x000000c6, "V_CMP_GE_U32", []>; -defm V_CMP_T_U32 : VOPC_32 <0x000000c7, "V_CMP_T_U32", []>; +defm V_CMP_F_U32 : VOPC_32 <0x000000c0, "V_CMP_F_U32">; +defm V_CMP_LT_U32 : VOPC_32 <0x000000c1, "V_CMP_LT_U32">; +defm V_CMP_EQ_U32 : VOPC_32 <0x000000c2, "V_CMP_EQ_U32">; +defm V_CMP_LE_U32 : VOPC_32 <0x000000c3, "V_CMP_LE_U32">; +defm V_CMP_GT_U32 : VOPC_32 <0x000000c4, "V_CMP_GT_U32">; +defm V_CMP_NE_U32 : VOPC_32 <0x000000c5, "V_CMP_NE_U32">; +defm V_CMP_GE_U32 : VOPC_32 <0x000000c6, "V_CMP_GE_U32">; +defm V_CMP_T_U32 : VOPC_32 <0x000000c7, "V_CMP_T_U32">; let hasSideEffects = 1 in { -defm V_CMPX_F_U32 : VOPC_32 <0x000000d0, "V_CMPX_F_U32", []>; -defm V_CMPX_LT_U32 : VOPC_32 <0x000000d1, "V_CMPX_LT_U32", []>; -defm V_CMPX_EQ_U32 : VOPC_32 <0x000000d2, "V_CMPX_EQ_U32", []>; -defm V_CMPX_LE_U32 : VOPC_32 <0x000000d3, "V_CMPX_LE_U32", []>; -defm V_CMPX_GT_U32 : VOPC_32 <0x000000d4, "V_CMPX_GT_U32", []>; -defm V_CMPX_NE_U32 : VOPC_32 <0x000000d5, "V_CMPX_NE_U32", []>; -defm V_CMPX_GE_U32 : VOPC_32 <0x000000d6, "V_CMPX_GE_U32", []>; -defm V_CMPX_T_U32 : VOPC_32 <0x000000d7, "V_CMPX_T_U32", []>; +defm V_CMPX_F_U32 : VOPC_32 <0x000000d0, "V_CMPX_F_U32">; +defm V_CMPX_LT_U32 : VOPC_32 <0x000000d1, "V_CMPX_LT_U32">; +defm V_CMPX_EQ_U32 : VOPC_32 <0x000000d2, "V_CMPX_EQ_U32">; +defm V_CMPX_LE_U32 : VOPC_32 <0x000000d3, "V_CMPX_LE_U32">; +defm V_CMPX_GT_U32 : VOPC_32 <0x000000d4, "V_CMPX_GT_U32">; +defm V_CMPX_NE_U32 : VOPC_32 <0x000000d5, "V_CMPX_NE_U32">; +defm V_CMPX_GE_U32 : VOPC_32 <0x000000d6, "V_CMPX_GE_U32">; +defm V_CMPX_T_U32 : VOPC_32 <0x000000d7, "V_CMPX_T_U32">; } // End hasSideEffects -defm V_CMP_F_U64 : VOPC_64 <0x000000e0, "V_CMP_F_U64", []>; -defm V_CMP_LT_U64 : VOPC_64 <0x000000e1, "V_CMP_LT_U64", []>; -defm V_CMP_EQ_U64 : VOPC_64 <0x000000e2, "V_CMP_EQ_U64", []>; -defm V_CMP_LE_U64 : VOPC_64 <0x000000e3, "V_CMP_LE_U64", []>; -defm V_CMP_GT_U64 : VOPC_64 <0x000000e4, "V_CMP_GT_U64", []>; -defm V_CMP_NE_U64 : VOPC_64 <0x000000e5, "V_CMP_NE_U64", []>; -defm V_CMP_GE_U64 : VOPC_64 <0x000000e6, "V_CMP_GE_U64", []>; -defm V_CMP_T_U64 : VOPC_64 <0x000000e7, "V_CMP_T_U64", []>; -defm V_CMPX_F_U64 : VOPC_64 <0x000000f0, "V_CMPX_F_U64", []>; -defm V_CMPX_LT_U64 : VOPC_64 <0x000000f1, "V_CMPX_LT_U64", []>; -defm V_CMPX_EQ_U64 : VOPC_64 <0x000000f2, "V_CMPX_EQ_U64", []>; -defm V_CMPX_LE_U64 : VOPC_64 <0x000000f3, "V_CMPX_LE_U64", []>; -defm V_CMPX_GT_U64 : VOPC_64 <0x000000f4, "V_CMPX_GT_U64", []>; -defm V_CMPX_NE_U64 : VOPC_64 <0x000000f5, "V_CMPX_NE_U64", []>; -defm V_CMPX_GE_U64 : VOPC_64 <0x000000f6, "V_CMPX_GE_U64", []>; -defm V_CMPX_T_U64 : VOPC_64 <0x000000f7, "V_CMPX_T_U64", []>; -defm V_CMP_CLASS_F32 : VOPC_32 <0x00000088, "V_CMP_CLASS_F32", []>; -defm V_CMPX_CLASS_F32 : VOPC_32 <0x00000098, "V_CMPX_CLASS_F32", []>; -defm V_CMP_CLASS_F64 : VOPC_64 <0x000000a8, "V_CMP_CLASS_F64", []>; -defm V_CMPX_CLASS_F64 : VOPC_64 <0x000000b8, "V_CMPX_CLASS_F64", []>; +defm V_CMP_F_U64 : VOPC_64 <0x000000e0, "V_CMP_F_U64">; +defm V_CMP_LT_U64 : VOPC_64 <0x000000e1, "V_CMP_LT_U64">; +defm V_CMP_EQ_U64 : VOPC_64 <0x000000e2, "V_CMP_EQ_U64">; +defm V_CMP_LE_U64 : VOPC_64 <0x000000e3, "V_CMP_LE_U64">; +defm V_CMP_GT_U64 : VOPC_64 <0x000000e4, "V_CMP_GT_U64">; +defm V_CMP_NE_U64 : VOPC_64 <0x000000e5, "V_CMP_NE_U64">; +defm V_CMP_GE_U64 : VOPC_64 <0x000000e6, "V_CMP_GE_U64">; +defm V_CMP_T_U64 : VOPC_64 <0x000000e7, "V_CMP_T_U64">; +defm V_CMPX_F_U64 : VOPC_64 <0x000000f0, "V_CMPX_F_U64">; +defm V_CMPX_LT_U64 : VOPC_64 <0x000000f1, "V_CMPX_LT_U64">; +defm V_CMPX_EQ_U64 : VOPC_64 <0x000000f2, "V_CMPX_EQ_U64">; +defm V_CMPX_LE_U64 : VOPC_64 <0x000000f3, "V_CMPX_LE_U64">; +defm V_CMPX_GT_U64 : VOPC_64 <0x000000f4, "V_CMPX_GT_U64">; +defm V_CMPX_NE_U64 : VOPC_64 <0x000000f5, "V_CMPX_NE_U64">; +defm V_CMPX_GE_U64 : VOPC_64 <0x000000f6, "V_CMPX_GE_U64">; +defm V_CMPX_T_U64 : VOPC_64 <0x000000f7, "V_CMPX_T_U64">; +defm V_CMP_CLASS_F32 : VOPC_32 <0x00000088, "V_CMP_CLASS_F32">; +defm V_CMPX_CLASS_F32 : VOPC_32 <0x00000098, "V_CMPX_CLASS_F32">; +defm V_CMP_CLASS_F64 : VOPC_64 <0x000000a8, "V_CMP_CLASS_F64">; +defm V_CMPX_CLASS_F64 : VOPC_64 <0x000000b8, "V_CMPX_CLASS_F64">; //def BUFFER_LOAD_FORMAT_X : MUBUF_ <0x00000000, "BUFFER_LOAD_FORMAT_X", []>; //def BUFFER_LOAD_FORMAT_XY : MUBUF_ <0x00000001, "BUFFER_LOAD_FORMAT_XY", []>; //def BUFFER_LOAD_FORMAT_XYZ : MUBUF_ <0x00000002, "BUFFER_LOAD_FORMAT_XYZ", []>; -- cgit v1.1 From b4dc10c8c5df75c0b281e0d815018b5830b965b9 Mon Sep 17 00:00:00 2001 From: Christian Konig Date: Thu, 21 Feb 2013 15:17:09 +0000 Subject: R600/SI: rework VOP3 classes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Order the classes and add asm operands. Signed-off-by: Christian König Reviewed-by: Tom Stellard Reviewed-by: Michel Dänzer git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175751 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIInstrInfo.td | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'lib') diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index 0808f24..05325db 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -127,20 +127,6 @@ multiclass SMRD_Helper op, string asm, RegisterClass dstClass> { // Vector ALU classes //===----------------------------------------------------------------------===// -class VOP3_32 op, string opName, list pattern> : VOP3 < - op, (outs VReg_32:$dst), - (ins VSrc_32:$src0, VReg_32:$src1, VReg_32:$src2, i32imm:$src3, - i32imm:$src4, i32imm:$src5, i32imm:$src6), - opName, pattern ->; - -class VOP3_64 op, string opName, list pattern> : VOP3 < - op, (outs VReg_64:$dst), - (ins VSrc_64:$src0, VReg_64:$src1, VReg_64:$src2, - i32imm:$src3, i32imm:$src4, i32imm:$src5, i32imm:$src6), - opName, pattern ->; - multiclass VOP1_Helper op, RegisterClass drc, RegisterClass src, string opName, list pattern> { @@ -224,6 +210,20 @@ multiclass VOPC_64 op, string opName, ValueType vt = untyped, PatLeaf cond = COND_NULL> : VOPC_Helper ; +class VOP3_32 op, string opName, list pattern> : VOP3 < + op, (outs VReg_32:$dst), + (ins VSrc_32:$src0, VReg_32:$src1, VReg_32:$src2, + i32imm:$abs, i32imm:$clamp, i32imm:$omod, i32imm:$neg), + opName#" $dst, $src0, $src1, $src2, $abs, $clamp, $omod, $neg", pattern +>; + +class VOP3_64 op, string opName, list pattern> : VOP3 < + op, (outs VReg_64:$dst), + (ins VSrc_64:$src0, VReg_64:$src1, VReg_64:$src2, + i32imm:$abs, i32imm:$clamp, i32imm:$omod, i32imm:$neg), + opName#" $dst, $src0, $src1, $src2, $abs, $clamp, $omod, $neg", pattern +>; + //===----------------------------------------------------------------------===// // Vector I/O classes //===----------------------------------------------------------------------===// -- cgit v1.1 From f17d0d6f806e7e215c8fb17120ed18c22e957771 Mon Sep 17 00:00:00 2001 From: Christian Konig Date: Thu, 21 Feb 2013 15:17:13 +0000 Subject: R600/SI: add the missing S_* asm operands MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Christian König Reviewed-by: Tom Stellard Reviewed-by: Michel Dänzer git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175752 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIInstrInfo.td | 52 +++++++++++++++++++++++++++--------------- 1 file changed, 34 insertions(+), 18 deletions(-) (limited to 'lib') diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index 05325db..56ca03a 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -85,41 +85,57 @@ include "SIInstrFormats.td" // Scalar classes //===----------------------------------------------------------------------===// -class SOP1_32 op, string opName, list pattern> - : SOP1 ; +class SOP1_32 op, string opName, list pattern> : SOP1 < + op, (outs SReg_32:$dst), (ins SSrc_32:$src0), + opName#" $dst, $src0", pattern +>; -class SOP1_64 op, string opName, list pattern> - : SOP1 ; +class SOP1_64 op, string opName, list pattern> : SOP1 < + op, (outs SReg_64:$dst), (ins SSrc_64:$src0), + opName#" $dst, $src0", pattern +>; -class SOP2_32 op, string opName, list pattern> - : SOP2 ; +class SOP2_32 op, string opName, list pattern> : SOP2 < + op, (outs SReg_32:$dst), (ins SSrc_32:$src0, SSrc_32:$src1), + opName#" $dst, $src0, $src1", pattern +>; -class SOP2_64 op, string opName, list pattern> - : SOP2 ; +class SOP2_64 op, string opName, list pattern> : SOP2 < + op, (outs SReg_64:$dst), (ins SSrc_64:$src0, SSrc_64:$src1), + opName#" $dst, $src0, $src1", pattern +>; -class SOPC_32 op, string opName, list pattern> - : SOPC ; +class SOPC_32 op, string opName, list pattern> : SOPC < + op, (outs SCCReg:$dst), (ins SSrc_32:$src0, SSrc_32:$src1), + opName#" $dst, $src0, $src1", pattern +>; -class SOPC_64 op, string opName, list pattern> - : SOPC ; +class SOPC_64 op, string opName, list pattern> : SOPC < + op, (outs SCCReg:$dst), (ins SSrc_64:$src0, SSrc_64:$src1), + opName#" $dst, $src0, $src1", pattern +>; -class SOPK_32 op, string opName, list pattern> - : SOPK ; +class SOPK_32 op, string opName, list pattern> : SOPK < + op, (outs SReg_32:$dst), (ins i16imm:$src0), + opName#" $dst, $src0", pattern +>; -class SOPK_64 op, string opName, list pattern> - : SOPK ; +class SOPK_64 op, string opName, list pattern> : SOPK < + op, (outs SReg_64:$dst), (ins i16imm:$src0), + opName#" $dst, $src0", pattern +>; multiclass SMRD_Helper op, string asm, RegisterClass dstClass> { def _IMM : SMRD < op, 1, (outs dstClass:$dst), (ins GPR2Align:$sbase, i32imm:$offset), - asm, [] + asm#" $dst, $sbase, $offset", [] >; def _SGPR : SMRD < op, 0, (outs dstClass:$dst), (ins GPR2Align:$sbase, SReg_32:$soff), - asm, [] + asm#" $dst, $sbase, $soff", [] >; } -- cgit v1.1 From ee44118ef7a917b2fd94f40e5a07d8b5f420acf2 Mon Sep 17 00:00:00 2001 From: Christian Konig Date: Thu, 21 Feb 2013 15:17:17 +0000 Subject: R600/SI: add the missing M*BUF|IMG asm operands MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Christian König Reviewed-by: Tom Stellard Reviewed-by: Michel Dänzer git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175753 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIInstrInfo.td | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index 56ca03a..99168ce 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -250,7 +250,8 @@ class MTBUF_Store_Helper op, string asm, RegisterClass regClass> : MTBU (ins regClass:$vdata, i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64, i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr, GPR4Align:$srsrc, i1imm:$slc, i1imm:$tfe, SSrc_32:$soffset), - asm, + asm#" $vdata, $offset, $offen, $idxen, $glc, $addr64, $dfmt," + #" $nfmt, $vaddr, $srsrc, $slc, $tfe, $soffset", []> { let mayStore = 1; let mayLoad = 0; @@ -262,7 +263,8 @@ class MUBUF_Load_Helper op, string asm, RegisterClass regClass> : MUBUF (ins i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64, i1imm:$lds, VReg_32:$vaddr, GPR4Align:$srsrc, i1imm:$slc, i1imm:$tfe, SSrc_32:$soffset), - asm, + asm#" $dst, $offset, $offen, $idxen, $glc, $addr64, " + #"$lds, $vaddr, $srsrc, $slc, $tfe, $soffset", []> { let mayLoad = 1; let mayStore = 0; @@ -274,7 +276,8 @@ class MTBUF_Load_Helper op, string asm, RegisterClass regClass> : MTBUF (ins i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64, i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr, GPR4Align:$srsrc, i1imm:$slc, i1imm:$tfe, SSrc_32:$soffset), - asm, + asm#" $dst, $offset, $offen, $idxen, $glc, $addr64, $dfmt," + #" $nfmt, $vaddr, $srsrc, $slc, $tfe, $soffset", []> { let mayLoad = 1; let mayStore = 0; @@ -286,7 +289,8 @@ class MIMG_Load_Helper op, string asm> : MIMG < (ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128, i1imm:$tfe, i1imm:$lwe, i1imm:$slc, VReg_32:$vaddr, GPR4Align:$srsrc, GPR4Align:$ssamp), - asm, + asm#" $vdata, $dmask, $unorm, $glc, $da, $r128," + #" $tfe, $lwe, $slc, $vaddr, $srsrc, $ssamp", []> { let mayLoad = 1; let mayStore = 0; -- cgit v1.1 From 53f22df199542f6fc4e0edc5b7cecbeacea11ada Mon Sep 17 00:00:00 2001 From: Christian Konig Date: Thu, 21 Feb 2013 15:17:22 +0000 Subject: R600/SI: add all the other missing asm operands v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v2: put implicit parameters in [] Signed-off-by: Christian König Reviewed-by: Tom Stellard Reviewed-by: Michel Dänzer git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175754 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp | 4 ++ lib/Target/R600/SIInstructions.td | 47 ++++++++++++----------- 2 files changed, 29 insertions(+), 22 deletions(-) (limited to 'lib') diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp index d6450a0..10547a5 100644 --- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp +++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp @@ -11,6 +11,7 @@ #include "AMDGPUInstPrinter.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCExpr.h" using namespace llvm; @@ -35,6 +36,9 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, O << Op.getImm(); } else if (Op.isFPImm()) { O << Op.getFPImm(); + } else if (Op.isExpr()) { + const MCExpr *Exp = Op.getExpr(); + Exp->print(O); } else { assert(!"unknown operand type in printOperand"); } diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index b7e44fe..e9c00f9 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -620,7 +620,7 @@ def V_INTERP_P1_F32 : VINTRP < 0x00000000, (outs VReg_32:$dst), (ins VReg_32:$i, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0), - "V_INTERP_P1_F32", + "V_INTERP_P1_F32 $dst, $i, $attr_chan, $attr, [$m0]", []> { let DisableEncoding = "$m0"; } @@ -629,7 +629,7 @@ def V_INTERP_P2_F32 : VINTRP < 0x00000001, (outs VReg_32:$dst), (ins VReg_32:$src0, VReg_32:$j, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0), - "V_INTERP_P2_F32", + "V_INTERP_P2_F32 $dst, [$src0], $j, $attr_chan, $attr, [$m0]", []> { let Constraints = "$src0 = $dst"; @@ -641,7 +641,7 @@ def V_INTERP_MOV_F32 : VINTRP < 0x00000002, (outs VReg_32:$dst), (ins InterpSlot:$src0, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0), - "V_INTERP_MOV_F32 $dst, $src0, $attr_chan, $attr", + "V_INTERP_MOV_F32 $dst, $src0, $attr_chan, $attr, [$m0]", []> { let DisableEncoding = "$m0"; } @@ -659,7 +659,7 @@ def S_ENDPGM : SOPP <0x00000001, (ins), "S_ENDPGM", let isBranch = 1 in { def S_BRANCH : SOPP < - 0x00000002, (ins brtarget:$target), "S_BRANCH", + 0x00000002, (ins brtarget:$target), "S_BRANCH $target", [(br bb:$target)]> { let isBarrier = 1; } @@ -667,35 +667,35 @@ def S_BRANCH : SOPP < let DisableEncoding = "$scc" in { def S_CBRANCH_SCC0 : SOPP < 0x00000004, (ins brtarget:$target, SCCReg:$scc), - "S_CBRANCH_SCC0", [] + "S_CBRANCH_SCC0 $target", [] >; def S_CBRANCH_SCC1 : SOPP < 0x00000005, (ins brtarget:$target, SCCReg:$scc), - "S_CBRANCH_SCC1", + "S_CBRANCH_SCC1 $target", [] >; } // End DisableEncoding = "$scc" def S_CBRANCH_VCCZ : SOPP < 0x00000006, (ins brtarget:$target, VCCReg:$vcc), - "S_CBRANCH_VCCZ", + "S_CBRANCH_VCCZ $target", [] >; def S_CBRANCH_VCCNZ : SOPP < 0x00000007, (ins brtarget:$target, VCCReg:$vcc), - "S_CBRANCH_VCCNZ", + "S_CBRANCH_VCCNZ $target", [] >; let DisableEncoding = "$exec" in { def S_CBRANCH_EXECZ : SOPP < 0x00000008, (ins brtarget:$target, EXECReg:$exec), - "S_CBRANCH_EXECZ", + "S_CBRANCH_EXECZ $target", [] >; def S_CBRANCH_EXECNZ : SOPP < 0x00000009, (ins brtarget:$target, EXECReg:$exec), - "S_CBRANCH_EXECNZ", + "S_CBRANCH_EXECNZ $target", [] >; } // End DisableEncoding = "$exec" @@ -722,16 +722,19 @@ def S_WAITCNT : SOPP <0x0000000c, (ins i32imm:$simm16), "S_WAITCNT $simm16", //def S_TTRACEDATA : SOPP_ <0x00000016, "S_TTRACEDATA", []>; def V_CNDMASK_B32_e32 : VOP2 <0x00000000, (outs VReg_32:$dst), - (ins VSrc_32:$src0, VReg_32:$src1, VCCReg:$vcc), "V_CNDMASK_B32_e32", + (ins VSrc_32:$src0, VReg_32:$src1, VCCReg:$vcc), + "V_CNDMASK_B32_e32 $dst, $src0, $src1, [$vcc]", [] >{ let DisableEncoding = "$vcc"; } def V_CNDMASK_B32_e64 : VOP3 <0x00000100, (outs VReg_32:$dst), - (ins VReg_32:$src0, VReg_32:$src1, SReg_64:$src2, InstFlag:$abs, InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg), - "V_CNDMASK_B32_e64", - [(set (i32 VReg_32:$dst), (select (i1 SReg_64:$src2), VReg_32:$src1, VReg_32:$src0))] + (ins VReg_32:$src0, VReg_32:$src1, SReg_64:$src2, + InstFlag:$abs, InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg), + "V_CNDMASK_B32_e64 $dst, $src0, $src1, $src2, $abs, $clamp, $omod, $neg", + [(set (i32 VReg_32:$dst), (select (i1 SReg_64:$src2), + VReg_32:$src1, VReg_32:$src0))] >; //f32 pattern for V_CNDMASK_B32_e64 @@ -974,7 +977,7 @@ let isCodeGenOnly = 1, isPseudo = 1 in { def SET_M0 : InstSI < (outs SReg_32:$dst), (ins i32imm:$src0), - "SET_M0", + "SET_M0 $dst, $src0", [(set SReg_32:$dst, (int_SI_set_M0 imm:$src0))] >; @@ -1021,14 +1024,14 @@ let isBranch = 1, isTerminator = 1 in { def SI_IF : InstSI < (outs SReg_64:$dst), (ins SReg_64:$vcc, brtarget:$target), - "SI_IF", + "SI_IF $dst, $vcc, $target", [(set SReg_64:$dst, (int_SI_if SReg_64:$vcc, bb:$target))] >; def SI_ELSE : InstSI < (outs SReg_64:$dst), (ins SReg_64:$src, brtarget:$target), - "SI_ELSE", + "SI_ELSE $dst, $src, $target", [(set SReg_64:$dst, (int_SI_else SReg_64:$src, bb:$target))]> { let Constraints = "$src = $dst"; @@ -1037,7 +1040,7 @@ def SI_ELSE : InstSI < def SI_LOOP : InstSI < (outs), (ins SReg_64:$saved, brtarget:$target), - "SI_LOOP", + "SI_LOOP $saved, $target", [(int_SI_loop SReg_64:$saved, bb:$target)] >; @@ -1046,28 +1049,28 @@ def SI_LOOP : InstSI < def SI_BREAK : InstSI < (outs SReg_64:$dst), (ins SReg_64:$src), - "SI_ELSE", + "SI_ELSE $dst, $src", [(set SReg_64:$dst, (int_SI_break SReg_64:$src))] >; def SI_IF_BREAK : InstSI < (outs SReg_64:$dst), (ins SReg_64:$vcc, SReg_64:$src), - "SI_IF_BREAK", + "SI_IF_BREAK $dst, $vcc, $src", [(set SReg_64:$dst, (int_SI_if_break SReg_64:$vcc, SReg_64:$src))] >; def SI_ELSE_BREAK : InstSI < (outs SReg_64:$dst), (ins SReg_64:$src0, SReg_64:$src1), - "SI_ELSE_BREAK", + "SI_ELSE_BREAK $dst, $src0, $src1", [(set SReg_64:$dst, (int_SI_else_break SReg_64:$src0, SReg_64:$src1))] >; def SI_END_CF : InstSI < (outs), (ins SReg_64:$saved), - "SI_END_CF", + "SI_END_CF $saved", [(int_SI_end_cf SReg_64:$saved)] >; -- cgit v1.1 From 664a061f81228a8092895a695c8861d3025af5e8 Mon Sep 17 00:00:00 2001 From: Christian Konig Date: Thu, 21 Feb 2013 15:17:27 +0000 Subject: R600/SI: use patterns for clamp, fabs, fneg MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of using custom inserters, it's simpler and should make DAG folding easier. Signed-off-by: Christian König Reviewed-by: Tom Stellard Reviewed-by: Michel Dänzer git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175755 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIISelLowering.cpp | 36 ------------------------------------ lib/Target/R600/SIInstructions.td | 26 ++++++++++++++++++++++---- 2 files changed, 22 insertions(+), 40 deletions(-) (limited to 'lib') diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 5a468ae..2f304eb 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -62,7 +62,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( MachineInstr * MI, MachineBasicBlock * BB) const { - const TargetInstrInfo * TII = getTargetMachine().getInstrInfo(); MachineRegisterInfo & MRI = BB->getParent()->getRegInfo(); MachineBasicBlock::iterator I = MI; @@ -70,41 +69,6 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); case AMDGPU::BRANCH: return BB; - case AMDGPU::CLAMP_SI: - BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_ADD_F32_e64)) - .addOperand(MI->getOperand(0)) - .addOperand(MI->getOperand(1)) - .addImm(0x80) // SRC1 - .addImm(0) // ABS - .addImm(1) // CLAMP - .addImm(0) // OMOD - .addImm(0); // NEG - MI->eraseFromParent(); - break; - - case AMDGPU::FABS_SI: - BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_ADD_F32_e64)) - .addOperand(MI->getOperand(0)) - .addOperand(MI->getOperand(1)) - .addImm(0x80) // SRC1 - .addImm(1) // ABS - .addImm(0) // CLAMP - .addImm(0) // OMOD - .addImm(0); // NEG - MI->eraseFromParent(); - break; - - case AMDGPU::FNEG_SI: - BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_ADD_F32_e64)) - .addOperand(MI->getOperand(0)) - .addOperand(MI->getOperand(1)) - .addImm(0x80) // SRC1 - .addImm(0) // ABS - .addImm(0) // CLAMP - .addImm(0) // OMOD - .addImm(1); // NEG - MI->eraseFromParent(); - break; case AMDGPU::SHADER_TYPE: BB->getParent()->getInfo()->ShaderType = MI->getOperand(0).getImm(); diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index e9c00f9..1e5f94f 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1187,10 +1187,6 @@ defm : SamplePatterns; defm : SamplePatterns; defm : SamplePatterns; -def CLAMP_SI : CLAMP; -def FABS_SI : FABS; -def FNEG_SI : FNEG; - def : Extract_Element ; def : Extract_Element ; def : Extract_Element ; @@ -1214,6 +1210,28 @@ def : BitConvert ; def : BitConvert ; def : BitConvert ; +/********** =================== **********/ +/********** Src & Dst modifiers **********/ +/********** =================== **********/ + +def : Pat < + (int_AMDIL_clamp VReg_32:$src, (f32 FP_ZERO), (f32 FP_ONE)), + (V_ADD_F32_e64 VReg_32:$src, (i32 0x80 /* SRC1 */), + 0 /* ABS */, 1 /* CLAMP */, 0 /* OMOD */, 0 /* NEG */) +>; + +def : Pat < + (fabs VReg_32:$src), + (V_ADD_F32_e64 VReg_32:$src, (i32 0x80 /* SRC1 */), + 1 /* ABS */, 0 /* CLAMP */, 0 /* OMOD */, 0 /* NEG */) +>; + +def : Pat < + (fneg VReg_32:$src), + (V_ADD_F32_e64 VReg_32:$src, (i32 0x80 /* SRC1 */), + 0 /* ABS */, 0 /* CLAMP */, 0 /* OMOD */, 1 /* NEG */) +>; + /********** ================== **********/ /********** Immediate Patterns **********/ /********** ================== **********/ -- cgit v1.1 From 1fbb3b3ce5629b22129a47c83bc84e2d783dde57 Mon Sep 17 00:00:00 2001 From: Christian Konig Date: Thu, 21 Feb 2013 15:17:32 +0000 Subject: R600/SI: replace SI_V_CNDLT with a pattern MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It actually fixes quite a bunch of piglit tests. This is a candidate for the mesa-stable branch. Signed-off-by: Christian König Reviewed-by: Tom Stellard Reviewed-by: Michel Dänzer git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175756 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIISelLowering.cpp | 22 ---------------------- lib/Target/R600/SIISelLowering.h | 2 -- lib/Target/R600/SIInstructions.td | 12 +++++------- 3 files changed, 5 insertions(+), 31 deletions(-) (limited to 'lib') diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 2f304eb..212e3f2 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -81,9 +81,6 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( case AMDGPU::SI_WQM: LowerSI_WQM(MI, *BB, I, MRI); break; - case AMDGPU::SI_V_CNDLT: - LowerSI_V_CNDLT(MI, *BB, I, MRI); - break; } return BB; } @@ -127,25 +124,6 @@ void SITargetLowering::LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB, MI->eraseFromParent(); } -void SITargetLowering::LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB, - MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const { - unsigned VCC = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); - - BuildMI(BB, I, BB.findDebugLoc(I), - TII->get(AMDGPU::V_CMP_GT_F32_e32), - VCC) - .addImm(0) - .addOperand(MI->getOperand(1)); - - BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_CNDMASK_B32_e32)) - .addOperand(MI->getOperand(0)) - .addOperand(MI->getOperand(3)) - .addOperand(MI->getOperand(2)) - .addReg(VCC); - - MI->eraseFromParent(); -} - EVT SITargetLowering::getSetCCResultType(EVT VT) const { return MVT::i1; } diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h index a8429b7..5d048f8 100644 --- a/lib/Target/R600/SIISelLowering.h +++ b/lib/Target/R600/SIISelLowering.h @@ -29,8 +29,6 @@ class SITargetLowering : public AMDGPUTargetLowering { MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const; void LowerSI_WQM(MachineInstr *MI, MachineBasicBlock &BB, MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const; - void LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB, - MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const; SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 1e5f94f..2a41285 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -990,13 +990,6 @@ def LOAD_CONST : AMDGPUShaderInst < let usesCustomInserter = 1 in { -def SI_V_CNDLT : InstSI < - (outs VReg_32:$dst), - (ins VReg_32:$src0, VReg_32:$src1, VReg_32:$src2), - "SI_V_CNDLT $dst, $src0, $src1, $src2", - [(set VReg_32:$dst, (int_AMDGPU_cndlt VReg_32:$src0, VReg_32:$src1, VReg_32:$src2))] ->; - def SI_INTERP : InstSI < (outs VReg_32:$dst), (ins VReg_32:$i, VReg_32:$j, i32imm:$attr_chan, i32imm:$attr, SReg_32:$params), @@ -1086,6 +1079,11 @@ def SI_KILL : InstSI < } // end IsCodeGenOnly, isPseudo +def : Pat< + (int_AMDGPU_cndlt VReg_32:$src0, VReg_32:$src1, VReg_32:$src2), + (V_CNDMASK_B32_e64 VReg_32:$src2, VReg_32:$src1, (V_CMP_GT_F32_e64 0, VReg_32:$src0)) +>; + def : Pat < (int_AMDGPU_kilp), (SI_KILL (V_MOV_B32_e32 0xbf800000)) -- cgit v1.1 From 4fb98259b53df97f57ca45cfb90c11b22fae4e3a Mon Sep 17 00:00:00 2001 From: Christian Konig Date: Thu, 21 Feb 2013 15:17:36 +0000 Subject: R600/SI: replace IMPLICIT_DEF with SIOperand.ZERO MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Christian König Reviewed-by: Tom Stellard Reviewed-by: Michel Dänzer git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175757 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIInstructions.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 2a41285..2ee82ca 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -895,7 +895,7 @@ def V_MUL_HI_U32 : VOP3_32 <0x0000016a, "V_MUL_HI_U32", []>; def V_MUL_LO_I32 : VOP3_32 <0x0000016b, "V_MUL_LO_I32", []>; def : Pat < (mul VSrc_32:$src0, VReg_32:$src1), - (V_MUL_LO_I32 VSrc_32:$src0, VReg_32:$src1, (IMPLICIT_DEF), 0, 0, 0, 0) + (V_MUL_LO_I32 VSrc_32:$src0, VReg_32:$src1, (i32 SIOperand.ZERO), 0, 0, 0, 0) >; def V_MUL_HI_I32 : VOP3_32 <0x0000016c, "V_MUL_HI_I32", []>; def V_DIV_SCALE_F32 : VOP3_32 <0x0000016d, "V_DIV_SCALE_F32", []>; -- cgit v1.1 From 14849048076c32c427948df569435f2d4e5dbfd2 Mon Sep 17 00:00:00 2001 From: Christian Konig Date: Thu, 21 Feb 2013 15:17:41 +0000 Subject: R600/SI: inline V_ADD|SUB_F32 patterns MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Christian König Reviewed-by: Tom Stellard Reviewed-by: Michel Dänzer git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175758 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIInstructions.td | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'lib') diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 2ee82ca..55b4fa8 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -746,17 +746,13 @@ def : Pat < defm V_READLANE_B32 : VOP2_32 <0x00000001, "V_READLANE_B32", []>; defm V_WRITELANE_B32 : VOP2_32 <0x00000002, "V_WRITELANE_B32", []>; -defm V_ADD_F32 : VOP2_32 <0x00000003, "V_ADD_F32", []>; -def : Pat < - (f32 (fadd VSrc_32:$src0, VReg_32:$src1)), - (V_ADD_F32_e32 VSrc_32:$src0, VReg_32:$src1) +defm V_ADD_F32 : VOP2_32 <0x00000003, "V_ADD_F32", + [(set VReg_32:$dst, (fadd VSrc_32:$src0, VReg_32:$src1))] >; - -defm V_SUB_F32 : VOP2_32 <0x00000004, "V_SUB_F32", []>; -def : Pat < - (f32 (fsub VSrc_32:$src0, VReg_32:$src1)), - (V_SUB_F32_e32 VSrc_32:$src0, VReg_32:$src1) +defm V_SUB_F32 : VOP2_32 <0x00000004, "V_SUB_F32", + [(set VReg_32:$dst, (fsub VSrc_32:$src0, VReg_32:$src1))] >; + defm V_SUBREV_F32 : VOP2_32 <0x00000005, "V_SUBREV_F32", []>; defm V_MAC_LEGACY_F32 : VOP2_32 <0x00000006, "V_MAC_LEGACY_F32", []>; defm V_MUL_LEGACY_F32 : VOP2_32 < -- cgit v1.1 From f31bd0f7d92fdab64a478ccf6497e887d454d4af Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Thu, 21 Feb 2013 15:24:35 +0000 Subject: DAGCombiner: Make the post-legalize vector op optimization more aggressive. A legal BUILD_VECTOR goes in and gets constant folded into another legal BUILD_VECTOR so we don't lose any legality here. The problematic PPC optimization that made this check necessary was fixed recently. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175759 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 2777d7c..ec52d7e 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -9262,11 +9262,6 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { /// SimplifyVBinOp - Visit a binary vector operation, like ADD. SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { - // After legalize, the target may be depending on adds and other - // binary ops to provide legal ways to construct constants or other - // things. Simplifying them may result in a loss of legality. - if (LegalOperations) return SDValue(); - assert(N->getValueType(0).isVector() && "SimplifyVBinOp only works on vectors!"); @@ -9336,11 +9331,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { /// SimplifyVUnaryOp - Visit a binary vector operation, like FABS/FNEG. SDValue DAGCombiner::SimplifyVUnaryOp(SDNode *N) { - // After legalize, the target may be depending on adds and other - // binary ops to provide legal ways to construct constants or other - // things. Simplifying them may result in a loss of legality. - if (LegalOperations) return SDValue(); - assert(N->getValueType(0).isVector() && "SimplifyVUnaryOp only works on vectors!"); -- cgit v1.1 From 8389f24a66674e5776399f744fa8def3b217bc9e Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Thu, 21 Feb 2013 16:40:18 +0000 Subject: getX86SubSuperRegister has a special mode with High=true for i64 which exists solely to enable it to call itself for i8 with some registers. The proposed patch simplifies the function somewhat to make the High bit only meaningful for the i8 mode, which makes sense. No functional difference (getX86SubSuperRegister is not getting called from anywhere outside with i64 and High=true). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175762 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86RegisterInfo.cpp | 26 +++++++++----------------- 1 file changed, 9 insertions(+), 17 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 3b9144d..6b0a915 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -620,7 +620,15 @@ unsigned getX86SubSuperRegister(unsigned Reg, MVT::SimpleValueType VT, case MVT::i8: if (High) { switch (Reg) { - default: return getX86SubSuperRegister(Reg, MVT::i64, High); + default: return getX86SubSuperRegister(Reg, MVT::i64); + case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI: + return X86::SI; + case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI: + return X86::DI; + case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP: + return X86::BP; + case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP: + return X86::SP; case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX: return X86::AH; case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX: @@ -740,22 +748,6 @@ unsigned getX86SubSuperRegister(unsigned Reg, MVT::SimpleValueType VT, return X86::R15D; } case MVT::i64: - // For 64-bit mode if we've requested a "high" register and the - // Q or r constraints we want one of these high registers or - // just the register name otherwise. - if (High) { - switch (Reg) { - case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI: - return X86::SI; - case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI: - return X86::DI; - case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP: - return X86::BP; - case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP: - return X86::SP; - // Fallthrough. - } - } switch (Reg) { default: llvm_unreachable("Unexpected register"); case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX: -- cgit v1.1 From f6c80bde655c1f08f7a42a55150727b0a7fad65d Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Thu, 21 Feb 2013 17:01:59 +0000 Subject: Revert r175688 - It broke a test case (see PR15320). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175765 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/ExpandPostRAPseudos.cpp | 38 ++++++++++++++++++++++++------------- 1 file changed, 25 insertions(+), 13 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/ExpandPostRAPseudos.cpp b/lib/CodeGen/ExpandPostRAPseudos.cpp index f71c453..0b9e83d 100644 --- a/lib/CodeGen/ExpandPostRAPseudos.cpp +++ b/lib/CodeGen/ExpandPostRAPseudos.cpp @@ -49,6 +49,8 @@ private: bool LowerSubregToReg(MachineInstr *MI); bool LowerCopy(MachineInstr *MI); + void TransferDeadFlag(MachineInstr *MI, unsigned DstReg, + const TargetRegisterInfo *TRI); void TransferImplicitDefs(MachineInstr *MI); }; } // end anonymous namespace @@ -59,6 +61,21 @@ char &llvm::ExpandPostRAPseudosID = ExpandPostRA::ID; INITIALIZE_PASS(ExpandPostRA, "postrapseudos", "Post-RA pseudo instruction expansion pass", false, false) +/// TransferDeadFlag - MI is a pseudo-instruction with DstReg dead, +/// and the lowered replacement instructions immediately precede it. +/// Mark the replacement instructions with the dead flag. +void +ExpandPostRA::TransferDeadFlag(MachineInstr *MI, unsigned DstReg, + const TargetRegisterInfo *TRI) { + for (MachineBasicBlock::iterator MII = + prior(MachineBasicBlock::iterator(MI)); ; --MII) { + if (MII->addRegisterDead(DstReg, TRI)) + break; + assert(MII != MI->getParent()->begin() && + "copyPhysReg output doesn't reference destination register!"); + } +} + /// TransferImplicitDefs - MI is a pseudo-instruction, and the lowered /// replacement instructions immediately precede it. Copy any implicit-def /// operands from MI to the replacement instruction. @@ -111,17 +128,17 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) { } DEBUG(dbgs() << "subreg: eliminated!"); } else { - if (MI->getOperand(0).isDead()) { - MI->setDesc(TII->get(TargetOpcode::KILL)); - DEBUG(dbgs() << "subreg: replaced by: " << *MI); - return true; - } TII->copyPhysReg(*MBB, MI, MI->getDebugLoc(), DstSubReg, InsReg, MI->getOperand(2).isKill()); + // Implicitly define DstReg for subsequent uses. MachineBasicBlock::iterator CopyMI = MI; --CopyMI; CopyMI->addRegisterDefined(DstReg); + + // Transfer the kill/dead flags, if needed. + if (MI->getOperand(0).isDead()) + TransferDeadFlag(MI, DstSubReg, TRI); DEBUG(dbgs() << "subreg: " << *CopyMI); } @@ -134,18 +151,11 @@ bool ExpandPostRA::LowerCopy(MachineInstr *MI) { MachineOperand &DstMO = MI->getOperand(0); MachineOperand &SrcMO = MI->getOperand(1); - if (DstMO.isDead()) { - DEBUG(dbgs() << "dead copy: " << *MI); - MI->setDesc(TII->get(TargetOpcode::KILL)); - DEBUG(dbgs() << "replaced by: " << *MI); - return true; - } - if (SrcMO.getReg() == DstMO.getReg()) { DEBUG(dbgs() << "identity copy: " << *MI); // No need to insert an identity copy instruction, but replace with a KILL // if liveness is changed. - if (SrcMO.isUndef() || MI->getNumOperands() > 2) { + if (DstMO.isDead() || SrcMO.isUndef() || MI->getNumOperands() > 2) { // We must make sure the super-register gets killed. Replace the // instruction with KILL. MI->setDesc(TII->get(TargetOpcode::KILL)); @@ -161,6 +171,8 @@ bool ExpandPostRA::LowerCopy(MachineInstr *MI) { TII->copyPhysReg(*MI->getParent(), MI, MI->getDebugLoc(), DstMO.getReg(), SrcMO.getReg(), SrcMO.isKill()); + if (DstMO.isDead()) + TransferDeadFlag(MI, DstMO.getReg(), TRI); if (MI->getNumOperands() > 2) TransferImplicitDefs(MI); DEBUG({ -- cgit v1.1 From 53b0b0e75480121e4e01a7a76e17909e92b1762a Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Thu, 21 Feb 2013 17:12:27 +0000 Subject: Large code model support for PowerPC. Large code model is identical to medium code model except that the addis/addi sequence for "local" accesses is never used. All accesses use the addis/ld sequence. The coding changes are straightforward; most of the patch is taken up with creating variants of the medium model tests for large model. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175767 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCAsmPrinter.cpp | 7 +++++-- lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 13 +++++++------ lib/Target/PowerPC/PPCISelLowering.h | 11 ++++++----- lib/Target/PowerPC/PPCInstr64Bit.td | 2 +- lib/Target/PowerPC/PPCInstrInfo.td | 2 +- 5 files changed, 20 insertions(+), 15 deletions(-) (limited to 'lib') diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index a440667..eae9b7b 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -464,12 +464,15 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { // associated TOC entry. Otherwise reference the symbol directly. TmpInst.setOpcode(PPC::LDrs); const MachineOperand &MO = MI->getOperand(1); - assert((MO.isGlobal() || MO.isJTI()) && "Invalid operand for LDtocL!"); + assert((MO.isGlobal() || MO.isJTI() || MO.isCPI()) && + "Invalid operand for LDtocL!"); MCSymbol *MOSymbol = 0; if (MO.isJTI()) MOSymbol = lookUpOrCreateTOCEntry(GetJTISymbol(MO.getIndex())); - else { + else if (MO.isCPI()) + MOSymbol = GetCPISymbol(MO.getIndex()); + else if (MO.isGlobal()) { const GlobalValue *GValue = MO.getGlobal(); const GlobalAlias *GAlias = dyn_cast(GValue); const GlobalValue *RealGValue = GAlias ? diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 561099b..e3b879d 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -1281,16 +1281,17 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { case PPCISD::TOC_ENTRY: { assert (PPCSubTarget.isPPC64() && "Only supported for 64-bit ABI"); - // For medium code model, we generate two instructions as described - // below. Otherwise we allow SelectCodeCommon to handle this, selecting - // one of LDtoc, LDtocJTI, and LDtocCPT. - if (TM.getCodeModel() != CodeModel::Medium) + // For medium and large code model, we generate two instructions as + // described below. Otherwise we allow SelectCodeCommon to handle this, + // selecting one of LDtoc, LDtocJTI, and LDtocCPT. + CodeModel::Model CModel = TM.getCodeModel(); + if (CModel != CodeModel::Medium && CModel != CodeModel::Large) break; // The first source operand is a TargetGlobalAddress or a // TargetJumpTable. If it is an externally defined symbol, a symbol // with common linkage, a function address, or a jump table address, - // we generate: + // or if we are generating code for large code model, we generate: // LDtocL(, ADDIStocHA(%X2, )) // Otherwise we generate: // ADDItocL(ADDIStocHA(%X2, ), ) @@ -1299,7 +1300,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SDNode *Tmp = CurDAG->getMachineNode(PPC::ADDIStocHA, dl, MVT::i64, TOCbase, GA); - if (isa(GA)) + if (isa(GA) || CModel == CodeModel::Large) return CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA, SDValue(Tmp, 0)); diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 1fa88f3..f5d418c 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -258,13 +258,14 @@ namespace llvm { /// or i32. LBRX, - /// G8RC = ADDIS_TOC_HA %X2, Symbol - For medium code model, produces - /// an ADDIS8 instruction that adds the TOC base register to sym@toc@ha. + /// G8RC = ADDIS_TOC_HA %X2, Symbol - For medium and large code model, + /// produces an ADDIS8 instruction that adds the TOC base register to + /// sym@toc@ha. ADDIS_TOC_HA, - /// G8RC = LD_TOC_L Symbol, G8RReg - For medium code model, produces a - /// LD instruction with base register G8RReg and offset sym@toc@l. - /// Preceded by an ADDIS_TOC_HA to form a full 32-bit offset. + /// G8RC = LD_TOC_L Symbol, G8RReg - For medium and large code model, + /// produces a LD instruction with base register G8RReg and offset + /// sym@toc@l. Preceded by an ADDIS_TOC_HA to form a full 32-bit offset. LD_TOC_L, /// G8RC = ADDI_TOC_L G8RReg, Symbol - For medium code model, produces diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index 1dd5415..0120130 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -701,7 +701,7 @@ def : Pat<(PPCload ixaddr:$src), def : Pat<(PPCload xaddr:$src), (LDX xaddr:$src)>; -// Support for medium code model. +// Support for medium and large code model. def ADDIStocHA: Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, tocentry:$disp), "#ADDIStocHA", [(set G8RC:$rD, diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 8519bf1..460e943 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -181,7 +181,7 @@ def PPClarx : SDNode<"PPCISD::LARX", SDT_PPClarx, def PPCstcx : SDNode<"PPCISD::STCX", SDT_PPCstcx, [SDNPHasChain, SDNPMayStore]>; -// Instructions to support medium code model +// Instructions to support medium and large code model def PPCaddisTocHA : SDNode<"PPCISD::ADDIS_TOC_HA", SDTIntBinOp, []>; def PPCldTocL : SDNode<"PPCISD::LD_TOC_L", SDTIntBinOp, [SDNPMayLoad]>; def PPCaddiTocL : SDNode<"PPCISD::ADDI_TOC_L", SDTIntBinOp, []>; -- cgit v1.1 From 399eafb580f824d6df2d7392e1bc3e25ecb39f32 Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Thu, 21 Feb 2013 17:26:05 +0000 Subject: Trivial cleanup git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175771 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index e3b879d..17bea8a 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -1529,8 +1529,8 @@ void PPCDAGToDAGISel::PostprocessISelDAG() { DebugLoc dl = GA->getDebugLoc(); const GlobalValue *GV = GA->getGlobal(); ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, 0, Flags); - } - else if (ConstantPoolSDNode *CP = dyn_cast(ImmOpnd)) { + } else if (ConstantPoolSDNode *CP = + dyn_cast(ImmOpnd)) { const Constant *C = CP->getConstVal(); ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64, CP->getAlignment(), -- cgit v1.1 From b300455b5817f099d64aad8f9356e0e23fa9a87e Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Thu, 21 Feb 2013 18:37:54 +0000 Subject: Radar numbers don't belong in source code. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175775 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMBaseInstrInfo.cpp | 2 -- lib/Target/ARM/ARMConstantIslandPass.cpp | 2 +- lib/Target/ARM/ARMLoadStoreOptimizer.cpp | 1 - 3 files changed, 1 insertion(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 0076910..ed001ea 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -2719,7 +2719,6 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, case ARM::t2STMDB_UPD: { unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands() + 1; if (Subtarget.isSwift()) { - // rdar://8402126 int UOps = 1 + NumRegs; // One for address computation, one for each ld / st. switch (Opc) { default: break; @@ -4047,7 +4046,6 @@ getPartialRegUpdateClearance(const MachineInstr *MI, case ARM::VLDRS: case ARM::FCONSTS: case ARM::VMOVSR: - // rdar://problem/8791586 case ARM::VMOVv8i8: case ARM::VMOVv4i16: case ARM::VMOVv2i32: diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index 70a25c2..4891609 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -1468,7 +1468,7 @@ void ARMConstantIslands::removeDeadCPEMI(MachineInstr *CPEMI) { if (CPEBB->empty()) { BBInfo[CPEBB->getNumber()].Size = 0; - // This block no longer needs to be aligned. . + // This block no longer needs to be aligned. CPEBB->setAlignment(0); } else // Entries are sorted by descending alignment, so realign from the front. diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index a1c21ee..98bd6c1 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -1188,7 +1188,6 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, OddDeadKill = true; } // Never kill the base register in the first instruction. - // if (EvenReg == BaseReg) EvenDeadKill = false; InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, -- cgit v1.1 From 601158a18e325879b224bd1979d824407ed98bc7 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Thu, 21 Feb 2013 19:35:21 +0000 Subject: Make RAFast::UsedInInstr indexed by register units. This fixes some problems with too conservative checking where we were marking all aliases of a register as used, and then also checking all aliases when allocating a register. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175782 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/RegAllocFast.cpp | 52 +++++++++++++++++++++++++------------------- 1 file changed, 30 insertions(+), 22 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp index 840ddb1..bb9c05c 100644 --- a/lib/CodeGen/RegAllocFast.cpp +++ b/lib/CodeGen/RegAllocFast.cpp @@ -113,12 +113,27 @@ namespace { // PhysRegState - One of the RegState enums, or a virtreg. std::vector PhysRegState; + // Set of register units. typedef SparseSet UsedInInstrSet; - // UsedInInstr - Set of physregs that are used in the current instruction, - // and so cannot be allocated. + // Set of register units that are used in the current instruction, and so + // cannot be allocated. UsedInInstrSet UsedInInstr; + // Mark a physreg as used in this instruction. + void markRegUsedInInstr(unsigned PhysReg) { + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) + UsedInInstr.insert(*Units); + } + + // Check if a physreg or any of its aliases are used in this instruction. + bool isRegUsedInInstr(unsigned PhysReg) const { + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) + if (UsedInInstr.count(*Units)) + return true; + return false; + } + // SkippedInstrs - Descriptors of instructions whose clobber list was // ignored because all registers were spilled. It is still necessary to // mark all the clobbered registers as used by the function. @@ -333,7 +348,7 @@ void RAFast::usePhysReg(MachineOperand &MO) { unsigned PhysReg = MO.getReg(); assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) && "Bad usePhysReg operand"); - + markRegUsedInInstr(PhysReg); switch (PhysRegState[PhysReg]) { case regDisabled: break; @@ -341,7 +356,6 @@ void RAFast::usePhysReg(MachineOperand &MO) { PhysRegState[PhysReg] = regFree; // Fall through case regFree: - UsedInInstr.insert(PhysReg); MO.setIsKill(); return; default: @@ -361,13 +375,11 @@ void RAFast::usePhysReg(MachineOperand &MO) { "Instruction is not using a subregister of a reserved register"); // Leave the superregister in the working set. PhysRegState[Alias] = regFree; - UsedInInstr.insert(Alias); MO.getParent()->addRegisterKilled(Alias, TRI, true); return; case regFree: if (TRI->isSuperRegister(PhysReg, Alias)) { // Leave the superregister in the working set. - UsedInInstr.insert(Alias); MO.getParent()->addRegisterKilled(Alias, TRI, true); return; } @@ -381,7 +393,6 @@ void RAFast::usePhysReg(MachineOperand &MO) { // All aliases are disabled, bring register into working set. PhysRegState[PhysReg] = regFree; - UsedInInstr.insert(PhysReg); MO.setIsKill(); } @@ -390,7 +401,7 @@ void RAFast::usePhysReg(MachineOperand &MO) { /// reserved instead of allocated. void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg, RegState NewState) { - UsedInInstr.insert(PhysReg); + markRegUsedInInstr(PhysReg); switch (unsigned VirtReg = PhysRegState[PhysReg]) { case regDisabled: break; @@ -430,7 +441,7 @@ void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg, // can be allocated directly. // Returns spillImpossible when PhysReg or an alias can't be spilled. unsigned RAFast::calcSpillCost(unsigned PhysReg) const { - if (UsedInInstr.count(PhysReg)) { + if (isRegUsedInInstr(PhysReg)) { DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is already used in instr.\n"); return spillImpossible; } @@ -455,8 +466,6 @@ unsigned RAFast::calcSpillCost(unsigned PhysReg) const { unsigned Cost = 0; for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) { unsigned Alias = *AI; - if (UsedInInstr.count(Alias)) - return spillImpossible; switch (unsigned VirtReg = PhysRegState[Alias]) { case regDisabled: break; @@ -531,7 +540,7 @@ RAFast::LiveRegMap::iterator RAFast::allocVirtReg(MachineInstr *MI, // First try to find a completely free register. for (ArrayRef::iterator I = AO.begin(), E = AO.end(); I != E; ++I){ unsigned PhysReg = *I; - if (PhysRegState[PhysReg] == regFree && !UsedInInstr.count(PhysReg)) { + if (PhysRegState[PhysReg] == regFree && !isRegUsedInInstr(PhysReg)) { assignVirtToPhysReg(*LRI, PhysReg); return LRI; } @@ -597,7 +606,7 @@ RAFast::defineVirtReg(MachineInstr *MI, unsigned OpNum, LRI->LastUse = MI; LRI->LastOpNum = OpNum; LRI->Dirty = true; - UsedInInstr.insert(LRI->PhysReg); + markRegUsedInInstr(LRI->PhysReg); return LRI; } @@ -647,7 +656,7 @@ RAFast::reloadVirtReg(MachineInstr *MI, unsigned OpNum, assert(LRI->PhysReg && "Register not assigned"); LRI->LastUse = MI; LRI->LastOpNum = OpNum; - UsedInInstr.insert(LRI->PhysReg); + markRegUsedInInstr(LRI->PhysReg); return LRI; } @@ -708,8 +717,8 @@ void RAFast::handleThroughOperands(MachineInstr *MI, if (!MO.isReg() || !MO.isDef()) continue; unsigned Reg = MO.getReg(); if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + markRegUsedInInstr(Reg); for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { - UsedInInstr.insert(*AI); if (ThroughRegs.count(PhysRegState[*AI])) definePhysReg(MI, *AI, regFree); } @@ -765,12 +774,12 @@ void RAFast::handleThroughOperands(MachineInstr *MI, if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; DEBUG(dbgs() << "\tSetting " << PrintReg(Reg, TRI) << " as used in instr\n"); - UsedInInstr.insert(Reg); + markRegUsedInInstr(Reg); } // Also mark PartialDefs as used to avoid reallocation. for (unsigned i = 0, e = PartialDefs.size(); i != e; ++i) - UsedInInstr.insert(PartialDefs[i]); + markRegUsedInInstr(PartialDefs[i]); } void RAFast::AllocateBasicBlock() { @@ -969,7 +978,7 @@ void RAFast::AllocateBasicBlock() { for (UsedInInstrSet::iterator I = UsedInInstr.begin(), E = UsedInInstr.end(); I != E; ++I) - MRI->setPhysRegUsed(*I); + MRI->setRegUnitUsed(*I); // Track registers defined by instruction - early clobbers and tied uses at // this point. @@ -982,8 +991,7 @@ void RAFast::AllocateBasicBlock() { if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; // Look for physreg defs and tied uses. if (!MO.isDef() && !MI->isRegTiedToDefOperand(i)) continue; - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) - UsedInInstr.insert(*AI); + markRegUsedInInstr(Reg); } } @@ -1035,7 +1043,7 @@ void RAFast::AllocateBasicBlock() { for (UsedInInstrSet::iterator I = UsedInInstr.begin(), E = UsedInInstr.end(); I != E; ++I) - MRI->setPhysRegUsed(*I); + MRI->setRegUnitUsed(*I); if (CopyDst && CopyDst == CopySrc && CopyDstSub == CopySrcSub) { DEBUG(dbgs() << "-- coalescing: " << *MI); @@ -1071,7 +1079,7 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) { MRI->freezeReservedRegs(Fn); RegClassInfo.runOnMachineFunction(Fn); UsedInInstr.clear(); - UsedInInstr.setUniverse(TRI->getNumRegs()); + UsedInInstr.setUniverse(TRI->getNumRegUnits()); assert(!MRI->isSSA() && "regalloc requires leaving SSA"); -- cgit v1.1 From 6585d3b64b04b788faad79ba6cf4e592ebf4658d Mon Sep 17 00:00:00 2001 From: Anshuman Dasgupta Date: Thu, 21 Feb 2013 19:39:40 +0000 Subject: Hexagon: Expand cttz, ctlz, and ctpop for now. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175783 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Hexagon/HexagonISelLowering.cpp | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'lib') diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp index 99a5db3..fac931a 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -1366,10 +1366,15 @@ HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine setOperationAction(ISD::FSINCOS, MVT::f64, Expand); setOperationAction(ISD::FSINCOS, MVT::f32, Expand); setOperationAction(ISD::CTPOP, MVT::i32, Expand); + setOperationAction(ISD::CTPOP, MVT::i64, Expand); setOperationAction(ISD::CTTZ , MVT::i32, Expand); + setOperationAction(ISD::CTTZ , MVT::i64, Expand); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand); setOperationAction(ISD::CTLZ , MVT::i32, Expand); + setOperationAction(ISD::CTLZ , MVT::i64, Expand); setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand); setOperationAction(ISD::ROTL , MVT::i32, Expand); setOperationAction(ISD::ROTR , MVT::i32, Expand); setOperationAction(ISD::BSWAP, MVT::i32, Expand); -- cgit v1.1 From fece442c697eb29f14b4718bdafa3a97d545b476 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Thu, 21 Feb 2013 19:46:51 +0000 Subject: Don't assert on empty attributes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175785 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Function.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/IR/Function.cpp b/lib/IR/Function.cpp index 15c05e7..5c444d2 100644 --- a/lib/IR/Function.cpp +++ b/lib/IR/Function.cpp @@ -125,7 +125,7 @@ bool Argument::hasStructRetAttr() const { /// addAttr - Add attributes to an argument. void Argument::addAttr(AttributeSet AS) { - assert(AS.getNumSlots() == 1 && + assert(AS.getNumSlots() <= 1 && "Trying to add more than one attribute set to an argument!"); AttrBuilder B(AS, AS.getSlotIndex(0)); getParent()->addAttributes(getArgNo() + 1, @@ -135,7 +135,7 @@ void Argument::addAttr(AttributeSet AS) { /// removeAttr - Remove attributes from an argument. void Argument::removeAttr(AttributeSet AS) { - assert(AS.getNumSlots() == 1 && + assert(AS.getNumSlots() <= 1 && "Trying to remove more than one attribute set from an argument!"); AttrBuilder B(AS, AS.getSlotIndex(0)); getParent()->removeAttributes(getArgNo() + 1, -- cgit v1.1 From 700ed80d3da5e98e05ceb90e9bfb66058581a6db Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Thu, 21 Feb 2013 20:05:00 +0000 Subject: Move the eliminateCallFramePseudoInstr method from TargetRegisterInfo to TargetFrameLowering, where it belongs. Incidentally, this allows us to delete some duplicated (and slightly different!) code in TRI. There are potentially other layering problems that can be cleaned up as a result, or in a similar manner. The refactoring was OK'd by Anton Korobeynikov on llvmdev. Note: this touches the target interfaces, so out-of-tree targets may be affected. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175788 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/PrologEpilogInserter.cpp | 5 +- lib/Target/AArch64/AArch64FrameLowering.cpp | 40 +++++++++++ lib/Target/AArch64/AArch64FrameLowering.h | 4 ++ lib/Target/AArch64/AArch64RegisterInfo.cpp | 38 ----------- lib/Target/AArch64/AArch64RegisterInfo.h | 4 -- lib/Target/ARM/ARMBaseRegisterInfo.cpp | 58 ---------------- lib/Target/ARM/ARMBaseRegisterInfo.h | 4 -- lib/Target/ARM/ARMFrameLowering.cpp | 55 ++++++++++++++- lib/Target/ARM/ARMFrameLowering.h | 5 ++ lib/Target/ARM/Thumb1FrameLowering.cpp | 35 ++++++++++ lib/Target/ARM/Thumb1FrameLowering.h | 4 ++ lib/Target/ARM/Thumb1RegisterInfo.cpp | 41 ----------- lib/Target/ARM/Thumb1RegisterInfo.h | 5 -- lib/Target/Hexagon/HexagonFrameLowering.cpp | 15 +++++ lib/Target/Hexagon/HexagonFrameLowering.h | 5 ++ lib/Target/Hexagon/HexagonRegisterInfo.cpp | 15 ----- lib/Target/Hexagon/HexagonRegisterInfo.h | 4 -- lib/Target/MBlaze/MBlazeFrameLowering.cpp | 39 +++++++++++ lib/Target/MBlaze/MBlazeFrameLowering.h | 4 ++ lib/Target/MBlaze/MBlazeRegisterInfo.cpp | 38 ----------- lib/Target/MBlaze/MBlazeRegisterInfo.h | 4 -- lib/Target/MSP430/MSP430FrameLowering.cpp | 66 +++++++++++++++++- lib/Target/MSP430/MSP430FrameLowering.h | 4 ++ lib/Target/MSP430/MSP430RegisterInfo.cpp | 60 ----------------- lib/Target/MSP430/MSP430RegisterInfo.h | 4 -- lib/Target/Mips/Mips16FrameLowering.cpp | 19 ++++++ lib/Target/Mips/Mips16FrameLowering.h | 4 ++ lib/Target/Mips/Mips16RegisterInfo.cpp | 21 ------ lib/Target/Mips/Mips16RegisterInfo.h | 4 -- lib/Target/Mips/MipsSEFrameLowering.cpp | 20 ++++++ lib/Target/Mips/MipsSEFrameLowering.h | 4 ++ lib/Target/Mips/MipsSERegisterInfo.cpp | 22 ------ lib/Target/Mips/MipsSERegisterInfo.h | 4 -- lib/Target/NVPTX/NVPTXFrameLowering.cpp | 11 +++ lib/Target/NVPTX/NVPTXFrameLowering.h | 4 ++ lib/Target/NVPTX/NVPTXRegisterInfo.cpp | 9 --- lib/Target/NVPTX/NVPTXRegisterInfo.h | 4 -- lib/Target/PowerPC/PPCFrameLowering.cpp | 41 +++++++++++ lib/Target/PowerPC/PPCFrameLowering.h | 4 ++ lib/Target/PowerPC/PPCISelLowering.cpp | 2 +- lib/Target/PowerPC/PPCRegisterInfo.cpp | 39 ----------- lib/Target/PowerPC/PPCRegisterInfo.h | 4 -- lib/Target/Sparc/SparcFrameLowering.cpp | 16 +++++ lib/Target/Sparc/SparcFrameLowering.h | 4 ++ lib/Target/Sparc/SparcRegisterInfo.cpp | 13 ---- lib/Target/Sparc/SparcRegisterInfo.h | 4 -- lib/Target/X86/X86FrameLowering.cpp | 85 ++++++++++++++++++++++- lib/Target/X86/X86FrameLowering.h | 4 ++ lib/Target/X86/X86RegisterInfo.cpp | 101 ---------------------------- lib/Target/X86/X86RegisterInfo.h | 4 -- lib/Target/XCore/XCoreFrameLowering.cpp | 52 ++++++++++++++ lib/Target/XCore/XCoreFrameLowering.h | 4 ++ lib/Target/XCore/XCoreRegisterInfo.cpp | 52 -------------- lib/Target/XCore/XCoreRegisterInfo.h | 4 -- 54 files changed, 543 insertions(+), 572 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index 45e04a9..b18d52d 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -139,7 +139,6 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { /// variables for the function's frame information and eliminate call frame /// pseudo instructions. void PEI::calculateCallsInformation(MachineFunction &Fn) { - const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo(); const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering(); MachineFrameInfo *MFI = Fn.getFrameInfo(); @@ -186,7 +185,7 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) { // here. The sub/add sp instruction pairs are still inserted, but we don't // need to track the SP adjustment for frame index elimination. if (TFI->canSimplifyCallFramePseudos(Fn)) - RegInfo->eliminateCallFramePseudoInstr(Fn, *I->getParent(), I); + TFI->eliminateCallFramePseudoInstr(Fn, *I->getParent(), I); } } @@ -747,7 +746,7 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { MachineBasicBlock::iterator PrevI = BB->end(); if (I != BB->begin()) PrevI = prior(I); - TRI.eliminateCallFramePseudoInstr(Fn, *BB, I); + TFI->eliminateCallFramePseudoInstr(Fn, *BB, I); // Visit the instructions created by eliminateCallFramePseudoInstr(). if (PrevI == BB->end()) diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp index 24d1576..cca6d12 100644 --- a/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -644,3 +644,43 @@ AArch64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { // variable-sized objects that prevent reservation of a call frame. return !(hasFP(MF) && MFI->hasVarSizedObjects()); } + +void +AArch64FrameLowering::eliminateCallFramePseudoInstr( + MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const { + const AArch64InstrInfo &TII = + *static_cast(MF.getTarget().getInstrInfo()); + DebugLoc dl = MI->getDebugLoc(); + int Opcode = MI->getOpcode(); + bool IsDestroy = Opcode == TII.getCallFrameDestroyOpcode(); + uint64_t CalleePopAmount = IsDestroy ? MI->getOperand(1).getImm() : 0; + + if (!hasReservedCallFrame(MF)) { + unsigned Align = getStackAlignment(); + + int64_t Amount = MI->getOperand(0).getImm(); + Amount = RoundUpToAlignment(Amount, Align); + if (!IsDestroy) Amount = -Amount; + + // N.b. if CalleePopAmount is valid but zero (i.e. callee would pop, but it + // doesn't have to pop anything), then the first operand will be zero too so + // this adjustment is a no-op. + if (CalleePopAmount == 0) { + // FIXME: in-function stack adjustment for calls is limited to 12-bits + // because there's no guaranteed temporary register available. Mostly call + // frames will be allocated at the start of a function so this is OK, but + // it is a limitation that needs dealing with. + assert(Amount > -0xfff && Amount < 0xfff && "call frame too large"); + emitSPUpdate(MBB, MI, dl, TII, AArch64::NoRegister, Amount); + } + } else if (CalleePopAmount != 0) { + // If the calling convention demands that the callee pops arguments from the + // stack, we want to add it back if we have a reserved call frame. + assert(CalleePopAmount < 0xfff && "call frame too large"); + emitSPUpdate(MBB, MI, dl, TII, AArch64::NoRegister, -CalleePopAmount); + } + + MBB.erase(MI); +} diff --git a/lib/Target/AArch64/AArch64FrameLowering.h b/lib/Target/AArch64/AArch64FrameLowering.h index bca7b06..45ea0ec 100644 --- a/lib/Target/AArch64/AArch64FrameLowering.h +++ b/lib/Target/AArch64/AArch64FrameLowering.h @@ -71,6 +71,10 @@ public: const std::vector &CSI, const TargetRegisterInfo *TRI) const; + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const; + /// If the register is X30 (i.e. LR) and the return address is used in the /// function then the callee-save store doesn't actually kill the register, /// otherwise it does. diff --git a/lib/Target/AArch64/AArch64RegisterInfo.cpp b/lib/Target/AArch64/AArch64RegisterInfo.cpp index ee34d76..20b0dcf 100644 --- a/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -152,44 +152,6 @@ AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MBBI, MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset / OffsetScale); } -void -AArch64RegisterInfo::eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - DebugLoc dl = MI->getDebugLoc(); - int Opcode = MI->getOpcode(); - bool IsDestroy = Opcode == TII.getCallFrameDestroyOpcode(); - uint64_t CalleePopAmount = IsDestroy ? MI->getOperand(1).getImm() : 0; - - if (!TFI->hasReservedCallFrame(MF)) { - unsigned Align = TFI->getStackAlignment(); - - int64_t Amount = MI->getOperand(0).getImm(); - Amount = RoundUpToAlignment(Amount, Align); - if (!IsDestroy) Amount = -Amount; - - // N.b. if CalleePopAmount is valid but zero (i.e. callee would pop, but it - // doesn't have to pop anything), then the first operand will be zero too so - // this adjustment is a no-op. - if (CalleePopAmount == 0) { - // FIXME: in-function stack adjustment for calls is limited to 12-bits - // because there's no guaranteed temporary register available. Mostly call - // frames will be allocated at the start of a function so this is OK, but - // it is a limitation that needs dealing with. - assert(Amount > -0xfff && Amount < 0xfff && "call frame too large"); - emitSPUpdate(MBB, MI, dl, TII, AArch64::NoRegister, Amount); - } - } else if (CalleePopAmount != 0) { - // If the calling convention demands that the callee pops arguments from the - // stack, we want to add it back if we have a reserved call frame. - assert(CalleePopAmount < 0xfff && "call frame too large"); - emitSPUpdate(MBB, MI, dl, TII, AArch64::NoRegister, -CalleePopAmount); - } - - MBB.erase(MI); -} - unsigned AArch64RegisterInfo::getFrameRegister(const MachineFunction &MF) const { const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); diff --git a/lib/Target/AArch64/AArch64RegisterInfo.h b/lib/Target/AArch64/AArch64RegisterInfo.h index a25f9d2..bb64fd5 100644 --- a/lib/Target/AArch64/AArch64RegisterInfo.h +++ b/lib/Target/AArch64/AArch64RegisterInfo.h @@ -44,10 +44,6 @@ public: unsigned FIOperandNum, RegScavenger *Rs = NULL) const; - void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI) const; - /// getCrossCopyRegClass - Returns a legal register class to copy a register /// in the specified class to or from. Returns original class if it is /// possible to copy between a two registers of the specified class. diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index db33d54..abdd251 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -401,64 +401,6 @@ requiresVirtualBaseRegisters(const MachineFunction &MF) const { return true; } -static void -emitSPUpdate(bool isARM, - MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, - DebugLoc dl, const ARMBaseInstrInfo &TII, - int NumBytes, - ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) { - if (isARM) - emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, - Pred, PredReg, TII); - else - emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, - Pred, PredReg, TII); -} - - -void ARMBaseRegisterInfo:: -eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - if (!TFI->hasReservedCallFrame(MF)) { - // If we have alloca, convert as follows: - // ADJCALLSTACKDOWN -> sub, sp, sp, amount - // ADJCALLSTACKUP -> add, sp, sp, amount - MachineInstr *Old = I; - DebugLoc dl = Old->getDebugLoc(); - unsigned Amount = Old->getOperand(0).getImm(); - if (Amount != 0) { - // We need to keep the stack aligned properly. To do this, we round the - // amount of space needed for the outgoing arguments up to the next - // alignment boundary. - unsigned Align = TFI->getStackAlignment(); - Amount = (Amount+Align-1)/Align*Align; - - ARMFunctionInfo *AFI = MF.getInfo(); - assert(!AFI->isThumb1OnlyFunction() && - "This eliminateCallFramePseudoInstr does not support Thumb1!"); - bool isARM = !AFI->isThumbFunction(); - - // Replace the pseudo instruction with a new instruction... - unsigned Opc = Old->getOpcode(); - int PIdx = Old->findFirstPredOperandIdx(); - ARMCC::CondCodes Pred = (PIdx == -1) - ? ARMCC::AL : (ARMCC::CondCodes)Old->getOperand(PIdx).getImm(); - if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) { - // Note: PredReg is operand 2 for ADJCALLSTACKDOWN. - unsigned PredReg = Old->getOperand(2).getReg(); - emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, Pred, PredReg); - } else { - // Note: PredReg is operand 3 for ADJCALLSTACKUP. - unsigned PredReg = Old->getOperand(3).getReg(); - assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP); - emitSPUpdate(isARM, MBB, I, dl, TII, Amount, Pred, PredReg); - } - } - } - MBB.erase(I); -} - int64_t ARMBaseRegisterInfo:: getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const { const MCInstrDesc &Desc = MI->getDesc(); diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h index 7fab9ff..725033b 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -168,10 +168,6 @@ public: virtual bool requiresVirtualBaseRegisters(const MachineFunction &MF) const; - virtual void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const; - virtual void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, RegScavenger *RS = NULL) const; diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index 39d27c4..0ca6450 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -119,13 +119,14 @@ static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, DebugLoc dl, const ARMBaseInstrInfo &TII, - int NumBytes, unsigned MIFlags = MachineInstr::NoFlags) { + int NumBytes, unsigned MIFlags = MachineInstr::NoFlags, + ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) { if (isARM) emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, - ARMCC::AL, 0, TII, MIFlags); + Pred, PredReg, TII, MIFlags); else emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, - ARMCC::AL, 0, TII, MIFlags); + Pred, PredReg, TII, MIFlags); } void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { @@ -1430,3 +1431,51 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, AFI->setLRIsSpilledForFarJump(true); } } + + +void ARMFrameLowering:: +eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + const ARMBaseInstrInfo &TII = + *static_cast(MF.getTarget().getInstrInfo()); + if (!hasReservedCallFrame(MF)) { + // If we have alloca, convert as follows: + // ADJCALLSTACKDOWN -> sub, sp, sp, amount + // ADJCALLSTACKUP -> add, sp, sp, amount + MachineInstr *Old = I; + DebugLoc dl = Old->getDebugLoc(); + unsigned Amount = Old->getOperand(0).getImm(); + if (Amount != 0) { + // We need to keep the stack aligned properly. To do this, we round the + // amount of space needed for the outgoing arguments up to the next + // alignment boundary. + unsigned Align = getStackAlignment(); + Amount = (Amount+Align-1)/Align*Align; + + ARMFunctionInfo *AFI = MF.getInfo(); + assert(!AFI->isThumb1OnlyFunction() && + "This eliminateCallFramePseudoInstr does not support Thumb1!"); + bool isARM = !AFI->isThumbFunction(); + + // Replace the pseudo instruction with a new instruction... + unsigned Opc = Old->getOpcode(); + int PIdx = Old->findFirstPredOperandIdx(); + ARMCC::CondCodes Pred = (PIdx == -1) + ? ARMCC::AL : (ARMCC::CondCodes)Old->getOperand(PIdx).getImm(); + if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) { + // Note: PredReg is operand 2 for ADJCALLSTACKDOWN. + unsigned PredReg = Old->getOperand(2).getReg(); + emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, MachineInstr::NoFlags, + Pred, PredReg); + } else { + // Note: PredReg is operand 3 for ADJCALLSTACKUP. + unsigned PredReg = Old->getOperand(3).getReg(); + assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP); + emitSPUpdate(isARM, MBB, I, dl, TII, Amount, MachineInstr::NoFlags, + Pred, PredReg); + } + } + } + MBB.erase(I); +} + diff --git a/lib/Target/ARM/ARMFrameLowering.h b/lib/Target/ARM/ARMFrameLowering.h index a1c2b93..efa255a 100644 --- a/lib/Target/ARM/ARMFrameLowering.h +++ b/lib/Target/ARM/ARMFrameLowering.h @@ -70,6 +70,11 @@ public: unsigned LdrOpc, bool isVarArg, bool NoGap, bool(*Func)(unsigned, bool), unsigned NumAlignedDPRCS2Regs) const; + + virtual void eliminateCallFramePseudoInstr( + MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const; }; } // End llvm namespace diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp index 98efc16..2c3388c 100644 --- a/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -43,6 +43,41 @@ emitSPUpdate(MachineBasicBlock &MBB, MRI, MIFlags); } + +void Thumb1FrameLowering:: +eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + const Thumb1InstrInfo &TII = + *static_cast(MF.getTarget().getInstrInfo()); + const Thumb1RegisterInfo *RegInfo = + static_cast(MF.getTarget().getRegisterInfo()); + if (!hasReservedCallFrame(MF)) { + // If we have alloca, convert as follows: + // ADJCALLSTACKDOWN -> sub, sp, sp, amount + // ADJCALLSTACKUP -> add, sp, sp, amount + MachineInstr *Old = I; + DebugLoc dl = Old->getDebugLoc(); + unsigned Amount = Old->getOperand(0).getImm(); + if (Amount != 0) { + // We need to keep the stack aligned properly. To do this, we round the + // amount of space needed for the outgoing arguments up to the next + // alignment boundary. + unsigned Align = getStackAlignment(); + Amount = (Amount+Align-1)/Align*Align; + + // Replace the pseudo instruction with a new instruction... + unsigned Opc = Old->getOpcode(); + if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) { + emitSPUpdate(MBB, I, TII, dl, *RegInfo, -Amount); + } else { + assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP); + emitSPUpdate(MBB, I, TII, dl, *RegInfo, Amount); + } + } + } + MBB.erase(I); +} + void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); MachineBasicBlock::iterator MBBI = MBB.begin(); diff --git a/lib/Target/ARM/Thumb1FrameLowering.h b/lib/Target/ARM/Thumb1FrameLowering.h index bcfc516..5a300af 100644 --- a/lib/Target/ARM/Thumb1FrameLowering.h +++ b/lib/Target/ARM/Thumb1FrameLowering.h @@ -45,6 +45,10 @@ public: const TargetRegisterInfo *TRI) const; bool hasReservedCallFrame(const MachineFunction &MF) const; + + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const; }; } // End llvm namespace diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp index 5442aa7..609d502 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp @@ -296,47 +296,6 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB, } } -static void emitSPUpdate(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, - const TargetInstrInfo &TII, DebugLoc dl, - const Thumb1RegisterInfo &MRI, - int NumBytes) { - emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, TII, - MRI); -} - -void Thumb1RegisterInfo:: -eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - - if (!TFI->hasReservedCallFrame(MF)) { - // If we have alloca, convert as follows: - // ADJCALLSTACKDOWN -> sub, sp, sp, amount - // ADJCALLSTACKUP -> add, sp, sp, amount - MachineInstr *Old = I; - DebugLoc dl = Old->getDebugLoc(); - unsigned Amount = Old->getOperand(0).getImm(); - if (Amount != 0) { - // We need to keep the stack aligned properly. To do this, we round the - // amount of space needed for the outgoing arguments up to the next - // alignment boundary. - unsigned Align = TFI->getStackAlignment(); - Amount = (Amount+Align-1)/Align*Align; - - // Replace the pseudo instruction with a new instruction... - unsigned Opc = Old->getOpcode(); - if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) { - emitSPUpdate(MBB, I, TII, dl, *this, -Amount); - } else { - assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP); - emitSPUpdate(MBB, I, TII, dl, *this, Amount); - } - } - } - MBB.erase(I); -} - /// emitThumbConstant - Emit a series of instructions to materialize a /// constant. static void emitThumbConstant(MachineBasicBlock &MBB, diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/Thumb1RegisterInfo.h index 6232551..ebbab36 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.h +++ b/lib/Target/ARM/Thumb1RegisterInfo.h @@ -43,11 +43,6 @@ public: unsigned PredReg = 0, unsigned MIFlags = MachineInstr::NoFlags) const; - /// Code Generation virtual methods... - void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const; - // rewrite MI to access 'Offset' bytes from the FP. Update Offset to be // however much remains to be handled. Return 'true' if no further // work is required. diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp index 9043cf9..d6a9329 100644 --- a/lib/Target/Hexagon/HexagonFrameLowering.cpp +++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp @@ -327,6 +327,21 @@ bool HexagonFrameLowering::restoreCalleeSavedRegisters( return true; } +void HexagonFrameLowering:: +eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + MachineInstr &MI = *I; + + if (MI.getOpcode() == Hexagon::ADJCALLSTACKDOWN) { + // Hexagon_TODO: add code + } else if (MI.getOpcode() == Hexagon::ADJCALLSTACKUP) { + // Hexagon_TODO: add code + } else { + llvm_unreachable("Cannot handle this call frame pseudo instruction"); + } + MBB.erase(I); +} + int HexagonFrameLowering::getFrameIndexOffset(const MachineFunction &MF, int FI) const { return MF.getFrameInfo()->getObjectOffset(FI); diff --git a/lib/Target/Hexagon/HexagonFrameLowering.h b/lib/Target/Hexagon/HexagonFrameLowering.h index ad87f11..a62c76a 100644 --- a/lib/Target/Hexagon/HexagonFrameLowering.h +++ b/lib/Target/Hexagon/HexagonFrameLowering.h @@ -35,6 +35,11 @@ public: MachineBasicBlock::iterator MI, const std::vector &CSI, const TargetRegisterInfo *TRI) const; + + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; + virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp index 7929610..e558234 100644 --- a/lib/Target/Hexagon/HexagonRegisterInfo.cpp +++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp @@ -117,21 +117,6 @@ HexagonRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { "architecture version"); } -void HexagonRegisterInfo:: -eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const { - MachineInstr &MI = *I; - - if (MI.getOpcode() == Hexagon::ADJCALLSTACKDOWN) { - // Hexagon_TODO: add code - } else if (MI.getOpcode() == Hexagon::ADJCALLSTACKUP) { - // Hexagon_TODO: add code - } else { - llvm_unreachable("Cannot handle this call frame pseudo instruction"); - } - MBB.erase(I); -} - void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const { diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.h b/lib/Target/Hexagon/HexagonRegisterInfo.h index 58c374e..a1a438a 100644 --- a/lib/Target/Hexagon/HexagonRegisterInfo.h +++ b/lib/Target/Hexagon/HexagonRegisterInfo.h @@ -56,10 +56,6 @@ struct HexagonRegisterInfo : public HexagonGenRegisterInfo { BitVector getReservedRegs(const MachineFunction &MF) const; - void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const; - void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, RegScavenger *RS = NULL) const; diff --git a/lib/Target/MBlaze/MBlazeFrameLowering.cpp b/lib/Target/MBlaze/MBlazeFrameLowering.cpp index b6edbba..172304b 100644 --- a/lib/Target/MBlaze/MBlazeFrameLowering.cpp +++ b/lib/Target/MBlaze/MBlazeFrameLowering.cpp @@ -426,6 +426,45 @@ void MBlazeFrameLowering::emitEpilogue(MachineFunction &MF, } } +// Eliminate ADJCALLSTACKDOWN/ADJCALLSTACKUP pseudo instructions +void MBlazeFrameLowering:: +eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + const MBlazeInstrInfo &TII = + *static_cast(MF.getTarget().getInstrInfo()); + if (!hasReservedCallFrame(MF)) { + // If we have a frame pointer, turn the adjcallstackup instruction into a + // 'addi r1, r1, -' and the adjcallstackdown instruction into + // 'addi r1, r1, ' + MachineInstr *Old = I; + int Amount = Old->getOperand(0).getImm() + 4; + if (Amount != 0) { + // We need to keep the stack aligned properly. To do this, we round the + // amount of space needed for the outgoing arguments up to the next + // alignment boundary. + unsigned Align = getStackAlignment(); + Amount = (Amount+Align-1)/Align*Align; + + MachineInstr *New; + if (Old->getOpcode() == MBlaze::ADJCALLSTACKDOWN) { + New = BuildMI(MF,Old->getDebugLoc(), TII.get(MBlaze::ADDIK),MBlaze::R1) + .addReg(MBlaze::R1).addImm(-Amount); + } else { + assert(Old->getOpcode() == MBlaze::ADJCALLSTACKUP); + New = BuildMI(MF,Old->getDebugLoc(), TII.get(MBlaze::ADDIK),MBlaze::R1) + .addReg(MBlaze::R1).addImm(Amount); + } + + // Replace the pseudo instruction with a new instruction... + MBB.insert(I, New); + } + } + + // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions. + MBB.erase(I); +} + + void MBlazeFrameLowering:: processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS) const { diff --git a/lib/Target/MBlaze/MBlazeFrameLowering.h b/lib/Target/MBlaze/MBlazeFrameLowering.h index 01e6578..f4228c5 100644 --- a/lib/Target/MBlaze/MBlazeFrameLowering.h +++ b/lib/Target/MBlaze/MBlazeFrameLowering.h @@ -39,6 +39,10 @@ public: void emitPrologue(MachineFunction &MF) const; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; + bool hasFP(const MachineFunction &MF) const; int getFrameIndexOffset(const MachineFunction &MF, int FI) const; diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp index 1682db1..d0fd7dc 100644 --- a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp +++ b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp @@ -83,44 +83,6 @@ getReservedRegs(const MachineFunction &MF) const { return Reserved; } -// This function eliminate ADJCALLSTACKDOWN/ADJCALLSTACKUP pseudo instructions -void MBlazeRegisterInfo:: -eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - - if (!TFI->hasReservedCallFrame(MF)) { - // If we have a frame pointer, turn the adjcallstackup instruction into a - // 'addi r1, r1, -' and the adjcallstackdown instruction into - // 'addi r1, r1, ' - MachineInstr *Old = I; - int Amount = Old->getOperand(0).getImm() + 4; - if (Amount != 0) { - // We need to keep the stack aligned properly. To do this, we round the - // amount of space needed for the outgoing arguments up to the next - // alignment boundary. - unsigned Align = TFI->getStackAlignment(); - Amount = (Amount+Align-1)/Align*Align; - - MachineInstr *New; - if (Old->getOpcode() == MBlaze::ADJCALLSTACKDOWN) { - New = BuildMI(MF,Old->getDebugLoc(),TII.get(MBlaze::ADDIK),MBlaze::R1) - .addReg(MBlaze::R1).addImm(-Amount); - } else { - assert(Old->getOpcode() == MBlaze::ADJCALLSTACKUP); - New = BuildMI(MF,Old->getDebugLoc(),TII.get(MBlaze::ADDIK),MBlaze::R1) - .addReg(MBlaze::R1).addImm(Amount); - } - - // Replace the pseudo instruction with a new instruction... - MBB.insert(I, New); - } - } - - // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions. - MBB.erase(I); -} - // FrameIndex represent objects inside a abstract stack. // We must replace FrameIndex with an stack/frame pointer // direct reference. diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.h b/lib/Target/MBlaze/MBlazeRegisterInfo.h index 99d2e4b..99a2fac 100644 --- a/lib/Target/MBlaze/MBlazeRegisterInfo.h +++ b/lib/Target/MBlaze/MBlazeRegisterInfo.h @@ -50,10 +50,6 @@ struct MBlazeRegisterInfo : public MBlazeGenRegisterInfo { BitVector getReservedRegs(const MachineFunction &MF) const; - void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const; - /// Stack Frame Processing Methods void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, diff --git a/lib/Target/MSP430/MSP430FrameLowering.cpp b/lib/Target/MSP430/MSP430FrameLowering.cpp index aef45d8..ae2e556 100644 --- a/lib/Target/MSP430/MSP430FrameLowering.cpp +++ b/lib/Target/MSP430/MSP430FrameLowering.cpp @@ -222,13 +222,73 @@ MSP430FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, return true; } +void MSP430FrameLowering:: +eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + const MSP430InstrInfo &TII = + *static_cast(MF.getTarget().getInstrInfo()); + unsigned StackAlign = getStackAlignment(); + + if (!hasReservedCallFrame(MF)) { + // If the stack pointer can be changed after prologue, turn the + // adjcallstackup instruction into a 'sub SPW, ' and the + // adjcallstackdown instruction into 'add SPW, ' + // TODO: consider using push / pop instead of sub + store / add + MachineInstr *Old = I; + uint64_t Amount = Old->getOperand(0).getImm(); + if (Amount != 0) { + // We need to keep the stack aligned properly. To do this, we round the + // amount of space needed for the outgoing arguments up to the next + // alignment boundary. + Amount = (Amount+StackAlign-1)/StackAlign*StackAlign; + + MachineInstr *New = 0; + if (Old->getOpcode() == TII.getCallFrameSetupOpcode()) { + New = BuildMI(MF, Old->getDebugLoc(), + TII.get(MSP430::SUB16ri), MSP430::SPW) + .addReg(MSP430::SPW).addImm(Amount); + } else { + assert(Old->getOpcode() == TII.getCallFrameDestroyOpcode()); + // factor out the amount the callee already popped. + uint64_t CalleeAmt = Old->getOperand(1).getImm(); + Amount -= CalleeAmt; + if (Amount) + New = BuildMI(MF, Old->getDebugLoc(), + TII.get(MSP430::ADD16ri), MSP430::SPW) + .addReg(MSP430::SPW).addImm(Amount); + } + + if (New) { + // The SRW implicit def is dead. + New->getOperand(3).setIsDead(); + + // Replace the pseudo instruction with a new instruction... + MBB.insert(I, New); + } + } + } else if (I->getOpcode() == TII.getCallFrameDestroyOpcode()) { + // If we are performing frame pointer elimination and if the callee pops + // something off the stack pointer, add it back. + if (uint64_t CalleeAmt = I->getOperand(1).getImm()) { + MachineInstr *Old = I; + MachineInstr *New = + BuildMI(MF, Old->getDebugLoc(), TII.get(MSP430::SUB16ri), + MSP430::SPW).addReg(MSP430::SPW).addImm(CalleeAmt); + // The SRW implicit def is dead. + New->getOperand(3).setIsDead(); + + MBB.insert(I, New); + } + } + + MBB.erase(I); +} + void MSP430FrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - // Create a frame entry for the FPW register that must be saved. - if (TFI->hasFP(MF)) { + if (hasFP(MF)) { int FrameIdx = MF.getFrameInfo()->CreateFixedObject(2, -4, true); (void)FrameIdx; assert(FrameIdx == MF.getFrameInfo()->getObjectIndexBegin() && diff --git a/lib/Target/MSP430/MSP430FrameLowering.h b/lib/Target/MSP430/MSP430FrameLowering.h index cb02545..a077dd7 100644 --- a/lib/Target/MSP430/MSP430FrameLowering.h +++ b/lib/Target/MSP430/MSP430FrameLowering.h @@ -35,6 +35,10 @@ public: void emitPrologue(MachineFunction &MF) const; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; + bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector &CSI, diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp index 6e0a223..0b3e9e2 100644 --- a/lib/Target/MSP430/MSP430RegisterInfo.cpp +++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp @@ -101,66 +101,6 @@ MSP430RegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind) return &MSP430::GR16RegClass; } -void MSP430RegisterInfo:: -eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - - if (!TFI->hasReservedCallFrame(MF)) { - // If the stack pointer can be changed after prologue, turn the - // adjcallstackup instruction into a 'sub SPW, ' and the - // adjcallstackdown instruction into 'add SPW, ' - // TODO: consider using push / pop instead of sub + store / add - MachineInstr *Old = I; - uint64_t Amount = Old->getOperand(0).getImm(); - if (Amount != 0) { - // We need to keep the stack aligned properly. To do this, we round the - // amount of space needed for the outgoing arguments up to the next - // alignment boundary. - Amount = (Amount+StackAlign-1)/StackAlign*StackAlign; - - MachineInstr *New = 0; - if (Old->getOpcode() == TII.getCallFrameSetupOpcode()) { - New = BuildMI(MF, Old->getDebugLoc(), - TII.get(MSP430::SUB16ri), MSP430::SPW) - .addReg(MSP430::SPW).addImm(Amount); - } else { - assert(Old->getOpcode() == TII.getCallFrameDestroyOpcode()); - // factor out the amount the callee already popped. - uint64_t CalleeAmt = Old->getOperand(1).getImm(); - Amount -= CalleeAmt; - if (Amount) - New = BuildMI(MF, Old->getDebugLoc(), - TII.get(MSP430::ADD16ri), MSP430::SPW) - .addReg(MSP430::SPW).addImm(Amount); - } - - if (New) { - // The SRW implicit def is dead. - New->getOperand(3).setIsDead(); - - // Replace the pseudo instruction with a new instruction... - MBB.insert(I, New); - } - } - } else if (I->getOpcode() == TII.getCallFrameDestroyOpcode()) { - // If we are performing frame pointer elimination and if the callee pops - // something off the stack pointer, add it back. - if (uint64_t CalleeAmt = I->getOperand(1).getImm()) { - MachineInstr *Old = I; - MachineInstr *New = - BuildMI(MF, Old->getDebugLoc(), TII.get(MSP430::SUB16ri), - MSP430::SPW).addReg(MSP430::SPW).addImm(CalleeAmt); - // The SRW implicit def is dead. - New->getOperand(3).setIsDead(); - - MBB.insert(I, New); - } - } - - MBB.erase(I); -} - void MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, diff --git a/lib/Target/MSP430/MSP430RegisterInfo.h b/lib/Target/MSP430/MSP430RegisterInfo.h index fca903a..69cccb2 100644 --- a/lib/Target/MSP430/MSP430RegisterInfo.h +++ b/lib/Target/MSP430/MSP430RegisterInfo.h @@ -42,10 +42,6 @@ public: const TargetRegisterClass* getPointerRegClass(const MachineFunction &MF, unsigned Kind = 0) const; - void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const; - void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, RegScavenger *RS = NULL) const; diff --git a/lib/Target/Mips/Mips16FrameLowering.cpp b/lib/Target/Mips/Mips16FrameLowering.cpp index 127fcf2..1bb6fe4 100644 --- a/lib/Target/Mips/Mips16FrameLowering.cpp +++ b/lib/Target/Mips/Mips16FrameLowering.cpp @@ -139,6 +139,25 @@ bool Mips16FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, return true; } +// Eliminate ADJCALLSTACKDOWN, ADJCALLSTACKUP pseudo instructions +void Mips16FrameLowering:: +eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + if (!hasReservedCallFrame(MF)) { + int64_t Amount = I->getOperand(0).getImm(); + + if (I->getOpcode() == Mips::ADJCALLSTACKDOWN) + Amount = -Amount; + + const Mips16InstrInfo &TII = + *static_cast(MF.getTarget().getInstrInfo()); + + TII.adjustStackPtr(Mips::SP, Amount, MBB, I); + } + + MBB.erase(I); +} + bool Mips16FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); diff --git a/lib/Target/Mips/Mips16FrameLowering.h b/lib/Target/Mips/Mips16FrameLowering.h index 01db71e..25f4ffb 100644 --- a/lib/Target/Mips/Mips16FrameLowering.h +++ b/lib/Target/Mips/Mips16FrameLowering.h @@ -27,6 +27,10 @@ public: void emitPrologue(MachineFunction &MF) const; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; + bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector &CSI, diff --git a/lib/Target/Mips/Mips16RegisterInfo.cpp b/lib/Target/Mips/Mips16RegisterInfo.cpp index a181a34..0ea9368 100644 --- a/lib/Target/Mips/Mips16RegisterInfo.cpp +++ b/lib/Target/Mips/Mips16RegisterInfo.cpp @@ -72,27 +72,6 @@ bool Mips16RegisterInfo::saveScavengerRegister return true; } -// This function eliminate ADJCALLSTACKDOWN, -// ADJCALLSTACKUP pseudo instructions -void Mips16RegisterInfo:: -eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - - if (!TFI->hasReservedCallFrame(MF)) { - int64_t Amount = I->getOperand(0).getImm(); - - if (I->getOpcode() == Mips::ADJCALLSTACKDOWN) - Amount = -Amount; - - const Mips16InstrInfo *II = static_cast(&TII); - - II->adjustStackPtr(Mips::SP, Amount, MBB, I); - } - - MBB.erase(I); -} - void Mips16RegisterInfo::eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo, int FrameIndex, uint64_t StackSize, diff --git a/lib/Target/Mips/Mips16RegisterInfo.h b/lib/Target/Mips/Mips16RegisterInfo.h index 6101739..b8f818a 100644 --- a/lib/Target/Mips/Mips16RegisterInfo.h +++ b/lib/Target/Mips/Mips16RegisterInfo.h @@ -25,10 +25,6 @@ public: Mips16RegisterInfo(const MipsSubtarget &Subtarget, const Mips16InstrInfo &TII); - void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const; - bool requiresRegisterScavenging(const MachineFunction &MF) const; bool requiresFrameIndexScavenging(const MachineFunction &MF) const; diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp index e5ecf2c..0dd6713 100644 --- a/lib/Target/Mips/MipsSEFrameLowering.cpp +++ b/lib/Target/Mips/MipsSEFrameLowering.cpp @@ -249,6 +249,26 @@ MipsSEFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { return isInt<16>(MFI->getMaxCallFrameSize()) && !MFI->hasVarSizedObjects(); } +// Eliminate ADJCALLSTACKDOWN, ADJCALLSTACKUP pseudo instructions +void MipsSEFrameLowering:: +eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + const MipsSEInstrInfo &TII = + *static_cast(MF.getTarget().getInstrInfo()); + + if (!hasReservedCallFrame(MF)) { + int64_t Amount = I->getOperand(0).getImm(); + + if (I->getOpcode() == Mips::ADJCALLSTACKDOWN) + Amount = -Amount; + + unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP; + TII.adjustStackPtr(SP, Amount, MBB, I); + } + + MBB.erase(I); +} + void MipsSEFrameLowering:: processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS) const { diff --git a/lib/Target/Mips/MipsSEFrameLowering.h b/lib/Target/Mips/MipsSEFrameLowering.h index 9b04ee9..7becd25 100644 --- a/lib/Target/Mips/MipsSEFrameLowering.h +++ b/lib/Target/Mips/MipsSEFrameLowering.h @@ -28,6 +28,10 @@ public: void emitPrologue(MachineFunction &MF) const; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; + bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector &CSI, diff --git a/lib/Target/Mips/MipsSERegisterInfo.cpp b/lib/Target/Mips/MipsSERegisterInfo.cpp index 0c0a1a3..a39b393 100644 --- a/lib/Target/Mips/MipsSERegisterInfo.cpp +++ b/lib/Target/Mips/MipsSERegisterInfo.cpp @@ -54,28 +54,6 @@ requiresFrameIndexScavenging(const MachineFunction &MF) const { return true; } -// This function eliminate ADJCALLSTACKDOWN, -// ADJCALLSTACKUP pseudo instructions -void MipsSERegisterInfo:: -eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - - if (!TFI->hasReservedCallFrame(MF)) { - int64_t Amount = I->getOperand(0).getImm(); - - if (I->getOpcode() == Mips::ADJCALLSTACKDOWN) - Amount = -Amount; - - const MipsSEInstrInfo *II = static_cast(&TII); - unsigned SP = Subtarget.isABI_N64() ? Mips::SP_64 : Mips::SP; - - II->adjustStackPtr(SP, Amount, MBB, I); - } - - MBB.erase(I); -} - void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo, int FrameIndex, uint64_t StackSize, diff --git a/lib/Target/Mips/MipsSERegisterInfo.h b/lib/Target/Mips/MipsSERegisterInfo.h index 7437bd3..f6827e9 100644 --- a/lib/Target/Mips/MipsSERegisterInfo.h +++ b/lib/Target/Mips/MipsSERegisterInfo.h @@ -31,10 +31,6 @@ public: bool requiresFrameIndexScavenging(const MachineFunction &MF) const; - void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const; - private: virtual void eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo, int FrameIndex, uint64_t StackSize, diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.cpp b/lib/Target/NVPTX/NVPTXFrameLowering.cpp index 50072c5..bb2c55c 100644 --- a/lib/Target/NVPTX/NVPTXFrameLowering.cpp +++ b/lib/Target/NVPTX/NVPTXFrameLowering.cpp @@ -74,3 +74,14 @@ void NVPTXFrameLowering::emitPrologue(MachineFunction &MF) const { void NVPTXFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { } + +// This function eliminates ADJCALLSTACKDOWN, +// ADJCALLSTACKUP pseudo instructions +void NVPTXFrameLowering:: +eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + // Simply discard ADJCALLSTACKDOWN, + // ADJCALLSTACKUP instructions. + MBB.erase(I); +} + diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.h b/lib/Target/NVPTX/NVPTXFrameLowering.h index ee87b39..d34e7be 100644 --- a/lib/Target/NVPTX/NVPTXFrameLowering.h +++ b/lib/Target/NVPTX/NVPTXFrameLowering.h @@ -33,6 +33,10 @@ public: virtual void emitPrologue(MachineFunction &MF) const; virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; }; } // End llvm namespace diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp index 8e105b5..350a2c5 100644 --- a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp +++ b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp @@ -150,12 +150,3 @@ unsigned NVPTXRegisterInfo::getRARegister() const { return 0; } -// This function eliminates ADJCALLSTACKDOWN, -// ADJCALLSTACKUP pseudo instructions -void NVPTXRegisterInfo:: -eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const { - // Simply discard ADJCALLSTACKDOWN, - // ADJCALLSTACKUP instructions. - MBB.erase(I); -} diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.h b/lib/Target/NVPTX/NVPTXRegisterInfo.h index 56e6289..69f73f2 100644 --- a/lib/Target/NVPTX/NVPTXRegisterInfo.h +++ b/lib/Target/NVPTX/NVPTXRegisterInfo.h @@ -58,10 +58,6 @@ public: int SPAdj, unsigned FIOperandNum, RegScavenger *RS=NULL) const; - void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const; - virtual int getDwarfRegNum(unsigned RegNum, bool isEH) const; virtual unsigned getFrameRegister(const MachineFunction &MF) const; virtual unsigned getRARegister() const; diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp index 9948d61..cc1ed69 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -1124,6 +1124,47 @@ restoreCRs(bool isPPC64, bool CR2Spilled, bool CR3Spilled, bool CR4Spilled, .addReg(MoveReg)); } +void PPCFrameLowering:: +eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + const PPCInstrInfo &TII = + *static_cast(MF.getTarget().getInstrInfo()); + if (MF.getTarget().Options.GuaranteedTailCallOpt && + I->getOpcode() == PPC::ADJCALLSTACKUP) { + // Add (actually subtract) back the amount the callee popped on return. + if (int CalleeAmt = I->getOperand(1).getImm()) { + bool is64Bit = Subtarget.isPPC64(); + CalleeAmt *= -1; + unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1; + unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0; + unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI; + unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4; + unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS; + unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI; + MachineInstr *MI = I; + DebugLoc dl = MI->getDebugLoc(); + + if (isInt<16>(CalleeAmt)) { + BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg) + .addReg(StackReg, RegState::Kill) + .addImm(CalleeAmt); + } else { + MachineBasicBlock::iterator MBBI = I; + BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg) + .addImm(CalleeAmt >> 16); + BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg) + .addReg(TmpReg, RegState::Kill) + .addImm(CalleeAmt & 0xFFFF); + BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg) + .addReg(StackReg, RegState::Kill) + .addReg(TmpReg); + } + } + } + // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions. + MBB.erase(I); +} + bool PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h index b1d63ab..d09e47f 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.h +++ b/lib/Target/PowerPC/PPCFrameLowering.h @@ -51,6 +51,10 @@ public: const std::vector &CSI, const TargetRegisterInfo *TRI) const; + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; + bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector &CSI, diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 60f6ea0..cf1f459 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -3332,7 +3332,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl, // When performing tail call optimization the callee pops its arguments off // the stack. Account for this here so these bytes can be pushed back on in - // PPCRegisterInfo::eliminateCallFramePseudoInstr. + // PPCFrameLowering::eliminateCallFramePseudoInstr. int BytesCalleePops = (CallConv == CallingConv::Fast && getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0; diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index eca7f12..9745235 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -222,45 +222,6 @@ PPCRegisterInfo::avoidWriteAfterWrite(const TargetRegisterClass *RC) const { // Stack Frame Processing methods //===----------------------------------------------------------------------===// -void PPCRegisterInfo:: -eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const { - if (MF.getTarget().Options.GuaranteedTailCallOpt && - I->getOpcode() == PPC::ADJCALLSTACKUP) { - // Add (actually subtract) back the amount the callee popped on return. - if (int CalleeAmt = I->getOperand(1).getImm()) { - bool is64Bit = Subtarget.isPPC64(); - CalleeAmt *= -1; - unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1; - unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0; - unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI; - unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4; - unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS; - unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI; - MachineInstr *MI = I; - DebugLoc dl = MI->getDebugLoc(); - - if (isInt<16>(CalleeAmt)) { - BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg) - .addReg(StackReg, RegState::Kill) - .addImm(CalleeAmt); - } else { - MachineBasicBlock::iterator MBBI = I; - BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg) - .addImm(CalleeAmt >> 16); - BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg) - .addReg(TmpReg, RegState::Kill) - .addImm(CalleeAmt & 0xFFFF); - BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg) - .addReg(StackReg, RegState::Kill) - .addReg(TmpReg); - } - } - } - // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions. - MBB.erase(I); -} - /// findScratchRegister - Find a 'free' PPC register. Try for a call-clobbered /// register first and then a spilled callee-saved register if that fails. static diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h index 3e07a01..c22450e 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/lib/Target/PowerPC/PPCRegisterInfo.h @@ -56,10 +56,6 @@ public: bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const; - void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const; - void lowerDynamicAlloc(MachineBasicBlock::iterator II, int SPAdj, RegScavenger *RS) const; void lowerCRSpilling(MachineBasicBlock::iterator II, unsigned FrameIndex, diff --git a/lib/Target/Sparc/SparcFrameLowering.cpp b/lib/Target/Sparc/SparcFrameLowering.cpp index 6c47c70..a0dae6e 100644 --- a/lib/Target/Sparc/SparcFrameLowering.cpp +++ b/lib/Target/Sparc/SparcFrameLowering.cpp @@ -67,6 +67,22 @@ void SparcFrameLowering::emitPrologue(MachineFunction &MF) const { } } +void SparcFrameLowering:: +eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + MachineInstr &MI = *I; + DebugLoc dl = MI.getDebugLoc(); + int Size = MI.getOperand(0).getImm(); + if (MI.getOpcode() == SP::ADJCALLSTACKDOWN) + Size = -Size; + const SparcInstrInfo &TII = + *static_cast(MF.getTarget().getInstrInfo()); + if (Size) + BuildMI(MBB, I, dl, TII.get(SP::ADDri), SP::O6).addReg(SP::O6).addImm(Size); + MBB.erase(I); +} + + void SparcFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); diff --git a/lib/Target/Sparc/SparcFrameLowering.h b/lib/Target/Sparc/SparcFrameLowering.h index 6b593c9..464233e 100644 --- a/lib/Target/Sparc/SparcFrameLowering.h +++ b/lib/Target/Sparc/SparcFrameLowering.h @@ -32,6 +32,10 @@ public: void emitPrologue(MachineFunction &MF) const; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; + bool hasFP(const MachineFunction &MF) const { return false; } }; diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp index ac1a350..25e90b7 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.cpp +++ b/lib/Target/Sparc/SparcRegisterInfo.cpp @@ -56,19 +56,6 @@ BitVector SparcRegisterInfo::getReservedRegs(const MachineFunction &MF) const { return Reserved; } -void SparcRegisterInfo:: -eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const { - MachineInstr &MI = *I; - DebugLoc dl = MI.getDebugLoc(); - int Size = MI.getOperand(0).getImm(); - if (MI.getOpcode() == SP::ADJCALLSTACKDOWN) - Size = -Size; - if (Size) - BuildMI(MBB, I, dl, TII.get(SP::ADDri), SP::O6).addReg(SP::O6).addImm(Size); - MBB.erase(I); -} - void SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, diff --git a/lib/Target/Sparc/SparcRegisterInfo.h b/lib/Target/Sparc/SparcRegisterInfo.h index 37bb4d5..357879b 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.h +++ b/lib/Target/Sparc/SparcRegisterInfo.h @@ -36,10 +36,6 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo { BitVector getReservedRegs(const MachineFunction &MF) const; - void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const; - void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, RegScavenger *RS = NULL) const; diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 2b3d853..a05cf5c 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -55,8 +55,8 @@ bool X86FrameLowering::hasFP(const MachineFunction &MF) const { MMI.callsUnwindInit() || MMI.callsEHReturn()); } -static unsigned getSUBriOpcode(unsigned isLP64, int64_t Imm) { - if (isLP64) { +static unsigned getSUBriOpcode(unsigned IsLP64, int64_t Imm) { + if (IsLP64) { if (isInt<8>(Imm)) return X86::SUB64ri8; return X86::SUB64ri32; @@ -1756,3 +1756,84 @@ void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const { MF.verify(); #endif } + +void X86FrameLowering:: +eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + const X86InstrInfo &TII = *TM.getInstrInfo(); + const X86RegisterInfo &RegInfo = *TM.getRegisterInfo(); + unsigned StackPtr = RegInfo.getStackRegister(); + bool reseveCallFrame = hasReservedCallFrame(MF); + int Opcode = I->getOpcode(); + bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode(); + bool IsLP64 = STI.isTarget64BitLP64(); + DebugLoc DL = I->getDebugLoc(); + uint64_t Amount = !reseveCallFrame ? I->getOperand(0).getImm() : 0; + uint64_t CalleeAmt = isDestroy ? I->getOperand(1).getImm() : 0; + I = MBB.erase(I); + + if (!reseveCallFrame) { + // If the stack pointer can be changed after prologue, turn the + // adjcallstackup instruction into a 'sub ESP, ' and the + // adjcallstackdown instruction into 'add ESP, ' + // TODO: consider using push / pop instead of sub + store / add + if (Amount == 0) + return; + + // We need to keep the stack aligned properly. To do this, we round the + // amount of space needed for the outgoing arguments up to the next + // alignment boundary. + unsigned StackAlign = TM.getFrameLowering()->getStackAlignment(); + Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign; + + MachineInstr *New = 0; + if (Opcode == TII.getCallFrameSetupOpcode()) { + New = BuildMI(MF, DL, TII.get(getSUBriOpcode(IsLP64, Amount)), + StackPtr) + .addReg(StackPtr) + .addImm(Amount); + } else { + assert(Opcode == TII.getCallFrameDestroyOpcode()); + + // Factor out the amount the callee already popped. + Amount -= CalleeAmt; + + if (Amount) { + unsigned Opc = getADDriOpcode(IsLP64, Amount); + New = BuildMI(MF, DL, TII.get(Opc), StackPtr) + .addReg(StackPtr).addImm(Amount); + } + } + + if (New) { + // The EFLAGS implicit def is dead. + New->getOperand(3).setIsDead(); + + // Replace the pseudo instruction with a new instruction. + MBB.insert(I, New); + } + + return; + } + + if (Opcode == TII.getCallFrameDestroyOpcode() && CalleeAmt) { + // If we are performing frame pointer elimination and if the callee pops + // something off the stack pointer, add it back. We do this until we have + // more advanced stack pointer tracking ability. + unsigned Opc = getSUBriOpcode(IsLP64, CalleeAmt); + MachineInstr *New = BuildMI(MF, DL, TII.get(Opc), StackPtr) + .addReg(StackPtr).addImm(CalleeAmt); + + // The EFLAGS implicit def is dead. + New->getOperand(3).setIsDead(); + + // We are not tracking the stack pointer adjustment by the callee, so make + // sure we restore the stack pointer immediately after the call, there may + // be spill code inserted between the CALL and ADJCALLSTACKUP instructions. + MachineBasicBlock::iterator B = MBB.begin(); + while (I != B && !llvm::prior(I)->isCall()) + --I; + MBB.insert(I, New); + } +} + diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h index c35d952..3f08b9a 100644 --- a/lib/Target/X86/X86FrameLowering.h +++ b/lib/Target/X86/X86FrameLowering.h @@ -65,6 +65,10 @@ public: int getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const; uint32_t getCompactUnwindEncoding(MachineFunction &MF) const; + + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const; }; } // End llvm namespace diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 6b0a915..03f412f 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -447,107 +447,6 @@ bool X86RegisterInfo::hasReservedSpillSlot(const MachineFunction &MF, return false; } -static unsigned getSUBriOpcode(unsigned is64Bit, int64_t Imm) { - if (is64Bit) { - if (isInt<8>(Imm)) - return X86::SUB64ri8; - return X86::SUB64ri32; - } else { - if (isInt<8>(Imm)) - return X86::SUB32ri8; - return X86::SUB32ri; - } -} - -static unsigned getADDriOpcode(unsigned is64Bit, int64_t Imm) { - if (is64Bit) { - if (isInt<8>(Imm)) - return X86::ADD64ri8; - return X86::ADD64ri32; - } else { - if (isInt<8>(Imm)) - return X86::ADD32ri8; - return X86::ADD32ri; - } -} - -void X86RegisterInfo:: -eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - bool reseveCallFrame = TFI->hasReservedCallFrame(MF); - int Opcode = I->getOpcode(); - bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode(); - DebugLoc DL = I->getDebugLoc(); - uint64_t Amount = !reseveCallFrame ? I->getOperand(0).getImm() : 0; - uint64_t CalleeAmt = isDestroy ? I->getOperand(1).getImm() : 0; - I = MBB.erase(I); - - if (!reseveCallFrame) { - // If the stack pointer can be changed after prologue, turn the - // adjcallstackup instruction into a 'sub ESP, ' and the - // adjcallstackdown instruction into 'add ESP, ' - // TODO: consider using push / pop instead of sub + store / add - if (Amount == 0) - return; - - // We need to keep the stack aligned properly. To do this, we round the - // amount of space needed for the outgoing arguments up to the next - // alignment boundary. - unsigned StackAlign = TM.getFrameLowering()->getStackAlignment(); - Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign; - - MachineInstr *New = 0; - if (Opcode == TII.getCallFrameSetupOpcode()) { - New = BuildMI(MF, DL, TII.get(getSUBriOpcode(Is64Bit, Amount)), - StackPtr) - .addReg(StackPtr) - .addImm(Amount); - } else { - assert(Opcode == TII.getCallFrameDestroyOpcode()); - - // Factor out the amount the callee already popped. - Amount -= CalleeAmt; - - if (Amount) { - unsigned Opc = getADDriOpcode(Is64Bit, Amount); - New = BuildMI(MF, DL, TII.get(Opc), StackPtr) - .addReg(StackPtr).addImm(Amount); - } - } - - if (New) { - // The EFLAGS implicit def is dead. - New->getOperand(3).setIsDead(); - - // Replace the pseudo instruction with a new instruction. - MBB.insert(I, New); - } - - return; - } - - if (Opcode == TII.getCallFrameDestroyOpcode() && CalleeAmt) { - // If we are performing frame pointer elimination and if the callee pops - // something off the stack pointer, add it back. We do this until we have - // more advanced stack pointer tracking ability. - unsigned Opc = getSUBriOpcode(Is64Bit, CalleeAmt); - MachineInstr *New = BuildMI(MF, DL, TII.get(Opc), StackPtr) - .addReg(StackPtr).addImm(CalleeAmt); - - // The EFLAGS implicit def is dead. - New->getOperand(3).setIsDead(); - - // We are not tracking the stack pointer adjustment by the callee, so make - // sure we restore the stack pointer immediately after the call, there may - // be spill code inserted between the CALL and ADJCALLSTACKUP instructions. - MachineBasicBlock::iterator B = MBB.begin(); - while (I != B && !llvm::prior(I)->isCall()) - --I; - MBB.insert(I, New); - } -} - void X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h index 5b45e9e..b9d7b8c 100644 --- a/lib/Target/X86/X86RegisterInfo.h +++ b/lib/Target/X86/X86RegisterInfo.h @@ -117,10 +117,6 @@ public: bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg, int &FrameIdx) const; - void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI) const; - void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS = NULL) const; diff --git a/lib/Target/XCore/XCoreFrameLowering.cpp b/lib/Target/XCore/XCoreFrameLowering.cpp index bb9c77a..019c457 100644 --- a/lib/Target/XCore/XCoreFrameLowering.cpp +++ b/lib/Target/XCore/XCoreFrameLowering.cpp @@ -332,6 +332,58 @@ bool XCoreFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, return true; } +// This function eliminates ADJCALLSTACKDOWN, +// ADJCALLSTACKUP pseudo instructions +void XCoreFrameLowering:: +eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + const XCoreInstrInfo &TII = + *static_cast(MF.getTarget().getInstrInfo()); + if (!hasReservedCallFrame(MF)) { + // Turn the adjcallstackdown instruction into 'extsp ' and the + // adjcallstackup instruction into 'ldaw sp, sp[]' + MachineInstr *Old = I; + uint64_t Amount = Old->getOperand(0).getImm(); + if (Amount != 0) { + // We need to keep the stack aligned properly. To do this, we round the + // amount of space needed for the outgoing arguments up to the next + // alignment boundary. + unsigned Align = getStackAlignment(); + Amount = (Amount+Align-1)/Align*Align; + + assert(Amount%4 == 0); + Amount /= 4; + + bool isU6 = isImmU6(Amount); + if (!isU6 && !isImmU16(Amount)) { + // FIX could emit multiple instructions in this case. +#ifndef NDEBUG + errs() << "eliminateCallFramePseudoInstr size too big: " + << Amount << "\n"; +#endif + llvm_unreachable(0); + } + + MachineInstr *New; + if (Old->getOpcode() == XCore::ADJCALLSTACKDOWN) { + int Opcode = isU6 ? XCore::EXTSP_u6 : XCore::EXTSP_lu6; + New=BuildMI(MF, Old->getDebugLoc(), TII.get(Opcode)) + .addImm(Amount); + } else { + assert(Old->getOpcode() == XCore::ADJCALLSTACKUP); + int Opcode = isU6 ? XCore::LDAWSP_ru6_RRegs : XCore::LDAWSP_lru6_RRegs; + New=BuildMI(MF, Old->getDebugLoc(), TII.get(Opcode), XCore::SP) + .addImm(Amount); + } + + // Replace the pseudo instruction with a new instruction... + MBB.insert(I, New); + } + } + + MBB.erase(I); +} + void XCoreFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS) const { diff --git a/lib/Target/XCore/XCoreFrameLowering.h b/lib/Target/XCore/XCoreFrameLowering.h index db1bbb6..ebad62f 100644 --- a/lib/Target/XCore/XCoreFrameLowering.h +++ b/lib/Target/XCore/XCoreFrameLowering.h @@ -39,6 +39,10 @@ namespace llvm { const std::vector &CSI, const TargetRegisterInfo *TRI) const; + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; + bool hasFP(const MachineFunction &MF) const; void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp index 01749a8..49b5634 100644 --- a/lib/Target/XCore/XCoreRegisterInfo.cpp +++ b/lib/Target/XCore/XCoreRegisterInfo.cpp @@ -101,58 +101,6 @@ XCoreRegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const { return false; } -// This function eliminates ADJCALLSTACKDOWN, -// ADJCALLSTACKUP pseudo instructions -void XCoreRegisterInfo:: -eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - - if (!TFI->hasReservedCallFrame(MF)) { - // Turn the adjcallstackdown instruction into 'extsp ' and the - // adjcallstackup instruction into 'ldaw sp, sp[]' - MachineInstr *Old = I; - uint64_t Amount = Old->getOperand(0).getImm(); - if (Amount != 0) { - // We need to keep the stack aligned properly. To do this, we round the - // amount of space needed for the outgoing arguments up to the next - // alignment boundary. - unsigned Align = TFI->getStackAlignment(); - Amount = (Amount+Align-1)/Align*Align; - - assert(Amount%4 == 0); - Amount /= 4; - - bool isU6 = isImmU6(Amount); - if (!isU6 && !isImmU16(Amount)) { - // FIX could emit multiple instructions in this case. -#ifndef NDEBUG - errs() << "eliminateCallFramePseudoInstr size too big: " - << Amount << "\n"; -#endif - llvm_unreachable(0); - } - - MachineInstr *New; - if (Old->getOpcode() == XCore::ADJCALLSTACKDOWN) { - int Opcode = isU6 ? XCore::EXTSP_u6 : XCore::EXTSP_lu6; - New=BuildMI(MF, Old->getDebugLoc(), TII.get(Opcode)) - .addImm(Amount); - } else { - assert(Old->getOpcode() == XCore::ADJCALLSTACKUP); - int Opcode = isU6 ? XCore::LDAWSP_ru6_RRegs : XCore::LDAWSP_lru6_RRegs; - New=BuildMI(MF, Old->getDebugLoc(), TII.get(Opcode), XCore::SP) - .addImm(Amount); - } - - // Replace the pseudo instruction with a new instruction... - MBB.insert(I, New); - } - } - - MBB.erase(I); -} - void XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, diff --git a/lib/Target/XCore/XCoreRegisterInfo.h b/lib/Target/XCore/XCoreRegisterInfo.h index 62549a8..1db3248 100644 --- a/lib/Target/XCore/XCoreRegisterInfo.h +++ b/lib/Target/XCore/XCoreRegisterInfo.h @@ -54,10 +54,6 @@ public: bool useFPForScavengingIndex(const MachineFunction &MF) const; - void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const; - void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, RegScavenger *RS = NULL) const; -- cgit v1.1 From 5e5529c74c6bc5f46e272658835dbb5d715b57ca Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Thu, 21 Feb 2013 21:40:51 +0000 Subject: Remove dead code and whitespace. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175804 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/SCCP.cpp | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp index 837ed9b..e30a274 100644 --- a/lib/Transforms/Scalar/SCCP.cpp +++ b/lib/Transforms/Scalar/SCCP.cpp @@ -271,13 +271,6 @@ public: return I->second; } - /*LatticeVal getStructLatticeValueFor(Value *V, unsigned i) const { - DenseMap, LatticeVal>::const_iterator I = - StructValueState.find(std::make_pair(V, i)); - assert(I != StructValueState.end() && "V is not in valuemap!"); - return I->second; - }*/ - /// getTrackedRetVals - Get the inferred return value map. /// const DenseMap &getTrackedRetVals() { @@ -710,9 +703,6 @@ void SCCPSolver::visitPHINode(PHINode &PN) { markConstant(&PN, OperandVal); // Acquire operand value } - - - void SCCPSolver::visitReturnInst(ReturnInst &I) { if (I.getNumOperands() == 0) return; // ret void -- cgit v1.1 From b489e29976afed1a015eecd00c5726fe565b038c Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Thu, 21 Feb 2013 22:16:43 +0000 Subject: Re-apply r175688, with the changes suggested by Jakob in PR15320. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175809 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/ExpandPostRAPseudos.cpp | 39 ++++++++++++++----------------------- 1 file changed, 15 insertions(+), 24 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/ExpandPostRAPseudos.cpp b/lib/CodeGen/ExpandPostRAPseudos.cpp index 0b9e83d..1611db8 100644 --- a/lib/CodeGen/ExpandPostRAPseudos.cpp +++ b/lib/CodeGen/ExpandPostRAPseudos.cpp @@ -49,8 +49,6 @@ private: bool LowerSubregToReg(MachineInstr *MI); bool LowerCopy(MachineInstr *MI); - void TransferDeadFlag(MachineInstr *MI, unsigned DstReg, - const TargetRegisterInfo *TRI); void TransferImplicitDefs(MachineInstr *MI); }; } // end anonymous namespace @@ -61,21 +59,6 @@ char &llvm::ExpandPostRAPseudosID = ExpandPostRA::ID; INITIALIZE_PASS(ExpandPostRA, "postrapseudos", "Post-RA pseudo instruction expansion pass", false, false) -/// TransferDeadFlag - MI is a pseudo-instruction with DstReg dead, -/// and the lowered replacement instructions immediately precede it. -/// Mark the replacement instructions with the dead flag. -void -ExpandPostRA::TransferDeadFlag(MachineInstr *MI, unsigned DstReg, - const TargetRegisterInfo *TRI) { - for (MachineBasicBlock::iterator MII = - prior(MachineBasicBlock::iterator(MI)); ; --MII) { - if (MII->addRegisterDead(DstReg, TRI)) - break; - assert(MII != MI->getParent()->begin() && - "copyPhysReg output doesn't reference destination register!"); - } -} - /// TransferImplicitDefs - MI is a pseudo-instruction, and the lowered /// replacement instructions immediately precede it. Copy any implicit-def /// operands from MI to the replacement instruction. @@ -114,6 +97,12 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) { DEBUG(dbgs() << "subreg: CONVERTING: " << *MI); + if (MI->allDefsAreDead()) { + MI->setDesc(TII->get(TargetOpcode::KILL)); + DEBUG(dbgs() << "subreg: replaced by: " << *MI); + return true; + } + if (DstSubReg == InsReg) { // No need to insert an identify copy instruction. // Watch out for case like this: @@ -135,10 +124,6 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) { MachineBasicBlock::iterator CopyMI = MI; --CopyMI; CopyMI->addRegisterDefined(DstReg); - - // Transfer the kill/dead flags, if needed. - if (MI->getOperand(0).isDead()) - TransferDeadFlag(MI, DstSubReg, TRI); DEBUG(dbgs() << "subreg: " << *CopyMI); } @@ -148,6 +133,14 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) { } bool ExpandPostRA::LowerCopy(MachineInstr *MI) { + + if (MI->allDefsAreDead()) { + DEBUG(dbgs() << "dead copy: " << *MI); + MI->setDesc(TII->get(TargetOpcode::KILL)); + DEBUG(dbgs() << "replaced by: " << *MI); + return true; + } + MachineOperand &DstMO = MI->getOperand(0); MachineOperand &SrcMO = MI->getOperand(1); @@ -155,7 +148,7 @@ bool ExpandPostRA::LowerCopy(MachineInstr *MI) { DEBUG(dbgs() << "identity copy: " << *MI); // No need to insert an identity copy instruction, but replace with a KILL // if liveness is changed. - if (DstMO.isDead() || SrcMO.isUndef() || MI->getNumOperands() > 2) { + if (SrcMO.isUndef() || MI->getNumOperands() > 2) { // We must make sure the super-register gets killed. Replace the // instruction with KILL. MI->setDesc(TII->get(TargetOpcode::KILL)); @@ -171,8 +164,6 @@ bool ExpandPostRA::LowerCopy(MachineInstr *MI) { TII->copyPhysReg(*MI->getParent(), MI, MI->getDebugLoc(), DstMO.getReg(), SrcMO.getReg(), SrcMO.isKill()); - if (DstMO.isDead()) - TransferDeadFlag(MI, DstMO.getReg(), TRI); if (MI->getNumOperands() > 2) TransferImplicitDefs(MI); DEBUG({ -- cgit v1.1 From e18bce5317ff9f64b3c02418f28c6d383d88b294 Mon Sep 17 00:00:00 2001 From: Renato Golin Date: Thu, 21 Feb 2013 22:39:03 +0000 Subject: Allow GlobalValues to vectorize with AliasAnalysis Storing the load/store instructions with the values and inspect them using Alias Analysis to make sure they don't alias, since the GEP pointer operand doesn't take the offset into account. Trying hard to not add any extra cost to loads and stores that don't overlap on global values, AA is *only* calculated if all of the previous attempts failed. Using biggest vector register size as the stride for the vectorization access, as we're being conservative and the cost model (which calculates the real vectorization factor) is only run after the legalization phase. We might re-think this relationship in the future, but for now, I'd rather be safe than sorry. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175818 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/LoopVectorize.cpp | 189 +++++++++++++++++++++++------ 1 file changed, 154 insertions(+), 35 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index ab1068d..f489393 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -319,8 +319,9 @@ private: class LoopVectorizationLegality { public: LoopVectorizationLegality(Loop *L, ScalarEvolution *SE, DataLayout *DL, - DominatorTree *DT) - : TheLoop(L), SE(SE), DL(DL), DT(DT), Induction(0) {} + DominatorTree *DT, TargetTransformInfo* TTI, + AliasAnalysis* AA) + : TheLoop(L), SE(SE), DL(DL), DT(DT), TTI(TTI), AA(AA), Induction(0) {} /// This enum represents the kinds of reductions that we support. enum ReductionKind { @@ -404,6 +405,11 @@ public: /// induction descriptor. typedef MapVector InductionList; + /// Alias(Multi)Map stores the values (GEPs or underlying objects and their + /// respective Store/Load instruction(s) to calculate aliasing. + typedef DenseMap AliasMap; + typedef DenseMap > AliasMultiMap; + /// Returns true if it is legal to vectorize this loop. /// This does not mean that it is profitable to vectorize this /// loop, only that it is legal to do so. @@ -477,6 +483,14 @@ private: InductionKind isInductionVariable(PHINode *Phi); /// Return true if can compute the address bounds of Ptr within the loop. bool hasComputableBounds(Value *Ptr); + /// Return true if there is the chance of write reorder. + bool hasPossibleGlobalWriteReorder(Value *Object, + Instruction *Inst, + AliasMultiMap &WriteObjects, + unsigned MaxByteWidth); + /// Return the AA location for a load or a store. + AliasAnalysis::Location getLoadStoreLocation(Instruction *Inst); + /// The loop that we evaluate. Loop *TheLoop; @@ -484,8 +498,12 @@ private: ScalarEvolution *SE; /// DataLayout analysis. DataLayout *DL; - // Dominators. + /// Dominators. DominatorTree *DT; + /// Target Info. + TargetTransformInfo *TTI; + /// Alias Analysis. + AliasAnalysis *AA; // --- vectorization state --- // @@ -612,6 +630,7 @@ struct LoopVectorize : public LoopPass { LoopInfo *LI; TargetTransformInfo *TTI; DominatorTree *DT; + AliasAnalysis *AA; virtual bool runOnLoop(Loop *L, LPPassManager &LPM) { // We only vectorize innermost loops. @@ -623,12 +642,13 @@ struct LoopVectorize : public LoopPass { LI = &getAnalysis(); TTI = &getAnalysis(); DT = &getAnalysis(); + AA = getAnalysisIfAvailable(); DEBUG(dbgs() << "LV: Checking a loop in \"" << L->getHeader()->getParent()->getName() << "\"\n"); // Check if it is legal to vectorize the loop. - LoopVectorizationLegality LVL(L, SE, DL, DT); + LoopVectorizationLegality LVL(L, SE, DL, DT, TTI, AA); if (!LVL.canVectorize()) { DEBUG(dbgs() << "LV: Not vectorizing.\n"); return false; @@ -2275,6 +2295,42 @@ void LoopVectorizationLegality::collectLoopUniforms() { } } +AliasAnalysis::Location +LoopVectorizationLegality::getLoadStoreLocation(Instruction *Inst) { + if (StoreInst *Store = dyn_cast(Inst)) + return AA->getLocation(Store); + else if (LoadInst *Load = dyn_cast(Inst)) + return AA->getLocation(Load); + + llvm_unreachable("Should be either load or store instruction"); +} + +bool +LoopVectorizationLegality::hasPossibleGlobalWriteReorder( + Value *Object, + Instruction *Inst, + AliasMultiMap& WriteObjects, + unsigned MaxByteWidth) { + + AliasAnalysis::Location ThisLoc = getLoadStoreLocation(Inst); + + std::vector::iterator + it = WriteObjects[Object].begin(), + end = WriteObjects[Object].end(); + + for (; it != end; ++it) { + Instruction* I = *it; + if (I == Inst) + continue; + + AliasAnalysis::Location ThatLoc = getLoadStoreLocation(I); + if (AA->alias(ThisLoc.getWithNewSize(MaxByteWidth), + ThatLoc.getWithNewSize(MaxByteWidth))) + return true; + } + return false; +} + bool LoopVectorizationLegality::canVectorizeMemory() { if (TheLoop->isAnnotatedParallel()) { @@ -2337,9 +2393,10 @@ bool LoopVectorizationLegality::canVectorizeMemory() { return true; } - // Holds the read and read-write *pointers* that we find. - ValueVector Reads; - ValueVector ReadWrites; + // Holds the read and read-write *pointers* that we find. These maps hold + // unique values for pointers (so no need for multi-map). + AliasMap Reads; + AliasMap ReadWrites; // Holds the analyzed pointers. We don't want to call GetUnderlyingObjects // multiple times on the same object. If the ptr is accessed twice, once @@ -2361,7 +2418,7 @@ bool LoopVectorizationLegality::canVectorizeMemory() { // If we did *not* see this pointer before, insert it to // the read-write list. At this phase it is only a 'write' list. if (Seen.insert(Ptr)) - ReadWrites.push_back(Ptr); + ReadWrites.insert(std::make_pair(Ptr, ST)); } for (I = Loads.begin(), IE = Loads.end(); I != IE; ++I) { @@ -2376,7 +2433,7 @@ bool LoopVectorizationLegality::canVectorizeMemory() { // read a few words, modify, and write a few words, and some of the // words may be written to the same address. if (Seen.insert(Ptr) || 0 == isConsecutivePtr(Ptr)) - Reads.push_back(Ptr); + Reads.insert(std::make_pair(Ptr, LD)); } // If we write (or read-write) to a single destination and there are no @@ -2389,22 +2446,27 @@ bool LoopVectorizationLegality::canVectorizeMemory() { // Find pointers with computable bounds. We are going to use this information // to place a runtime bound check. bool CanDoRT = true; - for (I = ReadWrites.begin(), IE = ReadWrites.end(); I != IE; ++I) - if (hasComputableBounds(*I)) { - PtrRtCheck.insert(SE, TheLoop, *I); - DEBUG(dbgs() << "LV: Found a runtime check ptr:" << **I <<"\n"); + AliasMap::iterator MI, ME; + for (MI = ReadWrites.begin(), ME = ReadWrites.end(); MI != ME; ++MI) { + Value *V = (*MI).first; + if (hasComputableBounds(V)) { + PtrRtCheck.insert(SE, TheLoop, V); + DEBUG(dbgs() << "LV: Found a runtime check ptr:" << *V <<"\n"); } else { CanDoRT = false; break; } - for (I = Reads.begin(), IE = Reads.end(); I != IE; ++I) - if (hasComputableBounds(*I)) { - PtrRtCheck.insert(SE, TheLoop, *I); - DEBUG(dbgs() << "LV: Found a runtime check ptr:" << **I <<"\n"); + } + for (MI = Reads.begin(), ME = Reads.end(); MI != ME; ++MI) { + Value *V = (*MI).first; + if (hasComputableBounds(V)) { + PtrRtCheck.insert(SE, TheLoop, V); + DEBUG(dbgs() << "LV: Found a runtime check ptr:" << *V <<"\n"); } else { CanDoRT = false; break; } + } // Check that we did not collect too many pointers or found a // unsizeable pointer. @@ -2419,47 +2481,104 @@ bool LoopVectorizationLegality::canVectorizeMemory() { bool NeedRTCheck = false; + // Biggest vectorized access possible, vector width * unroll factor. + // TODO: We're being very pessimistic here, find a way to know the + // real access width before getting here. + unsigned MaxByteWidth = (TTI->getRegisterBitWidth(true) / 8) * + TTI->getMaximumUnrollFactor(); // Now that the pointers are in two lists (Reads and ReadWrites), we // can check that there are no conflicts between each of the writes and // between the writes to the reads. - ValueSet WriteObjects; + // Note that WriteObjects duplicates the stores (indexed now by underlying + // objects) to avoid pointing to elements inside ReadWrites. + // TODO: Maybe create a new type where they can interact without duplication. + AliasMultiMap WriteObjects; ValueVector TempObjects; // Check that the read-writes do not conflict with other read-write // pointers. bool AllWritesIdentified = true; - for (I = ReadWrites.begin(), IE = ReadWrites.end(); I != IE; ++I) { - GetUnderlyingObjects(*I, TempObjects, DL); - for (ValueVector::iterator it=TempObjects.begin(), e=TempObjects.end(); - it != e; ++it) { - if (!isIdentifiedObject(*it)) { - DEBUG(dbgs() << "LV: Found an unidentified write ptr:"<< **it <<"\n"); + for (MI = ReadWrites.begin(), ME = ReadWrites.end(); MI != ME; ++MI) { + Value *Val = (*MI).first; + Instruction *Inst = (*MI).second; + + GetUnderlyingObjects(Val, TempObjects, DL); + for (ValueVector::iterator UI=TempObjects.begin(), UE=TempObjects.end(); + UI != UE; ++UI) { + if (!isIdentifiedObject(*UI)) { + DEBUG(dbgs() << "LV: Found an unidentified write ptr:"<< **UI <<"\n"); NeedRTCheck = true; AllWritesIdentified = false; } - if (!WriteObjects.insert(*it)) { + + // Never seen it before, can't alias. + if (WriteObjects[*UI].empty()) { + DEBUG(dbgs() << "LV: Adding Underlying value:" << **UI <<"\n"); + WriteObjects[*UI].push_back(Inst); + continue; + } + // Direct alias found. + if (!AA || dyn_cast(*UI) == NULL) { + DEBUG(dbgs() << "LV: Found a possible write-write reorder:" + << **UI <<"\n"); + return false; + } + DEBUG(dbgs() << "LV: Found a conflicting global value:" + << **UI <<"\n"); + DEBUG(dbgs() << "LV: While examining store:" << *Inst <<"\n"); + DEBUG(dbgs() << "LV: On value:" << *Val <<"\n"); + + // If global alias, make sure they do alias. + if (hasPossibleGlobalWriteReorder(*UI, + Inst, + WriteObjects, + MaxByteWidth)) { DEBUG(dbgs() << "LV: Found a possible write-write reorder:" - << **it <<"\n"); + << *UI <<"\n"); return false; } + + // Didn't alias, insert into map for further reference. + WriteObjects[*UI].push_back(Inst); } TempObjects.clear(); } /// Check that the reads don't conflict with the read-writes. - for (I = Reads.begin(), IE = Reads.end(); I != IE; ++I) { - GetUnderlyingObjects(*I, TempObjects, DL); - for (ValueVector::iterator it=TempObjects.begin(), e=TempObjects.end(); - it != e; ++it) { + for (MI = Reads.begin(), ME = Reads.end(); MI != ME; ++MI) { + Value *Val = (*MI).first; + GetUnderlyingObjects(Val, TempObjects, DL); + for (ValueVector::iterator UI=TempObjects.begin(), UE=TempObjects.end(); + UI != UE; ++UI) { // If all of the writes are identified then we don't care if the read // pointer is identified or not. - if (!AllWritesIdentified && !isIdentifiedObject(*it)) { - DEBUG(dbgs() << "LV: Found an unidentified read ptr:"<< **it <<"\n"); + if (!AllWritesIdentified && !isIdentifiedObject(*UI)) { + DEBUG(dbgs() << "LV: Found an unidentified read ptr:"<< **UI <<"\n"); NeedRTCheck = true; } - if (WriteObjects.count(*it)) { - DEBUG(dbgs() << "LV: Found a possible read/write reorder:" - << **it <<"\n"); + + // Never seen it before, can't alias. + if (WriteObjects[*UI].empty()) + continue; + // Direct alias found. + if (!AA || dyn_cast(*UI) == NULL) { + DEBUG(dbgs() << "LV: Found a possible write-write reorder:" + << **UI <<"\n"); + return false; + } + DEBUG(dbgs() << "LV: Found a global value: " + << **UI <<"\n"); + Instruction *Inst = (*MI).second; + DEBUG(dbgs() << "LV: While examining load:" << *Inst <<"\n"); + DEBUG(dbgs() << "LV: On value:" << *Val <<"\n"); + + // If global alias, make sure they do alias. + if (hasPossibleGlobalWriteReorder(*UI, + Inst, + WriteObjects, + MaxByteWidth)) { + DEBUG(dbgs() << "LV: Found a possible read-write reorder:" + << *UI <<"\n"); return false; } } -- cgit v1.1 From 7bf3d6a0438485df61c438f26cfbaef2f8d8a3c4 Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Thu, 21 Feb 2013 22:53:19 +0000 Subject: Previously, parsing capability of the .debug_frame section was added to lib/DebugInfo, with dumping in llvm-dwarfdump. This patch adds initial ability to parse and dump CFA instructions contained in entries. To keep it manageable, the patch omits some more advanced capabilities (accounted in TODOs): * Parsing of instructions with BLOCK arguments (expression lists) * Dumping of actual instruction arguments (currently only names are dumped). This is quite tricky since the dumper has to effectively "interpret" the instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175820 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/DebugInfo/DWARFDebugFrame.cpp | 176 ++++++++++++++++++++++++++++++++++++-- lib/Support/Dwarf.cpp | 1 + 2 files changed, 171 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/DebugInfo/DWARFDebugFrame.cpp b/lib/DebugInfo/DWARFDebugFrame.cpp index 69e3a3a..951c7a0 100644 --- a/lib/DebugInfo/DWARFDebugFrame.cpp +++ b/lib/DebugInfo/DWARFDebugFrame.cpp @@ -10,8 +10,12 @@ #include "DWARFDebugFrame.h" #include "llvm/ADT/SmallString.h" #include "llvm/Support/DataTypes.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" +#include +#include using namespace llvm; using namespace dwarf; @@ -29,9 +33,21 @@ public: } FrameKind getKind() const { return Kind; } + virtual uint64_t getOffset() const { return Offset; } + /// \brief Parse and store a sequence of CFI instructions from our data + /// stream, starting at Offset and ending at EndOffset. If everything + /// goes well, Offset should be equal to EndOffset when this method + /// returns. Otherwise, an error occurred. + /// TODO: Improve error reporting... + virtual void parseInstructions(uint32_t &Offset, uint32_t EndOffset); + + /// \brief Dump the entry header to the given output stream. virtual void dumpHeader(raw_ostream &OS) const = 0; + /// \brief Dump the entry's instructions to the given output stream. + virtual void dumpInstructions(raw_ostream &OS) const; + protected: const FrameKind Kind; @@ -44,8 +60,143 @@ protected: /// \brief Entry length as specified in DWARF. uint64_t Length; + + /// An entry may contain CFI instructions. An instruction consists of an + /// opcode and an optional sequence of operands. + typedef std::vector Operands; + struct Instruction { + Instruction(uint8_t Opcode) + : Opcode(Opcode) + {} + + uint8_t Opcode; + Operands Ops; + }; + + std::vector Instructions; + + /// Convenience methods to add a new instruction with the given opcode and + /// operands to the Instructions vector. + void addInstruction(uint8_t Opcode) { + Instructions.push_back(Instruction(Opcode)); + } + + void addInstruction(uint8_t Opcode, uint64_t Operand1) { + Instructions.push_back(Instruction(Opcode)); + Instructions.back().Ops.push_back(Operand1); + } + + void addInstruction(uint8_t Opcode, uint64_t Operand1, uint64_t Operand2) { + Instructions.push_back(Instruction(Opcode)); + Instructions.back().Ops.push_back(Operand1); + Instructions.back().Ops.push_back(Operand2); + } }; + +// See DWARF standard v3, section 7.23 +const uint8_t DWARF_CFI_PRIMARY_OPCODE_MASK = 0xc0; +const uint8_t DWARF_CFI_PRIMARY_OPERAND_MASK = 0x3f; + + +void FrameEntry::parseInstructions(uint32_t &Offset, uint32_t EndOffset) { + while (Offset < EndOffset) { + uint8_t Opcode = Data.getU8(&Offset); + // Some instructions have a primary opcode encoded in the top bits. + uint8_t Primary = Opcode & DWARF_CFI_PRIMARY_OPCODE_MASK; + + if (Primary) { + // If it's a primary opcode, the first operand is encoded in the bottom + // bits of the opcode itself. + uint64_t Op1 = Opcode & DWARF_CFI_PRIMARY_OPERAND_MASK; + switch (Primary) { + default: llvm_unreachable("Impossible primary CFI opcode"); + case DW_CFA_advance_loc: + case DW_CFA_restore: + addInstruction(Primary, Op1); + break; + case DW_CFA_offset: + addInstruction(Primary, Op1, Data.getULEB128(&Offset)); + break; + } + } else { + // Extended opcode - its value is Opcode itself. + switch (Opcode) { + default: llvm_unreachable("Invalid extended CFI opcode"); + case DW_CFA_nop: + case DW_CFA_remember_state: + case DW_CFA_restore_state: + // No operands + addInstruction(Opcode); + break; + case DW_CFA_set_loc: + // Operands: Address + addInstruction(Opcode, Data.getAddress(&Offset)); + break; + case DW_CFA_advance_loc1: + // Operands: 1-byte delta + addInstruction(Opcode, Data.getU8(&Offset)); + break; + case DW_CFA_advance_loc2: + // Operands: 2-byte delta + addInstruction(Opcode, Data.getU16(&Offset)); + break; + case DW_CFA_advance_loc4: + // Operands: 4-byte delta + addInstruction(Opcode, Data.getU32(&Offset)); + break; + case DW_CFA_restore_extended: + case DW_CFA_undefined: + case DW_CFA_same_value: + case DW_CFA_def_cfa_register: + case DW_CFA_def_cfa_offset: + // Operands: ULEB128 + addInstruction(Opcode, Data.getULEB128(&Offset)); + break; + case DW_CFA_def_cfa_offset_sf: + // Operands: SLEB128 + addInstruction(Opcode, Data.getSLEB128(&Offset)); + break; + case DW_CFA_offset_extended: + case DW_CFA_register: + case DW_CFA_def_cfa: + case DW_CFA_val_offset: + // Operands: ULEB128, ULEB128 + addInstruction(Opcode, Data.getULEB128(&Offset), + Data.getULEB128(&Offset)); + break; + case DW_CFA_offset_extended_sf: + case DW_CFA_def_cfa_sf: + case DW_CFA_val_offset_sf: + // Operands: ULEB128, SLEB128 + addInstruction(Opcode, Data.getULEB128(&Offset), + Data.getSLEB128(&Offset)); + break; + case DW_CFA_def_cfa_expression: + case DW_CFA_expression: + case DW_CFA_val_expression: + // TODO: implement this + report_fatal_error("Values with expressions not implemented yet!"); + } + } + } +} + + +void FrameEntry::dumpInstructions(raw_ostream &OS) const { + // TODO: at the moment only instruction names are dumped. Expand this to + // dump operands as well. + for (std::vector::const_iterator I = Instructions.begin(), + E = Instructions.end(); + I != E; ++I) { + uint8_t Opcode = I->Opcode; + if (Opcode & DWARF_CFI_PRIMARY_OPCODE_MASK) + Opcode &= DWARF_CFI_PRIMARY_OPCODE_MASK; + OS << " " << CallFrameString(Opcode) << ":\n"; + } +} + + namespace { /// \brief DWARF Common Information Entry (CIE) class CIE : public FrameEntry { @@ -69,9 +220,12 @@ public: << "\n"; OS << format(" Version: %d\n", Version); OS << " Augmentation: \"" << Augmentation << "\"\n"; - OS << format(" Code alignment factor: %u\n", (uint32_t)CodeAlignmentFactor); - OS << format(" Data alignment factor: %d\n", (int32_t)DataAlignmentFactor); - OS << format(" Return address column: %d\n", (int32_t)ReturnAddressRegister); + OS << format(" Code alignment factor: %u\n", + (uint32_t)CodeAlignmentFactor); + OS << format(" Data alignment factor: %d\n", + (int32_t)DataAlignmentFactor); + OS << format(" Return address column: %d\n", + (int32_t)ReturnAddressRegister); OS << "\n"; } @@ -111,7 +265,6 @@ public: (int32_t)LinkedCIEOffset, (uint32_t)InitialLocation, (uint32_t)InitialLocation + (uint32_t)AddressRange); - OS << "\n"; if (LinkedCIE) { OS << format("%p\n", LinkedCIE); } @@ -208,7 +361,15 @@ void DWARFDebugFrame::parse(DataExtractor Data) { Entries.push_back(NewFDE); } - Offset = EndStructureOffset; + Entries.back()->parseInstructions(Offset, EndStructureOffset); + + if (Offset != EndStructureOffset) { + std::string Str; + raw_string_ostream OS(Str); + OS << format("Parsing entry instructions at %lx failed", + Entries.back()->getOffset()); + report_fatal_error(Str); + } } } @@ -217,7 +378,10 @@ void DWARFDebugFrame::dump(raw_ostream &OS) const { OS << "\n"; for (EntryVector::const_iterator I = Entries.begin(), E = Entries.end(); I != E; ++I) { - (*I)->dumpHeader(OS); + FrameEntry *Entry = *I; + Entry->dumpHeader(OS); + Entry->dumpInstructions(OS); + OS << "\n"; } } diff --git a/lib/Support/Dwarf.cpp b/lib/Support/Dwarf.cpp index 615efb8..0f91c11 100644 --- a/lib/Support/Dwarf.cpp +++ b/lib/Support/Dwarf.cpp @@ -688,6 +688,7 @@ const char *llvm::dwarf::MacinfoString(unsigned Encoding) { /// encodings. const char *llvm::dwarf::CallFrameString(unsigned Encoding) { switch (Encoding) { + case DW_CFA_nop: return "DW_CFA_nop"; case DW_CFA_advance_loc: return "DW_CFA_advance_loc"; case DW_CFA_offset: return "DW_CFA_offset"; case DW_CFA_restore: return "DW_CFA_restore"; -- cgit v1.1 From a931a12e04b856421977c86d94789cd8b47d6ad3 Mon Sep 17 00:00:00 2001 From: Cameron Zwarich Date: Thu, 21 Feb 2013 22:58:42 +0000 Subject: Stop relying on physical register kill flags in isKilled() in the two-address pass. One of the callers of isKilled() can cope with overapproximation of kills and the other can't, so I added a flag to indicate this. In theory this could pessimize code slightly, but in practice most physical register uses are kills, and most important kills of physical registers are the only uses of that register prior to register allocation, so we can recognize them as kills even without kill flags. This is relevant because LiveIntervals gets rid of all kill flags. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175821 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/TwoAddressInstructionPass.cpp | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index e0dba3f..e312642 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -372,12 +372,19 @@ static bool isPlainlyKilled(MachineInstr *MI, unsigned Reg, /// normal heuristics commute the (two-address) add, which lets /// coalescing eliminate the extra copy. /// +/// If allowFalsePositives is true then likely kills are treated as kills even +/// if it can't be proven that they are kills. static bool isKilled(MachineInstr &MI, unsigned Reg, const MachineRegisterInfo *MRI, const TargetInstrInfo *TII, - LiveIntervals *LIS) { + LiveIntervals *LIS, + bool allowFalsePositives) { MachineInstr *DefMI = &MI; for (;;) { + // All uses of physical registers are likely to be kills. + if (TargetRegisterInfo::isPhysicalRegister(Reg) && + (allowFalsePositives || MRI->hasOneUse(Reg))) + return true; if (!isPlainlyKilled(DefMI, Reg, LIS)) return false; if (TargetRegisterInfo::isPhysicalRegister(Reg)) @@ -1028,7 +1035,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, assert(TargetRegisterInfo::isVirtualRegister(regB) && "cannot make instruction into two-address form"); - bool regBKilled = isKilled(MI, regB, MRI, TII, LIS); + bool regBKilled = isKilled(MI, regB, MRI, TII, LIS, true); if (TargetRegisterInfo::isVirtualRegister(regA)) scanUses(regA); @@ -1048,7 +1055,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, if (regCIdx != ~0U) { regC = MI.getOperand(regCIdx).getReg(); - if (!regBKilled && isKilled(MI, regC, MRI, TII, LIS)) + if (!regBKilled && isKilled(MI, regC, MRI, TII, LIS, false)) // If C dies but B does not, swap the B and C operands. // This makes the live ranges of A and C joinable. TryCommute = true; -- cgit v1.1 From 143d46476cdcf5b88b9ee18ebd799e5820a2db0e Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Fri, 22 Feb 2013 00:12:35 +0000 Subject: Implement the NoBuiltin attribute. The 'nobuiltin' attribute is applied to call sites to indicate that LLVM should not treat the callee function as a built-in function. I.e., it shouldn't try to replace that function with different code. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175835 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/AsmParser/LLLexer.cpp | 1 + lib/AsmParser/LLParser.cpp | 27 +++++++++++++++++++------ lib/AsmParser/LLParser.h | 2 +- lib/AsmParser/LLToken.h | 1 + lib/IR/Attributes.cpp | 32 +++++++++++++++++++++++++++++- lib/IR/Verifier.cpp | 3 ++- lib/Transforms/Scalar/SimplifyLibCalls.cpp | 2 +- lib/Transforms/Utils/SimplifyLibCalls.cpp | 1 + 8 files changed, 59 insertions(+), 10 deletions(-) (limited to 'lib') diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp index 35108af..2b14559 100644 --- a/lib/AsmParser/LLLexer.cpp +++ b/lib/AsmParser/LLLexer.cpp @@ -571,6 +571,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(naked); KEYWORD(nest); KEYWORD(noalias); + KEYWORD(nobuiltin); KEYWORD(nocapture); KEYWORD(noduplicate); KEYWORD(noimplicitfloat); diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index 86e2fd9..bde18cd 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -810,11 +810,13 @@ bool LLParser::ParseUnnamedAttrGrp() { assert(Lex.getKind() == lltok::AttrGrpID); unsigned VarID = Lex.getUIntVal(); std::vector unused; + LocTy NoBuiltinLoc; Lex.Lex(); if (ParseToken(lltok::equal, "expected '=' here") || ParseToken(lltok::lbrace, "expected '{' here") || - ParseFnAttributeValuePairs(NumberedAttrBuilders[VarID], unused, true) || + ParseFnAttributeValuePairs(NumberedAttrBuilders[VarID], unused, true, + NoBuiltinLoc) || ParseToken(lltok::rbrace, "expected end of attribute group")) return true; @@ -828,13 +830,15 @@ bool LLParser::ParseUnnamedAttrGrp() { /// ::= | '=' bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B, std::vector &FwdRefAttrGrps, - bool inAttrGrp) { + bool inAttrGrp, LocTy &NoBuiltinLoc) { bool HaveError = false; B.clear(); while (true) { lltok::Kind Token = Lex.getKind(); + if (Token == lltok::kw_nobuiltin) + NoBuiltinLoc = Lex.getLoc(); switch (Token) { default: if (!inAttrGrp) return HaveError; @@ -908,6 +912,7 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B, case lltok::kw_inlinehint: B.addAttribute(Attribute::InlineHint); break; case lltok::kw_minsize: B.addAttribute(Attribute::MinSize); break; case lltok::kw_naked: B.addAttribute(Attribute::Naked); break; + case lltok::kw_nobuiltin: B.addAttribute(Attribute::NoBuiltin); break; case lltok::kw_noduplicate: B.addAttribute(Attribute::NoDuplicate); break; case lltok::kw_noimplicitfloat: B.addAttribute(Attribute::NoImplicitFloat); break; case lltok::kw_noinline: B.addAttribute(Attribute::NoInline); break; @@ -1164,7 +1169,7 @@ bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) { case lltok::kw_naked: case lltok::kw_nonlazybind: case lltok::kw_address_safety: case lltok::kw_minsize: case lltok::kw_alignstack: case lltok::kw_thread_safety: - case lltok::kw_uninitialized_checks: + case lltok::kw_nobuiltin: case lltok::kw_uninitialized_checks: HaveError |= Error(Lex.getLoc(), "invalid use of function-only attribute"); break; } @@ -1207,6 +1212,7 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) { case lltok::kw_minsize: case lltok::kw_alignstack: case lltok::kw_align: case lltok::kw_noduplicate: case lltok::kw_thread_safety: case lltok::kw_uninitialized_checks: + case lltok::kw_nobuiltin: HaveError |= Error(Lex.getLoc(), "invalid use of function-only attribute"); break; } @@ -2944,6 +2950,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { bool isVarArg; AttrBuilder FuncAttrs; std::vector FwdRefAttrGrps; + LocTy NoBuiltinLoc; std::string Section; unsigned Alignment; std::string GC; @@ -2953,7 +2960,8 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { if (ParseArgumentList(ArgList, isVarArg) || ParseOptionalToken(lltok::kw_unnamed_addr, UnnamedAddr, &UnnamedAddrLoc) || - ParseFnAttributeValuePairs(FuncAttrs, FwdRefAttrGrps, false) || + ParseFnAttributeValuePairs(FuncAttrs, FwdRefAttrGrps, false, + NoBuiltinLoc) || (EatIfPresent(lltok::kw_section) && ParseStringConstant(Section)) || ParseOptionalAlignment(Alignment) || @@ -2961,6 +2969,9 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { ParseStringConstant(GC))) return true; + if (FuncAttrs.contains(Attribute::NoBuiltin)) + return Error(NoBuiltinLoc, "'nobuiltin' attribute not valid on function"); + // If the alignment was parsed as an attribute, move to the alignment field. if (FuncAttrs.hasAlignmentAttr()) { Alignment = FuncAttrs.getAlignment(); @@ -3474,6 +3485,7 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) { LocTy CallLoc = Lex.getLoc(); AttrBuilder RetAttrs, FnAttrs; std::vector FwdRefAttrGrps; + LocTy NoBuiltinLoc; CallingConv::ID CC; Type *RetType = 0; LocTy RetTypeLoc; @@ -3486,7 +3498,8 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) { ParseType(RetType, RetTypeLoc, true /*void allowed*/) || ParseValID(CalleeID) || ParseParameterList(ArgList, PFS) || - ParseFnAttributeValuePairs(FnAttrs, FwdRefAttrGrps, false) || + ParseFnAttributeValuePairs(FnAttrs, FwdRefAttrGrps, false, + NoBuiltinLoc) || ParseToken(lltok::kw_to, "expected 'to' in invoke") || ParseTypeAndBasicBlock(NormalBB, PFS) || ParseToken(lltok::kw_unwind, "expected 'unwind' in invoke") || @@ -3881,6 +3894,7 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS, bool isTail) { AttrBuilder RetAttrs, FnAttrs; std::vector FwdRefAttrGrps; + LocTy NoBuiltinLoc; CallingConv::ID CC; Type *RetType = 0; LocTy RetTypeLoc; @@ -3894,7 +3908,8 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS, ParseType(RetType, RetTypeLoc, true /*void allowed*/) || ParseValID(CalleeID) || ParseParameterList(ArgList, PFS) || - ParseFnAttributeValuePairs(FnAttrs, FwdRefAttrGrps, false)) + ParseFnAttributeValuePairs(FnAttrs, FwdRefAttrGrps, false, + NoBuiltinLoc)) return true; // If RetType is a non-function pointer type, then this is the short syntax diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h index 42cdbd5..1f2879e 100644 --- a/lib/AsmParser/LLParser.h +++ b/lib/AsmParser/LLParser.h @@ -242,7 +242,7 @@ namespace llvm { bool ParseUnnamedAttrGrp(); bool ParseFnAttributeValuePairs(AttrBuilder &B, std::vector &FwdRefAttrGrps, - bool inAttrGrp); + bool inAttrGrp, LocTy &NoBuiltinLoc); // Type Parsing. bool ParseType(Type *&Result, bool AllowVoid = false); diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h index 97429b8..a51dada 100644 --- a/lib/AsmParser/LLToken.h +++ b/lib/AsmParser/LLToken.h @@ -102,6 +102,7 @@ namespace lltok { kw_naked, kw_nest, kw_noalias, + kw_nobuiltin, kw_nocapture, kw_noduplicate, kw_noimplicitfloat, diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index d89ebc5..96b17c3 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -171,6 +171,8 @@ std::string Attribute::getAsString(bool InAttrGrp) const { return "nest"; if (hasAttribute(Attribute::NoAlias)) return "noalias"; + if (hasAttribute(Attribute::NoBuiltin)) + return "nobuiltin"; if (hasAttribute(Attribute::NoCapture)) return "nocapture"; if (hasAttribute(Attribute::NoDuplicate)) @@ -354,6 +356,8 @@ bool AttributeImpl::operator<(const AttributeImpl &AI) const { uint64_t AttributeImpl::getAttrMask(Attribute::AttrKind Val) { // FIXME: Remove this. switch (Val) { + default: + llvm_unreachable("Unsupported attribute type"); case Attribute::EndAttrKinds: llvm_unreachable("Synthetic enumerators which should never get here"); @@ -391,7 +395,6 @@ uint64_t AttributeImpl::getAttrMask(Attribute::AttrKind Val) { case Attribute::ThreadSafety: return 1ULL << 36; case Attribute::UninitializedChecks: return 1ULL << 37; } - llvm_unreachable("Unsupported attribute type"); } //===----------------------------------------------------------------------===// @@ -1097,6 +1100,33 @@ bool AttrBuilder::operator==(const AttrBuilder &B) { return Alignment == B.Alignment && StackAlignment == B.StackAlignment; } +void AttrBuilder::removeFunctionOnlyAttrs() { + removeAttribute(Attribute::NoReturn) + .removeAttribute(Attribute::NoUnwind) + .removeAttribute(Attribute::ReadNone) + .removeAttribute(Attribute::ReadOnly) + .removeAttribute(Attribute::NoInline) + .removeAttribute(Attribute::AlwaysInline) + .removeAttribute(Attribute::OptimizeForSize) + .removeAttribute(Attribute::StackProtect) + .removeAttribute(Attribute::StackProtectReq) + .removeAttribute(Attribute::StackProtectStrong) + .removeAttribute(Attribute::NoRedZone) + .removeAttribute(Attribute::NoImplicitFloat) + .removeAttribute(Attribute::Naked) + .removeAttribute(Attribute::InlineHint) + .removeAttribute(Attribute::StackAlignment) + .removeAttribute(Attribute::UWTable) + .removeAttribute(Attribute::NonLazyBind) + .removeAttribute(Attribute::ReturnsTwice) + .removeAttribute(Attribute::AddressSafety) + .removeAttribute(Attribute::ThreadSafety) + .removeAttribute(Attribute::UninitializedChecks) + .removeAttribute(Attribute::MinSize) + .removeAttribute(Attribute::NoDuplicate) + .removeAttribute(Attribute::NoBuiltin); +} + AttrBuilder &AttrBuilder::addRawValue(uint64_t Val) { // FIXME: Remove this in 4.0. if (!Val) return *this; diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp index 02c2096..33e8ec6 100644 --- a/lib/IR/Verifier.cpp +++ b/lib/IR/Verifier.cpp @@ -653,7 +653,8 @@ void Verifier::VerifyParameterAttrs(AttributeSet Attrs, uint64_t Idx, Type *Ty, !Attrs.hasAttribute(Idx, Attribute::AddressSafety) && !Attrs.hasAttribute(Idx, Attribute::ThreadSafety) && !Attrs.hasAttribute(Idx, Attribute::UninitializedChecks) && - !Attrs.hasAttribute(Idx, Attribute::MinSize), + !Attrs.hasAttribute(Idx, Attribute::MinSize) && + !Attrs.hasAttribute(Idx, Attribute::NoBuiltin), "Some attributes in '" + Attrs.getAsString(Idx) + "' only apply to functions!", V); diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index d5cefa3..916b37d 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -165,7 +165,7 @@ bool SimplifyLibCalls::runOnFunction(Function &F) { for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) { // Ignore non-calls. CallInst *CI = dyn_cast(I++); - if (!CI) continue; + if (!CI || CI->hasFnAttr(Attribute::NoBuiltin)) continue; // Ignore indirect calls and calls to non-external functions. Function *Callee = CI->getCalledFunction(); diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index cccf0a6..8ad566c 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1889,6 +1889,7 @@ LibCallSimplifier::~LibCallSimplifier() { } Value *LibCallSimplifier::optimizeCall(CallInst *CI) { + if (CI->hasFnAttr(Attribute::NoBuiltin)) return 0; return Impl->optimizeCall(CI); } -- cgit v1.1 From d18e0b94bfaad5b8a24fcb45b55d7e031cc94202 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Fri, 22 Feb 2013 00:40:12 +0000 Subject: Add a bitmask for NoBuiltin. This should *not* be used. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175843 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Attributes.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 96b17c3..84d14b4 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -394,7 +394,8 @@ uint64_t AttributeImpl::getAttrMask(Attribute::AttrKind Val) { case Attribute::StackProtectStrong: return 1ULL << 35; case Attribute::ThreadSafety: return 1ULL << 36; case Attribute::UninitializedChecks: return 1ULL << 37; - } + case Attribute::NoBuiltin: return 1ULL << 38; + } } //===----------------------------------------------------------------------===// -- cgit v1.1 From bd2acfab4a4692ce39541f380997945e9bbb14ae Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Fri, 22 Feb 2013 00:50:09 +0000 Subject: Remove warning about default covering no cases. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175846 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Attributes.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 84d14b4..11ed82d 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -356,8 +356,6 @@ bool AttributeImpl::operator<(const AttributeImpl &AI) const { uint64_t AttributeImpl::getAttrMask(Attribute::AttrKind Val) { // FIXME: Remove this. switch (Val) { - default: - llvm_unreachable("Unsupported attribute type"); case Attribute::EndAttrKinds: llvm_unreachable("Synthetic enumerators which should never get here"); @@ -395,7 +393,8 @@ uint64_t AttributeImpl::getAttrMask(Attribute::AttrKind Val) { case Attribute::ThreadSafety: return 1ULL << 36; case Attribute::UninitializedChecks: return 1ULL << 37; case Attribute::NoBuiltin: return 1ULL << 38; - } + } + llvm_unreachable("Unsupported attribute type"); } //===----------------------------------------------------------------------===// -- cgit v1.1 From 46e0d1d58c9c1f288cbf943e4c930efd1a2968af Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Fri, 22 Feb 2013 00:50:48 +0000 Subject: Code cleanup: pass Offset by pointer to parseInstruction to more explicitly convey that it's a INOUT argument. Also, if parsing of entry instructions fails, don't push the entry. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175847 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/DebugInfo/DWARFDebugFrame.cpp | 62 +++++++++++++++++++++------------------ 1 file changed, 33 insertions(+), 29 deletions(-) (limited to 'lib') diff --git a/lib/DebugInfo/DWARFDebugFrame.cpp b/lib/DebugInfo/DWARFDebugFrame.cpp index 951c7a0..3efe6a1 100644 --- a/lib/DebugInfo/DWARFDebugFrame.cpp +++ b/lib/DebugInfo/DWARFDebugFrame.cpp @@ -36,11 +36,10 @@ public: virtual uint64_t getOffset() const { return Offset; } /// \brief Parse and store a sequence of CFI instructions from our data - /// stream, starting at Offset and ending at EndOffset. If everything - /// goes well, Offset should be equal to EndOffset when this method + /// stream, starting at *Offset and ending at EndOffset. If everything + /// goes well, *Offset should be equal to EndOffset when this method /// returns. Otherwise, an error occurred. - /// TODO: Improve error reporting... - virtual void parseInstructions(uint32_t &Offset, uint32_t EndOffset); + virtual void parseInstructions(uint32_t *Offset, uint32_t EndOffset); /// \brief Dump the entry header to the given output stream. virtual void dumpHeader(raw_ostream &OS) const = 0; @@ -99,9 +98,9 @@ const uint8_t DWARF_CFI_PRIMARY_OPCODE_MASK = 0xc0; const uint8_t DWARF_CFI_PRIMARY_OPERAND_MASK = 0x3f; -void FrameEntry::parseInstructions(uint32_t &Offset, uint32_t EndOffset) { - while (Offset < EndOffset) { - uint8_t Opcode = Data.getU8(&Offset); +void FrameEntry::parseInstructions(uint32_t *Offset, uint32_t EndOffset) { + while (*Offset < EndOffset) { + uint8_t Opcode = Data.getU8(Offset); // Some instructions have a primary opcode encoded in the top bits. uint8_t Primary = Opcode & DWARF_CFI_PRIMARY_OPCODE_MASK; @@ -116,7 +115,7 @@ void FrameEntry::parseInstructions(uint32_t &Offset, uint32_t EndOffset) { addInstruction(Primary, Op1); break; case DW_CFA_offset: - addInstruction(Primary, Op1, Data.getULEB128(&Offset)); + addInstruction(Primary, Op1, Data.getULEB128(Offset)); break; } } else { @@ -131,19 +130,19 @@ void FrameEntry::parseInstructions(uint32_t &Offset, uint32_t EndOffset) { break; case DW_CFA_set_loc: // Operands: Address - addInstruction(Opcode, Data.getAddress(&Offset)); + addInstruction(Opcode, Data.getAddress(Offset)); break; case DW_CFA_advance_loc1: // Operands: 1-byte delta - addInstruction(Opcode, Data.getU8(&Offset)); + addInstruction(Opcode, Data.getU8(Offset)); break; case DW_CFA_advance_loc2: // Operands: 2-byte delta - addInstruction(Opcode, Data.getU16(&Offset)); + addInstruction(Opcode, Data.getU16(Offset)); break; case DW_CFA_advance_loc4: // Operands: 4-byte delta - addInstruction(Opcode, Data.getU32(&Offset)); + addInstruction(Opcode, Data.getU32(Offset)); break; case DW_CFA_restore_extended: case DW_CFA_undefined: @@ -151,26 +150,26 @@ void FrameEntry::parseInstructions(uint32_t &Offset, uint32_t EndOffset) { case DW_CFA_def_cfa_register: case DW_CFA_def_cfa_offset: // Operands: ULEB128 - addInstruction(Opcode, Data.getULEB128(&Offset)); + addInstruction(Opcode, Data.getULEB128(Offset)); break; case DW_CFA_def_cfa_offset_sf: // Operands: SLEB128 - addInstruction(Opcode, Data.getSLEB128(&Offset)); + addInstruction(Opcode, Data.getSLEB128(Offset)); break; case DW_CFA_offset_extended: case DW_CFA_register: case DW_CFA_def_cfa: case DW_CFA_val_offset: // Operands: ULEB128, ULEB128 - addInstruction(Opcode, Data.getULEB128(&Offset), - Data.getULEB128(&Offset)); + addInstruction(Opcode, Data.getULEB128(Offset), + Data.getULEB128(Offset)); break; case DW_CFA_offset_extended_sf: case DW_CFA_def_cfa_sf: case DW_CFA_val_offset_sf: // Operands: ULEB128, SLEB128 - addInstruction(Opcode, Data.getULEB128(&Offset), - Data.getSLEB128(&Offset)); + addInstruction(Opcode, Data.getULEB128(Offset), + Data.getSLEB128(Offset)); break; case DW_CFA_def_cfa_expression: case DW_CFA_expression: @@ -337,37 +336,42 @@ void DWARFDebugFrame::parse(DataExtractor Data) { Id = Data.getUnsigned(&Offset, IsDWARF64 ? 8 : 4); bool IsCIE = ((IsDWARF64 && Id == DW64_CIE_ID) || Id == DW_CIE_ID); + FrameEntry *Entry = 0; if (IsCIE) { // Note: this is specifically DWARFv3 CIE header structure. It was - // changed in DWARFv4. + // changed in DWARFv4. We currently don't support reading DWARFv4 + // here because LLVM itself does not emit it (and LLDB doesn't + // support it either). uint8_t Version = Data.getU8(&Offset); const char *Augmentation = Data.getCStr(&Offset); uint64_t CodeAlignmentFactor = Data.getULEB128(&Offset); int64_t DataAlignmentFactor = Data.getSLEB128(&Offset); uint64_t ReturnAddressRegister = Data.getULEB128(&Offset); - CIE *NewCIE = new CIE(Data, StartOffset, Length, Version, - StringRef(Augmentation), CodeAlignmentFactor, - DataAlignmentFactor, ReturnAddressRegister); - Entries.push_back(NewCIE); + Entry = new CIE(Data, StartOffset, Length, Version, + StringRef(Augmentation), CodeAlignmentFactor, + DataAlignmentFactor, ReturnAddressRegister); } else { // FDE uint64_t CIEPointer = Id; uint64_t InitialLocation = Data.getAddress(&Offset); uint64_t AddressRange = Data.getAddress(&Offset); - FDE *NewFDE = new FDE(Data, StartOffset, Length, CIEPointer, - InitialLocation, AddressRange); - Entries.push_back(NewFDE); + Entry = new FDE(Data, StartOffset, Length, CIEPointer, + InitialLocation, AddressRange); } - Entries.back()->parseInstructions(Offset, EndStructureOffset); + assert(Entry && "Expected Entry to be populated with CIE or FDE"); + Entry->parseInstructions(&Offset, EndStructureOffset); - if (Offset != EndStructureOffset) { + if (Offset == EndStructureOffset) { + // Entry instrucitons parsed successfully. + Entries.push_back(Entry); + } else { std::string Str; raw_string_ostream OS(Str); OS << format("Parsing entry instructions at %lx failed", - Entries.back()->getOffset()); + Entry->getOffset()); report_fatal_error(Str); } } -- cgit v1.1 From 667754e239538350c5bd8581772f414783ac71a2 Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Fri, 22 Feb 2013 01:15:08 +0000 Subject: Remove code copied from GenRegisterInfo.inc. There's no apparent reason this code was copied from generated source into a .cpp. It sets a bad example for those working on other targets and trying to understand the register info API. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175849 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Hexagon/HexagonRegisterInfo.cpp | 52 ------------------------------ lib/Target/Hexagon/HexagonRegisterInfo.h | 5 --- 2 files changed, 57 deletions(-) (limited to 'lib') diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp index e558234..f947dfc 100644 --- a/lib/Target/Hexagon/HexagonRegisterInfo.cpp +++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp @@ -291,58 +291,6 @@ void HexagonRegisterInfo::getInitialFrameState(std::vector Moves.push_back(MachineMove(0, Dst, Src)); } -// Get the weight in units of pressure for this register class. -const RegClassWeight & -HexagonRegisterInfo::getRegClassWeight(const TargetRegisterClass *RC) const { - // Each TargetRegisterClass has a per register weight, and weight - // limit which must be less than the limits of its pressure sets. - static const RegClassWeight RCWeightTable[] = { - {1, 32}, // IntRegs - {1, 8}, // CRRegs - {1, 4}, // PredRegs - {2, 16}, // DoubleRegs - {0, 0} }; - return RCWeightTable[RC->getID()]; -} - -/// Get the number of dimensions of register pressure. -unsigned HexagonRegisterInfo::getNumRegPressureSets() const { - return 4; -} - -/// Get the name of this register unit pressure set. -const char *HexagonRegisterInfo::getRegPressureSetName(unsigned Idx) const { - static const char *const RegPressureSetName[] = { - "IntRegsRegSet", - "CRRegsRegSet", - "PredRegsRegSet", - "DoubleRegsRegSet" - }; - assert((Idx < 4) && "Index out of bounds"); - return RegPressureSetName[Idx]; -} - -/// Get the register unit pressure limit for this dimension. -/// This limit must be adjusted dynamically for reserved registers. -unsigned HexagonRegisterInfo::getRegPressureSetLimit(unsigned Idx) const { - static const int RegPressureLimit [] = { 16, 4, 2, 8 }; - assert((Idx < 4) && "Index out of bounds"); - return RegPressureLimit[Idx]; -} - -const int* -HexagonRegisterInfo::getRegClassPressureSets(const TargetRegisterClass *RC) - const { - static const int RCSetsTable[] = { - 0, -1, // IntRegs - 1, -1, // CRRegs - 2, -1, // PredRegs - 0, -1, // DoubleRegs - -1 }; - static const unsigned RCSetStartTable[] = { 0, 2, 4, 6, 0 }; - unsigned SetListStart = RCSetStartTable[RC->getID()]; - return &RCSetsTable[SetListStart]; -} unsigned HexagonRegisterInfo::getEHExceptionRegister() const { llvm_unreachable("What is the exception register"); } diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.h b/lib/Target/Hexagon/HexagonRegisterInfo.h index a1a438a..8a3f94a 100644 --- a/lib/Target/Hexagon/HexagonRegisterInfo.h +++ b/lib/Target/Hexagon/HexagonRegisterInfo.h @@ -84,11 +84,6 @@ struct HexagonRegisterInfo : public HexagonGenRegisterInfo { // Exception handling queries. unsigned getEHExceptionRegister() const; unsigned getEHHandlerRegister() const; - const RegClassWeight &getRegClassWeight(const TargetRegisterClass *RC) const; - unsigned getNumRegPressureSets() const; - const char *getRegPressureSetName(unsigned Idx) const; - unsigned getRegPressureSetLimit(unsigned Idx) const; - const int* getRegClassPressureSets(const TargetRegisterClass *RC) const; }; } // end namespace llvm -- cgit v1.1 From b704ffbb46cae716dc51bd8e6ba425c0a4949a47 Mon Sep 17 00:00:00 2001 From: Pete Cooper Date: Fri, 22 Feb 2013 01:50:38 +0000 Subject: Fix isa<> check which could never be true. It was incorrectly checking a Function* being an IntrinsicInst* which isn't possible. It should always have been checking the CallInst* instead. Added test case for x86 which ensures we only get one constant load. It was 2 before this change. rdar://problem/13267920 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175853 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/FastISel.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 6c41e1b..04f5b32 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -705,7 +705,7 @@ bool FastISel::SelectCall(const User *I) { // all the values which have already been materialized, // appear after the call. It also makes sense to skip intrinsics // since they tend to be inlined. - if (!isa(F)) + if (!isa(Call)) flushLocalValueMap(); // An arbitrary call. Bail. -- cgit v1.1 From 7617d032ae12ba96ad65f37d91274e6f8c14e690 Mon Sep 17 00:00:00 2001 From: Reed Kotler Date: Fri, 22 Feb 2013 05:10:51 +0000 Subject: Expand mips16 SelT form pseudso/macros. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175862 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips16InstrInfo.td | 1 + lib/Target/Mips/MipsISelLowering.cpp | 77 ++++++++++++++++++++++++++++++++++++ lib/Target/Mips/MipsISelLowering.h | 3 ++ 3 files changed, 81 insertions(+) (limited to 'lib') diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td index 9e07b95..3c6c7d7 100644 --- a/lib/Target/Mips/Mips16InstrInfo.td +++ b/lib/Target/Mips/Mips16InstrInfo.td @@ -392,6 +392,7 @@ class SeliT: !strconcat(op1, "\t.+4\n\tmove $rd, $rs"))), []> { let isCodeGenOnly=1; let Constraints = "$rd = $rd_"; + let usesCustomInserter = 1; } // diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 5605759..f1affff 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -1298,6 +1298,71 @@ MachineBasicBlock *MipsTargetLowering::EmitSel16(unsigned Opc, MachineInstr *MI, return BB; } +MachineBasicBlock *MipsTargetLowering::EmitSelT16 + (unsigned Opc1, unsigned Opc2, + MachineInstr *MI, MachineBasicBlock *BB) const { + if (DontExpandCondPseudos16) + return BB; + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + DebugLoc dl = MI->getDebugLoc(); + // To "insert" a SELECT_CC instruction, we actually have to insert the + // diamond control-flow pattern. The incoming instruction knows the + // destination vreg to set, the condition code register to branch on, the + // true/false values to select between, and a branch opcode to use. + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator It = BB; + ++It; + + // thisMBB: + // ... + // TrueVal = ... + // setcc r1, r2, r3 + // bNE r1, r0, copy1MBB + // fallthrough --> copy0MBB + MachineBasicBlock *thisMBB = BB; + MachineFunction *F = BB->getParent(); + MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, copy0MBB); + F->insert(It, sinkMBB); + + // Transfer the remainder of BB and its successor edges to sinkMBB. + sinkMBB->splice(sinkMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(BB); + + // Next, add the true and fallthrough blocks as its successors. + BB->addSuccessor(copy0MBB); + BB->addSuccessor(sinkMBB); + + BuildMI(BB, dl, TII->get(Opc2)).addReg(MI->getOperand(3).getReg()) + .addImm(MI->getOperand(4).getImm()); + BuildMI(BB, dl, TII->get(Opc1)).addMBB(sinkMBB); + + // copy0MBB: + // %FalseValue = ... + // # fallthrough to sinkMBB + BB = copy0MBB; + + // Update machine-CFG edges + BB->addSuccessor(sinkMBB); + + // sinkMBB: + // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ] + // ... + BB = sinkMBB; + + BuildMI(*BB, BB->begin(), dl, + TII->get(Mips::PHI), MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB) + .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB); + + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; + +} + MachineBasicBlock * MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const { @@ -1413,6 +1478,18 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, return EmitSel16(Mips::BeqzRxImm16, MI, BB); case Mips::SelBneZ: return EmitSel16(Mips::BnezRxImm16, MI, BB); + case Mips::SelTBteqZCmpi: + return EmitSelT16(Mips::BteqzX16, Mips::CmpiRxImmX16, MI, BB); + case Mips::SelTBteqZSlti: + return EmitSelT16(Mips::BteqzX16, Mips::SltiRxImmX16, MI, BB); + case Mips::SelTBteqZSltiu: + return EmitSelT16(Mips::BteqzX16, Mips::SltiuRxImmX16, MI, BB); + case Mips::SelTBtneZCmpi: + return EmitSelT16(Mips::BtnezX16, Mips::CmpiRxImmX16, MI, BB); + case Mips::SelTBtneZSlti: + return EmitSelT16(Mips::BtnezX16, Mips::SltiRxImmX16, MI, BB); + case Mips::SelTBtneZSltiu: + return EmitSelT16(Mips::BtnezX16, Mips::SltiuRxImmX16, MI, BB); } } diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 2531a20..8343915 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -406,6 +406,9 @@ namespace llvm { MachineBasicBlock *BB, unsigned Size) const; MachineBasicBlock *EmitSel16(unsigned Opc, MachineInstr *MI, MachineBasicBlock *BB) const; + MachineBasicBlock *EmitSelT16(unsigned Opc1, unsigned Opc2, + MachineInstr *MI, + MachineBasicBlock *BB) const; }; } -- cgit v1.1 From 00ddc5a7274fb4131f1a724bc350fd756156a80f Mon Sep 17 00:00:00 2001 From: Reed Kotler Date: Fri, 22 Feb 2013 05:59:39 +0000 Subject: Fix a nomenclature mistake. Slt->Slti in the functions. The "i" refers to the immediate operand of sli or cmp function. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175865 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsISelLowering.cpp | 14 +++++++------- lib/Target/Mips/MipsISelLowering.h | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index f1affff..b9a0bfb 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -1298,7 +1298,7 @@ MachineBasicBlock *MipsTargetLowering::EmitSel16(unsigned Opc, MachineInstr *MI, return BB; } -MachineBasicBlock *MipsTargetLowering::EmitSelT16 +MachineBasicBlock *MipsTargetLowering::EmitSeliT16 (unsigned Opc1, unsigned Opc2, MachineInstr *MI, MachineBasicBlock *BB) const { if (DontExpandCondPseudos16) @@ -1479,17 +1479,17 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case Mips::SelBneZ: return EmitSel16(Mips::BnezRxImm16, MI, BB); case Mips::SelTBteqZCmpi: - return EmitSelT16(Mips::BteqzX16, Mips::CmpiRxImmX16, MI, BB); + return EmitSeliT16(Mips::BteqzX16, Mips::CmpiRxImmX16, MI, BB); case Mips::SelTBteqZSlti: - return EmitSelT16(Mips::BteqzX16, Mips::SltiRxImmX16, MI, BB); + return EmitSeliT16(Mips::BteqzX16, Mips::SltiRxImmX16, MI, BB); case Mips::SelTBteqZSltiu: - return EmitSelT16(Mips::BteqzX16, Mips::SltiuRxImmX16, MI, BB); + return EmitSeliT16(Mips::BteqzX16, Mips::SltiuRxImmX16, MI, BB); case Mips::SelTBtneZCmpi: - return EmitSelT16(Mips::BtnezX16, Mips::CmpiRxImmX16, MI, BB); + return EmitSeliT16(Mips::BtnezX16, Mips::CmpiRxImmX16, MI, BB); case Mips::SelTBtneZSlti: - return EmitSelT16(Mips::BtnezX16, Mips::SltiRxImmX16, MI, BB); + return EmitSeliT16(Mips::BtnezX16, Mips::SltiRxImmX16, MI, BB); case Mips::SelTBtneZSltiu: - return EmitSelT16(Mips::BtnezX16, Mips::SltiuRxImmX16, MI, BB); + return EmitSeliT16(Mips::BtnezX16, Mips::SltiuRxImmX16, MI, BB); } } diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 8343915..789456a 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -406,7 +406,7 @@ namespace llvm { MachineBasicBlock *BB, unsigned Size) const; MachineBasicBlock *EmitSel16(unsigned Opc, MachineInstr *MI, MachineBasicBlock *BB) const; - MachineBasicBlock *EmitSelT16(unsigned Opc1, unsigned Opc2, + MachineBasicBlock *EmitSeliT16(unsigned Opc1, unsigned Opc2, MachineInstr *MI, MachineBasicBlock *BB) const; -- cgit v1.1 From 351b7a10e2560a835759748c58da09e53207b39d Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Fri, 22 Feb 2013 09:09:42 +0000 Subject: Use references to attribute groups on the call/invoke instructions. Listing all of the attributes for the callee of a call/invoke instruction is way too much and makes the IR unreadable. Use references to attributes instead. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175877 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/AsmWriter.cpp | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'lib') diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp index 3f32ac1..9954a29 100644 --- a/lib/IR/AsmWriter.cpp +++ b/lib/IR/AsmWriter.cpp @@ -554,13 +554,15 @@ void SlotTracker::processFunction() { if (MDNode *N = dyn_cast_or_null(I->getOperand(i))) CreateMetadataSlot(N); - // Add all the call attributes to the table. This is important for - // inline ASM, which may have attributes but no declaration. - if (CI->isInlineAsm()) { - AttributeSet Attrs = CI->getAttributes().getFnAttributes(); - if (Attrs.hasAttributes(AttributeSet::FunctionIndex)) - CreateAttributeSetSlot(Attrs); - } + // Add all the call attributes to the table. + AttributeSet Attrs = CI->getAttributes().getFnAttributes(); + if (Attrs.hasAttributes(AttributeSet::FunctionIndex)) + CreateAttributeSetSlot(Attrs); + } else if (const InvokeInst *II = dyn_cast(I)) { + // Add all the call attributes to the table. + AttributeSet Attrs = II->getAttributes().getFnAttributes(); + if (Attrs.hasAttributes(AttributeSet::FunctionIndex)) + CreateAttributeSetSlot(Attrs); } // Process metadata attached with this instruction. @@ -1935,7 +1937,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) { } Out << ')'; if (PAL.hasAttributes(AttributeSet::FunctionIndex)) - Out << ' ' << PAL.getAsString(AttributeSet::FunctionIndex); + Out << " #" << Machine.getAttributeGroupSlot(PAL.getFnAttributes()); } else if (const InvokeInst *II = dyn_cast(&I)) { Operand = II->getCalledValue(); PointerType *PTy = cast(Operand->getType()); @@ -1975,7 +1977,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) { Out << ')'; if (PAL.hasAttributes(AttributeSet::FunctionIndex)) - Out << ' ' << PAL.getAsString(AttributeSet::FunctionIndex); + Out << " #" << Machine.getAttributeGroupSlot(PAL.getFnAttributes()); Out << "\n to "; writeOperand(II->getNormalDest(), true); -- cgit v1.1 From 29e05fe7a885bd03d8570d2bcf14193013776bcd Mon Sep 17 00:00:00 2001 From: Kristof Beyls Date: Fri, 22 Feb 2013 10:01:33 +0000 Subject: Make ARMAsmPrinter generate the correct alignment specifier syntax in instructions. The Printer will now print instructions with the correct alignment specifier syntax, like vld1.8 {d16}, [r0:64] git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175884 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index d48b37e..2afb20d 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -627,8 +627,7 @@ void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum, O << markup(""); } -- cgit v1.1 From 86f7e67db3d226be6529acb54c24d1058823beb9 Mon Sep 17 00:00:00 2001 From: Michel Danzer Date: Fri, 22 Feb 2013 11:22:49 +0000 Subject: R600/SI: Add pattern for fceil. 9 more little piglits with radeonsi. NOTE: This is a candidate for the Mesa stable branch. Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175885 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIInstructions.td | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 55b4fa8..b83276f 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -567,7 +567,9 @@ defm V_FRACT_F32 : VOP1_32 <0x00000020, "V_FRACT_F32", [(set VReg_32:$dst, (AMDGPUfract VSrc_32:$src0))] >; defm V_TRUNC_F32 : VOP1_32 <0x00000021, "V_TRUNC_F32", []>; -defm V_CEIL_F32 : VOP1_32 <0x00000022, "V_CEIL_F32", []>; +defm V_CEIL_F32 : VOP1_32 <0x00000022, "V_CEIL_F32", + [(set VReg_32:$dst, (fceil VSrc_32:$src0))] +>; defm V_RNDNE_F32 : VOP1_32 <0x00000023, "V_RNDNE_F32", [(set VReg_32:$dst, (frint VSrc_32:$src0))] >; -- cgit v1.1 From dd24703f95676c148171f2c0b8431b7c1baf6601 Mon Sep 17 00:00:00 2001 From: Michel Danzer Date: Fri, 22 Feb 2013 11:22:54 +0000 Subject: R600/SI: Add pattern for logical or of i1 values. 24 more little piglits with radeonsi. NOTE: This is a candidate for the Mesa stable branch. Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175886 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIInstructions.td | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'lib') diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index b83276f..2e43f9e 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -942,6 +942,10 @@ def : Pat < def S_OR_B32 : SOP2_32 <0x00000010, "S_OR_B32", []>; def S_OR_B64 : SOP2_64 <0x00000011, "S_OR_B64", []>; +def : Pat < + (i1 (or SSrc_64:$src0, SSrc_64:$src1)), + (S_OR_B64 SSrc_64:$src0, SSrc_64:$src1) +>; def S_XOR_B32 : SOP2_32 <0x00000012, "S_XOR_B32", []>; def S_XOR_B64 : SOP2_64 <0x00000013, "S_XOR_B64", []>; def S_ANDN2_B32 : SOP2_32 <0x00000014, "S_ANDN2_B32", []>; -- cgit v1.1 From 890dc926808587a193d705b89163b5a0c64444d9 Mon Sep 17 00:00:00 2001 From: Michel Danzer Date: Fri, 22 Feb 2013 11:22:58 +0000 Subject: R600/SI: Add pattern for sign extension of i1 to i32. 16 more little piglits with radeonsi. NOTE: This is a candidate for the Mesa stable branch. Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175887 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIInstructions.td | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'lib') diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 2e43f9e..907cf49 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1382,6 +1382,11 @@ def : Pat < 0, 0, 0, 0), sub3) >; +def : Pat < + (i32 (sext (i1 SReg_64:$src0))), + (V_CNDMASK_B32_e64 (i32 0), (i32 -1), SReg_64:$src0) +>; + /********** ================== **********/ /********** VOP3 Patterns **********/ /********** ================== **********/ -- cgit v1.1 From c4952bfc31ee437590eeba8f16800fda5e4d607e Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Fri, 22 Feb 2013 19:19:44 +0000 Subject: x86_64: designate most general purpose and SSE registers as callee save under coldcc git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175911 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86CallingConv.td | 3 +++ lib/Target/X86/X86RegisterInfo.cpp | 46 +++++++++++++++++++++----------------- 2 files changed, 28 insertions(+), 21 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td index 7ad2fdd..b516be0 100644 --- a/lib/Target/X86/X86CallingConv.td +++ b/lib/Target/X86/X86CallingConv.td @@ -519,6 +519,9 @@ def CSR_64EHRet : CalleeSavedRegs<(add RAX, RDX, CSR_64)>; def CSR_Win64 : CalleeSavedRegs<(add RBX, RBP, RDI, RSI, R12, R13, R14, R15, (sequence "XMM%u", 6, 15))>; +def CSR_MostRegs_64 : CalleeSavedRegs<(add RBX, RCX, RDX, RSI, RDI, R8, R9, R10, + R11, R12, R13, R14, R15, RBP, + (sequence "XMM%u", 0, 15))>; // Standard C + YMM6-15 def CSR_Win64_Intel_OCL_BI_AVX : CalleeSavedRegs<(add RBX, RBP, RDI, RSI, R12, diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 03f412f..16886e4 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -235,38 +235,40 @@ X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, const uint16_t * X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { - bool callsEHReturn = false; - bool ghcCall = false; - bool oclBiCall = false; - bool hipeCall = false; - bool HasAVX = TM.getSubtarget().hasAVX(); - - if (MF) { - callsEHReturn = MF->getMMI().callsEHReturn(); - const Function *F = MF->getFunction(); - ghcCall = (F ? F->getCallingConv() == CallingConv::GHC : false); - oclBiCall = (F ? F->getCallingConv() == CallingConv::Intel_OCL_BI : false); - hipeCall = (F ? F->getCallingConv() == CallingConv::HiPE : false); - } - - if (ghcCall || hipeCall) + switch (MF->getFunction()->getCallingConv()) { + case CallingConv::GHC: + case CallingConv::HiPE: return CSR_NoRegs_SaveList; - if (oclBiCall) { + + case CallingConv::Intel_OCL_BI: { + bool HasAVX = TM.getSubtarget().hasAVX(); if (HasAVX && IsWin64) - return CSR_Win64_Intel_OCL_BI_AVX_SaveList; + return CSR_Win64_Intel_OCL_BI_AVX_SaveList; if (HasAVX && Is64Bit) - return CSR_64_Intel_OCL_BI_AVX_SaveList; + return CSR_64_Intel_OCL_BI_AVX_SaveList; if (!HasAVX && !IsWin64 && Is64Bit) - return CSR_64_Intel_OCL_BI_SaveList; + return CSR_64_Intel_OCL_BI_SaveList; + break; } + + case CallingConv::Cold: + if (Is64Bit) + return CSR_MostRegs_64_SaveList; + break; + + default: + break; + } + + bool CallsEHReturn = MF->getMMI().callsEHReturn(); if (Is64Bit) { if (IsWin64) return CSR_Win64_SaveList; - if (callsEHReturn) + if (CallsEHReturn) return CSR_64EHRet_SaveList; return CSR_64_SaveList; } - if (callsEHReturn) + if (CallsEHReturn) return CSR_32EHRet_SaveList; return CSR_32_SaveList; } @@ -287,6 +289,8 @@ X86RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { return CSR_NoRegs_RegMask; if (!Is64Bit) return CSR_32_RegMask; + if (CC == CallingConv::Cold) + return CSR_MostRegs_64_RegMask; if (IsWin64) return CSR_Win64_RegMask; return CSR_64_RegMask; -- cgit v1.1 From 606893294095e214f50937e8f8e9770efaab07a7 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Fri, 22 Feb 2013 21:10:03 +0000 Subject: [mips] Emit call16 operator instead of got_disp. The former allows lazy binding. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175920 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsISelLowering.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index b9a0bfb..034a672 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -3298,9 +3298,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, else if (LargeGOT) Callee = getAddrGlobalLargeGOT(Callee, DAG, MipsII::MO_CALL_HI16, MipsII::MO_CALL_LO16); - else if (HasMips64) - Callee = getAddrGlobal(Callee, DAG, MipsII::MO_GOT_DISP); - else // O32 & PIC + else // N64 || PIC Callee = getAddrGlobal(Callee, DAG, MipsII::MO_GOT_CALL); GlobalOrExternal = true; -- cgit v1.1 From d99a5a3ab4d47c6532bcf17a01677b1730599057 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Fri, 22 Feb 2013 23:33:30 +0000 Subject: SelectionDAG compile time improvement. One of the phases of SelectionDAG is LegalizeVectors. We don't need to sort the DAG and copy nodes around if there are no vector ops. Speeds up the compilation time of SelectionDAG on a big scalar workload by ~8%. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175929 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 7b28e69..c6e066e 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -85,6 +85,25 @@ class VectorLegalizer { }; bool VectorLegalizer::Run() { + // Before we start legalizing vector nodes, check if there are any vectors. + bool HasVectors = false; + for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), + E = prior(DAG.allnodes_end()); I != llvm::next(E); ++I) { + // Check if the values of the nodes contain vectors. We don't need to check + // the operands because we are going to check their values at some point. + for (SDNode::value_iterator J = I->value_begin(), E = I->value_end(); + J != E; ++J) + HasVectors |= J->isVector(); + + // If we found a vector node we can start the legalization. + if (HasVectors) + break; + } + + // If this basic block has no vectors then no need to legalize vectors. + if (!HasVectors) + return false; + // The legalize process is inherently a bottom-up recursive process (users // legalize their uses before themselves). Given infinite stack space, we // could just start legalizing on the root and traverse the whole graph. In -- cgit v1.1 From 1683b304e0487700c2f4447e7c72c62fb9f3a24c Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Fri, 22 Feb 2013 23:50:01 +0000 Subject: Add a TODO and explain when we can get rid of the isMain field. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175932 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/DIBuilder.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/IR/DIBuilder.cpp b/lib/IR/DIBuilder.cpp index 16632a1..3b69119 100644 --- a/lib/IR/DIBuilder.cpp +++ b/lib/IR/DIBuilder.cpp @@ -98,7 +98,7 @@ void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename, MDString::get(VMContext, Filename), MDString::get(VMContext, Directory), MDString::get(VMContext, Producer), - // Deprecate isMain field. + // isMain field can be removed when we remove the legacy debug info. ConstantInt::get(Type::getInt1Ty(VMContext), true), // isMain ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized), MDString::get(VMContext, Flags), -- cgit v1.1 From e4b67906d3cc5ea365359f9b034189aaa51ca1e9 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Fri, 22 Feb 2013 23:50:04 +0000 Subject: Add a field to the compile unit of where we plan on splitting out the debug info for -gsplit-dwarf so we can encode that location in the skeleton cu. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175933 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/DIBuilder.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/IR/DIBuilder.cpp b/lib/IR/DIBuilder.cpp index 3b69119..bab30e4 100644 --- a/lib/IR/DIBuilder.cpp +++ b/lib/IR/DIBuilder.cpp @@ -76,7 +76,7 @@ static MDNode *getNonCompileUnitScope(MDNode *N) { void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename, StringRef Directory, StringRef Producer, bool isOptimized, StringRef Flags, - unsigned RunTimeVer) { + unsigned RunTimeVer, StringRef SplitName) { assert(((Lang <= dwarf::DW_LANG_Python && Lang >= dwarf::DW_LANG_C89) || (Lang <= dwarf::DW_LANG_hi_user && Lang >= dwarf::DW_LANG_lo_user)) && "Invalid Language tag"); @@ -106,7 +106,8 @@ void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename, TempEnumTypes, TempRetainTypes, TempSubprograms, - TempGVs + TempGVs, + MDString::get(VMContext, SplitName) }; TheCU = DICompileUnit(MDNode::get(VMContext, Elts)); -- cgit v1.1 From 3ce51a970fd545c1ef45f580c84b64d764fb9a64 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Fri, 22 Feb 2013 23:50:08 +0000 Subject: Use getSplitDebugFilename when constructing the skeleton cu and update testcase accordingly to give the correct name to the cu. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175934 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 0982dbb..87659ef 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -2543,10 +2543,8 @@ CompileUnit *DwarfDebug::constructSkeletonCU(const MDNode *N) { DIUnit.getLanguage(), Die, Asm, this, &SkeletonHolder); - SmallString<16> T(DIUnit.getFilename()); - sys::path::replace_extension(T, ".dwo"); - StringRef FN = sys::path::filename(T); - NewCU->addLocalString(Die, dwarf::DW_AT_GNU_dwo_name, FN); + NewCU->addLocalString(Die, dwarf::DW_AT_GNU_dwo_name, + DIUnit.getSplitDebugFilename()); // This should be a unique identifier when we want to build .dwp files. NewCU->addUInt(Die, dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8, 0); -- cgit v1.1 From 3603e9aa5e46923c44db1e6254f0393a2b0644e4 Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Sat, 23 Feb 2013 00:31:32 +0000 Subject: Fixed a careless mistake. rdar://13273675. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175939 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/ObjCARC/ObjCARCOpts.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp index 7f66b1c..9c14949 100644 --- a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp @@ -218,7 +218,7 @@ static bool DoesRetainableObjPtrEscape(const User *Ptr) { if (isa(UUser) || isa(UUser) || isa(UUser) || isa(UUser)) { - if (!VisitedSet.insert(UUser)) { + if (VisitedSet.insert(UUser)) { DEBUG(dbgs() << "DoesRetainableObjPtrEscape: User copies value. " "Ptr escapes if result escapes. Adding to list.\n"); Worklist.push_back(UUser); -- cgit v1.1 From 1e8ed2537b3e4b2175cd9e62626f07606c62cfa0 Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Sat, 23 Feb 2013 00:52:09 +0000 Subject: ARM: Convenience aliases for 'srs*' instructions. Handle an implied 'sp' operand. rdar://11466783 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175940 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrInfo.td | 12 ++++++++++++ lib/Target/ARM/ARMInstrThumb2.td | 7 +++++++ lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 9 +++++++++ 3 files changed, 28 insertions(+) (limited to 'lib') diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index c938c41..9409f35 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -2103,6 +2103,18 @@ def SRSIB_UPD : SRSI<1, "srsib\tsp!, $mode"> { let Inst{24-23} = 0b11; } +def : ARMInstAlias<"srsda $mode", (SRSDA imm0_31:$mode)>; +def : ARMInstAlias<"srsda $mode!", (SRSDA_UPD imm0_31:$mode)>; + +def : ARMInstAlias<"srsdb $mode", (SRSDB imm0_31:$mode)>; +def : ARMInstAlias<"srsdb $mode!", (SRSDB_UPD imm0_31:$mode)>; + +def : ARMInstAlias<"srsia $mode", (SRSIA imm0_31:$mode)>; +def : ARMInstAlias<"srsia $mode!", (SRSIA_UPD imm0_31:$mode)>; + +def : ARMInstAlias<"srsib $mode", (SRSIB imm0_31:$mode)>; +def : ARMInstAlias<"srsib $mode!", (SRSIB_UPD imm0_31:$mode)>; + // Return From Exception class RFEI : XI<(outs), (ins GPR:$Rn), AddrModeNone, 4, IndexModeNone, BrFrm, diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index cf8b302..c9d709e 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -3481,6 +3481,13 @@ def t2SRSIA_UPD : T2SRS<0b11, 1, (outs), (ins imm0_31:$mode), NoItinerary, def t2SRSIA : T2SRS<0b11, 0, (outs), (ins imm0_31:$mode), NoItinerary, "srsia","\tsp, $mode", []>; + +def : t2InstAlias<"srsdb${p} $mode", (t2SRSDB imm0_31:$mode, pred:$p)>; +def : t2InstAlias<"srsdb${p} $mode!", (t2SRSDB_UPD imm0_31:$mode, pred:$p)>; + +def : t2InstAlias<"srsia${p} $mode", (t2SRSIA imm0_31:$mode, pred:$p)>; +def : t2InstAlias<"srsia${p} $mode!", (t2SRSIA_UPD imm0_31:$mode, pred:$p)>; + // Return From Exception is a system instruction. class T2RFE op31_20, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list pattern> diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index c95cc1b..6c678fd 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -4625,6 +4625,15 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl &Operands, } E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); Operands.push_back(ARMOperand::CreateImm(ImmVal, S, E)); + + // There can be a trailing '!' on operands that we want as a separate + // '!' Token operand. Handle that here. For example, the compatibilty + // alias for 'srsdb sp!, #imm' is 'srsdb #imm!'. + if (Parser.getTok().is(AsmToken::Exclaim)) { + Operands.push_back(ARMOperand::CreateToken(Parser.getTok().getString(), + Parser.getTok().getLoc())); + Parser.Lex(); // Eat exclaim token + } return false; } // w/ a ':' after the '#', it's just like a plain ':'. -- cgit v1.1 From 50354a3f4a5c9e3689d502a935430f2a57a44af2 Mon Sep 17 00:00:00 2001 From: Reed Kotler Date: Sat, 23 Feb 2013 03:09:56 +0000 Subject: Expand pseudos/macros for Selt. This is the last of the complex macros.The rest is some small misc. stuff. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175950 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips16InstrInfo.td | 1 + lib/Target/Mips/MipsISelLowering.cpp | 78 ++++++++++++++++++++++++++++++++++++ lib/Target/Mips/MipsISelLowering.h | 3 ++ 3 files changed, 82 insertions(+) (limited to 'lib') diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td index 3c6c7d7..2cb1573 100644 --- a/lib/Target/Mips/Mips16InstrInfo.td +++ b/lib/Target/Mips/Mips16InstrInfo.td @@ -416,6 +416,7 @@ class SelT: !strconcat(op1, "\t.+4\n\tmove $rd, $rs"))), []> { let isCodeGenOnly=1; let Constraints = "$rd = $rd_"; + let usesCustomInserter = 1; } // diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 034a672..68fa28b 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -1298,6 +1298,72 @@ MachineBasicBlock *MipsTargetLowering::EmitSel16(unsigned Opc, MachineInstr *MI, return BB; } +MachineBasicBlock *MipsTargetLowering::EmitSelT16 + (unsigned Opc1, unsigned Opc2, + MachineInstr *MI, MachineBasicBlock *BB) const { + if (DontExpandCondPseudos16) + return BB; + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + DebugLoc dl = MI->getDebugLoc(); + // To "insert" a SELECT_CC instruction, we actually have to insert the + // diamond control-flow pattern. The incoming instruction knows the + // destination vreg to set, the condition code register to branch on, the + // true/false values to select between, and a branch opcode to use. + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator It = BB; + ++It; + + // thisMBB: + // ... + // TrueVal = ... + // setcc r1, r2, r3 + // bNE r1, r0, copy1MBB + // fallthrough --> copy0MBB + MachineBasicBlock *thisMBB = BB; + MachineFunction *F = BB->getParent(); + MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, copy0MBB); + F->insert(It, sinkMBB); + + // Transfer the remainder of BB and its successor edges to sinkMBB. + sinkMBB->splice(sinkMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(BB); + + // Next, add the true and fallthrough blocks as its successors. + BB->addSuccessor(copy0MBB); + BB->addSuccessor(sinkMBB); + + BuildMI(BB, dl, TII->get(Opc2)).addReg(MI->getOperand(3).getReg()) + .addReg(MI->getOperand(4).getReg()); + BuildMI(BB, dl, TII->get(Opc1)).addMBB(sinkMBB); + + // copy0MBB: + // %FalseValue = ... + // # fallthrough to sinkMBB + BB = copy0MBB; + + // Update machine-CFG edges + BB->addSuccessor(sinkMBB); + + // sinkMBB: + // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ] + // ... + BB = sinkMBB; + + BuildMI(*BB, BB->begin(), dl, + TII->get(Mips::PHI), MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB) + .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB); + + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; + +} + + MachineBasicBlock *MipsTargetLowering::EmitSeliT16 (unsigned Opc1, unsigned Opc2, MachineInstr *MI, MachineBasicBlock *BB) const { @@ -1490,6 +1556,18 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, return EmitSeliT16(Mips::BtnezX16, Mips::SltiRxImmX16, MI, BB); case Mips::SelTBtneZSltiu: return EmitSeliT16(Mips::BtnezX16, Mips::SltiuRxImmX16, MI, BB); + case Mips::SelTBteqZCmp: + return EmitSelT16(Mips::BteqzX16, Mips::CmpRxRy16, MI, BB); + case Mips::SelTBteqZSlt: + return EmitSelT16(Mips::BteqzX16, Mips::SltRxRy16, MI, BB); + case Mips::SelTBteqZSltu: + return EmitSelT16(Mips::BteqzX16, Mips::SltuRxRy16, MI, BB); + case Mips::SelTBtneZCmp: + return EmitSelT16(Mips::BtnezX16, Mips::CmpRxRy16, MI, BB); + case Mips::SelTBtneZSlt: + return EmitSelT16(Mips::BtnezX16, Mips::SltRxRy16, MI, BB); + case Mips::SelTBtneZSltu: + return EmitSelT16(Mips::BtnezX16, Mips::SltuRxRy16, MI, BB); } } diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 789456a..2896de5 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -410,6 +410,9 @@ namespace llvm { MachineInstr *MI, MachineBasicBlock *BB) const; + MachineBasicBlock *EmitSelT16(unsigned Opc1, unsigned Opc2, + MachineInstr *MI, + MachineBasicBlock *BB) const; }; } -- cgit v1.1 From 80885e524ffceaba5ed237338a10f807895e9f8e Mon Sep 17 00:00:00 2001 From: Cameron Zwarich Date: Sat, 23 Feb 2013 04:49:13 +0000 Subject: Make rescheduleMIBelowKill() and rescheduleKillAboveMI() LiveIntervals-aware in TwoAddressInstructionPass. The code in rescheduleMIBelowKill() is a bit tricky, since multiple instructions need to be moved down, one-at-a-time, in reverse order. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175955 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/TwoAddressInstructionPass.cpp | 115 ++++++++++++++++++++++-------- 1 file changed, 85 insertions(+), 30 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index e312642..40c1a1b 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -734,9 +734,9 @@ bool TwoAddressInstructionPass:: rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, unsigned Reg) { - // Bail immediately if we don't have LV available. We use it to find kills - // efficiently. - if (!LV) + // Bail immediately if we don't have LV or LIS available. We use them to find + // kills efficiently. + if (!LV && !LIS) return false; MachineInstr *MI = &*mi; @@ -745,7 +745,22 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, // Must be created from unfolded load. Don't waste time trying this. return false; - MachineInstr *KillMI = LV->getVarInfo(Reg).findKill(MBB); + MachineInstr *KillMI = 0; + if (LIS) { + LiveInterval &LI = LIS->getInterval(Reg); + assert(LI.end() != LI.begin() && + "Reg should not have empty live interval."); + + SlotIndex MBBEndIdx = LIS->getMBBEndIdx(MBB).getPrevSlot(); + LiveInterval::const_iterator I = LI.find(MBBEndIdx); + if (I != LI.end() && I->start < MBBEndIdx) + return false; + + --I; + KillMI = LIS->getInstructionFromIndex(I->end); + } else { + KillMI = LV->getVarInfo(Reg).findKill(MBB); + } if (!KillMI || MI == KillMI || KillMI->isCopy() || KillMI->isCopyLike()) // Don't mess with copies, they may be coalesced later. return false; @@ -781,24 +796,27 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, Defs.insert(MOReg); else { Uses.insert(MOReg); - if (MO.isKill() && MOReg != Reg) + if (MOReg != Reg && (MO.isKill() || + (LIS && isPlainlyKilled(MI, MOReg, LIS)))) Kills.insert(MOReg); } } // Move the copies connected to MI down as well. - MachineBasicBlock::iterator From = MI; - MachineBasicBlock::iterator To = llvm::next(From); - while (To->isCopy() && Defs.count(To->getOperand(1).getReg())) { - Defs.insert(To->getOperand(0).getReg()); - ++To; + MachineBasicBlock::iterator Begin = MI; + MachineBasicBlock::iterator AfterMI = llvm::next(Begin); + + MachineBasicBlock::iterator End = AfterMI; + while (End->isCopy() && Defs.count(End->getOperand(1).getReg())) { + Defs.insert(End->getOperand(0).getReg()); + ++End; } // Check if the reschedule will not break depedencies. unsigned NumVisited = 0; MachineBasicBlock::iterator KillPos = KillMI; ++KillPos; - for (MachineBasicBlock::iterator I = To; I != KillPos; ++I) { + for (MachineBasicBlock::iterator I = End; I != KillPos; ++I) { MachineInstr *OtherMI = I; // DBG_VALUE cannot be counted against the limit. if (OtherMI->isDebugValue()) @@ -829,11 +847,13 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, } else { if (Defs.count(MOReg)) return false; + bool isKill = MO.isKill() || + (LIS && isPlainlyKilled(OtherMI, MOReg, LIS)); if (MOReg != Reg && - ((MO.isKill() && Uses.count(MOReg)) || Kills.count(MOReg))) + ((isKill && Uses.count(MOReg)) || Kills.count(MOReg))) // Don't want to extend other live ranges and update kills. return false; - if (MOReg == Reg && !MO.isKill()) + if (MOReg == Reg && !isKill) // We can't schedule across a use of the register in question. return false; // Ensure that if this is register in question, its the kill we expect. @@ -844,19 +864,35 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, } // Move debug info as well. - while (From != MBB->begin() && llvm::prior(From)->isDebugValue()) - --From; + while (Begin != MBB->begin() && llvm::prior(Begin)->isDebugValue()) + --Begin; + + nmi = End; + MachineBasicBlock::iterator InsertPos = KillPos; + if (LIS) { + // We have to move the copies first so that the MBB is still well-formed + // when calling handleMove(). + for (MachineBasicBlock::iterator MBBI = AfterMI; MBBI != End;) { + MachineInstr *CopyMI = MBBI; + ++MBBI; + MBB->splice(InsertPos, MBB, CopyMI); + LIS->handleMove(CopyMI); + InsertPos = CopyMI; + } + End = llvm::next(MachineBasicBlock::iterator(MI)); + } // Copies following MI may have been moved as well. - nmi = To; - MBB->splice(KillPos, MBB, From, To); + MBB->splice(InsertPos, MBB, Begin, End); DistanceMap.erase(DI); // Update live variables - LV->removeVirtualRegisterKilled(Reg, KillMI); - LV->addVirtualRegisterKilled(Reg, MI); - if (LIS) + if (LIS) { LIS->handleMove(MI); + } else { + LV->removeVirtualRegisterKilled(Reg, KillMI); + LV->addVirtualRegisterKilled(Reg, MI); + } DEBUG(dbgs() << "\trescheduled below kill: " << *KillMI); return true; @@ -892,9 +928,9 @@ bool TwoAddressInstructionPass:: rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, unsigned Reg) { - // Bail immediately if we don't have LV available. We use it to find kills - // efficiently. - if (!LV) + // Bail immediately if we don't have LV or LIS available. We use them to find + // kills efficiently. + if (!LV && !LIS) return false; MachineInstr *MI = &*mi; @@ -903,7 +939,22 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, // Must be created from unfolded load. Don't waste time trying this. return false; - MachineInstr *KillMI = LV->getVarInfo(Reg).findKill(MBB); + MachineInstr *KillMI = 0; + if (LIS) { + LiveInterval &LI = LIS->getInterval(Reg); + assert(LI.end() != LI.begin() && + "Reg should not have empty live interval."); + + SlotIndex MBBEndIdx = LIS->getMBBEndIdx(MBB).getPrevSlot(); + LiveInterval::const_iterator I = LI.find(MBBEndIdx); + if (I != LI.end() && I->start < MBBEndIdx) + return false; + + --I; + KillMI = LIS->getInstructionFromIndex(I->end); + } else { + KillMI = LV->getVarInfo(Reg).findKill(MBB); + } if (!KillMI || MI == KillMI || KillMI->isCopy() || KillMI->isCopyLike()) // Don't mess with copies, they may be coalesced later. return false; @@ -930,10 +981,11 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, continue; if (isDefTooClose(MOReg, DI->second, MI)) return false; - if (MOReg == Reg && !MO.isKill()) + bool isKill = MO.isKill() || (LIS && isPlainlyKilled(KillMI, MOReg, LIS)); + if (MOReg == Reg && !isKill) return false; Uses.insert(MOReg); - if (MO.isKill() && MOReg != Reg) + if (isKill && MOReg != Reg) Kills.insert(MOReg); } else if (TargetRegisterInfo::isPhysicalRegister(MOReg)) { Defs.insert(MOReg); @@ -973,7 +1025,8 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, if (Kills.count(MOReg)) // Don't want to extend other live ranges and update kills. return false; - if (OtherMI != MI && MOReg == Reg && !MO.isKill()) + if (OtherMI != MI && MOReg == Reg && + !(MO.isKill() || (LIS && isPlainlyKilled(OtherMI, MOReg, LIS)))) // We can't schedule across a use of the register in question. return false; } else { @@ -1007,10 +1060,12 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, DistanceMap.erase(DI); // Update live variables - LV->removeVirtualRegisterKilled(Reg, KillMI); - LV->addVirtualRegisterKilled(Reg, MI); - if (LIS) + if (LIS) { LIS->handleMove(KillMI); + } else { + LV->removeVirtualRegisterKilled(Reg, KillMI); + LV->addVirtualRegisterKilled(Reg, MI); + } DEBUG(dbgs() << "\trescheduled kill: " << *KillMI); return true; -- cgit v1.1 From 4c57942608094a74543920b7c809e442fa90dd72 Mon Sep 17 00:00:00 2001 From: Cameron Zwarich Date: Sat, 23 Feb 2013 04:49:20 +0000 Subject: Make TwoAddressInstructionPass::sink3AddrInstruction() LiveIntervals-aware. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175956 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/TwoAddressInstructionPass.cpp | 49 +++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 15 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index 40c1a1b..cbe07db 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -163,6 +163,8 @@ INITIALIZE_PASS_END(TwoAddressInstructionPass, "twoaddressinstruction", char &llvm::TwoAddressInstructionPassID = TwoAddressInstructionPass::ID; +static bool isPlainlyKilled(MachineInstr *MI, unsigned Reg, LiveIntervals *LIS); + /// sink3AddrInstruction - A two-address instruction has been converted to a /// three-address instruction to avoid clobbering a register. Try to sink it /// past the instruction that would kill the above mentioned register to reduce @@ -204,14 +206,29 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg, // Find the instruction that kills SavedReg. MachineInstr *KillMI = NULL; - for (MachineRegisterInfo::use_nodbg_iterator - UI = MRI->use_nodbg_begin(SavedReg), - UE = MRI->use_nodbg_end(); UI != UE; ++UI) { - MachineOperand &UseMO = UI.getOperand(); - if (!UseMO.isKill()) - continue; - KillMI = UseMO.getParent(); - break; + if (LIS) { + LiveInterval &LI = LIS->getInterval(SavedReg); + assert(LI.end() != LI.begin() && + "Reg should not have empty live interval."); + + SlotIndex MBBEndIdx = LIS->getMBBEndIdx(MBB).getPrevSlot(); + LiveInterval::const_iterator I = LI.find(MBBEndIdx); + if (I != LI.end() && I->start < MBBEndIdx) + return false; + + --I; + KillMI = LIS->getInstructionFromIndex(I->end); + } + if (!KillMI) { + for (MachineRegisterInfo::use_nodbg_iterator + UI = MRI->use_nodbg_begin(SavedReg), + UE = MRI->use_nodbg_end(); UI != UE; ++UI) { + MachineOperand &UseMO = UI.getOperand(); + if (!UseMO.isKill()) + continue; + KillMI = UseMO.getParent(); + break; + } } // If we find the instruction that kills SavedReg, and it is in an @@ -250,7 +267,7 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg, if (DefReg == MOReg) return false; - if (MO.isKill()) { + if (MO.isKill() || (LIS && isPlainlyKilled(OtherMI, MOReg, LIS))) { if (OtherMI == KillMI && MOReg == SavedReg) // Save the operand that kills the register. We want to unset the kill // marker if we can sink MI past it. @@ -263,13 +280,15 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg, } assert(KillMO && "Didn't find kill"); - // Update kill and LV information. - KillMO->setIsKill(false); - KillMO = MI->findRegisterUseOperand(SavedReg, false, TRI); - KillMO->setIsKill(true); + if (!LIS) { + // Update kill and LV information. + KillMO->setIsKill(false); + KillMO = MI->findRegisterUseOperand(SavedReg, false, TRI); + KillMO->setIsKill(true); - if (LV) - LV->replaceKillInstruction(SavedReg, KillMI, MI); + if (LV) + LV->replaceKillInstruction(SavedReg, KillMI, MI); + } // Move instruction to its destination. MBB->remove(MI); -- cgit v1.1 From b4bd022731b28a80f59818870cc7df5d4771d793 Mon Sep 17 00:00:00 2001 From: Cameron Zwarich Date: Sat, 23 Feb 2013 04:49:22 +0000 Subject: Fix a bug with the LiveIntervals updating in the two-address pass found by running ASCI_Purple/SMG2000 in the test-suite. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175957 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/TwoAddressInstructionPass.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index cbe07db..aca85b2 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -370,7 +370,7 @@ static bool isPlainlyKilled(MachineInstr *MI, unsigned Reg, SlotIndex useIdx = LIS->getInstructionIndex(MI); LiveInterval::const_iterator I = LI.find(useIdx); assert(I != LI.end() && "Reg must be live-in to use."); - return SlotIndex::isSameInstr(I->end, useIdx); + return !I->end.isBlock() && SlotIndex::isSameInstr(I->end, useIdx); } return MI->killsRegister(Reg); -- cgit v1.1 From 79f5ab1931e5abf6e9c304db25e50d592dd8f5b8 Mon Sep 17 00:00:00 2001 From: Cameron Zwarich Date: Sat, 23 Feb 2013 10:25:25 +0000 Subject: Make some fixes for LiveInterval repair with debug info. Debug value MachineInstrs don't have a slot index. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175961 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/LiveIntervalAnalysis.cpp | 6 +++++- lib/CodeGen/SlotIndexes.cpp | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index a716e8b..22b35d5 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -1056,6 +1056,8 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB, for (MachineBasicBlock::iterator I = End; I != Begin;) { --I; MachineInstr *MI = I; + if (MI->isDebugValue()) + continue; for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(), MOE = MI->operands_end(); MOI != MOE; ++MOI) { if (MOI->isReg() && @@ -1087,8 +1089,10 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB, for (MachineBasicBlock::iterator I = End; I != Begin;) { --I; MachineInstr *MI = I; - SlotIndex instrIdx = getInstructionIndex(MI); + if (MI->isDebugValue()) + continue; + SlotIndex instrIdx = getInstructionIndex(MI); bool isStartValid = getInstructionFromIndex(LII->start); bool isEndValid = getInstructionFromIndex(LII->end); diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp index f293794..20049a8 100644 --- a/lib/CodeGen/SlotIndexes.cpp +++ b/lib/CodeGen/SlotIndexes.cpp @@ -208,7 +208,7 @@ void SlotIndexes::repairIndexesInRange(MachineBasicBlock *MBB, for (MachineBasicBlock::iterator I = End; I != Begin;) { --I; MachineInstr *MI = I; - if (mi2iMap.find(MI) == mi2iMap.end()) + if (!MI->isDebugValue() && mi2iMap.find(MI) == mi2iMap.end()) insertMachineInstrInMaps(MI); } } -- cgit v1.1 From 36bd16ef0bd808fb4191bfb6e1b4609aef0dc6bd Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sat, 23 Feb 2013 10:40:58 +0000 Subject: X86: Disable cmov-memory patterns on subtargets without cmov. Fixes PR15115. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175962 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrCompiler.td | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index f387962..734e598 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -1081,12 +1081,14 @@ def : Pat<(X86cmp GR64:$src1, 0), // inverted. multiclass CMOVmr { - def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, InvertedCond, EFLAGS), - (Inst16 GR16:$src2, addr:$src1)>; - def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, InvertedCond, EFLAGS), - (Inst32 GR32:$src2, addr:$src1)>; - def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, InvertedCond, EFLAGS), - (Inst64 GR64:$src2, addr:$src1)>; + let Predicates = [HasCMov] in { + def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, InvertedCond, EFLAGS), + (Inst16 GR16:$src2, addr:$src1)>; + def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, InvertedCond, EFLAGS), + (Inst32 GR32:$src2, addr:$src1)>; + def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, InvertedCond, EFLAGS), + (Inst64 GR64:$src2, addr:$src1)>; + } } defm : CMOVmr; -- cgit v1.1 From 1ea93c79bc8b044935f867b334668623c587f5a8 Mon Sep 17 00:00:00 2001 From: Cameron Zwarich Date: Sat, 23 Feb 2013 23:13:28 +0000 Subject: TargetInstrInfo::commuteInstruction() doesn't actually return a new instruction unless it was requested to with an optional parameter that defaults to false, so we don't need to handle that case in TwoAddressInstructionPass. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175974 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/TwoAddressInstructionPass.cpp | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index aca85b2..2ed6be4 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -581,19 +581,9 @@ commuteInstruction(MachineBasicBlock::iterator &mi, } DEBUG(dbgs() << "2addr: COMMUTED TO: " << *NewMI); - // If the instruction changed to commute it, update livevar. - if (NewMI != MI) { - if (LV) - // Update live variables - LV->replaceKillInstruction(RegC, MI, NewMI); - if (LIS) - LIS->ReplaceMachineInstrInMaps(MI, NewMI); - - MBB->insert(mi, NewMI); // Insert the new inst - MBB->erase(mi); // Nuke the old inst. - mi = NewMI; - DistanceMap.insert(std::make_pair(NewMI, Dist)); - } + assert(NewMI == MI && + "TargetInstrInfo::commuteInstruction() should not return a new " + "instruction unless it was requested."); // Update source register map. unsigned FromRegC = getMappedReg(RegC, SrcRegMap); -- cgit v1.1 From e7c67492ddc503802128ff20853a23a644f082f9 Mon Sep 17 00:00:00 2001 From: Reed Kotler Date: Sat, 23 Feb 2013 23:37:03 +0000 Subject: Add new base instruction def for cmpi, cmp, slt and sltu so that def/uses proper. Fixed this already a few days ago for slti. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175975 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips16InstrInfo.td | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td index 2cb1573..1a50faf 100644 --- a/lib/Target/Mips/Mips16InstrInfo.td +++ b/lib/Target/Mips/Mips16InstrInfo.td @@ -283,6 +283,11 @@ class FRR16_ins f, string asmstr, InstrItinClass itin> : !strconcat(asmstr, "\t$rx, $ry"), [], itin> { } +class FRR16R_ins f, string asmstr, InstrItinClass itin> : + FRR16 { +} + class FRRTR16_ins : MipsPseudo16<(outs CPU16Regs:$rz), (ins CPU16Regs:$rx, CPU16Regs:$ry), !strconcat(asmstr, "\t$rx, $ry\n\tmove\t$rz, $$t8"), []> ; @@ -610,7 +615,7 @@ def BtnezT8SltiuX16: FEXT_T8I8I16_ins<"btnez", "sltiu">, // Purpose: Compare // To compare the contents of two GPRs. // -def CmpRxRy16: FRR16_ins<0b01010, "cmp", IIAlu> { +def CmpRxRy16: FRR16R_ins<0b01010, "cmp", IIAlu> { let Defs = [T8]; } @@ -619,7 +624,7 @@ def CmpRxRy16: FRR16_ins<0b01010, "cmp", IIAlu> { // Purpose: Compare Immediate // To compare a constant with the contents of a GPR. // -def CmpiRxImm16: FRI16_ins<0b01110, "cmpi", IIAlu> { +def CmpiRxImm16: FRI16R_ins<0b01110, "cmpi", IIAlu> { let Defs = [T8]; } @@ -628,7 +633,7 @@ def CmpiRxImm16: FRI16_ins<0b01110, "cmpi", IIAlu> { // Purpose: Compare Immediate (Extended) // To compare a constant with the contents of a GPR. // -def CmpiRxImmX16: FEXT_RI16_ins<0b01110, "cmpi", IIAlu> { +def CmpiRxImmX16: FEXT_RI16R_ins<0b01110, "cmpi", IIAlu> { let Defs = [T8]; } @@ -1110,7 +1115,7 @@ def SltiuCCRxImmX16: FEXT_CCRXI16_ins<"sltiu">; // Purpose: Set on Less Than // To record the result of a less-than comparison. // -def SltRxRy16: FRR16_ins<0b00010, "slt", IIAlu>{ +def SltRxRy16: FRR16R_ins<0b00010, "slt", IIAlu>{ let Defs = [T8]; } @@ -1120,7 +1125,7 @@ def SltCCRxRy16: FCCRR16_ins<"slt">; // Purpose: Set on Less Than Unsigned // To record the result of an unsigned less-than comparison. // -def SltuRxRy16: FRR16_ins<0b00011, "sltu", IIAlu>{ +def SltuRxRy16: FRR16R_ins<0b00011, "sltu", IIAlu>{ let Defs = [T8]; } -- cgit v1.1 From c5a6349ae84309534e0ade8c7c7ddada808e7729 Mon Sep 17 00:00:00 2001 From: Cameron Zwarich Date: Sun, 24 Feb 2013 00:27:26 +0000 Subject: TwoAddrInstructionPass::tryInstructionTransform() has a case where it calls itself recursively with a new instruction that has not been finalized, in order to determine whether to keep the instruction. On 'make check' and test-suite the only cases where the recursive invocation made any transformations were simple instruction commutations, so I am restricting the recursive invocation to do only this. The other cases wouldn't work correctly when updating LiveIntervals, since the new instructions don't have slot indices and LiveIntervals hasn't yet been updated. If the other transformations were actually triggering in any test case it would be possible to support it with a lot of effort, but since they don't it's not worth it. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175979 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/TwoAddressInstructionPass.cpp | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index 2ed6be4..34946ec 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -120,7 +120,7 @@ class TwoAddressInstructionPass : public MachineFunctionPass { bool tryInstructionTransform(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, unsigned SrcIdx, unsigned DstIdx, - unsigned Dist); + unsigned Dist, bool shouldOnlyCommute); void scanUses(unsigned DstReg); @@ -1085,11 +1085,13 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, /// either eliminate the tied operands or improve the opportunities for /// coalescing away the register copy. Returns true if no copy needs to be /// inserted to untie mi's operands (either because they were untied, or -/// because mi was rescheduled, and will be visited again later). +/// because mi was rescheduled, and will be visited again later). If the +/// shouldOnlyCommute flag is true, only instruction commutation is attempted. bool TwoAddressInstructionPass:: tryInstructionTransform(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, - unsigned SrcIdx, unsigned DstIdx, unsigned Dist) { + unsigned SrcIdx, unsigned DstIdx, + unsigned Dist, bool shouldOnlyCommute) { if (OptLevel == CodeGenOpt::None) return false; @@ -1138,6 +1140,9 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, return false; } + if (shouldOnlyCommute) + return false; + // If there is one more use of regB later in the same MBB, consider // re-schedule this MI below it. if (rescheduleMIBelowKill(mi, nmi, regB)) { @@ -1214,7 +1219,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, unsigned NewSrcIdx = NewMIs[1]->findRegisterUseOperandIdx(regB); MachineBasicBlock::iterator NewMI = NewMIs[1]; bool TransformSuccess = - tryInstructionTransform(NewMI, mi, NewSrcIdx, NewDstIdx, Dist); + tryInstructionTransform(NewMI, mi, NewSrcIdx, NewDstIdx, Dist, true); if (TransformSuccess || NewMIs[1]->getOperand(NewSrcIdx).isKill()) { // Success, or at least we made an improvement. Keep the unfolded @@ -1539,7 +1544,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { unsigned SrcReg = mi->getOperand(SrcIdx).getReg(); unsigned DstReg = mi->getOperand(DstIdx).getReg(); if (SrcReg != DstReg && - tryInstructionTransform(mi, nmi, SrcIdx, DstIdx, Dist)) { + tryInstructionTransform(mi, nmi, SrcIdx, DstIdx, Dist, false)) { // The tied operands have been eliminated or shifted further down the // block to ease elimination. Continue processing with 'nmi'. TiedOperands.clear(); -- cgit v1.1 From eb1b7254cf1ce480c423c2ae0ff1b68c6ada6180 Mon Sep 17 00:00:00 2001 From: Cameron Zwarich Date: Sun, 24 Feb 2013 00:27:29 +0000 Subject: TwoAddressInstructionPass::tryInstructionTransform() only potentially returns true when shouldOnlyCommute is false, so we can remove code that checks otherwise. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175980 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/TwoAddressInstructionPass.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index 34946ec..fe7a7d7 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -1218,10 +1218,11 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, unsigned NewDstIdx = NewMIs[1]->findRegisterDefOperandIdx(regA); unsigned NewSrcIdx = NewMIs[1]->findRegisterUseOperandIdx(regB); MachineBasicBlock::iterator NewMI = NewMIs[1]; - bool TransformSuccess = + bool TransformResult = tryInstructionTransform(NewMI, mi, NewSrcIdx, NewDstIdx, Dist, true); - if (TransformSuccess || - NewMIs[1]->getOperand(NewSrcIdx).isKill()) { + assert(!TransformResult && + "tryInstructionTransform() should return false."); + if (NewMIs[1]->getOperand(NewSrcIdx).isKill()) { // Success, or at least we made an improvement. Keep the unfolded // instructions and discard the original. if (LV) { @@ -1272,8 +1273,6 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, } mi = NewMIs[1]; - if (TransformSuccess) - return true; } else { // Transforming didn't eliminate the tie and didn't lead to an // improvement. Clean up the unfolded instructions and keep the -- cgit v1.1 From cc6137e30b40ddf3cdbbb6ebbe01e5b5c76da2e2 Mon Sep 17 00:00:00 2001 From: Cameron Zwarich Date: Sun, 24 Feb 2013 01:26:05 +0000 Subject: Add a use of an otherwise unused variable to remove a warning in non-Asserts builds. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175981 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/TwoAddressInstructionPass.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index fe7a7d7..26c5fe4 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -1220,6 +1220,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator NewMI = NewMIs[1]; bool TransformResult = tryInstructionTransform(NewMI, mi, NewSrcIdx, NewDstIdx, Dist, true); + (void)TransformResult; assert(!TransformResult && "tryInstructionTransform() should return false."); if (NewMIs[1]->getOperand(NewSrcIdx).isKill()) { -- cgit v1.1 From 459d35cb7975804048684261f2358eedbd2209c1 Mon Sep 17 00:00:00 2001 From: Reed Kotler Date: Sun, 24 Feb 2013 06:16:39 +0000 Subject: Make psuedo FEXT_T8I816_ins a custom inserter. It should be expanded as early as possible; which means during instruction selection. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175984 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips16InstrInfo.cpp | 22 ---------------------- lib/Target/Mips/Mips16InstrInfo.td | 1 + lib/Target/Mips/MipsISelLowering.cpp | 30 ++++++++++++++++++++++++++++++ lib/Target/Mips/MipsISelLowering.h | 3 +++ 4 files changed, 34 insertions(+), 22 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp index eacc8fc..c04d49e 100644 --- a/lib/Target/Mips/Mips16InstrInfo.cpp +++ b/lib/Target/Mips/Mips16InstrInfo.cpp @@ -135,48 +135,26 @@ bool Mips16InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { switch(MI->getDesc().getOpcode()) { default: return false; - case Mips::BteqzT8CmpX16: - ExpandFEXT_T8I816_ins(MBB, MI, Mips::BteqzX16, Mips::CmpRxRy16); - break; case Mips::BteqzT8CmpiX16: ExpandFEXT_T8I8I16_ins(MBB, MI, Mips::BteqzX16, Mips::CmpiRxImm16, Mips::CmpiRxImmX16); break; - case Mips::BteqzT8SltX16: - ExpandFEXT_T8I816_ins(MBB, MI, Mips::BteqzX16, Mips::SltRxRy16); - break; case Mips::BteqzT8SltiX16: ExpandFEXT_T8I8I16_ins(MBB, MI, Mips::BteqzX16, Mips::SltiRxImm16, Mips::SltiRxImmX16); break; - case Mips::BteqzT8SltuX16: - // TBD: figure out a way to get this or remove the instruction - // altogether. - ExpandFEXT_T8I816_ins(MBB, MI, Mips::BteqzX16, Mips::SltuRxRy16); - break; case Mips::BteqzT8SltiuX16: ExpandFEXT_T8I8I16_ins(MBB, MI, Mips::BteqzX16, Mips::SltiuRxImm16, Mips::SltiuRxImmX16); break; - case Mips::BtnezT8CmpX16: - ExpandFEXT_T8I816_ins(MBB, MI, Mips::BtnezX16, Mips::CmpRxRy16); - break; case Mips::BtnezT8CmpiX16: ExpandFEXT_T8I8I16_ins(MBB, MI, Mips::BtnezX16, Mips::CmpiRxImm16, Mips::CmpiRxImmX16); break; - case Mips::BtnezT8SltX16: - ExpandFEXT_T8I816_ins(MBB, MI, Mips::BtnezX16, Mips::SltRxRy16); - break; case Mips::BtnezT8SltiX16: ExpandFEXT_T8I8I16_ins(MBB, MI, Mips::BtnezX16, Mips::SltiRxImm16, Mips::SltiRxImmX16); break; - case Mips::BtnezT8SltuX16: - // TBD: figure out a way to get this or remove the instruction - // altogether. - ExpandFEXT_T8I816_ins(MBB, MI, Mips::BtnezX16, Mips::SltuRxRy16); - break; case Mips::BtnezT8SltiuX16: ExpandFEXT_T8I8I16_ins(MBB, MI, Mips::BtnezX16, Mips::SltiuRxImm16, Mips::SltiuRxImmX16); diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td index 1a50faf..01be7e7 100644 --- a/lib/Target/Mips/Mips16InstrInfo.td +++ b/lib/Target/Mips/Mips16InstrInfo.td @@ -225,6 +225,7 @@ class FEXT_T8I816_ins: !strconcat(asmstr2, !strconcat("\t$rx, $ry\n\t", !strconcat(asmstr, "\t$imm"))),[]> { let isCodeGenOnly=1; + let usesCustomInserter = 1; } // diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 68fa28b..1a0d97b 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -1429,6 +1429,20 @@ MachineBasicBlock *MipsTargetLowering::EmitSeliT16 } + +MachineBasicBlock + *MipsTargetLowering::EmitFEXT_T8I816_ins(unsigned BtOpc, unsigned CmpOpc, + MachineInstr *MI, + MachineBasicBlock *BB) const { + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + unsigned regX = MI->getOperand(0).getReg(); + unsigned regY = MI->getOperand(1).getReg(); + MachineBasicBlock *target = MI->getOperand(2).getMBB(); + BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(CmpOpc)).addReg(regX).addReg(regY); + BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(BtOpc)).addMBB(target); + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} MachineBasicBlock * MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const { @@ -1568,6 +1582,22 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, return EmitSelT16(Mips::BtnezX16, Mips::SltRxRy16, MI, BB); case Mips::SelTBtneZSltu: return EmitSelT16(Mips::BtnezX16, Mips::SltuRxRy16, MI, BB); + case Mips::BteqzT8CmpX16: + return EmitFEXT_T8I816_ins(Mips::BteqzX16, Mips::CmpRxRy16, MI, BB); + case Mips::BteqzT8SltX16: + return EmitFEXT_T8I816_ins(Mips::BteqzX16, Mips::SltRxRy16, MI, BB); + case Mips::BteqzT8SltuX16: + // TBD: figure out a way to get this or remove the instruction + // altogether. + return EmitFEXT_T8I816_ins(Mips::BteqzX16, Mips::SltuRxRy16, MI, BB); + case Mips::BtnezT8CmpX16: + return EmitFEXT_T8I816_ins(Mips::BtnezX16, Mips::CmpRxRy16, MI, BB); + case Mips::BtnezT8SltX16: + return EmitFEXT_T8I816_ins(Mips::BtnezX16, Mips::SltRxRy16, MI, BB); + case Mips::BtnezT8SltuX16: + // TBD: figure out a way to get this or remove the instruction + // altogether. + return EmitFEXT_T8I816_ins(Mips::BtnezX16, Mips::SltuRxRy16, MI, BB); } } diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 2896de5..ac05c83 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -413,6 +413,9 @@ namespace llvm { MachineBasicBlock *EmitSelT16(unsigned Opc1, unsigned Opc2, MachineInstr *MI, MachineBasicBlock *BB) const; + MachineBasicBlock *EmitFEXT_T8I816_ins(unsigned BtOpc, unsigned CmpOpc, + MachineInstr *MI, + MachineBasicBlock *BB) const; }; } -- cgit v1.1 From f8db4478f80ef63cf51518affc7b97c9d310bc23 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Sun, 24 Feb 2013 07:09:35 +0000 Subject: Revert r169638 because it broke Mesa llvmpipe tests. Fix PR15239. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175985 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 2315ac7..1543e23 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -16248,11 +16248,6 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, DebugLoc DL = N->getDebugLoc(); - // We are going to replace the AND, OR, NAND with either BLEND - // or PSIGN, which only look at the MSB. The VSRAI instruction - // does not affect the highest bit, so we can get rid of it. - Mask = Mask.getOperand(0); - // Now we know we at least have a plendvb with the mask val. See if // we can form a psignb/w/d. // psign = x.type == y.type == mask.type && y = sub(0, x); @@ -16261,7 +16256,7 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, X.getValueType() == MaskVT && Y.getValueType() == MaskVT) { assert((EltBits == 8 || EltBits == 16 || EltBits == 32) && "Unsupported VT for PSIGN"); - Mask = DAG.getNode(X86ISD::PSIGN, DL, MaskVT, X, Mask); + Mask = DAG.getNode(X86ISD::PSIGN, DL, MaskVT, X, Mask.getOperand(0)); return DAG.getNode(ISD::BITCAST, DL, VT, Mask); } // PBLENDVB only available on SSE 4.1 -- cgit v1.1 From f4aa644dffebcce0545ca583f2d9559a359d8e6c Mon Sep 17 00:00:00 2001 From: Francois Pichet Date: Sun, 24 Feb 2013 12:34:13 +0000 Subject: Typo git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175991 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64InstrFormats.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td index 8cb770e..cb93471 100644 --- a/lib/Target/AArch64/AArch64InstrFormats.td +++ b/lib/Target/AArch64/AArch64InstrFormats.td @@ -21,7 +21,7 @@ class A64Inst patterns, InstrItinClass itin> : Instruction { // All A64 instructions are 32-bit. This field will be filled in - // graually going down the hierarchy. + // gradually going down the hierarchy. field bits<32> Inst; field bits<32> Unpredictable = 0; -- cgit v1.1 From e8aa36a4afa02dde89e10f39b6ca87cfe1949dd8 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sun, 24 Feb 2013 15:34:43 +0000 Subject: CVP: If we have a PHI with an incoming select, try to skip the select. This is a common pattern with dyn_cast and similar constructs, when the PHI no longer depends on the select it can often be turned into a simpler construct or even get hoisted out of the loop. PR15340. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175995 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../Scalar/CorrelatedValuePropagation.cpp | 29 ++++++++++++++++++---- 1 file changed, 24 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index 4c3631b..995782e 100644 --- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -21,6 +21,8 @@ #include "llvm/IR/Instructions.h" #include "llvm/Pass.h" #include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -97,12 +99,29 @@ bool CorrelatedValuePropagation::processPHI(PHINode *P) { Value *Incoming = P->getIncomingValue(i); if (isa(Incoming)) continue; - Constant *C = LVI->getConstantOnEdge(P->getIncomingValue(i), - P->getIncomingBlock(i), - BB); - if (!C) continue; + Value *V = LVI->getConstantOnEdge(Incoming, P->getIncomingBlock(i), BB); - P->setIncomingValue(i, C); + // Look if the incoming value is a select with a constant but LVI tells us + // that the incoming value can never be that constant. In that case replace + // the incoming value with the other value of the select. This often allows + // us to remove the select later. + if (!V) { + SelectInst *SI = dyn_cast(Incoming); + if (!SI) continue; + + Constant *C = dyn_cast(SI->getFalseValue()); + if (!C) continue; + + if (LVI->getPredicateOnEdge(ICmpInst::ICMP_EQ, SI, C, + P->getIncomingBlock(i), BB) != + LazyValueInfo::False) + continue; + + DEBUG(dbgs() << "CVP: Threading PHI over " << *SI << '\n'); + V = SI->getTrueValue(); + } + + P->setIncomingValue(i, V); Changed = true; } -- cgit v1.1 From 4edd84da1b3f7fd73e96a13b6b7e183ad04ac7c4 Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Sun, 24 Feb 2013 17:34:50 +0000 Subject: Fix PR14364. This removes a const_cast hack from PPCRegisterInfo::hasReservedSpillSlot(). The proper place to save the frame index for the CR spill slot is in the PPCFunctionInfo object, not the PPCRegisterInfo object. No new test cases, as this just reimplements existing function. Existing tests such as test/CodeGen/PowerPC/crsave.ll are sufficient. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175998 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCFrameLowering.cpp | 13 ++++++++++++- lib/Target/PowerPC/PPCMachineFunctionInfo.h | 9 ++++++++- lib/Target/PowerPC/PPCRegisterInfo.cpp | 24 +++++++----------------- lib/Target/PowerPC/PPCRegisterInfo.h | 1 - 4 files changed, 27 insertions(+), 20 deletions(-) (limited to 'lib') diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp index cc1ed69..6dfb4c8 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -786,7 +786,8 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, PPCFunctionInfo *FI = MF.getInfo(); unsigned LR = RegInfo->getRARegister(); FI->setMustSaveLR(MustSaveLR(MF, LR)); - MF.getRegInfo().setPhysRegUnused(LR); + MachineRegisterInfo &MRI = MF.getRegInfo(); + MRI.setPhysRegUnused(LR); // Save R31 if necessary int FPSI = FI->getFramePointerSaveIndex(); @@ -811,6 +812,16 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, MFI->CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true); } + // For 32-bit SVR4, allocate the nonvolatile CR spill slot iff the + // function uses CR 2, 3, or 4. + if (!isPPC64 && !isDarwinABI && + (MRI.isPhysRegUsed(PPC::CR2) || + MRI.isPhysRegUsed(PPC::CR3) || + MRI.isPhysRegUsed(PPC::CR4))) { + int FrameIdx = MFI->CreateFixedObject((uint64_t)4, (int64_t)-4, true); + FI->setCRSpillFrameIndex(FrameIdx); + } + // Reserve a slot closest to SP or frame pointer if we have a dynalloc or // a large stack, which will require scavenging a register to materialize a // large offset. diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/lib/Target/PowerPC/PPCMachineFunctionInfo.h index 24caffa..045b375 100644 --- a/lib/Target/PowerPC/PPCMachineFunctionInfo.h +++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.h @@ -71,6 +71,9 @@ class PPCFunctionInfo : public MachineFunctionInfo { /// register for parameter passing. unsigned VarArgsNumFPR; + /// CRSpillFrameIndex - FrameIndex for CR spill slot for 32-bit SVR4. + int CRSpillFrameIndex; + public: explicit PPCFunctionInfo(MachineFunction &MF) : FramePointerSaveIndex(0), @@ -83,7 +86,8 @@ public: VarArgsFrameIndex(0), VarArgsStackOffset(0), VarArgsNumGPR(0), - VarArgsNumFPR(0) {} + VarArgsNumFPR(0), + CRSpillFrameIndex(0) {} int getFramePointerSaveIndex() const { return FramePointerSaveIndex; } void setFramePointerSaveIndex(int Idx) { FramePointerSaveIndex = Idx; } @@ -125,6 +129,9 @@ public: unsigned getVarArgsNumFPR() const { return VarArgsNumFPR; } void setVarArgsNumFPR(unsigned Num) { VarArgsNumFPR = Num; } + + int getCRSpillFrameIndex() const { return CRSpillFrameIndex; } + void setCRSpillFrameIndex(int idx) { CRSpillFrameIndex = idx; } }; } // end of namespace llvm diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index 9745235..df245cc 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -71,7 +71,7 @@ PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST, : PPCGenRegisterInfo(ST.isPPC64() ? PPC::LR8 : PPC::LR, ST.isPPC64() ? 0 : 1, ST.isPPC64() ? 0 : 1), - Subtarget(ST), TII(tii), CRSpillFrameIdx(0) { + Subtarget(ST), TII(tii) { ImmToIdxMap[PPC::LD] = PPC::LDX; ImmToIdxMap[PPC::STD] = PPC::STDX; ImmToIdxMap[PPC::LBZ] = PPC::LBZX; ImmToIdxMap[PPC::STB] = PPC::STBX; ImmToIdxMap[PPC::LHZ] = PPC::LHZX; ImmToIdxMap[PPC::LHA] = PPC::LHAX; @@ -111,11 +111,6 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { return Subtarget.isPPC64() ? CSR_Darwin64_SaveList : CSR_Darwin32_SaveList; - // For 32-bit SVR4, also initialize the frame index associated with - // the CR spill slot. - if (!Subtarget.isPPC64()) - CRSpillFrameIdx = 0; - return Subtarget.isPPC64() ? CSR_SVR464_SaveList : CSR_SVR432_SaveList; } @@ -450,19 +445,14 @@ PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF, // For the nonvolatile condition registers (CR2, CR3, CR4) in an SVR4 // ABI, return true to prevent allocating an additional frame slot. // For 64-bit, the CR save area is at SP+8; the value of FrameIdx = 0 - // is arbitrary and will be subsequently ignored. For 32-bit, we must - // create exactly one stack slot and return its FrameIdx for all - // nonvolatiles. + // is arbitrary and will be subsequently ignored. For 32-bit, we have + // previously created the stack slot if needed, so return its FrameIdx. if (Subtarget.isSVR4ABI() && PPC::CR2 <= Reg && Reg <= PPC::CR4) { - if (Subtarget.isPPC64()) { + if (Subtarget.isPPC64()) FrameIdx = 0; - } else if (CRSpillFrameIdx) { - FrameIdx = CRSpillFrameIdx; - } else { - MachineFrameInfo *MFI = - (const_cast(MF)).getFrameInfo(); - FrameIdx = MFI->CreateFixedObject((uint64_t)4, (int64_t)-4, true); - CRSpillFrameIdx = FrameIdx; + else { + const PPCFunctionInfo *FI = MF.getInfo(); + FrameIdx = FI->getCRSpillFrameIndex(); } return true; } diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h index c22450e..9840666 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/lib/Target/PowerPC/PPCRegisterInfo.h @@ -30,7 +30,6 @@ class PPCRegisterInfo : public PPCGenRegisterInfo { std::map ImmToIdxMap; const PPCSubtarget &Subtarget; const TargetInstrInfo &TII; - mutable int CRSpillFrameIdx; public: PPCRegisterInfo(const PPCSubtarget &SubTarget, const TargetInstrInfo &tii); -- cgit v1.1 From 29cb2591f9f7ec948e7b0e719b1db6cef99010d0 Mon Sep 17 00:00:00 2001 From: Reed Kotler Date: Sun, 24 Feb 2013 23:17:51 +0000 Subject: Make psuedo FEXT_T8I816_ins into a custom emitter. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176002 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips16InstrInfo.cpp | 53 ------------------------------------ lib/Target/Mips/Mips16InstrInfo.h | 8 ------ lib/Target/Mips/Mips16InstrInfo.td | 1 + lib/Target/Mips/MipsISelLowering.cpp | 35 ++++++++++++++++++++++++ lib/Target/Mips/MipsISelLowering.h | 3 ++ 5 files changed, 39 insertions(+), 61 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp index c04d49e..3c99b60 100644 --- a/lib/Target/Mips/Mips16InstrInfo.cpp +++ b/lib/Target/Mips/Mips16InstrInfo.cpp @@ -135,30 +135,6 @@ bool Mips16InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { switch(MI->getDesc().getOpcode()) { default: return false; - case Mips::BteqzT8CmpiX16: - ExpandFEXT_T8I8I16_ins(MBB, MI, Mips::BteqzX16, - Mips::CmpiRxImm16, Mips::CmpiRxImmX16); - break; - case Mips::BteqzT8SltiX16: - ExpandFEXT_T8I8I16_ins(MBB, MI, Mips::BteqzX16, - Mips::SltiRxImm16, Mips::SltiRxImmX16); - break; - case Mips::BteqzT8SltiuX16: - ExpandFEXT_T8I8I16_ins(MBB, MI, Mips::BteqzX16, - Mips::SltiuRxImm16, Mips::SltiuRxImmX16); - break; - case Mips::BtnezT8CmpiX16: - ExpandFEXT_T8I8I16_ins(MBB, MI, Mips::BtnezX16, - Mips::CmpiRxImm16, Mips::CmpiRxImmX16); - break; - case Mips::BtnezT8SltiX16: - ExpandFEXT_T8I8I16_ins(MBB, MI, Mips::BtnezX16, - Mips::SltiRxImm16, Mips::SltiRxImmX16); - break; - case Mips::BtnezT8SltiuX16: - ExpandFEXT_T8I8I16_ins(MBB, MI, Mips::BtnezX16, - Mips::SltiuRxImm16, Mips::SltiuRxImmX16); - break; case Mips::RetRA16: ExpandRetRA16(MBB, MI, Mips::JrcRa16); break; @@ -435,35 +411,6 @@ void Mips16InstrInfo::ExpandRetRA16(MachineBasicBlock &MBB, BuildMI(MBB, I, I->getDebugLoc(), get(Opc)); } - -void Mips16InstrInfo::ExpandFEXT_T8I816_ins( - MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - unsigned BtOpc, unsigned CmpOpc) const { - unsigned regX = I->getOperand(0).getReg(); - unsigned regY = I->getOperand(1).getReg(); - MachineBasicBlock *target = I->getOperand(2).getMBB(); - BuildMI(MBB, I, I->getDebugLoc(), get(CmpOpc)).addReg(regX).addReg(regY); - BuildMI(MBB, I, I->getDebugLoc(), get(BtOpc)).addMBB(target); - -} - -void Mips16InstrInfo::ExpandFEXT_T8I8I16_ins( - MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - unsigned BtOpc, unsigned CmpiOpc, unsigned CmpiXOpc) const { - unsigned regX = I->getOperand(0).getReg(); - int64_t imm = I->getOperand(1).getImm(); - MachineBasicBlock *target = I->getOperand(2).getMBB(); - unsigned CmpOpc; - if (isUInt<8>(imm)) - CmpOpc = CmpiOpc; - else if (isUInt<16>(imm)) - CmpOpc = CmpiXOpc; - else - llvm_unreachable("immediate field not usable"); - BuildMI(MBB, I, I->getDebugLoc(), get(CmpOpc)).addReg(regX).addImm(imm); - BuildMI(MBB, I, I->getDebugLoc(), get(BtOpc)).addMBB(target); -} - void Mips16InstrInfo::ExpandFEXT_CCRX16_ins( MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned SltOpc) const { diff --git a/lib/Target/Mips/Mips16InstrInfo.h b/lib/Target/Mips/Mips16InstrInfo.h index 0048fff..c01e9ca 100644 --- a/lib/Target/Mips/Mips16InstrInfo.h +++ b/lib/Target/Mips/Mips16InstrInfo.h @@ -115,14 +115,6 @@ private: MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; - void ExpandFEXT_T8I816_ins(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned BtOpc, unsigned CmpOpc) const; - - void ExpandFEXT_T8I8I16_ins( - MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - unsigned BtOpc, unsigned CmpiOpc, unsigned CmpiXOpc) const; - void ExpandFEXT_CCRX16_ins( MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned SltOpc) const; diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td index 01be7e7..e11b1a7 100644 --- a/lib/Target/Mips/Mips16InstrInfo.td +++ b/lib/Target/Mips/Mips16InstrInfo.td @@ -237,6 +237,7 @@ class FEXT_T8I8I16_ins: !strconcat(asmstr2, !strconcat("\t$rx, $imm\n\t", !strconcat(asmstr, "\t$targ"))), []> { let isCodeGenOnly=1; + let usesCustomInserter = 1; } // diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 1a0d97b..3c54e18 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -1443,6 +1443,28 @@ MachineBasicBlock MI->eraseFromParent(); // The pseudo instruction is gone now. return BB; } + + +MachineBasicBlock *MipsTargetLowering::EmitFEXT_T8I8I16_ins( + unsigned BtOpc, unsigned CmpiOpc, unsigned CmpiXOpc, + MachineInstr *MI, MachineBasicBlock *BB) const { + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + unsigned regX = MI->getOperand(0).getReg(); + int64_t imm = MI->getOperand(1).getImm(); + MachineBasicBlock *target = MI->getOperand(2).getMBB(); + unsigned CmpOpc; + if (isUInt<8>(imm)) + CmpOpc = CmpiOpc; + else if (isUInt<16>(imm)) + CmpOpc = CmpiXOpc; + else + llvm_unreachable("immediate field not usable"); + BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(CmpOpc)).addReg(regX).addImm(imm); + BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(BtOpc)).addMBB(target); + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + MachineBasicBlock * MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const { @@ -1598,6 +1620,19 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // TBD: figure out a way to get this or remove the instruction // altogether. return EmitFEXT_T8I816_ins(Mips::BtnezX16, Mips::SltuRxRy16, MI, BB); + case Mips::BteqzT8CmpiX16: return EmitFEXT_T8I8I16_ins( + Mips::BteqzX16, Mips::CmpiRxImm16, Mips::CmpiRxImmX16, MI, BB); + case Mips::BteqzT8SltiX16: return EmitFEXT_T8I8I16_ins( + Mips::BteqzX16, Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB); + case Mips::BteqzT8SltiuX16: return EmitFEXT_T8I8I16_ins( + Mips::BteqzX16, Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB); + case Mips::BtnezT8CmpiX16: return EmitFEXT_T8I8I16_ins( + Mips::BtnezX16, Mips::CmpiRxImm16, Mips::CmpiRxImmX16, MI, BB); + case Mips::BtnezT8SltiX16: return EmitFEXT_T8I8I16_ins( + Mips::BtnezX16, Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB); + case Mips::BtnezT8SltiuX16: return EmitFEXT_T8I8I16_ins( + Mips::BtnezX16, Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB); + break; } } diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index ac05c83..2d1e9a9 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -416,6 +416,9 @@ namespace llvm { MachineBasicBlock *EmitFEXT_T8I816_ins(unsigned BtOpc, unsigned CmpOpc, MachineInstr *MI, MachineBasicBlock *BB) const; + MachineBasicBlock *EmitFEXT_T8I8I16_ins( + unsigned BtOpc, unsigned CmpiOpc, unsigned CmpiXOpc, + MachineInstr *MI, MachineBasicBlock *BB) const; }; } -- cgit v1.1 From 6172f0298391e00cb669cc246e70ae2531f2cdec Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Mon, 25 Feb 2013 01:07:18 +0000 Subject: DIBuilder: support structs with vtable pointers in the same way as classes git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176004 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/DIBuilder.cpp | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/IR/DIBuilder.cpp b/lib/IR/DIBuilder.cpp index bab30e4..f31e531 100644 --- a/lib/IR/DIBuilder.cpp +++ b/lib/IR/DIBuilder.cpp @@ -508,11 +508,15 @@ DIType DIBuilder::createClassType(DIDescriptor Context, StringRef Name, } /// createStructType - Create debugging information entry for a struct. -DIType DIBuilder::createStructType(DIDescriptor Context, StringRef Name, - DIFile File, unsigned LineNumber, - uint64_t SizeInBits, uint64_t AlignInBits, - unsigned Flags, DIArray Elements, - unsigned RunTimeLang) { +DICompositeType DIBuilder::createStructType(DIDescriptor Context, + StringRef Name, DIFile File, + unsigned LineNumber, + uint64_t SizeInBits, + uint64_t AlignInBits, + unsigned Flags, DIType DerivedFrom, + DIArray Elements, + unsigned RunTimeLang, + MDNode *VTableHolder) { // TAG_structure_type is encoded in DICompositeType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_structure_type), @@ -524,13 +528,13 @@ DIType DIBuilder::createStructType(DIDescriptor Context, StringRef Name, ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), ConstantInt::get(Type::getInt32Ty(VMContext), 0), ConstantInt::get(Type::getInt32Ty(VMContext), Flags), - NULL, + DerivedFrom, Elements, ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeLang), - NULL, + VTableHolder, NULL, }; - return DIType(MDNode::get(VMContext, Elts)); + return DICompositeType(MDNode::get(VMContext, Elts)); } /// createUnionType - Create debugging information entry for an union. -- cgit v1.1 From de89ecd011c453108c7641f44360f3a93af90206 Mon Sep 17 00:00:00 2001 From: Reed Kotler Date: Mon, 25 Feb 2013 02:25:47 +0000 Subject: Make pseudos FEXT_CCRX16_ins and FEXT_CCRXI16_ins into custom emitters. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176007 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips16InstrInfo.cpp | 55 -------------------------------- lib/Target/Mips/Mips16InstrInfo.h | 14 --------- lib/Target/Mips/Mips16InstrInfo.td | 2 ++ lib/Target/Mips/MipsISelLowering.cpp | 61 ++++++++++++++++++++++++++++++++++++ lib/Target/Mips/MipsISelLowering.h | 7 +++++ 5 files changed, 70 insertions(+), 69 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp index 3c99b60..fd3cc8f 100644 --- a/lib/Target/Mips/Mips16InstrInfo.cpp +++ b/lib/Target/Mips/Mips16InstrInfo.cpp @@ -138,18 +138,6 @@ bool Mips16InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { case Mips::RetRA16: ExpandRetRA16(MBB, MI, Mips::JrcRa16); break; - case Mips::SltCCRxRy16: - ExpandFEXT_CCRX16_ins(MBB, MI, Mips::SltRxRy16); - break; - case Mips::SltiCCRxImmX16: - ExpandFEXT_CCRXI16_ins(MBB, MI, Mips::SltiRxImm16, Mips::SltiRxImmX16); - break; - case Mips::SltiuCCRxImmX16: - ExpandFEXT_CCRXI16_ins(MBB, MI, Mips::SltiuRxImm16, Mips::SltiuRxImmX16); - break; - case Mips::SltuCCRxRy16: - ExpandFEXT_CCRX16_ins(MBB, MI, Mips::SltuRxRy16); - break; } MBB.erase(MI); @@ -411,29 +399,6 @@ void Mips16InstrInfo::ExpandRetRA16(MachineBasicBlock &MBB, BuildMI(MBB, I, I->getDebugLoc(), get(Opc)); } -void Mips16InstrInfo::ExpandFEXT_CCRX16_ins( - MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - unsigned SltOpc) const { - unsigned CC = I->getOperand(0).getReg(); - unsigned regX = I->getOperand(1).getReg(); - unsigned regY = I->getOperand(2).getReg(); - BuildMI(MBB, I, I->getDebugLoc(), get(SltOpc)).addReg(regX).addReg(regY); - BuildMI(MBB, I, I->getDebugLoc(), - get(Mips::MoveR3216), CC).addReg(Mips::T8); - -} -void Mips16InstrInfo::ExpandFEXT_CCRXI16_ins( - MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - unsigned SltiOpc, unsigned SltiXOpc) const { - unsigned CC = I->getOperand(0).getReg(); - unsigned regX = I->getOperand(1).getReg(); - int64_t Imm = I->getOperand(2).getImm(); - unsigned SltOpc = whichOp8u_or_16simm(SltiOpc, SltiXOpc, Imm); - BuildMI(MBB, I, I->getDebugLoc(), get(SltOpc)).addReg(regX).addImm(Imm); - BuildMI(MBB, I, I->getDebugLoc(), - get(Mips::MoveR3216), CC).addReg(Mips::T8); - -} const MCInstrDesc &Mips16InstrInfo::AddiuSpImm(int64_t Imm) const { if (validSpImm8(Imm)) @@ -448,26 +413,6 @@ void Mips16InstrInfo::BuildAddiuSpImm BuildMI(MBB, I, DL, AddiuSpImm(Imm)).addImm(Imm); } -unsigned Mips16InstrInfo::whichOp8_or_16uimm - (unsigned shortOp, unsigned longOp, int64_t Imm) { - if (isUInt<8>(Imm)) - return shortOp; - else if (isUInt<16>(Imm)) - return longOp; - else - llvm_unreachable("immediate field not usable"); -} - -unsigned Mips16InstrInfo::whichOp8u_or_16simm - (unsigned shortOp, unsigned longOp, int64_t Imm) { - if (isUInt<8>(Imm)) - return shortOp; - else if (isInt<16>(Imm)) - return longOp; - else - llvm_unreachable("immediate field not usable"); -} - const MipsInstrInfo *llvm::createMips16InstrInfo(MipsTargetMachine &TM) { return new Mips16InstrInfo(TM); } diff --git a/lib/Target/Mips/Mips16InstrInfo.h b/lib/Target/Mips/Mips16InstrInfo.h index c01e9ca..1cb1dfe 100644 --- a/lib/Target/Mips/Mips16InstrInfo.h +++ b/lib/Target/Mips/Mips16InstrInfo.h @@ -115,20 +115,6 @@ private: MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; - void ExpandFEXT_CCRX16_ins( - MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - unsigned SltOpc) const; - - void ExpandFEXT_CCRXI16_ins( - MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - unsigned SltiOpc, unsigned SltiXOpc) const; - - static unsigned - whichOp8_or_16uimm (unsigned shortOp, unsigned longOp, int64_t Imm); - - static unsigned - whichOp8u_or_16simm (unsigned shortOp, unsigned longOp, int64_t Imm); - }; } diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td index e11b1a7..a9e9c52 100644 --- a/lib/Target/Mips/Mips16InstrInfo.td +++ b/lib/Target/Mips/Mips16InstrInfo.td @@ -90,6 +90,7 @@ class FEXT_CCRXI16_ins: MipsPseudo16<(outs CPU16Regs:$cc), (ins CPU16Regs:$rx, simm16:$imm), !strconcat(asmstr, "\t$rx, $imm\n\tmove\t$cc, $$t8"), []> { let isCodeGenOnly=1; + let usesCustomInserter = 1; } // JAL and JALX instruction format @@ -138,6 +139,7 @@ class FCCRR16_ins : MipsPseudo16<(outs CPU16Regs:$cc), (ins CPU16Regs:$rx, CPU16Regs:$ry), !strconcat(asmstr, "\t$rx, $ry\n\tmove\t$cc, $$t8"), []> { let isCodeGenOnly=1; + let usesCustomInserter = 1; } // diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 3c54e18..36e1a15 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -1434,6 +1434,8 @@ MachineBasicBlock *MipsTargetLowering::EmitFEXT_T8I816_ins(unsigned BtOpc, unsigned CmpOpc, MachineInstr *MI, MachineBasicBlock *BB) const { + if (DontExpandCondPseudos16) + return BB; const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); unsigned regX = MI->getOperand(0).getReg(); unsigned regY = MI->getOperand(1).getReg(); @@ -1448,6 +1450,8 @@ MachineBasicBlock MachineBasicBlock *MipsTargetLowering::EmitFEXT_T8I8I16_ins( unsigned BtOpc, unsigned CmpiOpc, unsigned CmpiXOpc, MachineInstr *MI, MachineBasicBlock *BB) const { + if (DontExpandCondPseudos16) + return BB; const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); unsigned regX = MI->getOperand(0).getReg(); int64_t imm = MI->getOperand(1).getImm(); @@ -1465,6 +1469,51 @@ MachineBasicBlock *MipsTargetLowering::EmitFEXT_T8I8I16_ins( return BB; } + +static unsigned Mips16WhichOp8uOr16simm + (unsigned shortOp, unsigned longOp, int64_t Imm) { + if (isUInt<8>(Imm)) + return shortOp; + else if (isInt<16>(Imm)) + return longOp; + else + llvm_unreachable("immediate field not usable"); +} + +MachineBasicBlock *MipsTargetLowering::EmitFEXT_CCRX16_ins( + unsigned SltOpc, + MachineInstr *MI, MachineBasicBlock *BB) const { + if (DontExpandCondPseudos16) + return BB; + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + unsigned CC = MI->getOperand(0).getReg(); + unsigned regX = MI->getOperand(1).getReg(); + unsigned regY = MI->getOperand(2).getReg(); + BuildMI(*BB, MI, MI->getDebugLoc(), + TII->get(SltOpc)).addReg(regX).addReg(regY); + BuildMI(*BB, MI, MI->getDebugLoc(), + TII->get(Mips::MoveR3216), CC).addReg(Mips::T8); + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} +MachineBasicBlock *MipsTargetLowering::EmitFEXT_CCRXI16_ins( + unsigned SltiOpc, unsigned SltiXOpc, + MachineInstr *MI, MachineBasicBlock *BB )const { + if (DontExpandCondPseudos16) + return BB; + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + unsigned CC = MI->getOperand(0).getReg(); + unsigned regX = MI->getOperand(1).getReg(); + int64_t Imm = MI->getOperand(2).getImm(); + unsigned SltOpc = Mips16WhichOp8uOr16simm(SltiOpc, SltiXOpc, Imm); + BuildMI(*BB, MI, MI->getDebugLoc(), + TII->get(SltOpc)).addReg(regX).addImm(Imm); + BuildMI(*BB, MI, MI->getDebugLoc(), + TII->get(Mips::MoveR3216), CC).addReg(Mips::T8); + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; + +} MachineBasicBlock * MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const { @@ -1633,6 +1682,18 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case Mips::BtnezT8SltiuX16: return EmitFEXT_T8I8I16_ins( Mips::BtnezX16, Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB); break; + case Mips::SltCCRxRy16: + return EmitFEXT_CCRX16_ins(Mips::SltRxRy16, MI, BB); + break; + case Mips::SltiCCRxImmX16: + return EmitFEXT_CCRXI16_ins + (Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB); + case Mips::SltiuCCRxImmX16: + return EmitFEXT_CCRXI16_ins + (Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB); + case Mips::SltuCCRxRy16: + return EmitFEXT_CCRX16_ins + (Mips::SltuRxRy16, MI, BB); } } diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 2d1e9a9..f0f3782 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -419,6 +419,13 @@ namespace llvm { MachineBasicBlock *EmitFEXT_T8I8I16_ins( unsigned BtOpc, unsigned CmpiOpc, unsigned CmpiXOpc, MachineInstr *MI, MachineBasicBlock *BB) const; + MachineBasicBlock *EmitFEXT_CCRX16_ins( + unsigned SltOpc, + MachineInstr *MI, MachineBasicBlock *BB) const; + MachineBasicBlock *EmitFEXT_CCRXI16_ins( + unsigned SltiOpc, unsigned SltiXOpc, + MachineInstr *MI, MachineBasicBlock *BB )const; + }; } -- cgit v1.1 From af23f8e403d68e3f96eb5eb63e50e3aec4ea01c9 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Mon, 25 Feb 2013 14:20:21 +0000 Subject: Fix the root cause of PR15348 by correctly handling alignment 0 on memory intrinsics in the SDAG builder. When alignment is zero, the lang ref says that *no* alignment assumptions can be made. This is the exact opposite of the internal API contracts of the DAG where alignment 0 indicates that the alignment can be made to be anything desired. There is another, more explicit alignment that is better suited for the role of "no alignment at all": an alignment of 1. Map the intrinsic alignment to this early so that we don't end up generating aligned DAGs. It is really terrifying that we've never seen this before, but we suddenly started generating a large number of alignment 0 memcpys due to the new code to do memcpy-based copying of POD class members. That patch contains a bug that rounds bitfield alignments down when they are the first field. This can in turn produce zero alignments. This fixes weird crashes I've seen in library users of LLVM on 32-bit hosts, etc. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176022 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 3 +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 6 ++++++ 2 files changed, 9 insertions(+) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 0640311..81b0ea7 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3867,6 +3867,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst, unsigned Align, bool isVol, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) { + assert(Align && "The SDAG layer expects explicit alignment and reservers 0"); // Check to see if we should lower the memcpy to loads and stores first. // For cases within the target-specified limits, this is the best choice. @@ -3934,6 +3935,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst, unsigned Align, bool isVol, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) { + assert(Align && "The SDAG layer expects explicit alignment and reservers 0"); // Check to see if we should lower the memmove to loads and stores first. // For cases within the target-specified limits, this is the best choice. @@ -3988,6 +3990,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVol, MachinePointerInfo DstPtrInfo) { + assert(Align && "The SDAG layer expects explicit alignment and reservers 0"); // Check to see if we should lower the memset to stores first. // For cases within the target-specified limits, this is the best choice. diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 3a55696..3a6f9b2 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4467,6 +4467,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue Op2 = getValue(I.getArgOperand(1)); SDValue Op3 = getValue(I.getArgOperand(2)); unsigned Align = cast(I.getArgOperand(3))->getZExtValue(); + if (!Align) + Align = 1; // @llvm.memcpy defines 0 and 1 to both mean no alignment. bool isVol = cast(I.getArgOperand(4))->getZExtValue(); DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol, false, MachinePointerInfo(I.getArgOperand(0)), @@ -4483,6 +4485,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue Op2 = getValue(I.getArgOperand(1)); SDValue Op3 = getValue(I.getArgOperand(2)); unsigned Align = cast(I.getArgOperand(3))->getZExtValue(); + if (!Align) + Align = 1; // @llvm.memset defines 0 and 1 to both mean no alignment. bool isVol = cast(I.getArgOperand(4))->getZExtValue(); DAG.setRoot(DAG.getMemset(getRoot(), dl, Op1, Op2, Op3, Align, isVol, MachinePointerInfo(I.getArgOperand(0)))); @@ -4500,6 +4504,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue Op2 = getValue(I.getArgOperand(1)); SDValue Op3 = getValue(I.getArgOperand(2)); unsigned Align = cast(I.getArgOperand(3))->getZExtValue(); + if (!Align) + Align = 1; // @llvm.memmove defines 0 and 1 to both mean no alignment. bool isVol = cast(I.getArgOperand(4))->getZExtValue(); DAG.setRoot(DAG.getMemmove(getRoot(), dl, Op1, Op2, Op3, Align, isVol, MachinePointerInfo(I.getArgOperand(0)), -- cgit v1.1 From 7e6ffac9ab0af2a54036c978822ad40492246622 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Mon, 25 Feb 2013 14:29:38 +0000 Subject: Fix spelling noticed by Duncan. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176023 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 81b0ea7..db8ae6e 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3867,7 +3867,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst, unsigned Align, bool isVol, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) { - assert(Align && "The SDAG layer expects explicit alignment and reservers 0"); + assert(Align && "The SDAG layer expects explicit alignment and reserves 0"); // Check to see if we should lower the memcpy to loads and stores first. // For cases within the target-specified limits, this is the best choice. @@ -3935,7 +3935,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst, unsigned Align, bool isVol, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) { - assert(Align && "The SDAG layer expects explicit alignment and reservers 0"); + assert(Align && "The SDAG layer expects explicit alignment and reserves 0"); // Check to see if we should lower the memmove to loads and stores first. // For cases within the target-specified limits, this is the best choice. @@ -3990,7 +3990,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVol, MachinePointerInfo DstPtrInfo) { - assert(Align && "The SDAG layer expects explicit alignment and reservers 0"); + assert(Align && "The SDAG layer expects explicit alignment and reserves 0"); // Check to see if we should lower the memset to stores first. // For cases within the target-specified limits, this is the best choice. -- cgit v1.1 From fc7695a653323071ec141aee994e4188592ad1f5 Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Mon, 25 Feb 2013 16:44:35 +0000 Subject: Fix missing relocation for TLS addressing peephole optimization. Report and fix due to Kai Nacke. Testcase update by me. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176029 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp | 3 +++ 1 file changed, 3 insertions(+) (limited to 'lib') diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp index ad41f3a..61868d4 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp @@ -153,6 +153,9 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target, case PPC::fixup_ppc_toc16: switch (Modifier) { default: llvm_unreachable("Unsupported Modifier"); + case MCSymbolRefExpr::VK_PPC_TPREL16_LO: + Type = ELF::R_PPC64_TPREL16_LO; + break; case MCSymbolRefExpr::VK_PPC_DTPREL16_LO: Type = ELF::R_PPC64_DTPREL16_LO; break; -- cgit v1.1 From 50e75bfc29269def44981ab5f109334d95f55007 Mon Sep 17 00:00:00 2001 From: Matt Beaumont-Gay Date: Mon, 25 Feb 2013 18:11:18 +0000 Subject: 'Hexadecimal' has two 'a's and only one 'i'. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176031 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 4 ++-- lib/MC/MCParser/AsmLexer.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 3a6f9b2..b8ab2a9 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3663,7 +3663,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, /// /// Op = (Op & 0x007fffff) | 0x3f800000; /// -/// where Op is the hexidecimal representation of floating point value. +/// where Op is the hexadecimal representation of floating point value. static SDValue GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl) { SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, @@ -3677,7 +3677,7 @@ GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl) { /// /// (float)(int)(((Op & 0x7f800000) >> 23) - 127); /// -/// where Op is the hexidecimal representation of floating point value. +/// where Op is the hexadecimal representation of floating point value. static SDValue GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI, DebugLoc dl) { diff --git a/lib/MC/MCParser/AsmLexer.cpp b/lib/MC/MCParser/AsmLexer.cpp index 530e94e..86a9674 100644 --- a/lib/MC/MCParser/AsmLexer.cpp +++ b/lib/MC/MCParser/AsmLexer.cpp @@ -282,7 +282,7 @@ AsmToken AsmLexer::LexDigit() { (int64_t)Result); } - // Either octal or hexidecimal. + // Either octal or hexadecimal. long long Value; unsigned Radix = doLookAhead(CurPtr, 8); bool isHex = Radix == 16; -- cgit v1.1 From dca83187b7c4465ad6ff8507052223d31c0ea66a Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Mon, 25 Feb 2013 19:06:27 +0000 Subject: [ms-inline asm] Add support for the pushad/popad mnemonics. rdar://13254235 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176036 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrInfo.td | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 84c278c..d989ec7 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -870,16 +870,14 @@ let Defs = [RSP], Uses = [RSP, EFLAGS], mayStore = 1, neverHasSideEffects=1 in def PUSHF64 : I<0x9C, RawFrm, (outs), (ins), "pushfq", [], IIC_PUSH_F>, Requires<[In64BitMode]>; - - let Defs = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP], Uses = [ESP], mayLoad=1, neverHasSideEffects=1 in { -def POPA32 : I<0x61, RawFrm, (outs), (ins), "popa{l}", [], IIC_POP_A>, +def POPA32 : I<0x61, RawFrm, (outs), (ins), "popa{l|d}", [], IIC_POP_A>, Requires<[In32BitMode]>; } let Defs = [ESP], Uses = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP], mayStore=1, neverHasSideEffects=1 in { -def PUSHA32 : I<0x60, RawFrm, (outs), (ins), "pusha{l}", [], IIC_PUSH_A>, +def PUSHA32 : I<0x60, RawFrm, (outs), (ins), "pusha{l|d}", [], IIC_PUSH_A>, Requires<[In32BitMode]>; } -- cgit v1.1 From 029f4fd2ff539ed143b83c140349df2c064965d2 Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Mon, 25 Feb 2013 19:11:48 +0000 Subject: pre-RA-sched fix: only reevaluate physreg interferences when necessary. Fixes rdar:13279013: scheduler was blowing up on select instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176037 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp | 94 +++++++++++++++++--------- 1 file changed, 61 insertions(+), 33 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 10d1adf..addfccb 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -143,6 +143,12 @@ private: std::vector LiveRegDefs; std::vector LiveRegGens; + // Collect interferences between physical register use/defs. + // Each interference is an SUnit and set of physical registers. + SmallVector Interferences; + typedef DenseMap > LRegsMapT; + LRegsMapT LRegsMap; + /// Topo - A topological ordering for SUnits which permits fast IsReachable /// and similar queries. ScheduleDAGTopologicalSort Topo; @@ -226,6 +232,8 @@ private: SmallVector&); bool DelayForLiveRegsBottomUp(SUnit*, SmallVector&); + void releaseInterferences(unsigned Reg = 0); + SUnit *PickNodeToScheduleBottomUp(); void ListScheduleBottomUp(); @@ -322,6 +330,7 @@ void ScheduleDAGRRList::Schedule() { LiveRegDefs.resize(TRI->getNumRegs() + 1, NULL); LiveRegGens.resize(TRI->getNumRegs() + 1, NULL); CallSeqEndForStart.clear(); + assert(Interferences.empty() && LRegsMap.empty() && "stale Interferences"); // Build the scheduling graph. BuildSchedGraph(NULL); @@ -735,6 +744,7 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { --NumLiveRegs; LiveRegDefs[I->getReg()] = NULL; LiveRegGens[I->getReg()] = NULL; + releaseInterferences(I->getReg()); } } // Release the special call resource dependence, if this is the beginning @@ -749,6 +759,7 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { --NumLiveRegs; LiveRegDefs[CallResource] = NULL; LiveRegGens[CallResource] = NULL; + releaseInterferences(CallResource); } } @@ -804,6 +815,7 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { --NumLiveRegs; LiveRegDefs[I->getReg()] = NULL; LiveRegGens[I->getReg()] = NULL; + releaseInterferences(I->getReg()); } } @@ -831,6 +843,7 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { --NumLiveRegs; LiveRegDefs[CallResource] = NULL; LiveRegGens[CallResource] = NULL; + releaseInterferences(CallResource); } } @@ -1315,34 +1328,58 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVector &LRegs) { return !LRegs.empty(); } +void ScheduleDAGRRList::releaseInterferences(unsigned Reg) { + // Add the nodes that aren't ready back onto the available list. + for (unsigned i = Interferences.size(); i > 0; --i) { + SUnit *SU = Interferences[i-1]; + LRegsMapT::iterator LRegsPos = LRegsMap.find(SU); + if (Reg) { + SmallVector &LRegs = LRegsPos->second; + if (std::find(LRegs.begin(), LRegs.end(), Reg) == LRegs.end()) + continue; + } + SU->isPending = false; + // The interfering node may no longer be available due to backtracking. + // Furthermore, it may have been made available again, in which case it is + // now already in the AvailableQueue. + if (SU->isAvailable && !SU->NodeQueueId) { + DEBUG(dbgs() << " Repushing SU #" << SU->NodeNum << '\n'); + AvailableQueue->push(SU); + } + if (i < Interferences.size()) + Interferences[i-1] = Interferences.back(); + Interferences.pop_back(); + LRegsMap.erase(LRegsPos); + } +} + /// Return a node that can be scheduled in this cycle. Requirements: /// (1) Ready: latency has been satisfied /// (2) No Hazards: resources are available /// (3) No Interferences: may unschedule to break register interferences. SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { - SmallVector Interferences; - DenseMap > LRegsMap; - - SUnit *CurSU = AvailableQueue->pop(); + SUnit *CurSU = AvailableQueue->empty() ? 0 : AvailableQueue->pop(); while (CurSU) { SmallVector LRegs; if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) break; - LRegsMap.insert(std::make_pair(CurSU, LRegs)); - - CurSU->isPending = true; // This SU is not in AvailableQueue right now. - Interferences.push_back(CurSU); + DEBUG(dbgs() << " Interfering reg " << TRI->getName(LRegs[0]) + << " SU #" << CurSU->NodeNum << '\n'); + std::pair LRegsPair = + LRegsMap.insert(std::make_pair(CurSU, LRegs)); + if (LRegsPair.second) { + CurSU->isPending = true; // This SU is not in AvailableQueue right now. + Interferences.push_back(CurSU); + } + else { + assert(CurSU->isPending && "Intereferences are pending"); + // Update the interference with current live regs. + LRegsPair.first->second = LRegs; + } CurSU = AvailableQueue->pop(); } - if (CurSU) { - // Add the nodes that aren't ready back onto the available list. - for (unsigned i = 0, e = Interferences.size(); i != e; ++i) { - Interferences[i]->isPending = false; - assert(Interferences[i]->isAvailable && "must still be available"); - AvailableQueue->push(Interferences[i]); - } + if (CurSU) return CurSU; - } // All candidates are delayed due to live physical reg dependencies. // Try backtracking, code duplication, or inserting cross class copies @@ -1363,6 +1400,7 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { } } if (!WillCreateCycle(TrySU, BtSU)) { + // BacktrackBottomUp mutates Interferences! BacktrackBottomUp(TrySU, BtSU); // Force the current node to be scheduled before the node that @@ -1372,19 +1410,19 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { if (!BtSU->isPending) AvailableQueue->remove(BtSU); } + DEBUG(dbgs() << "ARTIFICIAL edge from SU(" << BtSU->NodeNum << ") to SU(" + << TrySU->NodeNum << ")\n"); AddPred(TrySU, SDep(BtSU, SDep::Artificial)); // If one or more successors has been unscheduled, then the current - // node is no longer avaialable. Schedule a successor that's now - // available instead. - if (!TrySU->isAvailable) { + // node is no longer available. + if (!TrySU->isAvailable) CurSU = AvailableQueue->pop(); - } else { + AvailableQueue->remove(TrySU); CurSU = TrySU; - TrySU->isPending = false; - Interferences.erase(Interferences.begin()+i); } + // Interferences has been mutated. We must break. break; } } @@ -1435,17 +1473,7 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { TrySU->isAvailable = false; CurSU = NewDef; } - assert(CurSU && "Unable to resolve live physical register dependencies!"); - - // Add the nodes that aren't ready back onto the available list. - for (unsigned i = 0, e = Interferences.size(); i != e; ++i) { - Interferences[i]->isPending = false; - // May no longer be available due to backtracking. - if (Interferences[i]->isAvailable) { - AvailableQueue->push(Interferences[i]); - } - } return CurSU; } @@ -1466,7 +1494,7 @@ void ScheduleDAGRRList::ListScheduleBottomUp() { // While Available queue is not empty, grab the node with the highest // priority. If it is not ready put it back. Schedule the node. Sequence.reserve(SUnits.size()); - while (!AvailableQueue->empty()) { + while (!AvailableQueue->empty() || !Interferences.empty()) { DEBUG(dbgs() << "\nExamining Available:\n"; AvailableQueue->dump(this)); -- cgit v1.1 From fd3417d288c22673ec6d76dc4695989bb544373f Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Mon, 25 Feb 2013 21:59:35 +0000 Subject: [fast-isel] Add X86FastIsel::FastLowerArguments to handle functions with 6 or fewer scalar integer (i32 or i64) arguments. It completely eliminates the need for SDISel for trivial functions. Also, add the new llc -fast-isel-abort-args option, which is similar to -fast-isel-abort option, but for formal argument lowering. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176052 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp | 10 ++++ lib/Target/X86/X86FastISel.cpp | 73 +++++++++++++++++++++++++++ 2 files changed, 83 insertions(+) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index a598ec4..1c92f8b 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -145,6 +145,10 @@ EnableFastISelVerbose("fast-isel-verbose", cl::Hidden, static cl::opt EnableFastISelAbort("fast-isel-abort", cl::Hidden, cl::desc("Enable abort calls when \"fast\" instruction fails")); +static cl::opt +EnableFastISelAbortArgs("fast-isel-abort-args", cl::Hidden, + cl::desc("Enable abort calls when \"fast\" instruction fails to " + "lower formal arguments")); static cl::opt UseMBPI("use-mbpi", @@ -1048,6 +1052,12 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { if (LLVMBB == &Fn.getEntryBlock()) { // Lower any arguments needed in this block if this is the entry block. if (!FastIS->LowerArguments()) { + + if (EnableFastISelAbortArgs) + // The "fast" selector couldn't lower these arguments. For the + // purpose of debugging, just abort. + llvm_unreachable("FastISel didn't lower all arguments"); + // Call target indepedent SDISel argument lowering code if the target // specific routine is not successful. LowerArguments(LLVMBB); diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 4e5430d..ed17b11 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -75,6 +75,8 @@ public: virtual bool TryToFoldLoad(MachineInstr *MI, unsigned OpNo, const LoadInst *LI); + virtual bool FastLowerArguments(); + #include "X86GenFastISel.inc" private: @@ -1520,6 +1522,77 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { } } +bool X86FastISel::FastLowerArguments() { + if (!FuncInfo.CanLowerReturn) + return false; + + const Function *F = FuncInfo.Fn; + if (F->isVarArg()) + return false; + + CallingConv::ID CC = F->getCallingConv(); + if (CC != CallingConv::C) + return false; + + if (!Subtarget->is64Bit()) + return false; + + // Only handle simple cases. i.e. Up to 6 i32/i64 scalar arguments. + unsigned Idx = 1; + for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); + I != E; ++I, ++Idx) { + if (Idx > 6) + return false; + + if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) || + F->getAttributes().hasAttribute(Idx, Attribute::InReg) || + F->getAttributes().hasAttribute(Idx, Attribute::StructRet) || + F->getAttributes().hasAttribute(Idx, Attribute::Nest)) + return false; + + Type *ArgTy = I->getType(); + if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy()) + return false; + + EVT ArgVT = TLI.getValueType(ArgTy); + switch (ArgVT.getSimpleVT().SimpleTy) { + case MVT::i32: + case MVT::i64: + break; + default: + return false; + } + } + + static const uint16_t GPR32ArgRegs[] = { + X86::EDI, X86::ESI, X86::EDX, X86::ECX, X86::R8D, X86::R9D + }; + static const uint16_t GPR64ArgRegs[] = { + X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8 , X86::R9 + }; + + Idx = 0; + const TargetRegisterClass *RC32 = TLI.getRegClassFor(MVT::i32); + const TargetRegisterClass *RC64 = TLI.getRegClassFor(MVT::i64); + for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); + I != E; ++I, ++Idx) { + if (I->use_empty()) + continue; + bool is32Bit = TLI.getValueType(I->getType()) == MVT::i32; + const TargetRegisterClass *RC = is32Bit ? RC32 : RC64; + unsigned SrcReg = is32Bit ? GPR32ArgRegs[Idx] : GPR64ArgRegs[Idx]; + unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC); + // FIXME: Unfortunately it's necessary to emit a copy from the livein copy. + // Without this, EmitLiveInCopies may eliminate the livein if its only + // use is a bitcast (which isn't turned into an instruction). + unsigned ResultReg = createResultReg(RC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(DstReg, getKillRegState(true)); + UpdateValueMap(I, ResultReg); + } + return true; +} + bool X86FastISel::X86SelectCall(const Instruction *I) { const CallInst *CI = cast(I); const Value *Callee = CI->getCalledValue(); -- cgit v1.1 From ffa1dbadaf73d5b80a2bdf421c16cba29aea80a3 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Mon, 25 Feb 2013 22:20:00 +0000 Subject: Fix wording. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176055 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 1c92f8b..acae58c 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -144,11 +144,12 @@ EnableFastISelVerbose("fast-isel-verbose", cl::Hidden, "instruction selector")); static cl::opt EnableFastISelAbort("fast-isel-abort", cl::Hidden, - cl::desc("Enable abort calls when \"fast\" instruction fails")); + cl::desc("Enable abort calls when \"fast\" instruction selection " + "fails to lower an instruction")); static cl::opt EnableFastISelAbortArgs("fast-isel-abort-args", cl::Hidden, - cl::desc("Enable abort calls when \"fast\" instruction fails to " - "lower formal arguments")); + cl::desc("Enable abort calls when \"fast\" instruction selection " + "fails to lower a formal argument")); static cl::opt UseMBPI("use-mbpi", -- cgit v1.1 From 5e6e15caa79c30e155665e58b81540ec15e52051 Mon Sep 17 00:00:00 2001 From: Michael Liao Date: Mon, 25 Feb 2013 23:01:03 +0000 Subject: Fix PR10499 - Check whether SSE is available before lowering all 1s vector building with PCMPEQD, which is only available from SSE2 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176058 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 1543e23..57014dd 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -5344,7 +5344,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { // Vectors containing all ones can be matched by pcmpeqd on 128-bit width // vectors or broken into v4i32 operations on 256-bit vectors. AVX2 can use // vpcmpeqd on 256-bit vectors. - if (ISD::isBuildVectorAllOnes(Op.getNode())) { + if (ISD::isBuildVectorAllOnes(Op.getNode()) && Subtarget->hasSSE2()) { if (VT == MVT::v4i32 || (VT == MVT::v8i32 && Subtarget->hasInt256())) return Op; -- cgit v1.1 From d09318f9013aad79d7872de2ba490734b597f4d4 Mon Sep 17 00:00:00 2001 From: Michael Liao Date: Mon, 25 Feb 2013 23:16:36 +0000 Subject: Refine fix to PR10499, no functionality change - Put expensive checking after simple one git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176060 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 57014dd..fb33520 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -5344,7 +5344,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { // Vectors containing all ones can be matched by pcmpeqd on 128-bit width // vectors or broken into v4i32 operations on 256-bit vectors. AVX2 can use // vpcmpeqd on 256-bit vectors. - if (ISD::isBuildVectorAllOnes(Op.getNode()) && Subtarget->hasSSE2()) { + if (Subtarget->hasSSE2() && ISD::isBuildVectorAllOnes(Op.getNode())) { if (VT == MVT::v4i32 || (VT == MVT::v8i32 && Subtarget->hasInt256())) return Op; -- cgit v1.1 From fe88aa0d148510e41bc3080dea4febcb1445855c Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Tue, 26 Feb 2013 01:05:31 +0000 Subject: [fast-isel] Make sure the FastLowerArguments function checks to make sure the arguments type is a simple type. rdar://13290455 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176066 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMFastISel.cpp | 1 + lib/Target/X86/X86FastISel.cpp | 1 + 2 files changed, 2 insertions(+) (limited to 'lib') diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index b545dbc..29fcd40 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -2922,6 +2922,7 @@ bool ARMFastISel::FastLowerArguments() { return false; EVT ArgVT = TLI.getValueType(ArgTy); + if (!ArgVT.isSimple()) return false; switch (ArgVT.getSimpleVT().SimpleTy) { case MVT::i8: case MVT::i16: diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index ed17b11..b5c3270 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -1555,6 +1555,7 @@ bool X86FastISel::FastLowerArguments() { return false; EVT ArgVT = TLI.getValueType(ArgTy); + if (!ArgVT.isSimple()) return false; switch (ArgVT.getSimpleVT().SimpleTy) { case MVT::i32: case MVT::i64: -- cgit v1.1 From 70cdcd5114b30c4983ff158278422ea129bd27bb Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Tue, 26 Feb 2013 01:30:05 +0000 Subject: [mips] Use class RegDefsUses to track register defs and uses. No functionality change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176070 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsDelaySlotFiller.cpp | 171 +++++++++++++++----------------- 1 file changed, 82 insertions(+), 89 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp index 6b25d2d..d62b166 100644 --- a/lib/Target/Mips/MipsDelaySlotFiller.cpp +++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp @@ -44,6 +44,23 @@ static cl::opt SkipDelaySlotFiller( cl::Hidden); namespace { + class RegDefsUses { + public: + RegDefsUses(TargetMachine &TM); + void init(const MachineInstr &MI); + bool update(const MachineInstr &MI, unsigned Begin, unsigned End); + + private: + bool checkRegDefsUses(BitVector &NewDefs, BitVector &NewUses, unsigned Reg, + bool IsDef) const; + + /// Returns true if Reg or its alias is in RegSet. + bool isRegInSet(const BitVector &RegSet, unsigned Reg) const; + + const TargetRegisterInfo &TRI; + BitVector Defs, Uses; + }; + class Filler : public MachineFunctionPass { public: Filler(TargetMachine &tm) @@ -70,26 +87,11 @@ namespace { bool runOnMachineBasicBlock(MachineBasicBlock &MBB); - /// Initialize RegDefs and RegUses. - void initRegDefsUses(const MachineInstr &MI, BitVector &RegDefs, - BitVector &RegUses) const; - - bool isRegInSet(const BitVector &RegSet, unsigned Reg) const; - - bool checkRegDefsUses(const BitVector &RegDefs, const BitVector &RegUses, - BitVector &NewDefs, BitVector &NewUses, - unsigned Reg, bool IsDef) const; - - bool checkRegDefsUses(BitVector &RegDefs, BitVector &RegUses, - const MachineInstr &MI, unsigned Begin, - unsigned End) const; - /// This function checks if it is valid to move Candidate to the delay slot /// and returns true if it isn't. It also updates load and store flags and /// register defs and uses. bool delayHasHazard(const MachineInstr &Candidate, bool &SawLoad, - bool &SawStore, BitVector &RegDefs, - BitVector &RegUses) const; + bool &SawStore, RegDefsUses &RegDU) const; bool findDelayInstr(MachineBasicBlock &MBB, Iter slot, Iter &Filler) const; @@ -103,6 +105,65 @@ namespace { char Filler::ID = 0; } // end of anonymous namespace +RegDefsUses::RegDefsUses(TargetMachine &TM) + : TRI(*TM.getRegisterInfo()), Defs(TRI.getNumRegs(), false), + Uses(TRI.getNumRegs(), false) {} + +void RegDefsUses::init(const MachineInstr &MI) { + // Add all register operands which are explicit and non-variadic. + update(MI, 0, MI.getDesc().getNumOperands()); + + // If MI is a call, add RA to Defs to prevent users of RA from going into + // delay slot. + if (MI.isCall()) + Defs.set(Mips::RA); + + // Add all implicit register operands of branch instructions except + // register AT. + if (MI.isBranch()) { + update(MI, MI.getDesc().getNumOperands(), MI.getNumOperands()); + Defs.reset(Mips::AT); + } +} + +bool RegDefsUses::update(const MachineInstr &MI, unsigned Begin, unsigned End) { + BitVector NewDefs(TRI.getNumRegs()), NewUses(TRI.getNumRegs()); + bool HasHazard = false; + + for (unsigned I = Begin; I != End; ++I) { + const MachineOperand &MO = MI.getOperand(I); + + if (MO.isReg() && MO.getReg()) + HasHazard |= checkRegDefsUses(NewDefs, NewUses, MO.getReg(), MO.isDef()); + } + + Defs |= NewDefs; + Uses |= NewUses; + + return HasHazard; +} + +bool RegDefsUses::checkRegDefsUses(BitVector &NewDefs, BitVector &NewUses, + unsigned Reg, bool IsDef) const { + if (IsDef) { + NewDefs.set(Reg); + // check whether Reg has already been defined or used. + return (isRegInSet(Defs, Reg) || isRegInSet(Uses, Reg)); + } + + NewUses.set(Reg); + // check whether Reg has already been defined. + return isRegInSet(Defs, Reg); +} + +bool RegDefsUses::isRegInSet(const BitVector &RegSet, unsigned Reg) const { + // Check Reg and all aliased Registers. + for (MCRegAliasIterator AI(Reg, &TRI, true); AI.isValid(); ++AI) + if (RegSet.test(*AI)) + return true; + return false; +} + /// runOnMachineBasicBlock - Fill in delay slots for the given basic block. /// We assume there is only one delay slot per delayed instruction. bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) { @@ -139,10 +200,9 @@ FunctionPass *llvm::createMipsDelaySlotFillerPass(MipsTargetMachine &tm) { bool Filler::findDelayInstr(MachineBasicBlock &MBB, Iter Slot, Iter &Filler) const { - unsigned NumRegs = TM.getRegisterInfo()->getNumRegs(); - BitVector RegDefs(NumRegs), RegUses(NumRegs); + RegDefsUses RegDU(TM); - initRegDefsUses(*Slot, RegDefs, RegUses); + RegDU.init(*Slot); bool SawLoad = false; bool SawStore = false; @@ -155,7 +215,7 @@ bool Filler::findDelayInstr(MachineBasicBlock &MBB, Iter Slot, if (terminateSearch(*I)) break; - if (delayHasHazard(*I, SawLoad, SawStore, RegDefs, RegUses)) + if (delayHasHazard(*I, SawLoad, SawStore, RegDU)) continue; Filler = llvm::next(I).base(); @@ -165,45 +225,8 @@ bool Filler::findDelayInstr(MachineBasicBlock &MBB, Iter Slot, return false; } -bool Filler::checkRegDefsUses(const BitVector &RegDefs, - const BitVector &RegUses, - BitVector &NewDefs, BitVector &NewUses, - unsigned Reg, bool IsDef) const { - if (IsDef) { - NewDefs.set(Reg); - // check whether Reg has already been defined or used. - return (isRegInSet(RegDefs, Reg) || isRegInSet(RegUses, Reg)); - } - - NewUses.set(Reg); - // check whether Reg has already been defined. - return isRegInSet(RegDefs, Reg); -} - -bool Filler::checkRegDefsUses(BitVector &RegDefs, BitVector &RegUses, - const MachineInstr &MI, unsigned Begin, - unsigned End) const { - unsigned NumRegs = TM.getRegisterInfo()->getNumRegs(); - BitVector NewDefs(NumRegs), NewUses(NumRegs); - bool HasHazard = false; - - for (unsigned I = Begin; I != End; ++I) { - const MachineOperand &MO = MI.getOperand(I); - - if (MO.isReg() && MO.getReg()) - HasHazard |= checkRegDefsUses(RegDefs, RegUses, NewDefs, NewUses, - MO.getReg(), MO.isDef()); - } - - RegDefs |= NewDefs; - RegUses |= NewUses; - - return HasHazard; -} - bool Filler::delayHasHazard(const MachineInstr &Candidate, bool &SawLoad, - bool &SawStore, BitVector &RegDefs, - BitVector &RegUses) const { + bool &SawStore, RegDefsUses &RegDU) const { bool HasHazard = (Candidate.isImplicitDef() || Candidate.isKill()); // Loads or stores cannot be moved past a store to the delay slot @@ -219,41 +242,11 @@ bool Filler::delayHasHazard(const MachineInstr &Candidate, bool &SawLoad, assert((!Candidate.isCall() && !Candidate.isReturn()) && "Cannot put calls or returns in delay slot."); - HasHazard |= checkRegDefsUses(RegDefs, RegUses, Candidate, 0, - Candidate.getNumOperands()); + HasHazard |= RegDU.update(Candidate, 0, Candidate.getNumOperands()); return HasHazard; } -void Filler::initRegDefsUses(const MachineInstr &MI, BitVector &RegDefs, - BitVector &RegUses) const { - // Add all register operands which are explicit and non-variadic. - checkRegDefsUses(RegDefs, RegUses, MI, 0, MI.getDesc().getNumOperands()); - - // If MI is a call, add RA to RegDefs to prevent users of RA from going into - // delay slot. - if (MI.isCall()) - RegDefs.set(Mips::RA); - - // Add all implicit register operands of branch instructions except - // register AT. - if (MI.isBranch()) { - checkRegDefsUses(RegDefs, RegUses, MI, MI.getDesc().getNumOperands(), - MI.getNumOperands()); - RegDefs.reset(Mips::AT); - } -} - -//returns true if the Reg or its alias is in the RegSet. -bool Filler::isRegInSet(const BitVector &RegSet, unsigned Reg) const { - // Check Reg and all aliased Registers. - for (MCRegAliasIterator AI(Reg, TM.getRegisterInfo(), true); - AI.isValid(); ++AI) - if (RegSet.test(*AI)) - return true; - return false; -} - bool Filler::terminateSearch(const MachineInstr &Candidate) const { return (Candidate.isTerminator() || Candidate.isCall() || Candidate.isLabel() || Candidate.isInlineAsm() || -- cgit v1.1 From ce522ee0a27062390f13e7ccb53fcff4fc36c473 Mon Sep 17 00:00:00 2001 From: Michael Ilseman Date: Tue, 26 Feb 2013 01:31:59 +0000 Subject: Use a DenseMap instead of a std::map for AnalysisID -> Pass* maps. This reduces the pass-manager overhead from FPPassManager::runOnFunction() by about 10%. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176072 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/PassManager.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/IR/PassManager.cpp b/lib/IR/PassManager.cpp index 2bdfd98..5a8df70 100644 --- a/lib/IR/PassManager.cpp +++ b/lib/IR/PassManager.cpp @@ -874,9 +874,9 @@ void PMDataManager::removeNotPreservedAnalysis(Pass *P) { return; const AnalysisUsage::VectorType &PreservedSet = AnUsage->getPreservedSet(); - for (std::map::iterator I = AvailableAnalysis.begin(), + for (DenseMap::iterator I = AvailableAnalysis.begin(), E = AvailableAnalysis.end(); I != E; ) { - std::map::iterator Info = I++; + DenseMap::iterator Info = I++; if (Info->second->getAsImmutablePass() == 0 && std::find(PreservedSet.begin(), PreservedSet.end(), Info->first) == PreservedSet.end()) { @@ -897,10 +897,10 @@ void PMDataManager::removeNotPreservedAnalysis(Pass *P) { if (!InheritedAnalysis[Index]) continue; - for (std::map::iterator + for (DenseMap::iterator I = InheritedAnalysis[Index]->begin(), E = InheritedAnalysis[Index]->end(); I != E; ) { - std::map::iterator Info = I++; + DenseMap::iterator Info = I++; if (Info->second->getAsImmutablePass() == 0 && std::find(PreservedSet.begin(), PreservedSet.end(), Info->first) == PreservedSet.end()) { @@ -960,7 +960,7 @@ void PMDataManager::freePass(Pass *P, StringRef Msg, // listed as the available implementation. const std::vector &II = PInf->getInterfacesImplemented(); for (unsigned i = 0, e = II.size(); i != e; ++i) { - std::map::iterator Pos = + DenseMap::iterator Pos = AvailableAnalysis.find(II[i]->getTypeInfo()); if (Pos != AvailableAnalysis.end() && Pos->second == P) AvailableAnalysis.erase(Pos); @@ -1100,7 +1100,7 @@ void PMDataManager::initializeAnalysisImpl(Pass *P) { Pass *PMDataManager::findAnalysisPass(AnalysisID AID, bool SearchParent) { // Check if AvailableAnalysis map has one entry. - std::map::const_iterator I = AvailableAnalysis.find(AID); + DenseMap::const_iterator I = AvailableAnalysis.find(AID); if (I != AvailableAnalysis.end()) return I->second; -- cgit v1.1 From 8eec41fc778e99d42172a7f6de76faa43a6d8847 Mon Sep 17 00:00:00 2001 From: Kostya Serebryany Date: Tue, 26 Feb 2013 06:58:09 +0000 Subject: Unify clang/llvm attributes for asan/tsan/msan (LLVM part) These are two related changes (one in llvm, one in clang). LLVM: - rename address_safety => sanitize_address (the enum value is the same, so we preserve binary compatibility with old bitcode) - rename thread_safety => sanitize_thread - rename no_uninitialized_checks -> sanitize_memory CLANG: - add __attribute__((no_sanitize_address)) as a synonym for __attribute__((no_address_safety_analysis)) - add __attribute__((no_sanitize_thread)) - add __attribute__((no_sanitize_memory)) for S in address thread memory If -fsanitize=S is present and __attribute__((no_sanitize_S)) is not set llvm attribute sanitize_S git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176075 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/MemoryDependenceAnalysis.cpp | 4 +- lib/AsmParser/LLLexer.cpp | 6 +- lib/AsmParser/LLParser.cpp | 94 +++++++++++----------- lib/AsmParser/LLToken.h | 6 +- lib/IR/Attributes.cpp | 24 +++--- lib/IR/Verifier.cpp | 6 +- .../Instrumentation/AddressSanitizer.cpp | 4 +- 7 files changed, 72 insertions(+), 72 deletions(-) (limited to 'lib') diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index 9a1edc7..38bf5dd 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -287,7 +287,7 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, // Load widening is hostile to ThreadSanitizer: it may cause false positives // or make the reports more cryptic (access sizes are wrong). if (LI->getParent()->getParent()->getAttributes(). - hasAttribute(AttributeSet::FunctionIndex, Attribute::ThreadSafety)) + hasAttribute(AttributeSet::FunctionIndex, Attribute::SanitizeThread)) return 0; // Get the base of this load. @@ -334,7 +334,7 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, if (LIOffs+NewLoadByteSize > MemLocEnd && LI->getParent()->getParent()->getAttributes(). - hasAttribute(AttributeSet::FunctionIndex, Attribute::AddressSafety)) + hasAttribute(AttributeSet::FunctionIndex, Attribute::SanitizeAddress)) // We will be reading past the location accessed by the original program. // While this is safe in a regular build, Address Safety analysis tools // may start reporting false warnings. So, don't do widening. diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp index 2b14559..f46383b 100644 --- a/lib/AsmParser/LLLexer.cpp +++ b/lib/AsmParser/LLLexer.cpp @@ -562,7 +562,6 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(attributes); - KEYWORD(address_safety); KEYWORD(alwaysinline); KEYWORD(byval); KEYWORD(inlinehint); @@ -589,8 +588,9 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(ssp); KEYWORD(sspreq); KEYWORD(sspstrong); - KEYWORD(thread_safety); - KEYWORD(uninitialized_checks); + KEYWORD(sanitize_address); + KEYWORD(sanitize_thread); + KEYWORD(sanitize_memory); KEYWORD(uwtable); KEYWORD(zeroext); diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index bde18cd..c8da1f8 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -907,29 +907,29 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B, B.addStackAlignmentAttr(Alignment); continue; } - case lltok::kw_address_safety: B.addAttribute(Attribute::AddressSafety); break; - case lltok::kw_alwaysinline: B.addAttribute(Attribute::AlwaysInline); break; - case lltok::kw_inlinehint: B.addAttribute(Attribute::InlineHint); break; - case lltok::kw_minsize: B.addAttribute(Attribute::MinSize); break; - case lltok::kw_naked: B.addAttribute(Attribute::Naked); break; - case lltok::kw_nobuiltin: B.addAttribute(Attribute::NoBuiltin); break; - case lltok::kw_noduplicate: B.addAttribute(Attribute::NoDuplicate); break; - case lltok::kw_noimplicitfloat: B.addAttribute(Attribute::NoImplicitFloat); break; - case lltok::kw_noinline: B.addAttribute(Attribute::NoInline); break; - case lltok::kw_nonlazybind: B.addAttribute(Attribute::NonLazyBind); break; - case lltok::kw_noredzone: B.addAttribute(Attribute::NoRedZone); break; - case lltok::kw_noreturn: B.addAttribute(Attribute::NoReturn); break; - case lltok::kw_nounwind: B.addAttribute(Attribute::NoUnwind); break; - case lltok::kw_optsize: B.addAttribute(Attribute::OptimizeForSize); break; - case lltok::kw_readnone: B.addAttribute(Attribute::ReadNone); break; - case lltok::kw_readonly: B.addAttribute(Attribute::ReadOnly); break; - case lltok::kw_returns_twice: B.addAttribute(Attribute::ReturnsTwice); break; - case lltok::kw_ssp: B.addAttribute(Attribute::StackProtect); break; - case lltok::kw_sspreq: B.addAttribute(Attribute::StackProtectReq); break; - case lltok::kw_sspstrong: B.addAttribute(Attribute::StackProtectStrong); break; - case lltok::kw_thread_safety: B.addAttribute(Attribute::ThreadSafety); break; - case lltok::kw_uninitialized_checks: B.addAttribute(Attribute::UninitializedChecks); break; - case lltok::kw_uwtable: B.addAttribute(Attribute::UWTable); break; + case lltok::kw_alwaysinline: B.addAttribute(Attribute::AlwaysInline); break; + case lltok::kw_inlinehint: B.addAttribute(Attribute::InlineHint); break; + case lltok::kw_minsize: B.addAttribute(Attribute::MinSize); break; + case lltok::kw_naked: B.addAttribute(Attribute::Naked); break; + case lltok::kw_nobuiltin: B.addAttribute(Attribute::NoBuiltin); break; + case lltok::kw_noduplicate: B.addAttribute(Attribute::NoDuplicate); break; + case lltok::kw_noimplicitfloat: B.addAttribute(Attribute::NoImplicitFloat); break; + case lltok::kw_noinline: B.addAttribute(Attribute::NoInline); break; + case lltok::kw_nonlazybind: B.addAttribute(Attribute::NonLazyBind); break; + case lltok::kw_noredzone: B.addAttribute(Attribute::NoRedZone); break; + case lltok::kw_noreturn: B.addAttribute(Attribute::NoReturn); break; + case lltok::kw_nounwind: B.addAttribute(Attribute::NoUnwind); break; + case lltok::kw_optsize: B.addAttribute(Attribute::OptimizeForSize); break; + case lltok::kw_readnone: B.addAttribute(Attribute::ReadNone); break; + case lltok::kw_readonly: B.addAttribute(Attribute::ReadOnly); break; + case lltok::kw_returns_twice: B.addAttribute(Attribute::ReturnsTwice); break; + case lltok::kw_ssp: B.addAttribute(Attribute::StackProtect); break; + case lltok::kw_sspreq: B.addAttribute(Attribute::StackProtectReq); break; + case lltok::kw_sspstrong: B.addAttribute(Attribute::StackProtectStrong); break; + case lltok::kw_sanitize_address: B.addAttribute(Attribute::SanitizeAddress); break; + case lltok::kw_sanitize_thread: B.addAttribute(Attribute::SanitizeThread); break; + case lltok::kw_sanitize_memory: B.addAttribute(Attribute::SanitizeMemory); break; + case lltok::kw_uwtable: B.addAttribute(Attribute::UWTable); break; // Error handling. case lltok::kw_inreg: @@ -1159,17 +1159,17 @@ bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) { case lltok::kw_sret: B.addAttribute(Attribute::StructRet); break; case lltok::kw_zeroext: B.addAttribute(Attribute::ZExt); break; - case lltok::kw_noreturn: case lltok::kw_nounwind: - case lltok::kw_uwtable: case lltok::kw_returns_twice: - case lltok::kw_noinline: case lltok::kw_readnone: - case lltok::kw_readonly: case lltok::kw_inlinehint: - case lltok::kw_alwaysinline: case lltok::kw_optsize: - case lltok::kw_ssp: case lltok::kw_sspreq: - case lltok::kw_noredzone: case lltok::kw_noimplicitfloat: - case lltok::kw_naked: case lltok::kw_nonlazybind: - case lltok::kw_address_safety: case lltok::kw_minsize: - case lltok::kw_alignstack: case lltok::kw_thread_safety: - case lltok::kw_nobuiltin: case lltok::kw_uninitialized_checks: + case lltok::kw_alignstack: case lltok::kw_nounwind: + case lltok::kw_alwaysinline: case lltok::kw_optsize: + case lltok::kw_inlinehint: case lltok::kw_readnone: + case lltok::kw_minsize: case lltok::kw_readonly: + case lltok::kw_naked: case lltok::kw_returns_twice: + case lltok::kw_nobuiltin: case lltok::kw_sanitize_address: + case lltok::kw_noimplicitfloat: case lltok::kw_sanitize_memory: + case lltok::kw_noinline: case lltok::kw_sanitize_thread: + case lltok::kw_nonlazybind: case lltok::kw_ssp: + case lltok::kw_noredzone: case lltok::kw_sspreq: + case lltok::kw_noreturn: case lltok::kw_uwtable: HaveError |= Error(Lex.getLoc(), "invalid use of function-only attribute"); break; } @@ -1200,19 +1200,19 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) { HaveError |= Error(Lex.getLoc(), "invalid use of parameter-only attribute"); break; - case lltok::kw_noreturn: case lltok::kw_nounwind: - case lltok::kw_uwtable: case lltok::kw_returns_twice: - case lltok::kw_noinline: case lltok::kw_readnone: - case lltok::kw_readonly: case lltok::kw_inlinehint: - case lltok::kw_alwaysinline: case lltok::kw_optsize: - case lltok::kw_ssp: case lltok::kw_sspreq: - case lltok::kw_sspstrong: case lltok::kw_noimplicitfloat: - case lltok::kw_noredzone: case lltok::kw_naked: - case lltok::kw_nonlazybind: case lltok::kw_address_safety: - case lltok::kw_minsize: case lltok::kw_alignstack: - case lltok::kw_align: case lltok::kw_noduplicate: - case lltok::kw_thread_safety: case lltok::kw_uninitialized_checks: - case lltok::kw_nobuiltin: + case lltok::kw_align: case lltok::kw_noreturn: + case lltok::kw_alignstack: case lltok::kw_nounwind: + case lltok::kw_alwaysinline: case lltok::kw_optsize: + case lltok::kw_inlinehint: case lltok::kw_readnone: + case lltok::kw_minsize: case lltok::kw_readonly: + case lltok::kw_naked: case lltok::kw_returns_twice: + case lltok::kw_nobuiltin: case lltok::kw_sanitize_address: + case lltok::kw_noduplicate: case lltok::kw_sanitize_memory: + case lltok::kw_noimplicitfloat: case lltok::kw_sanitize_thread: + case lltok::kw_noinline: case lltok::kw_ssp: + case lltok::kw_nonlazybind: case lltok::kw_sspreq: + case lltok::kw_noredzone: case lltok::kw_sspstrong: + case lltok::kw_uwtable: HaveError |= Error(Lex.getLoc(), "invalid use of function-only attribute"); break; } diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h index a51dada..cd25ba3 100644 --- a/lib/AsmParser/LLToken.h +++ b/lib/AsmParser/LLToken.h @@ -94,7 +94,7 @@ namespace lltok { // Attributes: kw_attributes, kw_alwaysinline, - kw_address_safety, + kw_sanitize_address, kw_byval, kw_inlinehint, kw_inreg, @@ -120,8 +120,8 @@ namespace lltok { kw_sspreq, kw_sspstrong, kw_sret, - kw_thread_safety, - kw_uninitialized_checks, + kw_sanitize_thread, + kw_sanitize_memory, kw_uwtable, kw_zeroext, diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 11ed82d..6eb51f0 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -153,8 +153,8 @@ unsigned Attribute::getStackAlignment() const { std::string Attribute::getAsString(bool InAttrGrp) const { if (!pImpl) return ""; - if (hasAttribute(Attribute::AddressSafety)) - return "address_safety"; + if (hasAttribute(Attribute::SanitizeAddress)) + return "sanitize_address"; if (hasAttribute(Attribute::AlwaysInline)) return "alwaysinline"; if (hasAttribute(Attribute::ByVal)) @@ -207,10 +207,10 @@ std::string Attribute::getAsString(bool InAttrGrp) const { return "sspstrong"; if (hasAttribute(Attribute::StructRet)) return "sret"; - if (hasAttribute(Attribute::ThreadSafety)) - return "thread_safety"; - if (hasAttribute(Attribute::UninitializedChecks)) - return "uninitialized_checks"; + if (hasAttribute(Attribute::SanitizeThread)) + return "sanitize_thread"; + if (hasAttribute(Attribute::SanitizeMemory)) + return "sanitize_memory"; if (hasAttribute(Attribute::UWTable)) return "uwtable"; if (hasAttribute(Attribute::ZExt)) @@ -386,12 +386,12 @@ uint64_t AttributeImpl::getAttrMask(Attribute::AttrKind Val) { case Attribute::ReturnsTwice: return 1 << 29; case Attribute::UWTable: return 1 << 30; case Attribute::NonLazyBind: return 1U << 31; - case Attribute::AddressSafety: return 1ULL << 32; + case Attribute::SanitizeAddress: return 1ULL << 32; case Attribute::MinSize: return 1ULL << 33; case Attribute::NoDuplicate: return 1ULL << 34; case Attribute::StackProtectStrong: return 1ULL << 35; - case Attribute::ThreadSafety: return 1ULL << 36; - case Attribute::UninitializedChecks: return 1ULL << 37; + case Attribute::SanitizeThread: return 1ULL << 36; + case Attribute::SanitizeMemory: return 1ULL << 37; case Attribute::NoBuiltin: return 1ULL << 38; } llvm_unreachable("Unsupported attribute type"); @@ -1119,9 +1119,9 @@ void AttrBuilder::removeFunctionOnlyAttrs() { .removeAttribute(Attribute::UWTable) .removeAttribute(Attribute::NonLazyBind) .removeAttribute(Attribute::ReturnsTwice) - .removeAttribute(Attribute::AddressSafety) - .removeAttribute(Attribute::ThreadSafety) - .removeAttribute(Attribute::UninitializedChecks) + .removeAttribute(Attribute::SanitizeAddress) + .removeAttribute(Attribute::SanitizeThread) + .removeAttribute(Attribute::SanitizeMemory) .removeAttribute(Attribute::MinSize) .removeAttribute(Attribute::NoDuplicate) .removeAttribute(Attribute::NoBuiltin); diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp index 33e8ec6..8bfbb32 100644 --- a/lib/IR/Verifier.cpp +++ b/lib/IR/Verifier.cpp @@ -650,9 +650,9 @@ void Verifier::VerifyParameterAttrs(AttributeSet Attrs, uint64_t Idx, Type *Ty, !Attrs.hasAttribute(Idx, Attribute::UWTable) && !Attrs.hasAttribute(Idx, Attribute::NonLazyBind) && !Attrs.hasAttribute(Idx, Attribute::ReturnsTwice) && - !Attrs.hasAttribute(Idx, Attribute::AddressSafety) && - !Attrs.hasAttribute(Idx, Attribute::ThreadSafety) && - !Attrs.hasAttribute(Idx, Attribute::UninitializedChecks) && + !Attrs.hasAttribute(Idx, Attribute::SanitizeAddress) && + !Attrs.hasAttribute(Idx, Attribute::SanitizeThread) && + !Attrs.hasAttribute(Idx, Attribute::SanitizeMemory) && !Attrs.hasAttribute(Idx, Attribute::MinSize) && !Attrs.hasAttribute(Idx, Attribute::NoBuiltin), "Some attributes in '" + Attrs.getAsString(Idx) + diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index f116657..6877475 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -1098,11 +1098,11 @@ bool AddressSanitizer::runOnFunction(Function &F) { DEBUG(dbgs() << "ASAN instrumenting:\n" << F << "\n"); initializeCallbacks(*F.getParent()); - // If needed, insert __asan_init before checking for AddressSafety attr. + // If needed, insert __asan_init before checking for SanitizeAddress attr. maybeInsertAsanInitAtFunctionEntry(F); if (!F.getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::AddressSafety)) + Attribute::SanitizeAddress)) return false; if (!ClDebugFunc.empty() && ClDebugFunc != F.getName()) -- cgit v1.1 From 3a42989d3ddfe30c6b87fe1f3d7f1164e0868a40 Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Tue, 26 Feb 2013 16:41:03 +0000 Subject: Fix PR15359. The PowerPC TLS relocation types were not previously added to the necessary list in MCELFStreamer::fixSymbolsInTLSFixups(). Now they are! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176094 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCELFStreamer.cpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'lib') diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp index a9c35cc..c1428d8 100644 --- a/lib/MC/MCELFStreamer.cpp +++ b/lib/MC/MCELFStreamer.cpp @@ -334,6 +334,19 @@ void MCELFStreamer::fixSymbolsInTLSFixups(const MCExpr *expr) { case MCSymbolRefExpr::VK_Mips_GOTTPREL: case MCSymbolRefExpr::VK_Mips_TPREL_HI: case MCSymbolRefExpr::VK_Mips_TPREL_LO: + case MCSymbolRefExpr::VK_PPC_TPREL16_HA: + case MCSymbolRefExpr::VK_PPC_TPREL16_LO: + case MCSymbolRefExpr::VK_PPC_DTPREL16_HA: + case MCSymbolRefExpr::VK_PPC_DTPREL16_LO: + case MCSymbolRefExpr::VK_PPC_GOT_TPREL16_HA: + case MCSymbolRefExpr::VK_PPC_GOT_TPREL16_LO: + case MCSymbolRefExpr::VK_PPC_TLS: + case MCSymbolRefExpr::VK_PPC_GOT_TLSGD16_HA: + case MCSymbolRefExpr::VK_PPC_GOT_TLSGD16_LO: + case MCSymbolRefExpr::VK_PPC_TLSGD: + case MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_HA: + case MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_LO: + case MCSymbolRefExpr::VK_PPC_TLSLD: break; } MCSymbolData &SD = getAssembler().getOrCreateSymbolData(symRef.getSymbol()); -- cgit v1.1 From a25b8d4c0b4f6422c58c0ca5c003757bfbf636ac Mon Sep 17 00:00:00 2001 From: Christian Konig Date: Tue, 26 Feb 2013 17:51:57 +0000 Subject: R600/SI: fix stupid typo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a candidate for the mesa-stable branch. Signed-off-by: Christian König Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176097 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp index 2bf8fb8..6cc0077 100644 --- a/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp +++ b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp @@ -131,7 +131,7 @@ uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO) const { if (Imm.F == 4.0f) return 246; - if (Imm.F == 4.0f) + if (Imm.F == -4.0f) return 247; return 255; -- cgit v1.1 From 6fd49bc89adb515ae389f34f18b3c52c13b579fa Mon Sep 17 00:00:00 2001 From: Christian Konig Date: Tue, 26 Feb 2013 17:52:03 +0000 Subject: R600/SI: fix and cleanup SI register definition v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prevent producing real strange tablegen code by using proper register sizes, alignments and hierarchy. Also cleanup the unused definitions and add some comments. v2: add SGPR 512 bit registers, stop registers from wrapping around, fix SGPR alignment This is a candidate for the mesa-stable branch. Signed-off-by: Christian König Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176098 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIInstructions.td | 5 +- lib/Target/R600/SIRegisterInfo.td | 227 ++++++++++++++++++++++---------------- 2 files changed, 135 insertions(+), 97 deletions(-) (limited to 'lib') diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 907cf49..9701d19 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -918,14 +918,15 @@ def S_MAX_U32 : SOP2_32 <0x00000009, "S_MAX_U32", []>; def S_CSELECT_B32 : SOP2 < 0x0000000a, (outs SReg_32:$dst), (ins SReg_32:$src0, SReg_32:$src1, SCCReg:$scc), "S_CSELECT_B32", - [(set (i32 SReg_32:$dst), (select SCCReg:$scc, SReg_32:$src0, SReg_32:$src1))] + [(set (i32 SReg_32:$dst), (select (i1 SCCReg:$scc), + SReg_32:$src0, SReg_32:$src1))] >; def S_CSELECT_B64 : SOP2_64 <0x0000000b, "S_CSELECT_B64", []>; // f32 pattern for S_CSELECT_B32 def : Pat < - (f32 (select SCCReg:$scc, SReg_32:$src0, SReg_32:$src1)), + (f32 (select (i1 SCCReg:$scc), SReg_32:$src0, SReg_32:$src1)), (S_CSELECT_B32 SReg_32:$src0, SReg_32:$src1, SCCReg:$scc) >; diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td index ab36b87..9e04e24 100644 --- a/lib/Target/R600/SIRegisterInfo.td +++ b/lib/Target/R600/SIRegisterInfo.td @@ -1,30 +1,40 @@ +//===-- SIRegisterInfo.td - SI Register defs ---------------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Declarations that describe the SI registers +//===----------------------------------------------------------------------===// class SIReg encoding = 0> : Register { let Namespace = "AMDGPU"; let HWEncoding = encoding; } -class SI_64 subregs, bits<16> encoding> : RegisterWithSubRegs { - let Namespace = "AMDGPU"; - let SubRegIndices = [sub0, sub1]; - let HWEncoding = encoding; -} - -class SGPR_32 num, string name> : SIReg; - -class VGPR_32 num, string name> : SIReg { - let HWEncoding{8} = 1; -} - // Special Registers def VCC : SIReg<"VCC", 106>; -def EXEC_LO : SIReg <"EXEC LO", 126>; -def EXEC_HI : SIReg <"EXEC HI", 127>; -def EXEC : SI_64<"EXEC", [EXEC_LO, EXEC_HI], 126>; +def EXEC : SIReg<"EXEC", 126>; def SCC : SIReg<"SCC", 253>; def M0 : SIReg <"M0", 124>; -//Interpolation registers +// SGPR registers +foreach Index = 0-101 in { + def SGPR#Index : SIReg <"SGPR"#Index, Index>; +} + +// VGPR registers +foreach Index = 0-255 in { + def VGPR#Index : SIReg <"VGPR"#Index, Index> { + let HWEncoding{8} = 1; + } +} + +// virtual Interpolation registers def PERSP_SAMPLE_I : SIReg <"PERSP_SAMPLE_I">; def PERSP_SAMPLE_J : SIReg <"PERSP_SAMPLE_J">; def PERSP_CENTER_I : SIReg <"PERSP_CENTER_I">; @@ -50,102 +60,150 @@ def ANCILLARY : SIReg <"ANCILLARY">; def SAMPLE_COVERAGE : SIReg <"SAMPLE_COVERAGE">; def POS_FIXED_PT : SIReg <"POS_FIXED_PT">; -// SGPR 32-bit registers -foreach Index = 0-101 in { - def SGPR#Index : SGPR_32 ; -} +//===----------------------------------------------------------------------===// +// Groupings using register classes and tuples +//===----------------------------------------------------------------------===// +// SGPR 32-bit registers def SGPR_32 : RegisterClass<"AMDGPU", [f32, i32], 32, (add (sequence "SGPR%u", 0, 101))>; // SGPR 64-bit registers def SGPR_64 : RegisterTuples<[sub0, sub1], - [(add (decimate SGPR_32, 2)), - (add(decimate (rotl SGPR_32, 1), 2))]>; + [(add (decimate (trunc SGPR_32, 101), 2)), + (add (decimate (shl SGPR_32, 1), 2))]>; // SGPR 128-bit registers def SGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3], - [(add (decimate SGPR_32, 4)), - (add (decimate (rotl SGPR_32, 1), 4)), - (add (decimate (rotl SGPR_32, 2), 4)), - (add (decimate (rotl SGPR_32, 3), 4))]>; + [(add (decimate (trunc SGPR_32, 99), 4)), + (add (decimate (shl SGPR_32, 1), 4)), + (add (decimate (shl SGPR_32, 2), 4)), + (add (decimate (shl SGPR_32, 3), 4))]>; // SGPR 256-bit registers def SGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7], - [(add (decimate SGPR_32, 8)), - (add (decimate (rotl SGPR_32, 1), 8)), - (add (decimate (rotl SGPR_32, 2), 8)), - (add (decimate (rotl SGPR_32, 3), 8)), - (add (decimate (rotl SGPR_32, 4), 8)), - (add (decimate (rotl SGPR_32, 5), 8)), - (add (decimate (rotl SGPR_32, 6), 8)), - (add (decimate (rotl SGPR_32, 7), 8))]>; + [(add (decimate (trunc SGPR_32, 95), 4)), + (add (decimate (shl SGPR_32, 1), 4)), + (add (decimate (shl SGPR_32, 2), 4)), + (add (decimate (shl SGPR_32, 3), 4)), + (add (decimate (shl SGPR_32, 4), 4)), + (add (decimate (shl SGPR_32, 5), 4)), + (add (decimate (shl SGPR_32, 6), 4)), + (add (decimate (shl SGPR_32, 7), 4))]>; + +// SGPR 512-bit registers +def SGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7, + sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15], + [(add (decimate (trunc SGPR_32, 87), 4)), + (add (decimate (shl SGPR_32, 1), 4)), + (add (decimate (shl SGPR_32, 2), 4)), + (add (decimate (shl SGPR_32, 3), 4)), + (add (decimate (shl SGPR_32, 4), 4)), + (add (decimate (shl SGPR_32, 5), 4)), + (add (decimate (shl SGPR_32, 6), 4)), + (add (decimate (shl SGPR_32, 7), 4)), + (add (decimate (shl SGPR_32, 8), 4)), + (add (decimate (shl SGPR_32, 9), 4)), + (add (decimate (shl SGPR_32, 10), 4)), + (add (decimate (shl SGPR_32, 11), 4)), + (add (decimate (shl SGPR_32, 12), 4)), + (add (decimate (shl SGPR_32, 13), 4)), + (add (decimate (shl SGPR_32, 14), 4)), + (add (decimate (shl SGPR_32, 15), 4))]>; // VGPR 32-bit registers -foreach Index = 0-255 in { - def VGPR#Index : VGPR_32 ; -} - def VGPR_32 : RegisterClass<"AMDGPU", [f32, i32], 32, (add (sequence "VGPR%u", 0, 255))>; // VGPR 64-bit registers def VGPR_64 : RegisterTuples<[sub0, sub1], - [(add VGPR_32), - (add (rotl VGPR_32, 1))]>; + [(add (trunc VGPR_32, 255)), + (add (shl VGPR_32, 1))]>; // VGPR 128-bit registers def VGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3], - [(add VGPR_32), - (add (rotl VGPR_32, 1)), - (add (rotl VGPR_32, 2)), - (add (rotl VGPR_32, 3))]>; + [(add (trunc VGPR_32, 253)), + (add (shl VGPR_32, 1)), + (add (shl VGPR_32, 2)), + (add (shl VGPR_32, 3))]>; // VGPR 256-bit registers def VGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7], - [(add VGPR_32), - (add (rotl VGPR_32, 1)), - (add (rotl VGPR_32, 2)), - (add (rotl VGPR_32, 3)), - (add (rotl VGPR_32, 4)), - (add (rotl VGPR_32, 5)), - (add (rotl VGPR_32, 6)), - (add (rotl VGPR_32, 7))]>; + [(add (trunc VGPR_32, 249)), + (add (shl VGPR_32, 1)), + (add (shl VGPR_32, 2)), + (add (shl VGPR_32, 3)), + (add (shl VGPR_32, 4)), + (add (shl VGPR_32, 5)), + (add (shl VGPR_32, 6)), + (add (shl VGPR_32, 7))]>; // VGPR 512-bit registers def VGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7, sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15], - [(add VGPR_32), - (add (rotl VGPR_32, 1)), - (add (rotl VGPR_32, 2)), - (add (rotl VGPR_32, 3)), - (add (rotl VGPR_32, 4)), - (add (rotl VGPR_32, 5)), - (add (rotl VGPR_32, 6)), - (add (rotl VGPR_32, 7)), - (add (rotl VGPR_32, 8)), - (add (rotl VGPR_32, 9)), - (add (rotl VGPR_32, 10)), - (add (rotl VGPR_32, 11)), - (add (rotl VGPR_32, 12)), - (add (rotl VGPR_32, 13)), - (add (rotl VGPR_32, 14)), - (add (rotl VGPR_32, 15))]>; + [(add (trunc VGPR_32, 241)), + (add (shl VGPR_32, 1)), + (add (shl VGPR_32, 2)), + (add (shl VGPR_32, 3)), + (add (shl VGPR_32, 4)), + (add (shl VGPR_32, 5)), + (add (shl VGPR_32, 6)), + (add (shl VGPR_32, 7)), + (add (shl VGPR_32, 8)), + (add (shl VGPR_32, 9)), + (add (shl VGPR_32, 10)), + (add (shl VGPR_32, 11)), + (add (shl VGPR_32, 12)), + (add (shl VGPR_32, 13)), + (add (shl VGPR_32, 14)), + (add (shl VGPR_32, 15))]>; + +//===----------------------------------------------------------------------===// +// Register classes used as source and destination +//===----------------------------------------------------------------------===// + +// Special register classes for predicates and the M0 register +def SCCReg : RegisterClass<"AMDGPU", [i32, i1], 32, (add SCC)>; +def VCCReg : RegisterClass<"AMDGPU", [i64, i1], 64, (add VCC)>; +def EXECReg : RegisterClass<"AMDGPU", [i64, i1], 64, (add EXEC)>; +def M0Reg : RegisterClass<"AMDGPU", [i32], 32, (add M0)>; // Register class for all scalar registers (SGPRs + Special Registers) def SReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32, - (add SGPR_32, M0, EXEC_LO, EXEC_HI) + (add SGPR_32, M0Reg) >; -def SReg_64 : RegisterClass<"AMDGPU", [i1, i64], 64, (add SGPR_64, VCC, EXEC)>; +def SReg_64 : RegisterClass<"AMDGPU", [i64, i1], 64, + (add SGPR_64, VCCReg, EXECReg) +>; def SReg_128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, (add SGPR_128)>; def SReg_256 : RegisterClass<"AMDGPU", [v8i32], 256, (add SGPR_256)>; +def SReg_512 : RegisterClass<"AMDGPU", [v16i32], 512, (add SGPR_512)>; + // Register class for all vector registers (VGPRs + Interploation Registers) -def VReg_32 : RegisterClass<"AMDGPU", [f32, i32, v1i32], 32, - (add VGPR_32, +def VReg_32 : RegisterClass<"AMDGPU", [f32, i32, v1i32], 32, (add VGPR_32)>; + +def VReg_64 : RegisterClass<"AMDGPU", [i64, v2i32], 64, (add VGPR_64)>; + +def VReg_128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, (add VGPR_128)>; + +def VReg_256 : RegisterClass<"AMDGPU", [v8i32], 256, (add VGPR_256)>; + +def VReg_512 : RegisterClass<"AMDGPU", [v16i32], 512, (add VGPR_512)>; + +//===----------------------------------------------------------------------===// +// [SV]Src_* register classes, can have either an immediate or an register +//===----------------------------------------------------------------------===// + +def SSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add SReg_32)>; + +def SSrc_64 : RegisterClass<"AMDGPU", [i64, i1], 64, (add SReg_64)>; + +def VSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, + (add VReg_32, SReg_32, PERSP_SAMPLE_I, PERSP_SAMPLE_J, PERSP_CENTER_I, PERSP_CENTER_J, PERSP_CENTROID_I, PERSP_CENTROID_J, @@ -162,29 +220,8 @@ def VReg_32 : RegisterClass<"AMDGPU", [f32, i32, v1i32], 32, ANCILLARY, SAMPLE_COVERAGE, POS_FIXED_PT - ) + ) >; -def VReg_64 : RegisterClass<"AMDGPU", [i64, v2i32], 64, (add VGPR_64)>; - -def VReg_128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, (add VGPR_128)>; - -def VReg_256 : RegisterClass<"AMDGPU", [v8i32], 256, (add VGPR_256)>; - -def VReg_512 : RegisterClass<"AMDGPU", [v16i32], 512, (add VGPR_512)>; - -// [SV]Src_* operands can have either an immediate or an register -def SSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add SReg_32)>; - -def SSrc_64 : RegisterClass<"AMDGPU", [i1, i64], 64, (add SReg_64)>; - -def VSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add VReg_32, SReg_32)>; - -def VSrc_64 : RegisterClass<"AMDGPU", [i64], 64, (add SReg_64, VReg_64)>; - -// Special register classes for predicates and the M0 register -def SCCReg : RegisterClass<"AMDGPU", [i1], 1, (add SCC)>; -def VCCReg : RegisterClass<"AMDGPU", [i1], 1, (add VCC)>; -def EXECReg : RegisterClass<"AMDGPU", [i1], 1, (add EXEC)>; -def M0Reg : RegisterClass<"AMDGPU", [i32], 32, (add M0)>; +def VSrc_64 : RegisterClass<"AMDGPU", [i64], 64, (add VReg_64, SReg_64)>; -- cgit v1.1 From 749428f852b63a98e872ba69b0c576b26b7b7518 Mon Sep 17 00:00:00 2001 From: Christian Konig Date: Tue, 26 Feb 2013 17:52:09 +0000 Subject: R600/SI: fix VOP3b encoding v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v2: document why we hardcode VCC for now. This is a candidate for the mesa-stable branch. Signed-off-by: Christian König Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176099 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIInstrInfo.td | 24 ++++++++++++++++++++++++ lib/Target/R600/SIInstructions.td | 14 ++++++++------ 2 files changed, 32 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index 99168ce..d68fbff 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -51,6 +51,7 @@ class InlineImm : ImmLeaf : Operand { @@ -195,6 +196,29 @@ multiclass VOP2_32 op, string opName, list pattern> multiclass VOP2_64 op, string opName, list pattern> : VOP2_Helper ; +multiclass VOP2b_32 op, string opName, list pattern> { + + def _e32 : VOP2 < + op, (outs VReg_32:$dst), (ins VSrc_32:$src0, VReg_32:$src1), + opName#"_e32 $dst, $src0, $src1", pattern + >; + + def _e64 : VOP3b < + {1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, + (outs VReg_32:$dst), + (ins VSrc_32:$src0, VReg_32:$src1, + i32imm:$abs, i32imm:$clamp, + i32imm:$omod, i32imm:$neg), + opName#"_e64 $dst, $src0, $src1, $abs, $clamp, $omod, $neg", [] + > { + let SRC2 = SIOperand.ZERO; + /* the VOP2 variant puts the carry out into VCC, the VOP3 variant + can write it into any SGPR. We currently don't use the carry out, + so for now hardcode it to VCC as well */ + let SDST = SIOperand.VCC; + } +} + multiclass VOPC_Helper op, RegisterClass vrc, RegisterClass arc, string opName, ValueType vt, PatLeaf cond> { diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 9701d19..f999025 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -805,17 +805,19 @@ defm V_MADAK_F32 : VOP2_32 <0x00000021, "V_MADAK_F32", []>; //defm V_MBCNT_LO_U32_B32 : VOP2_32 <0x00000023, "V_MBCNT_LO_U32_B32", []>; //defm V_MBCNT_HI_U32_B32 : VOP2_32 <0x00000024, "V_MBCNT_HI_U32_B32", []>; let Defs = [VCC] in { // Carry-out goes to VCC -defm V_ADD_I32 : VOP2_32 <0x00000025, "V_ADD_I32", +defm V_ADD_I32 : VOP2b_32 <0x00000025, "V_ADD_I32", [(set VReg_32:$dst, (add (i32 VSrc_32:$src0), (i32 VReg_32:$src1)))] >; -defm V_SUB_I32 : VOP2_32 <0x00000026, "V_SUB_I32", +defm V_SUB_I32 : VOP2b_32 <0x00000026, "V_SUB_I32", [(set VReg_32:$dst, (sub (i32 VSrc_32:$src0), (i32 VReg_32:$src1)))] >; +defm V_SUBREV_I32 : VOP2b_32 <0x00000027, "V_SUBREV_I32", []>; +let Uses = [VCC] in { // Carry-out comes from VCC +defm V_ADDC_U32 : VOP2b_32 <0x00000028, "V_ADDC_U32", []>; +defm V_SUBB_U32 : VOP2b_32 <0x00000029, "V_SUBB_U32", []>; +defm V_SUBBREV_U32 : VOP2b_32 <0x0000002a, "V_SUBBREV_U32", []>; +} // End Uses = [VCC] } // End Defs = [VCC] -defm V_SUBREV_I32 : VOP2_32 <0x00000027, "V_SUBREV_I32", []>; -defm V_ADDC_U32 : VOP2_32 <0x00000028, "V_ADDC_U32", []>; -defm V_SUBB_U32 : VOP2_32 <0x00000029, "V_SUBB_U32", []>; -defm V_SUBBREV_U32 : VOP2_32 <0x0000002a, "V_SUBBREV_U32", []>; defm V_LDEXP_F32 : VOP2_32 <0x0000002b, "V_LDEXP_F32", []>; ////def V_CVT_PKACCUM_U8_F32 : VOP2_U8 <0x0000002c, "V_CVT_PKACCUM_U8_F32", []>; ////def V_CVT_PKNORM_I16_F32 : VOP2_I16 <0x0000002d, "V_CVT_PKNORM_I16_F32", []>; -- cgit v1.1 From c018ecac2f2f475b6e1023e90d0e48fcf9bd6e1d Mon Sep 17 00:00:00 2001 From: Christian Konig Date: Tue, 26 Feb 2013 17:52:16 +0000 Subject: R600/SI: add folding helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Christian König Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176100 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelLowering.h | 4 ++++ lib/Target/R600/AMDILISelDAGToDAG.cpp | 19 +++++++++++++++++++ lib/Target/R600/SIISelLowering.cpp | 6 ++++++ lib/Target/R600/SIISelLowering.h | 1 + 4 files changed, 30 insertions(+) (limited to 'lib') diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h index 404f620..9e7d997 100644 --- a/lib/Target/R600/AMDGPUISelLowering.h +++ b/lib/Target/R600/AMDGPUISelLowering.h @@ -65,6 +65,10 @@ public: SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const; virtual const char* getTargetNodeName(unsigned Opcode) const; + virtual SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const { + return N; + } + // Functions defined in AMDILISelLowering.cpp public: diff --git a/lib/Target/R600/AMDILISelDAGToDAG.cpp b/lib/Target/R600/AMDILISelDAGToDAG.cpp index 2e726e9..2f70fa9 100644 --- a/lib/Target/R600/AMDILISelDAGToDAG.cpp +++ b/lib/Target/R600/AMDILISelDAGToDAG.cpp @@ -43,6 +43,7 @@ public: SDNode *Select(SDNode *N); virtual const char *getPassName() const; + virtual void PostprocessISelDAG(); private: inline SDValue getSmallIPtrImm(unsigned Imm); @@ -575,3 +576,21 @@ bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, return true; } + +void AMDGPUDAGToDAGISel::PostprocessISelDAG() { + + // Go over all selected nodes and try to fold them a bit more + const AMDGPUTargetLowering& Lowering = ((const AMDGPUTargetLowering&)TLI); + for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), + E = CurDAG->allnodes_end(); I != E; ++I) { + + MachineSDNode *Node = dyn_cast(I); + if (!Node) + continue; + + SDNode *ResNode = Lowering.PostISelFolding(Node, *CurDAG); + if (ResNode != Node) + ReplaceUses(Node, ResNode); + } +} + diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 212e3f2..13173e8 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -357,3 +357,9 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N, } return SDValue(); } + +SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node, + SelectionDAG &DAG) const { + // TODO: Implement immediate folding + return Node; +} diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h index 5d048f8..71f99ac 100644 --- a/lib/Target/R600/SIISelLowering.h +++ b/lib/Target/R600/SIISelLowering.h @@ -41,6 +41,7 @@ public: virtual EVT getSetCCResultType(EVT VT) const; virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; + virtual SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const; }; } // End namespace llvm -- cgit v1.1 From d3b5509b8099b72104bd8a0d9a998a69eb56ab2a Mon Sep 17 00:00:00 2001 From: Christian Konig Date: Tue, 26 Feb 2013 17:52:23 +0000 Subject: R600/SI: add post ISel folding for SI v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Include immediate folding and SGPR limit handling for VOP3 instructions. v2: remove leftover hasExtraSrcRegAllocReq Signed-off-by: Christian König Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176101 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDILISelDAGToDAG.cpp | 1 + lib/Target/R600/SIISelLowering.cpp | 205 +++++++++++++++++++++++++++++++++- lib/Target/R600/SIISelLowering.h | 9 ++ lib/Target/R600/SIInstrInfo.td | 17 ++- lib/Target/R600/SIInstructions.td | 22 ++-- 5 files changed, 231 insertions(+), 23 deletions(-) (limited to 'lib') diff --git a/lib/Target/R600/AMDILISelDAGToDAG.cpp b/lib/Target/R600/AMDILISelDAGToDAG.cpp index 2f70fa9..e77b9dc 100644 --- a/lib/Target/R600/AMDILISelDAGToDAG.cpp +++ b/lib/Target/R600/AMDILISelDAGToDAG.cpp @@ -16,6 +16,7 @@ #include "AMDGPURegisterInfo.h" #include "AMDILDevices.h" #include "R600InstrInfo.h" +#include "SIISelLowering.h" #include "llvm/ADT/ValueMap.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SelectionDAGISel.h" diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 13173e8..bf1f3bf 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -26,7 +26,8 @@ using namespace llvm; SITargetLowering::SITargetLowering(TargetMachine &TM) : AMDGPUTargetLowering(TM), - TII(static_cast(TM.getInstrInfo())) { + TII(static_cast(TM.getInstrInfo())), + TRI(TM.getRegisterInfo()) { addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass); addRegisterClass(MVT::f32, &AMDGPU::VReg_32RegClass); addRegisterClass(MVT::i32, &AMDGPU::VReg_32RegClass); @@ -358,8 +359,206 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N, return SDValue(); } +/// \brief Test if RegClass is one of the VSrc classes +static bool isVSrc(unsigned RegClass) { + return AMDGPU::VSrc_32RegClassID == RegClass || + AMDGPU::VSrc_64RegClassID == RegClass; +} + +/// \brief Test if RegClass is one of the SSrc classes +static bool isSSrc(unsigned RegClass) { + return AMDGPU::SSrc_32RegClassID == RegClass || + AMDGPU::SSrc_64RegClassID == RegClass; +} + +/// \brief Analyze the possible immediate value Op +/// +/// Returns -1 if it isn't an immediate, 0 if it's and inline immediate +/// and the immediate value if it's a literal immediate +int32_t SITargetLowering::analyzeImmediate(const SDNode *N) const { + + union { + int32_t I; + float F; + } Imm; + + if (const ConstantSDNode *Node = dyn_cast(N)) + Imm.I = Node->getSExtValue(); + else if (const ConstantFPSDNode *Node = dyn_cast(N)) + Imm.F = Node->getValueAPF().convertToFloat(); + else + return -1; // It isn't an immediate + + if ((Imm.I >= -16 && Imm.I <= 64) || + Imm.F == 0.5f || Imm.F == -0.5f || + Imm.F == 1.0f || Imm.F == -1.0f || + Imm.F == 2.0f || Imm.F == -2.0f || + Imm.F == 4.0f || Imm.F == -4.0f) + return 0; // It's an inline immediate + + return Imm.I; // It's a literal immediate +} + +/// \brief Try to fold an immediate directly into an instruction +bool SITargetLowering::foldImm(SDValue &Operand, int32_t &Immediate, + bool &ScalarSlotUsed) const { + + MachineSDNode *Mov = dyn_cast(Operand); + if (Mov == 0 || !TII->isMov(Mov->getMachineOpcode())) + return false; + + const SDValue &Op = Mov->getOperand(0); + int32_t Value = analyzeImmediate(Op.getNode()); + if (Value == -1) { + // Not an immediate at all + return false; + + } else if (Value == 0) { + // Inline immediates can always be fold + Operand = Op; + return true; + + } else if (Value == Immediate) { + // Already fold literal immediate + Operand = Op; + return true; + + } else if (!ScalarSlotUsed && !Immediate) { + // Fold this literal immediate + ScalarSlotUsed = true; + Immediate = Value; + Operand = Op; + return true; + + } + + return false; +} + +/// \brief Does "Op" fit into register class "RegClass" ? +bool SITargetLowering::fitsRegClass(SelectionDAG &DAG, SDValue &Op, + unsigned RegClass) const { + + MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); + SDNode *Node = Op.getNode(); + + int OpClass; + if (MachineSDNode *MN = dyn_cast(Node)) { + const MCInstrDesc &Desc = TII->get(MN->getMachineOpcode()); + OpClass = Desc.OpInfo[Op.getResNo()].RegClass; + + } else if (Node->getOpcode() == ISD::CopyFromReg) { + RegisterSDNode *Reg = cast(Node->getOperand(1).getNode()); + OpClass = MRI.getRegClass(Reg->getReg())->getID(); + + } else + return false; + + if (OpClass == -1) + return false; + + return TRI->getRegClass(RegClass)->hasSubClassEq(TRI->getRegClass(OpClass)); +} + +/// \brief Make sure that we don't exeed the number of allowed scalars +void SITargetLowering::ensureSRegLimit(SelectionDAG &DAG, SDValue &Operand, + unsigned RegClass, + bool &ScalarSlotUsed) const { + + // First map the operands register class to a destination class + if (RegClass == AMDGPU::VSrc_32RegClassID) + RegClass = AMDGPU::VReg_32RegClassID; + else if (RegClass == AMDGPU::VSrc_64RegClassID) + RegClass = AMDGPU::VReg_64RegClassID; + else + return; + + // Nothing todo if they fit naturaly + if (fitsRegClass(DAG, Operand, RegClass)) + return; + + // If the scalar slot isn't used yet use it now + if (!ScalarSlotUsed) { + ScalarSlotUsed = true; + return; + } + + // This is a conservative aproach, it is possible that we can't determine + // the correct register class and copy too often, but better save than sorry. + SDValue RC = DAG.getTargetConstant(RegClass, MVT::i32); + SDNode *Node = DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DebugLoc(), + Operand.getValueType(), Operand, RC); + Operand = SDValue(Node, 0); +} + SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node, SelectionDAG &DAG) const { - // TODO: Implement immediate folding - return Node; + + // Original encoding (either e32 or e64) + int Opcode = Node->getMachineOpcode(); + const MCInstrDesc *Desc = &TII->get(Opcode); + + unsigned NumDefs = Desc->getNumDefs(); + unsigned NumOps = Desc->getNumOperands(); + + int32_t Immediate = Desc->getSize() == 4 ? 0 : -1; + bool HaveVSrc = false, HaveSSrc = false; + + // First figure out what we alread have in this instruction + for (unsigned i = 0, e = Node->getNumOperands(), Op = NumDefs; + i != e && Op < NumOps; ++i, ++Op) { + + unsigned RegClass = Desc->OpInfo[Op].RegClass; + if (isVSrc(RegClass)) + HaveVSrc = true; + else if (isSSrc(RegClass)) + HaveSSrc = true; + else + continue; + + int32_t Imm = analyzeImmediate(Node->getOperand(i).getNode()); + if (Imm != -1 && Imm != 0) { + // Literal immediate + Immediate = Imm; + } + } + + // If we neither have VSrc nor SSrc it makes no sense to continue + if (!HaveVSrc && !HaveSSrc) + return Node; + + // No scalar allowed when we have both VSrc and SSrc + bool ScalarSlotUsed = HaveVSrc && HaveSSrc; + + // Second go over the operands and try to fold them + std::vector Ops; + for (unsigned i = 0, e = Node->getNumOperands(), Op = NumDefs; + i != e && Op < NumOps; ++i, ++Op) { + + const SDValue &Operand = Node->getOperand(i); + Ops.push_back(Operand); + + // Already folded immediate ? + if (isa(Operand.getNode()) || + isa(Operand.getNode())) + continue; + + // Is this a VSrc or SSrc operand ? + unsigned RegClass = Desc->OpInfo[Op].RegClass; + if (!isVSrc(RegClass) && !isSSrc(RegClass)) + continue; + + // Try to fold the immediates + if (!foldImm(Ops[i], Immediate, ScalarSlotUsed)) { + // Folding didn't worked, make sure we don't hit the SReg limit + ensureSRegLimit(DAG, Ops[i], RegClass, ScalarSlotUsed); + } + } + + // Add optional chain and glue + for (unsigned i = NumOps - NumDefs, e = Node->getNumOperands(); i < e; ++i) + Ops.push_back(Node->getOperand(i)); + + // Update the instruction parameters + return DAG.UpdateNodeOperands(Node, Ops.data(), Ops.size()); } diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h index 71f99ac..737162f 100644 --- a/lib/Target/R600/SIISelLowering.h +++ b/lib/Target/R600/SIISelLowering.h @@ -22,6 +22,7 @@ namespace llvm { class SITargetLowering : public AMDGPUTargetLowering { const SIInstrInfo * TII; + const TargetRegisterInfo * TRI; void LowerMOV_IMM(MachineInstr *MI, MachineBasicBlock &BB, MachineBasicBlock::iterator I, unsigned Opocde) const; @@ -34,6 +35,12 @@ class SITargetLowering : public AMDGPUTargetLowering { SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; + bool foldImm(SDValue &Operand, int32_t &Immediate, + bool &ScalarSlotUsed) const; + bool fitsRegClass(SelectionDAG &DAG, SDValue &Op, unsigned RegClass) const; + void ensureSRegLimit(SelectionDAG &DAG, SDValue &Operand, + unsigned RegClass, bool &ScalarSlotUsed) const; + public: SITargetLowering(TargetMachine &tm); virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr * MI, @@ -42,6 +49,8 @@ public: virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; virtual SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const; + + int32_t analyzeImmediate(const SDNode *N) const; }; } // End namespace llvm diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index d68fbff..3a617b4 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -40,11 +40,10 @@ def IMM12bit : ImmLeaf < [{return isUInt<12>(Imm);}] >; -class InlineImm : ImmLeaf : PatLeaf <(vt imm), [{ + return ((const SITargetLowering &)TLI).analyzeImmediate(N) == 0; }]>; - //===----------------------------------------------------------------------===// // SI assembler operands //===----------------------------------------------------------------------===// @@ -181,7 +180,7 @@ multiclass VOP2_Helper op, RegisterClass vrc, RegisterClass arc, def _e64 : VOP3 < {1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, (outs vrc:$dst), - (ins arc:$src0, vrc:$src1, + (ins arc:$src0, arc:$src1, i32imm:$abs, i32imm:$clamp, i32imm:$omod, i32imm:$neg), opName#"_e64 $dst, $src0, $src1, $abs, $clamp, $omod, $neg", [] @@ -206,7 +205,7 @@ multiclass VOP2b_32 op, string opName, list pattern> { def _e64 : VOP3b < {1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, (outs VReg_32:$dst), - (ins VSrc_32:$src0, VReg_32:$src1, + (ins VSrc_32:$src0, VSrc_32:$src1, i32imm:$abs, i32imm:$clamp, i32imm:$omod, i32imm:$neg), opName#"_e64 $dst, $src0, $src1, $abs, $clamp, $omod, $neg", [] @@ -230,12 +229,12 @@ multiclass VOPC_Helper op, RegisterClass vrc, RegisterClass arc, def _e64 : VOP3 < {0, op{7}, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, (outs SReg_64:$dst), - (ins arc:$src0, vrc:$src1, + (ins arc:$src0, arc:$src1, InstFlag:$abs, InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg), opName#"_e64 $dst, $src0, $src1, $abs, $clamp, $omod, $neg", !if(!eq(!cast(cond), "COND_NULL"), [], - [(set SReg_64:$dst, (i1 (setcc (vt arc:$src0), vrc:$src1, cond)))] + [(set SReg_64:$dst, (i1 (setcc (vt arc:$src0), arc:$src1, cond)))] ) > { let SRC2 = SIOperand.ZERO; @@ -252,14 +251,14 @@ multiclass VOPC_64 op, string opName, class VOP3_32 op, string opName, list pattern> : VOP3 < op, (outs VReg_32:$dst), - (ins VSrc_32:$src0, VReg_32:$src1, VReg_32:$src2, + (ins VSrc_32:$src0, VSrc_32:$src1, VSrc_32:$src2, i32imm:$abs, i32imm:$clamp, i32imm:$omod, i32imm:$neg), opName#" $dst, $src0, $src1, $src2, $abs, $clamp, $omod, $neg", pattern >; class VOP3_64 op, string opName, list pattern> : VOP3 < op, (outs VReg_64:$dst), - (ins VSrc_64:$src0, VReg_64:$src1, VReg_64:$src2, + (ins VSrc_64:$src0, VSrc_64:$src1, VSrc_64:$src2, i32imm:$abs, i32imm:$clamp, i32imm:$omod, i32imm:$neg), opName#" $dst, $src0, $src1, $src2, $abs, $clamp, $omod, $neg", pattern >; diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index f999025..822be18 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -732,17 +732,17 @@ def V_CNDMASK_B32_e32 : VOP2 <0x00000000, (outs VReg_32:$dst), } def V_CNDMASK_B32_e64 : VOP3 <0x00000100, (outs VReg_32:$dst), - (ins VReg_32:$src0, VReg_32:$src1, SReg_64:$src2, + (ins VSrc_32:$src0, VSrc_32:$src1, SSrc_64:$src2, InstFlag:$abs, InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg), "V_CNDMASK_B32_e64 $dst, $src0, $src1, $src2, $abs, $clamp, $omod, $neg", - [(set (i32 VReg_32:$dst), (select (i1 SReg_64:$src2), - VReg_32:$src1, VReg_32:$src0))] + [(set (i32 VReg_32:$dst), (select (i1 SSrc_64:$src2), + VSrc_32:$src1, VSrc_32:$src0))] >; //f32 pattern for V_CNDMASK_B32_e64 def : Pat < - (f32 (select (i1 SReg_64:$src2), VReg_32:$src1, VReg_32:$src0)), - (V_CNDMASK_B32_e64 VReg_32:$src0, VReg_32:$src1, SReg_64:$src2) + (f32 (select (i1 SSrc_64:$src2), VSrc_32:$src1, VSrc_32:$src0)), + (V_CNDMASK_B32_e64 VSrc_32:$src0, VSrc_32:$src1, SSrc_64:$src2) >; defm V_READLANE_B32 : VOP2_32 <0x00000001, "V_READLANE_B32", []>; @@ -895,7 +895,7 @@ def V_MUL_HI_U32 : VOP3_32 <0x0000016a, "V_MUL_HI_U32", []>; def V_MUL_LO_I32 : VOP3_32 <0x0000016b, "V_MUL_LO_I32", []>; def : Pat < (mul VSrc_32:$src0, VReg_32:$src1), - (V_MUL_LO_I32 VSrc_32:$src0, VReg_32:$src1, (i32 SIOperand.ZERO), 0, 0, 0, 0) + (V_MUL_LO_I32 VSrc_32:$src0, VReg_32:$src1, (i32 0), 0, 0, 0, 0) >; def V_MUL_HI_I32 : VOP3_32 <0x0000016c, "V_MUL_HI_I32", []>; def V_DIV_SCALE_F32 : VOP3_32 <0x0000016d, "V_DIV_SCALE_F32", []>; @@ -1219,19 +1219,19 @@ def : BitConvert ; def : Pat < (int_AMDIL_clamp VReg_32:$src, (f32 FP_ZERO), (f32 FP_ONE)), - (V_ADD_F32_e64 VReg_32:$src, (i32 0x80 /* SRC1 */), + (V_ADD_F32_e64 VReg_32:$src, (i32 0 /* SRC1 */), 0 /* ABS */, 1 /* CLAMP */, 0 /* OMOD */, 0 /* NEG */) >; def : Pat < (fabs VReg_32:$src), - (V_ADD_F32_e64 VReg_32:$src, (i32 0x80 /* SRC1 */), + (V_ADD_F32_e64 VReg_32:$src, (i32 0 /* SRC1 */), 1 /* ABS */, 0 /* CLAMP */, 0 /* OMOD */, 0 /* NEG */) >; def : Pat < (fneg VReg_32:$src), - (V_ADD_F32_e64 VReg_32:$src, (i32 0x80 /* SRC1 */), + (V_ADD_F32_e64 VReg_32:$src, (i32 0 /* SRC1 */), 0 /* ABS */, 0 /* CLAMP */, 0 /* OMOD */, 1 /* NEG */) >; @@ -1394,8 +1394,8 @@ def : Pat < /********** VOP3 Patterns **********/ /********** ================== **********/ -def : Pat <(f32 (fadd (fmul VSrc_32:$src0, VReg_32:$src1), VReg_32:$src2)), - (V_MAD_F32 VSrc_32:$src0, VReg_32:$src1, VReg_32:$src2, +def : Pat <(f32 (fadd (fmul VSrc_32:$src0, VSrc_32:$src1), VSrc_32:$src2)), + (V_MAD_F32 VSrc_32:$src0, VSrc_32:$src1, VSrc_32:$src2, 0, 0, 0, 0)>; /********** ================== **********/ -- cgit v1.1 From b3d1eaded7d7a874bbda2b0d322df7389c724bfc Mon Sep 17 00:00:00 2001 From: Christian Konig Date: Tue, 26 Feb 2013 17:52:29 +0000 Subject: R600/SI: add some more instruction flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Christian König Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176102 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIInstrInfo.cpp | 10 +++++ lib/Target/R600/SIInstrInfo.h | 3 ++ lib/Target/R600/SIInstructions.td | 93 +++++++++++++++++++++++++++++++++------ 3 files changed, 92 insertions(+), 14 deletions(-) (limited to 'lib') diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp index 4dfd26e..d9dbd6a 100644 --- a/lib/Target/R600/SIInstrInfo.cpp +++ b/lib/Target/R600/SIInstrInfo.cpp @@ -66,6 +66,16 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, } } +MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI, + bool NewMI) const { + + if (MI->getNumOperands() < 3 || !MI->getOperand(1).isReg() || + !MI->getOperand(2).isReg()) + return 0; + + return TargetInstrInfo::commuteInstruction(MI, NewMI); +} + MachineInstr * SIInstrInfo::getMovImmInstr(MachineFunction *MF, unsigned DstReg, int64_t Imm) const { MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::V_MOV_B32_e32), DebugLoc()); diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h index a65f7b6..015cfb3 100644 --- a/lib/Target/R600/SIInstrInfo.h +++ b/lib/Target/R600/SIInstrInfo.h @@ -35,6 +35,9 @@ public: unsigned DestReg, unsigned SrcReg, bool KillSrc) const; + virtual MachineInstr *commuteInstruction(MachineInstr *MI, + bool NewMI=false) const; + virtual MachineInstr * getMovImmInstr(MachineFunction *MF, unsigned DstReg, int64_t Imm) const; diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 822be18..af116f0 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -28,10 +28,14 @@ def isSI : Predicate<"Subtarget.device()" let Predicates = [isSI] in { let neverHasSideEffects = 1 in { + +let isMoveImm = 1 in { def S_MOV_B32 : SOP1_32 <0x00000003, "S_MOV_B32", []>; def S_MOV_B64 : SOP1_64 <0x00000004, "S_MOV_B64", []>; def S_CMOV_B32 : SOP1_32 <0x00000005, "S_CMOV_B32", []>; def S_CMOV_B64 : SOP1_64 <0x00000006, "S_CMOV_B64", []>; +} // End isMoveImm = 1 + def S_NOT_B32 : SOP1_32 <0x00000007, "S_NOT_B32", []>; def S_NOT_B64 : SOP1_64 <0x00000008, "S_NOT_B64", []>; def S_WQM_B32 : SOP1_32 <0x00000009, "S_WQM_B32", []>; @@ -39,6 +43,7 @@ def S_WQM_B64 : SOP1_64 <0x0000000a, "S_WQM_B64", []>; def S_BREV_B32 : SOP1_32 <0x0000000b, "S_BREV_B32", []>; def S_BREV_B64 : SOP1_64 <0x0000000c, "S_BREV_B64", []>; } // End neverHasSideEffects = 1 + ////def S_BCNT0_I32_B32 : SOP1_BCNT0 <0x0000000d, "S_BCNT0_I32_B32", []>; ////def S_BCNT0_I32_B64 : SOP1_BCNT0 <0x0000000e, "S_BCNT0_I32_B64", []>; ////def S_BCNT1_I32_B32 : SOP1_BCNT1 <0x0000000f, "S_BCNT1_I32_B32", []>; @@ -107,6 +112,7 @@ def S_CMPK_EQ_I32 : SOPK < >; */ +let isCompare = 1 in { def S_CMPK_LG_I32 : SOPK_32 <0x00000004, "S_CMPK_LG_I32", []>; def S_CMPK_GT_I32 : SOPK_32 <0x00000005, "S_CMPK_GT_I32", []>; def S_CMPK_GE_I32 : SOPK_32 <0x00000006, "S_CMPK_GE_I32", []>; @@ -118,6 +124,8 @@ def S_CMPK_GT_U32 : SOPK_32 <0x0000000b, "S_CMPK_GT_U32", []>; def S_CMPK_GE_U32 : SOPK_32 <0x0000000c, "S_CMPK_GE_U32", []>; def S_CMPK_LT_U32 : SOPK_32 <0x0000000d, "S_CMPK_LT_U32", []>; def S_CMPK_LE_U32 : SOPK_32 <0x0000000e, "S_CMPK_LE_U32", []>; +} // End isCompare = 1 + def S_ADDK_I32 : SOPK_32 <0x0000000f, "S_ADDK_I32", []>; def S_MULK_I32 : SOPK_32 <0x00000010, "S_MULK_I32", []>; //def S_CBRANCH_I_FORK : SOPK_ <0x00000011, "S_CBRANCH_I_FORK", []>; @@ -127,6 +135,8 @@ def S_GETREG_REGRD_B32 : SOPK_32 <0x00000014, "S_GETREG_REGRD_B32", []>; //def S_SETREG_IMM32_B32 : SOPK_32 <0x00000015, "S_SETREG_IMM32_B32", []>; //def EXP : EXP_ <0x00000000, "EXP", []>; +let isCompare = 1 in { + defm V_CMP_F_F32 : VOPC_32 <0x00000000, "V_CMP_F_F32">; defm V_CMP_LT_F32 : VOPC_32 <0x00000001, "V_CMP_LT_F32", f32, COND_LT>; defm V_CMP_EQ_F32 : VOPC_32 <0x00000002, "V_CMP_EQ_F32", f32, COND_EQ>; @@ -144,8 +154,7 @@ defm V_CMP_NEQ_F32 : VOPC_32 <0x0000000d, "V_CMP_NEQ_F32", f32, COND_NE>; defm V_CMP_NLT_F32 : VOPC_32 <0x0000000e, "V_CMP_NLT_F32">; defm V_CMP_TRU_F32 : VOPC_32 <0x0000000f, "V_CMP_TRU_F32">; -//Side effect is writing to EXEC -let hasSideEffects = 1 in { +let hasSideEffects = 1, Defs = [EXEC] in { defm V_CMPX_F_F32 : VOPC_32 <0x00000010, "V_CMPX_F_F32">; defm V_CMPX_LT_F32 : VOPC_32 <0x00000011, "V_CMPX_LT_F32">; @@ -164,7 +173,7 @@ defm V_CMPX_NEQ_F32 : VOPC_32 <0x0000001d, "V_CMPX_NEQ_F32">; defm V_CMPX_NLT_F32 : VOPC_32 <0x0000001e, "V_CMPX_NLT_F32">; defm V_CMPX_TRU_F32 : VOPC_32 <0x0000001f, "V_CMPX_TRU_F32">; -} // End hasSideEffects = 1 +} // End hasSideEffects = 1, Defs = [EXEC] defm V_CMP_F_F64 : VOPC_64 <0x00000020, "V_CMP_F_F64">; defm V_CMP_LT_F64 : VOPC_64 <0x00000021, "V_CMP_LT_F64">; @@ -183,8 +192,7 @@ defm V_CMP_NEQ_F64 : VOPC_64 <0x0000002d, "V_CMP_NEQ_F64">; defm V_CMP_NLT_F64 : VOPC_64 <0x0000002e, "V_CMP_NLT_F64">; defm V_CMP_TRU_F64 : VOPC_64 <0x0000002f, "V_CMP_TRU_F64">; -//Side effect is writing to EXEC -let hasSideEffects = 1 in { +let hasSideEffects = 1, Defs = [EXEC] in { defm V_CMPX_F_F64 : VOPC_64 <0x00000030, "V_CMPX_F_F64">; defm V_CMPX_LT_F64 : VOPC_64 <0x00000031, "V_CMPX_LT_F64">; @@ -203,7 +211,7 @@ defm V_CMPX_NEQ_F64 : VOPC_64 <0x0000003d, "V_CMPX_NEQ_F64">; defm V_CMPX_NLT_F64 : VOPC_64 <0x0000003e, "V_CMPX_NLT_F64">; defm V_CMPX_TRU_F64 : VOPC_64 <0x0000003f, "V_CMPX_TRU_F64">; -} // End hasSideEffects = 1 +} // End hasSideEffects = 1, Defs = [EXEC] defm V_CMPS_F_F32 : VOPC_32 <0x00000040, "V_CMPS_F_F32">; defm V_CMPS_LT_F32 : VOPC_32 <0x00000041, "V_CMPS_LT_F32">; @@ -221,6 +229,9 @@ defm V_CMPS_NLE_F32 : VOPC_32 <0x0000004c, "V_CMPS_NLE_F32">; defm V_CMPS_NEQ_F32 : VOPC_32 <0x0000004d, "V_CMPS_NEQ_F32">; defm V_CMPS_NLT_F32 : VOPC_32 <0x0000004e, "V_CMPS_NLT_F32">; defm V_CMPS_TRU_F32 : VOPC_32 <0x0000004f, "V_CMPS_TRU_F32">; + +let hasSideEffects = 1, Defs = [EXEC] in { + defm V_CMPSX_F_F32 : VOPC_32 <0x00000050, "V_CMPSX_F_F32">; defm V_CMPSX_LT_F32 : VOPC_32 <0x00000051, "V_CMPSX_LT_F32">; defm V_CMPSX_EQ_F32 : VOPC_32 <0x00000052, "V_CMPSX_EQ_F32">; @@ -237,6 +248,9 @@ defm V_CMPSX_NLE_F32 : VOPC_32 <0x0000005c, "V_CMPSX_NLE_F32">; defm V_CMPSX_NEQ_F32 : VOPC_32 <0x0000005d, "V_CMPSX_NEQ_F32">; defm V_CMPSX_NLT_F32 : VOPC_32 <0x0000005e, "V_CMPSX_NLT_F32">; defm V_CMPSX_TRU_F32 : VOPC_32 <0x0000005f, "V_CMPSX_TRU_F32">; + +} // End hasSideEffects = 1, Defs = [EXEC] + defm V_CMPS_F_F64 : VOPC_64 <0x00000060, "V_CMPS_F_F64">; defm V_CMPS_LT_F64 : VOPC_64 <0x00000061, "V_CMPS_LT_F64">; defm V_CMPS_EQ_F64 : VOPC_64 <0x00000062, "V_CMPS_EQ_F64">; @@ -253,6 +267,9 @@ defm V_CMPS_NLE_F64 : VOPC_64 <0x0000006c, "V_CMPS_NLE_F64">; defm V_CMPS_NEQ_F64 : VOPC_64 <0x0000006d, "V_CMPS_NEQ_F64">; defm V_CMPS_NLT_F64 : VOPC_64 <0x0000006e, "V_CMPS_NLT_F64">; defm V_CMPS_TRU_F64 : VOPC_64 <0x0000006f, "V_CMPS_TRU_F64">; + +let hasSideEffects = 1, Defs = [EXEC] in { + defm V_CMPSX_F_F64 : VOPC_64 <0x00000070, "V_CMPSX_F_F64">; defm V_CMPSX_LT_F64 : VOPC_64 <0x00000071, "V_CMPSX_LT_F64">; defm V_CMPSX_EQ_F64 : VOPC_64 <0x00000072, "V_CMPSX_EQ_F64">; @@ -269,6 +286,9 @@ defm V_CMPSX_NLE_F64 : VOPC_64 <0x0000007c, "V_CMPSX_NLE_F64">; defm V_CMPSX_NEQ_F64 : VOPC_64 <0x0000007d, "V_CMPSX_NEQ_F64">; defm V_CMPSX_NLT_F64 : VOPC_64 <0x0000007e, "V_CMPSX_NLT_F64">; defm V_CMPSX_TRU_F64 : VOPC_64 <0x0000007f, "V_CMPSX_TRU_F64">; + +} // End hasSideEffects = 1, Defs = [EXEC] + defm V_CMP_F_I32 : VOPC_32 <0x00000080, "V_CMP_F_I32">; defm V_CMP_LT_I32 : VOPC_32 <0x00000081, "V_CMP_LT_I32", i32, COND_LT>; defm V_CMP_EQ_I32 : VOPC_32 <0x00000082, "V_CMP_EQ_I32", i32, COND_EQ>; @@ -278,7 +298,7 @@ defm V_CMP_NE_I32 : VOPC_32 <0x00000085, "V_CMP_NE_I32", i32, COND_NE>; defm V_CMP_GE_I32 : VOPC_32 <0x00000086, "V_CMP_GE_I32", i32, COND_GE>; defm V_CMP_T_I32 : VOPC_32 <0x00000087, "V_CMP_T_I32">; -let hasSideEffects = 1 in { +let hasSideEffects = 1, Defs = [EXEC] in { defm V_CMPX_F_I32 : VOPC_32 <0x00000090, "V_CMPX_F_I32">; defm V_CMPX_LT_I32 : VOPC_32 <0x00000091, "V_CMPX_LT_I32">; @@ -289,7 +309,7 @@ defm V_CMPX_NE_I32 : VOPC_32 <0x00000095, "V_CMPX_NE_I32">; defm V_CMPX_GE_I32 : VOPC_32 <0x00000096, "V_CMPX_GE_I32">; defm V_CMPX_T_I32 : VOPC_32 <0x00000097, "V_CMPX_T_I32">; -} // End hasSideEffects +} // End hasSideEffects = 1, Defs = [EXEC] defm V_CMP_F_I64 : VOPC_64 <0x000000a0, "V_CMP_F_I64">; defm V_CMP_LT_I64 : VOPC_64 <0x000000a1, "V_CMP_LT_I64">; @@ -300,7 +320,7 @@ defm V_CMP_NE_I64 : VOPC_64 <0x000000a5, "V_CMP_NE_I64">; defm V_CMP_GE_I64 : VOPC_64 <0x000000a6, "V_CMP_GE_I64">; defm V_CMP_T_I64 : VOPC_64 <0x000000a7, "V_CMP_T_I64">; -let hasSideEffects = 1 in { +let hasSideEffects = 1, Defs = [EXEC] in { defm V_CMPX_F_I64 : VOPC_64 <0x000000b0, "V_CMPX_F_I64">; defm V_CMPX_LT_I64 : VOPC_64 <0x000000b1, "V_CMPX_LT_I64">; @@ -311,7 +331,7 @@ defm V_CMPX_NE_I64 : VOPC_64 <0x000000b5, "V_CMPX_NE_I64">; defm V_CMPX_GE_I64 : VOPC_64 <0x000000b6, "V_CMPX_GE_I64">; defm V_CMPX_T_I64 : VOPC_64 <0x000000b7, "V_CMPX_T_I64">; -} // End hasSideEffects +} // End hasSideEffects = 1, Defs = [EXEC] defm V_CMP_F_U32 : VOPC_32 <0x000000c0, "V_CMP_F_U32">; defm V_CMP_LT_U32 : VOPC_32 <0x000000c1, "V_CMP_LT_U32">; @@ -322,7 +342,7 @@ defm V_CMP_NE_U32 : VOPC_32 <0x000000c5, "V_CMP_NE_U32">; defm V_CMP_GE_U32 : VOPC_32 <0x000000c6, "V_CMP_GE_U32">; defm V_CMP_T_U32 : VOPC_32 <0x000000c7, "V_CMP_T_U32">; -let hasSideEffects = 1 in { +let hasSideEffects = 1, Defs = [EXEC] in { defm V_CMPX_F_U32 : VOPC_32 <0x000000d0, "V_CMPX_F_U32">; defm V_CMPX_LT_U32 : VOPC_32 <0x000000d1, "V_CMPX_LT_U32">; @@ -333,7 +353,7 @@ defm V_CMPX_NE_U32 : VOPC_32 <0x000000d5, "V_CMPX_NE_U32">; defm V_CMPX_GE_U32 : VOPC_32 <0x000000d6, "V_CMPX_GE_U32">; defm V_CMPX_T_U32 : VOPC_32 <0x000000d7, "V_CMPX_T_U32">; -} // End hasSideEffects +} // End hasSideEffects = 1, Defs = [EXEC] defm V_CMP_F_U64 : VOPC_64 <0x000000e0, "V_CMP_F_U64">; defm V_CMP_LT_U64 : VOPC_64 <0x000000e1, "V_CMP_LT_U64">; @@ -343,6 +363,9 @@ defm V_CMP_GT_U64 : VOPC_64 <0x000000e4, "V_CMP_GT_U64">; defm V_CMP_NE_U64 : VOPC_64 <0x000000e5, "V_CMP_NE_U64">; defm V_CMP_GE_U64 : VOPC_64 <0x000000e6, "V_CMP_GE_U64">; defm V_CMP_T_U64 : VOPC_64 <0x000000e7, "V_CMP_T_U64">; + +let hasSideEffects = 1, Defs = [EXEC] in { + defm V_CMPX_F_U64 : VOPC_64 <0x000000f0, "V_CMPX_F_U64">; defm V_CMPX_LT_U64 : VOPC_64 <0x000000f1, "V_CMPX_LT_U64">; defm V_CMPX_EQ_U64 : VOPC_64 <0x000000f2, "V_CMPX_EQ_U64">; @@ -351,10 +374,23 @@ defm V_CMPX_GT_U64 : VOPC_64 <0x000000f4, "V_CMPX_GT_U64">; defm V_CMPX_NE_U64 : VOPC_64 <0x000000f5, "V_CMPX_NE_U64">; defm V_CMPX_GE_U64 : VOPC_64 <0x000000f6, "V_CMPX_GE_U64">; defm V_CMPX_T_U64 : VOPC_64 <0x000000f7, "V_CMPX_T_U64">; + +} // End hasSideEffects = 1, Defs = [EXEC] + defm V_CMP_CLASS_F32 : VOPC_32 <0x00000088, "V_CMP_CLASS_F32">; + +let hasSideEffects = 1, Defs = [EXEC] in { defm V_CMPX_CLASS_F32 : VOPC_32 <0x00000098, "V_CMPX_CLASS_F32">; +} // End hasSideEffects = 1, Defs = [EXEC] + defm V_CMP_CLASS_F64 : VOPC_64 <0x000000a8, "V_CMP_CLASS_F64">; + +let hasSideEffects = 1, Defs = [EXEC] in { defm V_CMPX_CLASS_F64 : VOPC_64 <0x000000b8, "V_CMPX_CLASS_F64">; +} // End hasSideEffects = 1, Defs = [EXEC] + +} // End isCompare = 1 + //def BUFFER_LOAD_FORMAT_X : MUBUF_ <0x00000000, "BUFFER_LOAD_FORMAT_X", []>; //def BUFFER_LOAD_FORMAT_XY : MUBUF_ <0x00000001, "BUFFER_LOAD_FORMAT_XY", []>; //def BUFFER_LOAD_FORMAT_XYZ : MUBUF_ <0x00000002, "BUFFER_LOAD_FORMAT_XYZ", []>; @@ -535,9 +571,11 @@ def IMAGE_SAMPLE_C_B : MIMG_Load_Helper <0x0000002d, "IMAGE_SAMPLE_C_B">; //def IMAGE_SAMPLER : MIMG_NoPattern_ <"IMAGE_SAMPLER", 0x0000007f>; //def V_NOP : VOP1_ <0x00000000, "V_NOP", []>; -let neverHasSideEffects = 1 in { + +let neverHasSideEffects = 1, isMoveImm = 1 in { defm V_MOV_B32 : VOP1_32 <0x00000001, "V_MOV_B32", []>; -} // End neverHasSideEffects +} // End neverHasSideEffects = 1, isMoveImm = 1 + defm V_READFIRSTLANE_B32 : VOP1_32 <0x00000002, "V_READFIRSTLANE_B32", []>; //defm V_CVT_I32_F64 : VOP1_32 <0x00000003, "V_CVT_I32_F64", []>; //defm V_CVT_F64_I32 : VOP1_64 <0x00000004, "V_CVT_F64_I32", []>; @@ -748,15 +786,21 @@ def : Pat < defm V_READLANE_B32 : VOP2_32 <0x00000001, "V_READLANE_B32", []>; defm V_WRITELANE_B32 : VOP2_32 <0x00000002, "V_WRITELANE_B32", []>; +let isCommutable = 1 in { defm V_ADD_F32 : VOP2_32 <0x00000003, "V_ADD_F32", [(set VReg_32:$dst, (fadd VSrc_32:$src0, VReg_32:$src1))] >; +} // End isCommutable = 1 + defm V_SUB_F32 : VOP2_32 <0x00000004, "V_SUB_F32", [(set VReg_32:$dst, (fsub VSrc_32:$src0, VReg_32:$src1))] >; defm V_SUBREV_F32 : VOP2_32 <0x00000005, "V_SUBREV_F32", []>; defm V_MAC_LEGACY_F32 : VOP2_32 <0x00000006, "V_MAC_LEGACY_F32", []>; + +let isCommutable = 1 in { + defm V_MUL_LEGACY_F32 : VOP2_32 < 0x00000007, "V_MUL_LEGACY_F32", [(set VReg_32:$dst, (int_AMDGPU_mul VSrc_32:$src0, VReg_32:$src1))] @@ -765,10 +809,16 @@ defm V_MUL_LEGACY_F32 : VOP2_32 < defm V_MUL_F32 : VOP2_32 <0x00000008, "V_MUL_F32", [(set VReg_32:$dst, (fmul VSrc_32:$src0, VReg_32:$src1))] >; + +} // End isCommutable = 1 + //defm V_MUL_I32_I24 : VOP2_32 <0x00000009, "V_MUL_I32_I24", []>; //defm V_MUL_HI_I32_I24 : VOP2_32 <0x0000000a, "V_MUL_HI_I32_I24", []>; //defm V_MUL_U32_U24 : VOP2_32 <0x0000000b, "V_MUL_U32_U24", []>; //defm V_MUL_HI_U32_U24 : VOP2_32 <0x0000000c, "V_MUL_HI_U32_U24", []>; + +let isCommutable = 1 in { + defm V_MIN_LEGACY_F32 : VOP2_32 <0x0000000d, "V_MIN_LEGACY_F32", [(set VReg_32:$dst, (AMDGPUfmin VSrc_32:$src0, VReg_32:$src1))] >; @@ -776,18 +826,25 @@ defm V_MIN_LEGACY_F32 : VOP2_32 <0x0000000d, "V_MIN_LEGACY_F32", defm V_MAX_LEGACY_F32 : VOP2_32 <0x0000000e, "V_MAX_LEGACY_F32", [(set VReg_32:$dst, (AMDGPUfmax VSrc_32:$src0, VReg_32:$src1))] >; + defm V_MIN_F32 : VOP2_32 <0x0000000f, "V_MIN_F32", []>; defm V_MAX_F32 : VOP2_32 <0x00000010, "V_MAX_F32", []>; defm V_MIN_I32 : VOP2_32 <0x00000011, "V_MIN_I32", []>; defm V_MAX_I32 : VOP2_32 <0x00000012, "V_MAX_I32", []>; defm V_MIN_U32 : VOP2_32 <0x00000013, "V_MIN_U32", []>; defm V_MAX_U32 : VOP2_32 <0x00000014, "V_MAX_U32", []>; + +} // End isCommutable = 1 + defm V_LSHR_B32 : VOP2_32 <0x00000015, "V_LSHR_B32", []>; defm V_LSHRREV_B32 : VOP2_32 <0x00000016, "V_LSHRREV_B32", []>; defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32", []>; defm V_ASHRREV_I32 : VOP2_32 <0x00000018, "V_ASHRREV_I32", []>; defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32", []>; defm V_LSHLREV_B32 : VOP2_32 <0x0000001a, "V_LSHLREV_B32", []>; + +let isCommutable = 1 in { + defm V_AND_B32 : VOP2_32 <0x0000001b, "V_AND_B32", [(set VReg_32:$dst, (and VSrc_32:$src0, VReg_32:$src1))] >; @@ -797,6 +854,9 @@ defm V_OR_B32 : VOP2_32 <0x0000001c, "V_OR_B32", defm V_XOR_B32 : VOP2_32 <0x0000001d, "V_XOR_B32", [(set VReg_32:$dst, (xor VSrc_32:$src0, VReg_32:$src1))] >; + +} // End isCommutable = 1 + defm V_BFM_B32 : VOP2_32 <0x0000001e, "V_BFM_B32", []>; defm V_MAC_F32 : VOP2_32 <0x0000001f, "V_MAC_F32", []>; defm V_MADMK_F32 : VOP2_32 <0x00000020, "V_MADMK_F32", []>; @@ -805,12 +865,17 @@ defm V_MADAK_F32 : VOP2_32 <0x00000021, "V_MADAK_F32", []>; //defm V_MBCNT_LO_U32_B32 : VOP2_32 <0x00000023, "V_MBCNT_LO_U32_B32", []>; //defm V_MBCNT_HI_U32_B32 : VOP2_32 <0x00000024, "V_MBCNT_HI_U32_B32", []>; let Defs = [VCC] in { // Carry-out goes to VCC + +let isCommutable = 1 in { defm V_ADD_I32 : VOP2b_32 <0x00000025, "V_ADD_I32", [(set VReg_32:$dst, (add (i32 VSrc_32:$src0), (i32 VReg_32:$src1)))] >; +} // End isCommutable = 1 + defm V_SUB_I32 : VOP2b_32 <0x00000026, "V_SUB_I32", [(set VReg_32:$dst, (sub (i32 VSrc_32:$src0), (i32 VReg_32:$src1)))] >; + defm V_SUBREV_I32 : VOP2b_32 <0x00000027, "V_SUBREV_I32", []>; let Uses = [VCC] in { // Carry-out comes from VCC defm V_ADDC_U32 : VOP2b_32 <0x00000028, "V_ADDC_U32", []>; -- cgit v1.1 From b37afdcf3fa41596ab1f70eef915a8ade68ccc24 Mon Sep 17 00:00:00 2001 From: Christian Konig Date: Tue, 26 Feb 2013 17:52:36 +0000 Subject: R600/SI: swap operands if it helps folding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Christian König Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176103 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIISelLowering.cpp | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index bf1f3bf..da30c07 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -545,8 +545,22 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node, // Is this a VSrc or SSrc operand ? unsigned RegClass = Desc->OpInfo[Op].RegClass; - if (!isVSrc(RegClass) && !isSSrc(RegClass)) + if (!isVSrc(RegClass) && !isSSrc(RegClass)) { + + if (i == 1 && Desc->isCommutable() && + fitsRegClass(DAG, Ops[0], RegClass) && + foldImm(Ops[1], Immediate, ScalarSlotUsed)) { + + assert(isVSrc(Desc->OpInfo[NumDefs].RegClass) || + isSSrc(Desc->OpInfo[NumDefs].RegClass)); + + // Swap commutable operands + SDValue Tmp = Ops[1]; + Ops[1] = Ops[0]; + Ops[0] = Tmp; + } continue; + } // Try to fold the immediates if (!foldImm(Ops[i], Immediate, ScalarSlotUsed)) { -- cgit v1.1 From f767018b1048f228b0c2a71d7e4008750aff0ef5 Mon Sep 17 00:00:00 2001 From: Christian Konig Date: Tue, 26 Feb 2013 17:52:42 +0000 Subject: R600/SI: add VOP mapping functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make it possible to map between e32 and e64 encoding opcodes. Signed-off-by: Christian König Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176104 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUInstrInfo.cpp | 1 + lib/Target/R600/SIInstrInfo.h | 6 ++++++ lib/Target/R600/SIInstrInfo.td | 39 ++++++++++++++++++++++++++----------- 3 files changed, 35 insertions(+), 11 deletions(-) (limited to 'lib') diff --git a/lib/Target/R600/AMDGPUInstrInfo.cpp b/lib/Target/R600/AMDGPUInstrInfo.cpp index 640707d..30f736c 100644 --- a/lib/Target/R600/AMDGPUInstrInfo.cpp +++ b/lib/Target/R600/AMDGPUInstrInfo.cpp @@ -22,6 +22,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #define GET_INSTRINFO_CTOR +#define GET_INSTRMAP_INFO #include "AMDGPUGenInstrInfo.inc" using namespace llvm; diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h index 015cfb3..5789af5 100644 --- a/lib/Target/R600/SIInstrInfo.h +++ b/lib/Target/R600/SIInstrInfo.h @@ -73,6 +73,12 @@ public: virtual const TargetRegisterClass *getSuperIndirectRegClass() const; }; +namespace AMDGPU { + + int getVOPe64(uint16_t Opcode); + +} // End namespace AMDGPU + } // End namespace llvm namespace SIInstrFlags { diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index 3a617b4..d6c3f06 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -143,13 +143,17 @@ multiclass SMRD_Helper op, string asm, RegisterClass dstClass> { // Vector ALU classes //===----------------------------------------------------------------------===// +class VOP { + string OpName = opName; +} + multiclass VOP1_Helper op, RegisterClass drc, RegisterClass src, string opName, list pattern> { - def _e32: VOP1 < + def _e32 : VOP1 < op, (outs drc:$dst), (ins src:$src0), opName#"_e32 $dst, $src0", pattern - >; + >, VOP ; def _e64 : VOP3 < {1, 1, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, @@ -158,7 +162,7 @@ multiclass VOP1_Helper op, RegisterClass drc, RegisterClass src, i32imm:$abs, i32imm:$clamp, i32imm:$omod, i32imm:$neg), opName#"_e64 $dst, $src0, $abs, $clamp, $omod, $neg", [] - > { + >, VOP { let SRC1 = SIOperand.ZERO; let SRC2 = SIOperand.ZERO; } @@ -175,7 +179,7 @@ multiclass VOP2_Helper op, RegisterClass vrc, RegisterClass arc, def _e32 : VOP2 < op, (outs vrc:$dst), (ins arc:$src0, vrc:$src1), opName#"_e32 $dst, $src0, $src1", pattern - >; + >, VOP ; def _e64 : VOP3 < {1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, @@ -184,7 +188,7 @@ multiclass VOP2_Helper op, RegisterClass vrc, RegisterClass arc, i32imm:$abs, i32imm:$clamp, i32imm:$omod, i32imm:$neg), opName#"_e64 $dst, $src0, $src1, $abs, $clamp, $omod, $neg", [] - > { + >, VOP { let SRC2 = SIOperand.ZERO; } } @@ -200,7 +204,7 @@ multiclass VOP2b_32 op, string opName, list pattern> { def _e32 : VOP2 < op, (outs VReg_32:$dst), (ins VSrc_32:$src0, VReg_32:$src1), opName#"_e32 $dst, $src0, $src1", pattern - >; + >, VOP ; def _e64 : VOP3b < {1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, @@ -209,7 +213,7 @@ multiclass VOP2b_32 op, string opName, list pattern> { i32imm:$abs, i32imm:$clamp, i32imm:$omod, i32imm:$neg), opName#"_e64 $dst, $src0, $src1, $abs, $clamp, $omod, $neg", [] - > { + >, VOP { let SRC2 = SIOperand.ZERO; /* the VOP2 variant puts the carry out into VCC, the VOP3 variant can write it into any SGPR. We currently don't use the carry out, @@ -224,7 +228,7 @@ multiclass VOPC_Helper op, RegisterClass vrc, RegisterClass arc, def _e32 : VOPC < op, (ins arc:$src0, vrc:$src1), opName#"_e32 $dst, $src0, $src1", [] - >; + >, VOP ; def _e64 : VOP3 < {0, op{7}, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, @@ -236,7 +240,7 @@ multiclass VOPC_Helper op, RegisterClass vrc, RegisterClass arc, !if(!eq(!cast(cond), "COND_NULL"), [], [(set SReg_64:$dst, (i1 (setcc (vt arc:$src0), arc:$src1, cond)))] ) - > { + >, VOP { let SRC2 = SIOperand.ZERO; } } @@ -254,14 +258,14 @@ class VOP3_32 op, string opName, list pattern> : VOP3 < (ins VSrc_32:$src0, VSrc_32:$src1, VSrc_32:$src2, i32imm:$abs, i32imm:$clamp, i32imm:$omod, i32imm:$neg), opName#" $dst, $src0, $src1, $src2, $abs, $clamp, $omod, $neg", pattern ->; +>, VOP ; class VOP3_64 op, string opName, list pattern> : VOP3 < op, (outs VReg_64:$dst), (ins VSrc_64:$src0, VSrc_64:$src1, VSrc_64:$src2, i32imm:$abs, i32imm:$clamp, i32imm:$omod, i32imm:$neg), opName#" $dst, $src0, $src1, $src2, $abs, $clamp, $omod, $neg", pattern ->; +>, VOP ; //===----------------------------------------------------------------------===// // Vector I/O classes @@ -319,4 +323,17 @@ class MIMG_Load_Helper op, string asm> : MIMG < let mayStore = 0; } +//===----------------------------------------------------------------------===// +// Vector instruction mappings +//===----------------------------------------------------------------------===// + +// Maps an opcode in e32 form to its e64 equivalent +def getVOPe64 : InstrMapping { + let FilterClass = "VOP"; + let RowFields = ["OpName"]; + let ColFields = ["Size"]; + let KeyCol = ["4"]; + let ValueCols = [["8"]]; +} + include "SIInstructions.td" -- cgit v1.1 From 3c980d1632fa0a1cef065e558fbc96d83ebbdf40 Mon Sep 17 00:00:00 2001 From: Christian Konig Date: Tue, 26 Feb 2013 17:52:47 +0000 Subject: R600/SI: Add promotion of e32 to e64 in operand folding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Christian König Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176105 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIISelLowering.cpp | 36 ++++++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index da30c07..0a0fbd9 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -501,6 +501,13 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node, unsigned NumDefs = Desc->getNumDefs(); unsigned NumOps = Desc->getNumOperands(); + // e64 version if available, -1 otherwise + int OpcodeE64 = AMDGPU::getVOPe64(Opcode); + const MCInstrDesc *DescE64 = OpcodeE64 == -1 ? 0 : &TII->get(OpcodeE64); + + assert(!DescE64 || DescE64->getNumDefs() == NumDefs); + assert(!DescE64 || DescE64->getNumOperands() == (NumOps + 4)); + int32_t Immediate = Desc->getSize() == 4 ? 0 : -1; bool HaveVSrc = false, HaveSSrc = false; @@ -532,6 +539,7 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node, // Second go over the operands and try to fold them std::vector Ops; + bool Promote2e64 = false; for (unsigned i = 0, e = Node->getNumOperands(), Op = NumDefs; i != e && Op < NumOps; ++i, ++Op) { @@ -558,6 +566,20 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node, SDValue Tmp = Ops[1]; Ops[1] = Ops[0]; Ops[0] = Tmp; + + } else if (DescE64 && !Immediate) { + // Test if it makes sense to switch to e64 encoding + + RegClass = DescE64->OpInfo[Op].RegClass; + int32_t TmpImm = -1; + if ((isVSrc(RegClass) || isSSrc(RegClass)) && + foldImm(Ops[i], TmpImm, ScalarSlotUsed)) { + + Immediate = -1; + Promote2e64 = true; + Desc = DescE64; + DescE64 = 0; + } } continue; } @@ -569,10 +591,20 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node, } } + if (Promote2e64) { + // Add the modifier flags while promoting + for (unsigned i = 0; i < 4; ++i) + Ops.push_back(DAG.getTargetConstant(0, MVT::i32)); + } + // Add optional chain and glue for (unsigned i = NumOps - NumDefs, e = Node->getNumOperands(); i < e; ++i) Ops.push_back(Node->getOperand(i)); - // Update the instruction parameters - return DAG.UpdateNodeOperands(Node, Ops.data(), Ops.size()); + // Either create a complete new or update the current instruction + if (Promote2e64) + return DAG.getMachineNode(OpcodeE64, Node->getDebugLoc(), + Node->getVTList(), Ops.data(), Ops.size()); + else + return DAG.UpdateNodeOperands(Node, Ops.data(), Ops.size()); } -- cgit v1.1 From d06c9cb9bb0ad682d097f704f8ed97b0fb657670 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 26 Feb 2013 20:13:09 +0000 Subject: Fix typo git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176117 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Archive/ArchiveReader.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Archive/ArchiveReader.cpp b/lib/Archive/ArchiveReader.cpp index efe1180..a48bdd4 100644 --- a/lib/Archive/ArchiveReader.cpp +++ b/lib/Archive/ArchiveReader.cpp @@ -177,7 +177,7 @@ Archive::parseMemberHeader(const char*& At, const char* End, std::string* error) } if (p >= endp) { if (error) - *error = "missing name termiantor in string table"; + *error = "missing name terminator in string table"; return 0; } } else { -- cgit v1.1 From ac67b50fcfaab20829b4bce32cfdce77507f6c72 Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Tue, 26 Feb 2013 20:17:10 +0000 Subject: AsmParser: More generic support for integer type suffices. For integer constants, allow 'L', 'UL' as well as 'ULL' and 'LL'. This provides better support for shared headers between .s and .c files that define bunches of constant values. rdar://9321056 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176118 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCParser/AsmLexer.cpp | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/MC/MCParser/AsmLexer.cpp b/lib/MC/MCParser/AsmLexer.cpp index 86a9674..c1c594a 100644 --- a/lib/MC/MCParser/AsmLexer.cpp +++ b/lib/MC/MCParser/AsmLexer.cpp @@ -156,10 +156,13 @@ AsmToken AsmLexer::LexLineComment() { } static void SkipIgnoredIntegerSuffix(const char *&CurPtr) { - if (CurPtr[0] == 'L' && CurPtr[1] == 'L') - CurPtr += 2; - if (CurPtr[0] == 'U' && CurPtr[1] == 'L' && CurPtr[2] == 'L') - CurPtr += 3; + // Skip ULL, UL, U, L and LL suffices. + if (CurPtr[0] == 'U') + ++CurPtr; + if (CurPtr[0] == 'L') + ++CurPtr; + if (CurPtr[0] == 'L') + ++CurPtr; } // Look ahead to search for first non-hex digit, if it's [hH], then we treat the @@ -220,8 +223,8 @@ AsmToken AsmLexer::LexDigit() { if (Radix == 2 || Radix == 16) ++CurPtr; - // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL - // suffixes on integer literals. + // The darwin/x86 (and x86-64) assembler accepts and ignores type + // suffices on integer literals. SkipIgnoredIntegerSuffix(CurPtr); return AsmToken(AsmToken::Integer, Result, Value); -- cgit v1.1 From 6fe26103d71263124e3cc56c0a349cb1391208a1 Mon Sep 17 00:00:00 2001 From: Manman Ren Date: Tue, 26 Feb 2013 20:48:29 +0000 Subject: Debug Info: for static member variables, move AT_MIPS_linkage_name from TAG_member inside a class to the specification DIE. Having AT_MIPS_linkage_name on TAG_member caused old gdb (GNU 6.3.50) to error out. Also gcc 4.7 has AT_MIPS_linkage_name on the specification DIE. rdar://problem/13291234 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176120 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 93b00fb..c023afd 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -1349,7 +1349,10 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) { // Add linkage name. StringRef LinkageName = GV.getLinkageName(); if (!LinkageName.empty() && isGlobalVariable) - addString(VariableDIE, dwarf::DW_AT_MIPS_linkage_name, + // For static member variables, AT_MIPS_linkage_name should be on + // specification DIE to make old gdb work. + addString(IsStaticMember && VariableSpecDIE ? + VariableSpecDIE : VariableDIE, dwarf::DW_AT_MIPS_linkage_name, getRealLinkageName(LinkageName)); } else if (const ConstantInt *CI = dyn_cast_or_null(GV.getConstant())) { -- cgit v1.1 From 7375f4f98148fc34f0c7fc765967501cdc65b24e Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 26 Feb 2013 21:20:35 +0000 Subject: Fix auto_ptr is deprecated warnings git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176123 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Archive/ArchiveReader.cpp | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'lib') diff --git a/lib/Archive/ArchiveReader.cpp b/lib/Archive/ArchiveReader.cpp index a48bdd4..14713e6 100644 --- a/lib/Archive/ArchiveReader.cpp +++ b/lib/Archive/ArchiveReader.cpp @@ -13,13 +13,13 @@ #include "llvm/Bitcode/Archive.h" #include "ArchiveInternals.h" +#include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Bitcode/ReaderWriter.h" #include "llvm/IR/Module.h" #include "llvm/Support/MemoryBuffer.h" #include #include -#include using namespace llvm; /// Read a variable-bit-rate encoded unsigned integer @@ -326,14 +326,14 @@ Archive::loadArchive(std::string* error) { // Open and completely load the archive file. Archive* -Archive::OpenAndLoad(const sys::Path& file, LLVMContext& C, +Archive::OpenAndLoad(const sys::Path& File, LLVMContext& C, std::string* ErrorMessage) { - std::auto_ptr result ( new Archive(file, C)); + OwningPtr result ( new Archive(File, C)); if (result->mapToMemory(ErrorMessage)) - return 0; + return NULL; if (!result->loadArchive(ErrorMessage)) - return 0; - return result.release(); + return NULL; + return result.take(); } // Get all the bitcode modules from the archive @@ -440,15 +440,15 @@ Archive::loadSymbolTable(std::string* ErrorMsg) { } // Open the archive and load just the symbol tables -Archive* Archive::OpenAndLoadSymbols(const sys::Path& file, +Archive* Archive::OpenAndLoadSymbols(const sys::Path& File, LLVMContext& C, std::string* ErrorMessage) { - std::auto_ptr result ( new Archive(file, C) ); + OwningPtr result ( new Archive(File, C) ); if (result->mapToMemory(ErrorMessage)) - return 0; + return NULL; if (!result->loadSymbolTable(ErrorMessage)) - return 0; - return result.release(); + return NULL; + return result.take(); } // Look up one symbol in the symbol table and return the module that defines -- cgit v1.1 From 65396823305326bcb379e88a95ded318e1da875c Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Tue, 26 Feb 2013 21:28:57 +0000 Subject: Fix PR15332 (patch by Florian Zeitz). There's no need to generate a stack frame for PPC32 SVR4 when there are no local variables assigned to the stack, i.e., when no red zone is needed. (PPC64 supports a red zone, but PPC32 does not.) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176124 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCFrameLowering.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp index 6dfb4c8..0a396e6 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -207,13 +207,14 @@ void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const { // to adjust the stack pointer (we fit in the Red Zone). For 64-bit // SVR4, we also require a stack frame if we need to spill the CR, // since this spill area is addressed relative to the stack pointer. + // The 32-bit SVR4 ABI has no Red Zone. However, it can still generate + // stackless code if all local vars are reg-allocated. bool DisableRedZone = MF.getFunction()->getAttributes(). hasAttribute(AttributeSet::FunctionIndex, Attribute::NoRedZone); - // FIXME SVR4 The 32-bit SVR4 ABI has no red zone. However, it can - // still generate stackless code if all local vars are reg-allocated. - // Try: (FrameSize <= 224 - // || (FrameSize == 0 && Subtarget.isPPC32 && Subtarget.isSVR4ABI())) if (!DisableRedZone && + (Subtarget.isPPC64() || // 32-bit SVR4, no stack- + !Subtarget.isSVR4ABI() || // allocated locals. + FrameSize == 0) && FrameSize <= 224 && // Fits in red zone. !MFI->hasVarSizedObjects() && // No dynamic alloca. !MFI->adjustsStack() && // No calls. -- cgit v1.1 From f15fe8195b0a42d0e950f3694c4d6ccd4034804a Mon Sep 17 00:00:00 2001 From: "Michael J. Spencer" Date: Tue, 26 Feb 2013 21:29:47 +0000 Subject: [TableGen] Fix ICE on MSVC 2012 Release builds. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176125 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/TableGen/TGParser.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/TableGen/TGParser.cpp b/lib/TableGen/TGParser.cpp index da0086a..c4b48fe 100644 --- a/lib/TableGen/TGParser.cpp +++ b/lib/TableGen/TGParser.cpp @@ -382,7 +382,8 @@ static bool isObjectStart(tgtok::TokKind K) { static std::string GetNewAnonymousName() { static unsigned AnonCounter = 0; - return "anonymous."+utostr(AnonCounter++); + unsigned Tmp = AnonCounter++; // MSVC2012 ICEs without this. + return "anonymous." + utostr(Tmp); } /// ParseObjectName - If an object name is specified, return it. Otherwise, -- cgit v1.1 From d61c840fcda18675c131bf86bb6d4498fcb6c272 Mon Sep 17 00:00:00 2001 From: Manman Ren Date: Tue, 26 Feb 2013 22:35:53 +0000 Subject: Revert r176120 as it caused a failure at static-member.cpp git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176129 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index c023afd..93b00fb 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -1349,10 +1349,7 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) { // Add linkage name. StringRef LinkageName = GV.getLinkageName(); if (!LinkageName.empty() && isGlobalVariable) - // For static member variables, AT_MIPS_linkage_name should be on - // specification DIE to make old gdb work. - addString(IsStaticMember && VariableSpecDIE ? - VariableSpecDIE : VariableDIE, dwarf::DW_AT_MIPS_linkage_name, + addString(VariableDIE, dwarf::DW_AT_MIPS_linkage_name, getRealLinkageName(LinkageName)); } else if (const ConstantInt *CI = dyn_cast_or_null(GV.getConstant())) { -- cgit v1.1 From ebbb359dd6deb33ce37fe81d491c96d8c32a4509 Mon Sep 17 00:00:00 2001 From: Roman Divacky Date: Tue, 26 Feb 2013 22:41:01 +0000 Subject: Add support for autodetection of ADM bdver2. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176130 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Support/Host.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp index afbf0bf..b9bbcb9 100644 --- a/lib/Support/Host.cpp +++ b/lib/Support/Host.cpp @@ -331,7 +331,10 @@ std::string sys::getHostCPUName() { case 20: return "btver1"; case 21: - return "bdver1"; + if (Model <= 15) + return "bdver1"; + else if (Model <= 31) + return "bdver2"; default: return "generic"; } -- cgit v1.1 From 616025007abbb0fb53fe53ef2cac81907668c2f0 Mon Sep 17 00:00:00 2001 From: Michael Ilseman Date: Tue, 26 Feb 2013 22:51:07 +0000 Subject: Constant fold vector bitcasts of halves similarly to how floats and doubles are folded. Test case included. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176131 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/ConstantFolding.cpp | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index c99925d..09d7608 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -54,13 +54,12 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, // Handle a vector->integer cast. if (IntegerType *IT = dyn_cast(DestTy)) { - ConstantDataVector *CDV = dyn_cast(C); - if (CDV == 0) + VectorType *VTy = dyn_cast(C->getType()); + if (VTy == 0) return ConstantExpr::getBitCast(C, DestTy); - unsigned NumSrcElts = CDV->getType()->getNumElements(); - - Type *SrcEltTy = CDV->getType()->getElementType(); + unsigned NumSrcElts = VTy->getNumElements(); + Type *SrcEltTy = VTy->getElementType(); // If the vector is a vector of floating point, convert it to vector of int // to simplify things. @@ -70,9 +69,12 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumSrcElts); // Ask IR to do the conversion now that #elts line up. C = ConstantExpr::getBitCast(C, SrcIVTy); - CDV = cast(C); } + ConstantDataVector *CDV = dyn_cast(C); + if (CDV == 0) + return ConstantExpr::getBitCast(C, DestTy); + // Now that we know that the input value is a vector of integers, just shift // and insert them into our result. unsigned BitShift = TD.getTypeAllocSizeInBits(SrcEltTy); -- cgit v1.1 From bdc98d3510c1b3d32679ed3188becdc6d6dfd545 Mon Sep 17 00:00:00 2001 From: Michael Ilseman Date: Tue, 26 Feb 2013 23:15:23 +0000 Subject: Have a way for a target to opt-out of target-independent fast isel git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176136 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/FastISel.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 04f5b32..ff9b2ba 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -822,7 +822,7 @@ FastISel::SelectInstruction(const Instruction *I) { } // First, try doing target-independent selection. - if (SelectOperator(I, I->getOpcode())) { + if (!SkipTargetIndependentFastISel() && SelectOperator(I, I->getOpcode())) { ++NumFastIselSuccessIndependent; DL = DebugLoc(); return true; -- cgit v1.1 From b3201c5cf1e183d840f7c99ff779d57f1549d8e5 Mon Sep 17 00:00:00 2001 From: Pedro Artigas Date: Tue, 26 Feb 2013 23:33:20 +0000 Subject: Enhance integer division emulation support to handle types smaller than 32 bits, enhancement done the trivial way; by extending inputs and truncating outputs which is addequate for targets with little or no support for integer arithmetic on integer types less than 32 bits. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176139 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Utils/IntegerDivision.cpp | 104 +++++++++++++++++++++++++++++++ 1 file changed, 104 insertions(+) (limited to 'lib') diff --git a/lib/Transforms/Utils/IntegerDivision.cpp b/lib/Transforms/Utils/IntegerDivision.cpp index 5187d7c..3cb8ded 100644 --- a/lib/Transforms/Utils/IntegerDivision.cpp +++ b/lib/Transforms/Utils/IntegerDivision.cpp @@ -418,3 +418,107 @@ bool llvm::expandDivision(BinaryOperator *Div) { return true; } + +/// Generate code to compute the remainder of two integers of bitwidth up to +/// 32 bits. Uses the above routines and extends the inputs/truncates the +/// outputs to operate in 32 bits; that is, these routines are good for targets +/// that have no or very little suppport for smaller than 32 bit integer +/// arithmetic. +/// +/// @brief Replace Rem with emulation code. +bool llvm::expandRemainderUpTo32Bits(BinaryOperator *Rem) { + assert((Rem->getOpcode() == Instruction::SRem || + Rem->getOpcode() == Instruction::URem) && + "Trying to expand remainder from a non-remainder function"); + + Type *RemTy = Rem->getType(); + if (RemTy->isVectorTy()) + llvm_unreachable("Div over vectors not supported"); + + unsigned RemTyBitWidth = RemTy->getIntegerBitWidth(); + + if (RemTyBitWidth > 32) + llvm_unreachable("Div of bitwidth greater than 32 not supported"); + + if (RemTyBitWidth == 32) + return expandRemainder(Rem); + + // If bitwidth smaller than 32 extend inputs, truncate output and proceed + // with 32 bit division. + IRBuilder<> Builder(Rem); + + Value *ExtDividend; + Value *ExtDivisor; + Value *ExtRem; + Value *Trunc; + Type *Int32Ty = Builder.getInt32Ty(); + + if (Rem->getOpcode() == Instruction::SRem) { + ExtDividend = Builder.CreateSExt(Rem->getOperand(0), Int32Ty); + ExtDivisor = Builder.CreateSExt(Rem->getOperand(1), Int32Ty); + ExtRem = Builder.CreateSRem(ExtDividend, ExtDivisor); + } else { + ExtDividend = Builder.CreateZExt(Rem->getOperand(0), Int32Ty); + ExtDivisor = Builder.CreateZExt(Rem->getOperand(1), Int32Ty); + ExtRem = Builder.CreateURem(ExtDividend, ExtDivisor); + } + Trunc = Builder.CreateTrunc(ExtRem, RemTy); + + Rem->replaceAllUsesWith(Trunc); + Rem->dropAllReferences(); + Rem->eraseFromParent(); + + return expandRemainder(cast(ExtRem)); +} + + +/// Generate code to divide two integers of bitwidth up to 32 bits. Uses the +/// above routines and extends the inputs/truncates the outputs to operate +/// in 32 bits; that is, these routines are good for targets that have no +/// or very little support for smaller than 32 bit integer arithmetic. +/// +/// @brief Replace Div with emulation code. +bool llvm::expandDivisionUpTo32Bits(BinaryOperator *Div) { + assert((Div->getOpcode() == Instruction::SDiv || + Div->getOpcode() == Instruction::UDiv) && + "Trying to expand division from a non-division function"); + + Type *DivTy = Div->getType(); + if (DivTy->isVectorTy()) + llvm_unreachable("Div over vectors not supported"); + + unsigned DivTyBitWidth = DivTy->getIntegerBitWidth(); + + if (DivTyBitWidth > 32) + llvm_unreachable("Div of bitwidth greater than 32 not supported"); + + if (DivTyBitWidth == 32) + return expandDivision(Div); + + // If bitwidth smaller than 32 extend inputs, truncate output and proceed + // with 32 bit division. + IRBuilder<> Builder(Div); + + Value *ExtDividend; + Value *ExtDivisor; + Value *ExtDiv; + Value *Trunc; + Type *Int32Ty = Builder.getInt32Ty(); + + if (Div->getOpcode() == Instruction::SDiv) { + ExtDividend = Builder.CreateSExt(Div->getOperand(0), Int32Ty); + ExtDivisor = Builder.CreateSExt(Div->getOperand(1), Int32Ty); + ExtDiv = Builder.CreateSDiv(ExtDividend, ExtDivisor); + } else { + ExtDividend = Builder.CreateZExt(Div->getOperand(0), Int32Ty); + ExtDivisor = Builder.CreateZExt(Div->getOperand(1), Int32Ty); + ExtDiv = Builder.CreateUDiv(ExtDividend, ExtDivisor); + } + Trunc = Builder.CreateTrunc(ExtDiv, DivTy); + + Div->replaceAllUsesWith(Trunc); + Div->dropAllReferences(); + Div->eraseFromParent(); + + return expandDivision(cast(ExtDiv)); +} -- cgit v1.1